1 /*-------------------------------------------------------------------------
2  *
3  * nodeModifyTable.c
4  *	  routines to handle ModifyTable nodes.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeModifyTable.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  *		ExecInitModifyTable - initialize the ModifyTable node
17  *		ExecModifyTable		- retrieve the next tuple from the node
18  *		ExecEndModifyTable	- shut down the ModifyTable node
19  *		ExecReScanModifyTable - rescan the ModifyTable node
20  *
21  *	 NOTES
22  *		Each ModifyTable node contains a list of one or more subplans,
23  *		much like an Append node.  There is one subplan per result relation.
24  *		The key reason for this is that in an inherited UPDATE command, each
25  *		result relation could have a different schema (more or different
26  *		columns) requiring a different plan tree to produce it.  In an
27  *		inherited DELETE, all the subplans should produce the same output
28  *		rowtype, but we might still find that different plans are appropriate
29  *		for different child relations.
30  *
31  *		If the query specifies RETURNING, then the ModifyTable returns a
32  *		RETURNING tuple after completing each row insert, update, or delete.
33  *		It must be called again to continue the operation.  Without RETURNING,
34  *		we just loop within the node until all the work is done, then
35  *		return NULL.  This avoids useless call/return overhead.
36  */
37 
38 #include "postgres.h"
39 
40 #include "access/htup_details.h"
41 #include "access/xact.h"
42 #include "commands/trigger.h"
43 #include "executor/execPartition.h"
44 #include "executor/executor.h"
45 #include "executor/nodeModifyTable.h"
46 #include "foreign/fdwapi.h"
47 #include "miscadmin.h"
48 #include "nodes/nodeFuncs.h"
49 #include "storage/bufmgr.h"
50 #include "storage/lmgr.h"
51 #include "utils/builtins.h"
52 #include "utils/memutils.h"
53 #include "utils/rel.h"
54 #include "utils/tqual.h"
55 
56 
57 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
58 					 ResultRelInfo *resultRelInfo,
59 					 ItemPointer conflictTid,
60 					 TupleTableSlot *planSlot,
61 					 TupleTableSlot *excludedSlot,
62 					 EState *estate,
63 					 bool canSetTag,
64 					 TupleTableSlot **returning);
65 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
66 						EState *estate,
67 						PartitionTupleRouting *proute,
68 						ResultRelInfo *targetRelInfo,
69 						TupleTableSlot *slot);
70 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
71 static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate);
72 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
73 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
74 						int whichplan);
75 
76 /*
77  * Verify that the tuples to be produced by INSERT or UPDATE match the
78  * target relation's rowtype
79  *
80  * We do this to guard against stale plans.  If plan invalidation is
81  * functioning properly then we should never get a failure here, but better
82  * safe than sorry.  Note that this is called after we have obtained lock
83  * on the target rel, so the rowtype can't change underneath us.
84  *
85  * The plan output is represented by its targetlist, because that makes
86  * handling the dropped-column case easier.
87  */
88 static void
ExecCheckPlanOutput(Relation resultRel,List * targetList)89 ExecCheckPlanOutput(Relation resultRel, List *targetList)
90 {
91 	TupleDesc	resultDesc = RelationGetDescr(resultRel);
92 	int			attno = 0;
93 	ListCell   *lc;
94 
95 	foreach(lc, targetList)
96 	{
97 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
98 		Form_pg_attribute attr;
99 
100 		if (tle->resjunk)
101 			continue;			/* ignore junk tlist items */
102 
103 		if (attno >= resultDesc->natts)
104 			ereport(ERROR,
105 					(errcode(ERRCODE_DATATYPE_MISMATCH),
106 					 errmsg("table row type and query-specified row type do not match"),
107 					 errdetail("Query has too many columns.")));
108 		attr = TupleDescAttr(resultDesc, attno);
109 		attno++;
110 
111 		if (!attr->attisdropped)
112 		{
113 			/* Normal case: demand type match */
114 			if (exprType((Node *) tle->expr) != attr->atttypid)
115 				ereport(ERROR,
116 						(errcode(ERRCODE_DATATYPE_MISMATCH),
117 						 errmsg("table row type and query-specified row type do not match"),
118 						 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
119 								   format_type_be(attr->atttypid),
120 								   attno,
121 								   format_type_be(exprType((Node *) tle->expr)))));
122 		}
123 		else
124 		{
125 			/*
126 			 * For a dropped column, we can't check atttypid (it's likely 0).
127 			 * In any case the planner has most likely inserted an INT4 null.
128 			 * What we insist on is just *some* NULL constant.
129 			 */
130 			if (!IsA(tle->expr, Const) ||
131 				!((Const *) tle->expr)->constisnull)
132 				ereport(ERROR,
133 						(errcode(ERRCODE_DATATYPE_MISMATCH),
134 						 errmsg("table row type and query-specified row type do not match"),
135 						 errdetail("Query provides a value for a dropped column at ordinal position %d.",
136 								   attno)));
137 		}
138 	}
139 	if (attno != resultDesc->natts)
140 		ereport(ERROR,
141 				(errcode(ERRCODE_DATATYPE_MISMATCH),
142 				 errmsg("table row type and query-specified row type do not match"),
143 				 errdetail("Query has too few columns.")));
144 }
145 
146 /*
147  * ExecProcessReturning --- evaluate a RETURNING list
148  *
149  * projectReturning: the projection to evaluate
150  * resultRelOid: result relation's OID
151  * tupleSlot: slot holding tuple actually inserted/updated/deleted
152  * planSlot: slot holding tuple returned by top subplan node
153  *
154  * In cross-partition UPDATE cases, projectReturning and planSlot are as
155  * for the source partition, and tupleSlot must conform to that.  But
156  * resultRelOid is for the destination partition.
157  *
158  * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
159  * scan tuple.
160  *
161  * Returns a slot holding the result tuple
162  */
163 static TupleTableSlot *
ExecProcessReturning(ProjectionInfo * projectReturning,Oid resultRelOid,TupleTableSlot * tupleSlot,TupleTableSlot * planSlot)164 ExecProcessReturning(ProjectionInfo *projectReturning,
165 					 Oid resultRelOid,
166 					 TupleTableSlot *tupleSlot,
167 					 TupleTableSlot *planSlot)
168 {
169 	ExprContext *econtext = projectReturning->pi_exprContext;
170 
171 	/*
172 	 * Reset per-tuple memory context to free any expression evaluation
173 	 * storage allocated in the previous cycle.
174 	 */
175 	ResetExprContext(econtext);
176 
177 	/* Make tuple and any needed join variables available to ExecProject */
178 	if (tupleSlot)
179 		econtext->ecxt_scantuple = tupleSlot;
180 	else
181 	{
182 		HeapTuple	tuple;
183 
184 		/*
185 		 * RETURNING expressions might reference the tableoid column, so be
186 		 * sure we expose the desired OID, ie that of the real target
187 		 * relation.
188 		 */
189 		Assert(!TupIsNull(econtext->ecxt_scantuple));
190 		tuple = ExecMaterializeSlot(econtext->ecxt_scantuple);
191 		tuple->t_tableOid = resultRelOid;
192 	}
193 	econtext->ecxt_outertuple = planSlot;
194 
195 	/* Compute the RETURNING expressions */
196 	return ExecProject(projectReturning);
197 }
198 
199 /*
200  * ExecCheckHeapTupleVisible -- verify heap tuple is visible
201  *
202  * It would not be consistent with guarantees of the higher isolation levels to
203  * proceed with avoiding insertion (taking speculative insertion's alternative
204  * path) on the basis of another tuple that is not visible to MVCC snapshot.
205  * Check for the need to raise a serialization failure, and do so as necessary.
206  */
207 static void
ExecCheckHeapTupleVisible(EState * estate,HeapTuple tuple,Buffer buffer)208 ExecCheckHeapTupleVisible(EState *estate,
209 						  HeapTuple tuple,
210 						  Buffer buffer)
211 {
212 	if (!IsolationUsesXactSnapshot())
213 		return;
214 
215 	/*
216 	 * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
217 	 * Caller should be holding pin, but not lock.
218 	 */
219 	LockBuffer(buffer, BUFFER_LOCK_SHARE);
220 	if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
221 	{
222 		/*
223 		 * We should not raise a serialization failure if the conflict is
224 		 * against a tuple inserted by our own transaction, even if it's not
225 		 * visible to our snapshot.  (This would happen, for example, if
226 		 * conflicting keys are proposed for insertion in a single command.)
227 		 */
228 		if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
229 			ereport(ERROR,
230 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
231 					 errmsg("could not serialize access due to concurrent update")));
232 	}
233 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
234 }
235 
236 /*
237  * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
238  */
239 static void
ExecCheckTIDVisible(EState * estate,ResultRelInfo * relinfo,ItemPointer tid)240 ExecCheckTIDVisible(EState *estate,
241 					ResultRelInfo *relinfo,
242 					ItemPointer tid)
243 {
244 	Relation	rel = relinfo->ri_RelationDesc;
245 	Buffer		buffer;
246 	HeapTupleData tuple;
247 
248 	/* Redundantly check isolation level */
249 	if (!IsolationUsesXactSnapshot())
250 		return;
251 
252 	tuple.t_self = *tid;
253 	if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
254 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
255 	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
256 	ReleaseBuffer(buffer);
257 }
258 
259 /* ----------------------------------------------------------------
260  *		ExecInsert
261  *
262  *		For INSERT, we have to insert the tuple into the target relation
263  *		and insert appropriate tuples into the index relations.
264  *
265  *		slot contains the new tuple value to be stored.
266  *		planSlot is the output of the ModifyTable's subplan; we use it
267  *		to access "junk" columns that are not going to be stored.
268  *		In a cross-partition UPDATE, srcSlot is the slot that held the
269  *		updated tuple for the source relation; otherwise it's NULL.
270  *
271  *		returningRelInfo is the resultRelInfo for the source relation of a
272  *		cross-partition UPDATE; otherwise it's the current result relation.
273  *		We use it to process RETURNING lists, for reasons explained below.
274  *
275  *		Returns RETURNING result if any, otherwise NULL.
276  * ----------------------------------------------------------------
277  */
278 static TupleTableSlot *
ExecInsert(ModifyTableState * mtstate,TupleTableSlot * slot,TupleTableSlot * planSlot,TupleTableSlot * srcSlot,ResultRelInfo * returningRelInfo,EState * estate,bool canSetTag)279 ExecInsert(ModifyTableState *mtstate,
280 		   TupleTableSlot *slot,
281 		   TupleTableSlot *planSlot,
282 		   TupleTableSlot *srcSlot,
283 		   ResultRelInfo *returningRelInfo,
284 		   EState *estate,
285 		   bool canSetTag)
286 {
287 	HeapTuple	tuple;
288 	ResultRelInfo *resultRelInfo;
289 	Relation	resultRelationDesc;
290 	Oid			newId;
291 	List	   *recheckIndexes = NIL;
292 	TupleTableSlot *result = NULL;
293 	TransitionCaptureState *ar_insert_trig_tcs;
294 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
295 	OnConflictAction onconflict = node->onConflictAction;
296 
297 	/*
298 	 * get the heap tuple out of the tuple table slot, making sure we have a
299 	 * writable copy
300 	 */
301 	tuple = ExecMaterializeSlot(slot);
302 
303 	/*
304 	 * get information on the (current) result relation
305 	 */
306 	resultRelInfo = estate->es_result_relation_info;
307 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
308 
309 	/*
310 	 * If the result relation has OIDs, force the tuple's OID to zero so that
311 	 * heap_insert will assign a fresh OID.  Usually the OID already will be
312 	 * zero at this point, but there are corner cases where the plan tree can
313 	 * return a tuple extracted literally from some table with the same
314 	 * rowtype.
315 	 *
316 	 * XXX if we ever wanted to allow users to assign their own OIDs to new
317 	 * rows, this'd be the place to do it.  For the moment, we make a point of
318 	 * doing this before calling triggers, so that a user-supplied trigger
319 	 * could hack the OID if desired.
320 	 */
321 	if (resultRelationDesc->rd_rel->relhasoids)
322 		HeapTupleSetOid(tuple, InvalidOid);
323 
324 	/*
325 	 * BEFORE ROW INSERT Triggers.
326 	 *
327 	 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
328 	 * INSERT ... ON CONFLICT statement.  We cannot check for constraint
329 	 * violations before firing these triggers, because they can change the
330 	 * values to insert.  Also, they can run arbitrary user-defined code with
331 	 * side-effects that we can't cancel by just not inserting the tuple.
332 	 */
333 	if (resultRelInfo->ri_TrigDesc &&
334 		resultRelInfo->ri_TrigDesc->trig_insert_before_row)
335 	{
336 		slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
337 
338 		if (slot == NULL)		/* "do nothing" */
339 			return NULL;
340 
341 		/* trigger might have changed tuple */
342 		tuple = ExecMaterializeSlot(slot);
343 	}
344 
345 	/* INSTEAD OF ROW INSERT Triggers */
346 	if (resultRelInfo->ri_TrigDesc &&
347 		resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
348 	{
349 		slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
350 
351 		if (slot == NULL)		/* "do nothing" */
352 			return NULL;
353 
354 		/* trigger might have changed tuple */
355 		tuple = ExecMaterializeSlot(slot);
356 
357 		newId = InvalidOid;
358 	}
359 	else if (resultRelInfo->ri_FdwRoutine)
360 	{
361 		/*
362 		 * insert into foreign table: let the FDW do it
363 		 */
364 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
365 															   resultRelInfo,
366 															   slot,
367 															   planSlot);
368 
369 		if (slot == NULL)		/* "do nothing" */
370 			return NULL;
371 
372 		/* FDW might have changed tuple */
373 		tuple = ExecMaterializeSlot(slot);
374 
375 		/*
376 		 * AFTER ROW Triggers or RETURNING expressions might reference the
377 		 * tableoid column, so initialize t_tableOid before evaluating them.
378 		 */
379 		tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
380 
381 		newId = InvalidOid;
382 	}
383 	else
384 	{
385 		WCOKind		wco_kind;
386 
387 		/*
388 		 * Constraints might reference the tableoid column, so initialize
389 		 * t_tableOid before evaluating them.
390 		 */
391 		tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
392 
393 		/*
394 		 * Check any RLS WITH CHECK policies.
395 		 *
396 		 * Normally we should check INSERT policies. But if the insert is the
397 		 * result of a partition key update that moved the tuple to a new
398 		 * partition, we should instead check UPDATE policies, because we are
399 		 * executing policies defined on the target table, and not those
400 		 * defined on the child partitions.
401 		 */
402 		wco_kind = (mtstate->operation == CMD_UPDATE) ?
403 			WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
404 
405 		/*
406 		 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
407 		 * we are looking for at this point.
408 		 */
409 		if (resultRelInfo->ri_WithCheckOptions != NIL)
410 			ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
411 
412 		/*
413 		 * Check the constraints of the tuple.
414 		 */
415 		if (resultRelationDesc->rd_att->constr)
416 			ExecConstraints(resultRelInfo, slot, estate);
417 
418 		/*
419 		 * Also check the tuple against the partition constraint, if there is
420 		 * one; except that if we got here via tuple-routing, we don't need to
421 		 * if there's no BR trigger defined on the partition.
422 		 */
423 		if (resultRelInfo->ri_PartitionCheck &&
424 			(resultRelInfo->ri_RootResultRelInfo == NULL ||
425 			 (resultRelInfo->ri_TrigDesc &&
426 			  resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
427 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
428 
429 		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
430 		{
431 			/* Perform a speculative insertion. */
432 			uint32		specToken;
433 			ItemPointerData conflictTid;
434 			bool		specConflict;
435 			List	   *arbiterIndexes;
436 
437 			arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
438 
439 			/*
440 			 * Do a non-conclusive check for conflicts first.
441 			 *
442 			 * We're not holding any locks yet, so this doesn't guarantee that
443 			 * the later insert won't conflict.  But it avoids leaving behind
444 			 * a lot of canceled speculative insertions, if you run a lot of
445 			 * INSERT ON CONFLICT statements that do conflict.
446 			 *
447 			 * We loop back here if we find a conflict below, either during
448 			 * the pre-check, or when we re-check after inserting the tuple
449 			 * speculatively.
450 			 */
451 	vlock:
452 			specConflict = false;
453 			if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
454 										   arbiterIndexes))
455 			{
456 				/* committed conflict tuple found */
457 				if (onconflict == ONCONFLICT_UPDATE)
458 				{
459 					/*
460 					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
461 					 * part.  Be prepared to retry if the UPDATE fails because
462 					 * of another concurrent UPDATE/DELETE to the conflict
463 					 * tuple.
464 					 */
465 					TupleTableSlot *returning = NULL;
466 
467 					if (ExecOnConflictUpdate(mtstate, resultRelInfo,
468 											 &conflictTid, planSlot, slot,
469 											 estate, canSetTag, &returning))
470 					{
471 						InstrCountTuples2(&mtstate->ps, 1);
472 						return returning;
473 					}
474 					else
475 						goto vlock;
476 				}
477 				else
478 				{
479 					/*
480 					 * In case of ON CONFLICT DO NOTHING, do nothing. However,
481 					 * verify that the tuple is visible to the executor's MVCC
482 					 * snapshot at higher isolation levels.
483 					 */
484 					Assert(onconflict == ONCONFLICT_NOTHING);
485 					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
486 					InstrCountTuples2(&mtstate->ps, 1);
487 					return NULL;
488 				}
489 			}
490 
491 			/*
492 			 * Before we start insertion proper, acquire our "speculative
493 			 * insertion lock".  Others can use that to wait for us to decide
494 			 * if we're going to go ahead with the insertion, instead of
495 			 * waiting for the whole transaction to complete.
496 			 */
497 			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
498 			HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
499 
500 			/* insert the tuple, with the speculative token */
501 			newId = heap_insert(resultRelationDesc, tuple,
502 								estate->es_output_cid,
503 								HEAP_INSERT_SPECULATIVE,
504 								NULL);
505 
506 			/* insert index entries for tuple */
507 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
508 												   estate, true, &specConflict,
509 												   arbiterIndexes);
510 
511 			/* adjust the tuple's state accordingly */
512 			if (!specConflict)
513 				heap_finish_speculative(resultRelationDesc, tuple);
514 			else
515 				heap_abort_speculative(resultRelationDesc, tuple);
516 
517 			/*
518 			 * Wake up anyone waiting for our decision.  They will re-check
519 			 * the tuple, see that it's no longer speculative, and wait on our
520 			 * XID as if this was a regularly inserted tuple all along.  Or if
521 			 * we killed the tuple, they will see it's dead, and proceed as if
522 			 * the tuple never existed.
523 			 */
524 			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
525 
526 			/*
527 			 * If there was a conflict, start from the beginning.  We'll do
528 			 * the pre-check again, which will now find the conflicting tuple
529 			 * (unless it aborts before we get there).
530 			 */
531 			if (specConflict)
532 			{
533 				list_free(recheckIndexes);
534 				goto vlock;
535 			}
536 
537 			/* Since there was no insertion conflict, we're done */
538 		}
539 		else
540 		{
541 			/*
542 			 * insert the tuple normally.
543 			 *
544 			 * Note: heap_insert returns the tid (location) of the new tuple
545 			 * in the t_self field.
546 			 */
547 			newId = heap_insert(resultRelationDesc, tuple,
548 								estate->es_output_cid,
549 								0, NULL);
550 
551 			/* insert index entries for tuple */
552 			if (resultRelInfo->ri_NumIndices > 0)
553 				recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
554 													   estate, false, NULL,
555 													   NIL);
556 		}
557 	}
558 
559 	if (canSetTag)
560 	{
561 		(estate->es_processed)++;
562 		estate->es_lastoid = newId;
563 		setLastTid(&(tuple->t_self));
564 	}
565 
566 	/*
567 	 * If this insert is the result of a partition key update that moved the
568 	 * tuple to a new partition, put this row into the transition NEW TABLE,
569 	 * if there is one. We need to do this separately for DELETE and INSERT
570 	 * because they happen on different tables.
571 	 */
572 	ar_insert_trig_tcs = mtstate->mt_transition_capture;
573 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
574 		&& mtstate->mt_transition_capture->tcs_update_new_table)
575 	{
576 		ExecARUpdateTriggers(estate, resultRelInfo, NULL,
577 							 NULL,
578 							 tuple,
579 							 NULL,
580 							 mtstate->mt_transition_capture);
581 
582 		/*
583 		 * We've already captured the NEW TABLE row, so make sure any AR
584 		 * INSERT trigger fired below doesn't capture it again.
585 		 */
586 		ar_insert_trig_tcs = NULL;
587 	}
588 
589 	/* AFTER ROW INSERT Triggers */
590 	ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
591 						 ar_insert_trig_tcs);
592 
593 	list_free(recheckIndexes);
594 
595 	/*
596 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
597 	 * required to do this after testing all constraints and uniqueness
598 	 * violations per the SQL spec, so we do it after actually inserting the
599 	 * record into the heap and all indexes.
600 	 *
601 	 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
602 	 * tuple will never be seen, if it violates the WITH CHECK OPTION.
603 	 *
604 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
605 	 * are looking for at this point.
606 	 */
607 	if (resultRelInfo->ri_WithCheckOptions != NIL)
608 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
609 
610 	/* Process RETURNING if present */
611 	if (returningRelInfo->ri_projectReturning)
612 	{
613 		/*
614 		 * In a cross-partition UPDATE with RETURNING, we have to use the
615 		 * source partition's RETURNING list, because that matches the output
616 		 * of the planSlot, while the destination partition might have
617 		 * different resjunk columns.  This means we have to map the
618 		 * destination tuple back to the source's format so we can apply that
619 		 * RETURNING list.  This is expensive, but it should be an uncommon
620 		 * corner case, so we won't spend much effort on making it fast.
621 		 *
622 		 * We assume that we can use srcSlot to hold the re-converted tuple.
623 		 * Note that in the common case where the child partitions both match
624 		 * the root's format, previous optimizations will have resulted in
625 		 * slot and srcSlot being identical, cueing us that there's nothing to
626 		 * do here.
627 		 */
628 		if (returningRelInfo != resultRelInfo && slot != srcSlot)
629 		{
630 			Relation	srcRelationDesc = returningRelInfo->ri_RelationDesc;
631 			TupleConversionMap *map;
632 
633 			map = convert_tuples_by_name(RelationGetDescr(resultRelationDesc),
634 										 RelationGetDescr(srcRelationDesc),
635 										 gettext_noop("could not convert row type"));
636 			if (map)
637 			{
638 				HeapTuple	origTuple = ExecMaterializeSlot(slot);
639 				HeapTuple	newTuple;
640 
641 				newTuple = do_convert_tuple(origTuple, map);
642 
643 				/* do_convert_tuple doesn't copy system columns, so do that */
644 				newTuple->t_self = newTuple->t_data->t_ctid =
645 					origTuple->t_self;
646 				newTuple->t_tableOid = origTuple->t_tableOid;
647 
648 				HeapTupleHeaderSetXmin(newTuple->t_data,
649 									   HeapTupleHeaderGetRawXmin(origTuple->t_data));
650 				HeapTupleHeaderSetCmin(newTuple->t_data,
651 									   HeapTupleHeaderGetRawCommandId(origTuple->t_data));
652 				HeapTupleHeaderSetXmax(newTuple->t_data,
653 									   InvalidTransactionId);
654 
655 				if (RelationGetDescr(resultRelationDesc)->tdhasoid)
656 				{
657 					Assert(RelationGetDescr(srcRelationDesc)->tdhasoid);
658 					HeapTupleSetOid(newTuple, HeapTupleGetOid(origTuple));
659 				}
660 
661 				slot = ExecStoreTuple(newTuple, srcSlot, InvalidBuffer, true);
662 
663 				free_conversion_map(map);
664 			}
665 		}
666 
667 		result = ExecProcessReturning(returningRelInfo->ri_projectReturning,
668 									  RelationGetRelid(resultRelationDesc),
669 									  slot, planSlot);
670 	}
671 
672 	return result;
673 }
674 
675 /* ----------------------------------------------------------------
676  *		ExecDelete
677  *
678  *		DELETE is like UPDATE, except that we delete the tuple and no
679  *		index modifications are needed.
680  *
681  *		When deleting from a table, tupleid identifies the tuple to
682  *		delete and oldtuple is NULL.  When deleting from a view,
683  *		oldtuple is passed to the INSTEAD OF triggers and identifies
684  *		what to delete, and tupleid is invalid.  When deleting from a
685  *		foreign table, tupleid is invalid; the FDW has to figure out
686  *		which row to delete using data from the planSlot.  oldtuple is
687  *		passed to foreign table triggers; it is NULL when the foreign
688  *		table has no relevant triggers.  We use tupleDeleted to indicate
689  *		whether the tuple is actually deleted, callers can use it to
690  *		decide whether to continue the operation.  When this DELETE is a
691  *		part of an UPDATE of partition-key, then the slot returned by
692  *		EvalPlanQual() is passed back using output parameter epqslot.
693  *
694  *		Returns RETURNING result if any, otherwise NULL.
695  * ----------------------------------------------------------------
696  */
697 static TupleTableSlot *
ExecDelete(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool processReturning,bool canSetTag,bool changingPart,bool * tupleDeleted,TupleTableSlot ** epqslot)698 ExecDelete(ModifyTableState *mtstate,
699 		   ItemPointer tupleid,
700 		   HeapTuple oldtuple,
701 		   TupleTableSlot *planSlot,
702 		   EPQState *epqstate,
703 		   EState *estate,
704 		   bool processReturning,
705 		   bool canSetTag,
706 		   bool changingPart,
707 		   bool *tupleDeleted,
708 		   TupleTableSlot **epqslot)
709 {
710 	ResultRelInfo *resultRelInfo;
711 	Relation	resultRelationDesc;
712 	HTSU_Result result;
713 	HeapUpdateFailureData hufd;
714 	TupleTableSlot *slot = NULL;
715 	TransitionCaptureState *ar_delete_trig_tcs;
716 
717 	if (tupleDeleted)
718 		*tupleDeleted = false;
719 
720 	/*
721 	 * get information on the (current) result relation
722 	 */
723 	resultRelInfo = estate->es_result_relation_info;
724 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
725 
726 	/* BEFORE ROW DELETE Triggers */
727 	if (resultRelInfo->ri_TrigDesc &&
728 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
729 	{
730 		bool		dodelete;
731 
732 		dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
733 										tupleid, oldtuple, epqslot);
734 
735 		if (!dodelete)			/* "do nothing" */
736 			return NULL;
737 	}
738 
739 	/* INSTEAD OF ROW DELETE Triggers */
740 	if (resultRelInfo->ri_TrigDesc &&
741 		resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
742 	{
743 		bool		dodelete;
744 
745 		Assert(oldtuple != NULL);
746 		dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
747 
748 		if (!dodelete)			/* "do nothing" */
749 			return NULL;
750 	}
751 	else if (resultRelInfo->ri_FdwRoutine)
752 	{
753 		HeapTuple	tuple;
754 
755 		/*
756 		 * delete from foreign table: let the FDW do it
757 		 *
758 		 * We offer the trigger tuple slot as a place to store RETURNING data,
759 		 * although the FDW can return some other slot if it wants.  Set up
760 		 * the slot's tupdesc so the FDW doesn't need to do that for itself.
761 		 */
762 		slot = estate->es_trig_tuple_slot;
763 		if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
764 			ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
765 
766 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
767 															   resultRelInfo,
768 															   slot,
769 															   planSlot);
770 
771 		if (slot == NULL)		/* "do nothing" */
772 			return NULL;
773 
774 		/*
775 		 * RETURNING expressions might reference the tableoid column, so
776 		 * initialize t_tableOid before evaluating them.
777 		 */
778 		if (slot->tts_isempty)
779 			ExecStoreAllNullTuple(slot);
780 		tuple = ExecMaterializeSlot(slot);
781 		tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
782 	}
783 	else
784 	{
785 		/*
786 		 * delete the tuple
787 		 *
788 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
789 		 * that the row to be deleted is visible to that snapshot, and throw a
790 		 * can't-serialize error if not. This is a special-case behavior
791 		 * needed for referential integrity updates in transaction-snapshot
792 		 * mode transactions.
793 		 */
794 ldelete:;
795 		result = heap_delete(resultRelationDesc, tupleid,
796 							 estate->es_output_cid,
797 							 estate->es_crosscheck_snapshot,
798 							 true /* wait for commit */ ,
799 							 &hufd,
800 							 changingPart);
801 		switch (result)
802 		{
803 			case HeapTupleSelfUpdated:
804 
805 				/*
806 				 * The target tuple was already updated or deleted by the
807 				 * current command, or by a later command in the current
808 				 * transaction.  The former case is possible in a join DELETE
809 				 * where multiple tuples join to the same target tuple. This
810 				 * is somewhat questionable, but Postgres has always allowed
811 				 * it: we just ignore additional deletion attempts.
812 				 *
813 				 * The latter case arises if the tuple is modified by a
814 				 * command in a BEFORE trigger, or perhaps by a command in a
815 				 * volatile function used in the query.  In such situations we
816 				 * should not ignore the deletion, but it is equally unsafe to
817 				 * proceed.  We don't want to discard the original DELETE
818 				 * while keeping the triggered actions based on its deletion;
819 				 * and it would be no better to allow the original DELETE
820 				 * while discarding updates that it triggered.  The row update
821 				 * carries some information that might be important according
822 				 * to business rules; so throwing an error is the only safe
823 				 * course.
824 				 *
825 				 * If a trigger actually intends this type of interaction, it
826 				 * can re-execute the DELETE and then return NULL to cancel
827 				 * the outer delete.
828 				 */
829 				if (hufd.cmax != estate->es_output_cid)
830 					ereport(ERROR,
831 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
832 							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
833 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
834 
835 				/* Else, already deleted by self; nothing to do */
836 				return NULL;
837 
838 			case HeapTupleMayBeUpdated:
839 				break;
840 
841 			case HeapTupleUpdated:
842 				if (IsolationUsesXactSnapshot())
843 					ereport(ERROR,
844 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
845 							 errmsg("could not serialize access due to concurrent update")));
846 				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
847 					ereport(ERROR,
848 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
849 							 errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
850 
851 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
852 				{
853 					TupleTableSlot *my_epqslot;
854 
855 					my_epqslot = EvalPlanQual(estate,
856 											  epqstate,
857 											  resultRelationDesc,
858 											  resultRelInfo->ri_RangeTableIndex,
859 											  LockTupleExclusive,
860 											  &hufd.ctid,
861 											  hufd.xmax);
862 					if (!TupIsNull(my_epqslot))
863 					{
864 						*tupleid = hufd.ctid;
865 
866 						/*
867 						 * If requested, skip delete and pass back the updated
868 						 * row.
869 						 */
870 						if (epqslot)
871 						{
872 							*epqslot = my_epqslot;
873 							return NULL;
874 						}
875 						else
876 							goto ldelete;
877 					}
878 				}
879 				/* tuple already deleted; nothing to do */
880 				return NULL;
881 
882 			default:
883 				elog(ERROR, "unrecognized heap_delete status: %u", result);
884 				return NULL;
885 		}
886 
887 		/*
888 		 * Note: Normally one would think that we have to delete index tuples
889 		 * associated with the heap tuple now...
890 		 *
891 		 * ... but in POSTGRES, we have no need to do this because VACUUM will
892 		 * take care of it later.  We can't delete index tuples immediately
893 		 * anyway, since the tuple is still visible to other transactions.
894 		 */
895 	}
896 
897 	if (canSetTag)
898 		(estate->es_processed)++;
899 
900 	/* Tell caller that the delete actually happened. */
901 	if (tupleDeleted)
902 		*tupleDeleted = true;
903 
904 	/*
905 	 * If this delete is the result of a partition key update that moved the
906 	 * tuple to a new partition, put this row into the transition OLD TABLE,
907 	 * if there is one. We need to do this separately for DELETE and INSERT
908 	 * because they happen on different tables.
909 	 */
910 	ar_delete_trig_tcs = mtstate->mt_transition_capture;
911 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
912 		&& mtstate->mt_transition_capture->tcs_update_old_table)
913 	{
914 		ExecARUpdateTriggers(estate, resultRelInfo,
915 							 tupleid,
916 							 oldtuple,
917 							 NULL,
918 							 NULL,
919 							 mtstate->mt_transition_capture);
920 
921 		/*
922 		 * We've already captured the NEW TABLE row, so make sure any AR
923 		 * DELETE trigger fired below doesn't capture it again.
924 		 */
925 		ar_delete_trig_tcs = NULL;
926 	}
927 
928 	/* AFTER ROW DELETE Triggers */
929 	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
930 						 ar_delete_trig_tcs);
931 
932 	/* Process RETURNING if present and if requested */
933 	if (processReturning && resultRelInfo->ri_projectReturning)
934 	{
935 		/*
936 		 * We have to put the target tuple into a slot, which means first we
937 		 * gotta fetch it.  We can use the trigger tuple slot.
938 		 */
939 		TupleTableSlot *rslot;
940 		HeapTupleData deltuple;
941 		Buffer		delbuffer;
942 
943 		if (resultRelInfo->ri_FdwRoutine)
944 		{
945 			/* FDW must have provided a slot containing the deleted row */
946 			Assert(!TupIsNull(slot));
947 			delbuffer = InvalidBuffer;
948 		}
949 		else
950 		{
951 			slot = estate->es_trig_tuple_slot;
952 			if (oldtuple != NULL)
953 			{
954 				deltuple = *oldtuple;
955 				delbuffer = InvalidBuffer;
956 			}
957 			else
958 			{
959 				deltuple.t_self = *tupleid;
960 				if (!heap_fetch(resultRelationDesc, SnapshotAny,
961 								&deltuple, &delbuffer, false, NULL))
962 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
963 			}
964 
965 			if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
966 				ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
967 			ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
968 		}
969 
970 		rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
971 									 RelationGetRelid(resultRelationDesc),
972 									 slot, planSlot);
973 
974 		/*
975 		 * Before releasing the target tuple again, make sure rslot has a
976 		 * local copy of any pass-by-reference values.
977 		 */
978 		ExecMaterializeSlot(rslot);
979 
980 		ExecClearTuple(slot);
981 		if (BufferIsValid(delbuffer))
982 			ReleaseBuffer(delbuffer);
983 
984 		return rslot;
985 	}
986 
987 	return NULL;
988 }
989 
990 /* ----------------------------------------------------------------
991  *		ExecUpdate
992  *
993  *		note: we can't run UPDATE queries with transactions
994  *		off because UPDATEs are actually INSERTs and our
995  *		scan will mistakenly loop forever, updating the tuple
996  *		it just inserted..  This should be fixed but until it
997  *		is, we don't want to get stuck in an infinite loop
998  *		which corrupts your database..
999  *
1000  *		When updating a table, tupleid identifies the tuple to
1001  *		update and oldtuple is NULL.  When updating a view, oldtuple
1002  *		is passed to the INSTEAD OF triggers and identifies what to
1003  *		update, and tupleid is invalid.  When updating a foreign table,
1004  *		tupleid is invalid; the FDW has to figure out which row to
1005  *		update using data from the planSlot.  oldtuple is passed to
1006  *		foreign table triggers; it is NULL when the foreign table has
1007  *		no relevant triggers.
1008  *
1009  *		Returns RETURNING result if any, otherwise NULL.
1010  * ----------------------------------------------------------------
1011  */
1012 static TupleTableSlot *
ExecUpdate(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool canSetTag)1013 ExecUpdate(ModifyTableState *mtstate,
1014 		   ItemPointer tupleid,
1015 		   HeapTuple oldtuple,
1016 		   TupleTableSlot *slot,
1017 		   TupleTableSlot *planSlot,
1018 		   EPQState *epqstate,
1019 		   EState *estate,
1020 		   bool canSetTag)
1021 {
1022 	HeapTuple	tuple;
1023 	ResultRelInfo *resultRelInfo;
1024 	Relation	resultRelationDesc;
1025 	HTSU_Result result;
1026 	HeapUpdateFailureData hufd;
1027 	List	   *recheckIndexes = NIL;
1028 	TupleConversionMap *saved_tcs_map = NULL;
1029 
1030 	/*
1031 	 * abort the operation if not running transactions
1032 	 */
1033 	if (IsBootstrapProcessingMode())
1034 		elog(ERROR, "cannot UPDATE during bootstrap");
1035 
1036 	/*
1037 	 * get the heap tuple out of the tuple table slot, making sure we have a
1038 	 * writable copy
1039 	 */
1040 	tuple = ExecMaterializeSlot(slot);
1041 
1042 	/*
1043 	 * get information on the (current) result relation
1044 	 */
1045 	resultRelInfo = estate->es_result_relation_info;
1046 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1047 
1048 	/* BEFORE ROW UPDATE Triggers */
1049 	if (resultRelInfo->ri_TrigDesc &&
1050 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
1051 	{
1052 		slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1053 									tupleid, oldtuple, slot);
1054 
1055 		if (slot == NULL)		/* "do nothing" */
1056 			return NULL;
1057 
1058 		/* trigger might have changed tuple */
1059 		tuple = ExecMaterializeSlot(slot);
1060 	}
1061 
1062 	/* INSTEAD OF ROW UPDATE Triggers */
1063 	if (resultRelInfo->ri_TrigDesc &&
1064 		resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1065 	{
1066 		slot = ExecIRUpdateTriggers(estate, resultRelInfo,
1067 									oldtuple, slot);
1068 
1069 		if (slot == NULL)		/* "do nothing" */
1070 			return NULL;
1071 
1072 		/* trigger might have changed tuple */
1073 		tuple = ExecMaterializeSlot(slot);
1074 	}
1075 	else if (resultRelInfo->ri_FdwRoutine)
1076 	{
1077 		/*
1078 		 * update in foreign table: let the FDW do it
1079 		 */
1080 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1081 															   resultRelInfo,
1082 															   slot,
1083 															   planSlot);
1084 
1085 		if (slot == NULL)		/* "do nothing" */
1086 			return NULL;
1087 
1088 		/* FDW might have changed tuple */
1089 		tuple = ExecMaterializeSlot(slot);
1090 
1091 		/*
1092 		 * AFTER ROW Triggers or RETURNING expressions might reference the
1093 		 * tableoid column, so initialize t_tableOid before evaluating them.
1094 		 */
1095 		tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
1096 	}
1097 	else
1098 	{
1099 		LockTupleMode lockmode;
1100 		bool		partition_constraint_failed;
1101 
1102 		/*
1103 		 * Constraints might reference the tableoid column, so initialize
1104 		 * t_tableOid before evaluating them.
1105 		 */
1106 		tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
1107 
1108 		/*
1109 		 * Check any RLS UPDATE WITH CHECK policies
1110 		 *
1111 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
1112 		 * must loop back here and recheck any RLS policies and constraints.
1113 		 * (We don't need to redo triggers, however.  If there are any BEFORE
1114 		 * triggers then trigger.c will have done heap_lock_tuple to lock the
1115 		 * correct tuple, so there's no need to do them again.)
1116 		 */
1117 lreplace:;
1118 
1119 		/*
1120 		 * If partition constraint fails, this row might get moved to another
1121 		 * partition, in which case we should check the RLS CHECK policy just
1122 		 * before inserting into the new partition, rather than doing it here.
1123 		 * This is because a trigger on that partition might again change the
1124 		 * row.  So skip the WCO checks if the partition constraint fails.
1125 		 */
1126 		partition_constraint_failed =
1127 			resultRelInfo->ri_PartitionCheck &&
1128 			!ExecPartitionCheck(resultRelInfo, slot, estate, false);
1129 
1130 		if (!partition_constraint_failed &&
1131 			resultRelInfo->ri_WithCheckOptions != NIL)
1132 		{
1133 			/*
1134 			 * ExecWithCheckOptions() will skip any WCOs which are not of the
1135 			 * kind we are looking for at this point.
1136 			 */
1137 			ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1138 								 resultRelInfo, slot, estate);
1139 		}
1140 
1141 		/*
1142 		 * If a partition check failed, try to move the row into the right
1143 		 * partition.
1144 		 */
1145 		if (partition_constraint_failed)
1146 		{
1147 			bool		tuple_deleted;
1148 			TupleTableSlot *ret_slot;
1149 			TupleTableSlot *orig_slot = slot;
1150 			TupleTableSlot *epqslot = NULL;
1151 			PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1152 			int			map_index;
1153 			TupleConversionMap *tupconv_map;
1154 
1155 			/*
1156 			 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1157 			 * original row to migrate to a different partition.  Maybe this
1158 			 * can be implemented some day, but it seems a fringe feature with
1159 			 * little redeeming value.
1160 			 */
1161 			if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1162 				ereport(ERROR,
1163 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1164 						 errmsg("invalid ON UPDATE specification"),
1165 						 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1166 
1167 			/*
1168 			 * When an UPDATE is run on a leaf partition, we will not have
1169 			 * partition tuple routing set up. In that case, fail with
1170 			 * partition constraint violation error.
1171 			 */
1172 			if (proute == NULL)
1173 				ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1174 
1175 			/*
1176 			 * Row movement, part 1.  Delete the tuple, but skip RETURNING
1177 			 * processing. We want to return rows from INSERT.
1178 			 */
1179 			ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1180 					   estate, false, false /* canSetTag */ ,
1181 					   true /* changingPart */ , &tuple_deleted, &epqslot);
1182 
1183 			/*
1184 			 * For some reason if DELETE didn't happen (e.g. trigger prevented
1185 			 * it, or it was already deleted by self, or it was concurrently
1186 			 * deleted by another transaction), then we should skip the insert
1187 			 * as well; otherwise, an UPDATE could cause an increase in the
1188 			 * total number of rows across all partitions, which is clearly
1189 			 * wrong.
1190 			 *
1191 			 * For a normal UPDATE, the case where the tuple has been the
1192 			 * subject of a concurrent UPDATE or DELETE would be handled by
1193 			 * the EvalPlanQual machinery, but for an UPDATE that we've
1194 			 * translated into a DELETE from this partition and an INSERT into
1195 			 * some other partition, that's not available, because CTID chains
1196 			 * can't span relation boundaries.  We mimic the semantics to a
1197 			 * limited extent by skipping the INSERT if the DELETE fails to
1198 			 * find a tuple. This ensures that two concurrent attempts to
1199 			 * UPDATE the same tuple at the same time can't turn one tuple
1200 			 * into two, and that an UPDATE of a just-deleted tuple can't
1201 			 * resurrect it.
1202 			 */
1203 			if (!tuple_deleted)
1204 			{
1205 				/*
1206 				 * epqslot will be typically NULL.  But when ExecDelete()
1207 				 * finds that another transaction has concurrently updated the
1208 				 * same row, it re-fetches the row, skips the delete, and
1209 				 * epqslot is set to the re-fetched tuple slot. In that case,
1210 				 * we need to do all the checks again.
1211 				 */
1212 				if (TupIsNull(epqslot))
1213 					return NULL;
1214 				else
1215 				{
1216 					slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1217 					tuple = ExecMaterializeSlot(slot);
1218 					goto lreplace;
1219 				}
1220 			}
1221 
1222 			/*
1223 			 * Updates set the transition capture map only when a new subplan
1224 			 * is chosen.  But for inserts, it is set for each row. So after
1225 			 * INSERT, we need to revert back to the map created for UPDATE;
1226 			 * otherwise the next UPDATE will incorrectly use the one created
1227 			 * for INSERT.  So first save the one created for UPDATE.
1228 			 */
1229 			if (mtstate->mt_transition_capture)
1230 				saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1231 
1232 			/*
1233 			 * resultRelInfo is one of the per-subplan resultRelInfos.  So we
1234 			 * should convert the tuple into root's tuple descriptor, since
1235 			 * ExecInsert() starts the search from root.  The tuple conversion
1236 			 * map list is in the order of mtstate->resultRelInfo[], so to
1237 			 * retrieve the one for this resultRel, we need to know the
1238 			 * position of the resultRel in mtstate->resultRelInfo[].
1239 			 */
1240 			map_index = resultRelInfo - mtstate->resultRelInfo;
1241 			Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1242 			tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1243 			tuple = ConvertPartitionTupleSlot(tupconv_map,
1244 											  tuple,
1245 											  proute->root_tuple_slot,
1246 											  &slot);
1247 
1248 			/*
1249 			 * Prepare for tuple routing, making it look like we're inserting
1250 			 * into the root.
1251 			 */
1252 			Assert(mtstate->rootResultRelInfo != NULL);
1253 			slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1254 										   mtstate->rootResultRelInfo, slot);
1255 
1256 			ret_slot = ExecInsert(mtstate, slot, planSlot,
1257 								  orig_slot, resultRelInfo,
1258 								  estate, canSetTag);
1259 
1260 			/* Revert ExecPrepareTupleRouting's node change. */
1261 			estate->es_result_relation_info = resultRelInfo;
1262 			if (mtstate->mt_transition_capture)
1263 			{
1264 				mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1265 				mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1266 			}
1267 
1268 			return ret_slot;
1269 		}
1270 
1271 		/*
1272 		 * Check the constraints of the tuple.  We've already checked the
1273 		 * partition constraint above; however, we must still ensure the tuple
1274 		 * passes all other constraints, so we will call ExecConstraints() and
1275 		 * have it validate all remaining checks.
1276 		 */
1277 		if (resultRelationDesc->rd_att->constr)
1278 			ExecConstraints(resultRelInfo, slot, estate);
1279 
1280 		/*
1281 		 * replace the heap tuple
1282 		 *
1283 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1284 		 * that the row to be updated is visible to that snapshot, and throw a
1285 		 * can't-serialize error if not. This is a special-case behavior
1286 		 * needed for referential integrity updates in transaction-snapshot
1287 		 * mode transactions.
1288 		 */
1289 		result = heap_update(resultRelationDesc, tupleid, tuple,
1290 							 estate->es_output_cid,
1291 							 estate->es_crosscheck_snapshot,
1292 							 true /* wait for commit */ ,
1293 							 &hufd, &lockmode);
1294 		switch (result)
1295 		{
1296 			case HeapTupleSelfUpdated:
1297 
1298 				/*
1299 				 * The target tuple was already updated or deleted by the
1300 				 * current command, or by a later command in the current
1301 				 * transaction.  The former case is possible in a join UPDATE
1302 				 * where multiple tuples join to the same target tuple. This
1303 				 * is pretty questionable, but Postgres has always allowed it:
1304 				 * we just execute the first update action and ignore
1305 				 * additional update attempts.
1306 				 *
1307 				 * The latter case arises if the tuple is modified by a
1308 				 * command in a BEFORE trigger, or perhaps by a command in a
1309 				 * volatile function used in the query.  In such situations we
1310 				 * should not ignore the update, but it is equally unsafe to
1311 				 * proceed.  We don't want to discard the original UPDATE
1312 				 * while keeping the triggered actions based on it; and we
1313 				 * have no principled way to merge this update with the
1314 				 * previous ones.  So throwing an error is the only safe
1315 				 * course.
1316 				 *
1317 				 * If a trigger actually intends this type of interaction, it
1318 				 * can re-execute the UPDATE (assuming it can figure out how)
1319 				 * and then return NULL to cancel the outer update.
1320 				 */
1321 				if (hufd.cmax != estate->es_output_cid)
1322 					ereport(ERROR,
1323 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1324 							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1325 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1326 
1327 				/* Else, already updated by self; nothing to do */
1328 				return NULL;
1329 
1330 			case HeapTupleMayBeUpdated:
1331 				break;
1332 
1333 			case HeapTupleUpdated:
1334 				if (IsolationUsesXactSnapshot())
1335 					ereport(ERROR,
1336 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1337 							 errmsg("could not serialize access due to concurrent update")));
1338 				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
1339 					ereport(ERROR,
1340 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1341 							 errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
1342 
1343 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
1344 				{
1345 					TupleTableSlot *epqslot;
1346 
1347 					epqslot = EvalPlanQual(estate,
1348 										   epqstate,
1349 										   resultRelationDesc,
1350 										   resultRelInfo->ri_RangeTableIndex,
1351 										   lockmode,
1352 										   &hufd.ctid,
1353 										   hufd.xmax);
1354 					if (!TupIsNull(epqslot))
1355 					{
1356 						*tupleid = hufd.ctid;
1357 						slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1358 						tuple = ExecMaterializeSlot(slot);
1359 						goto lreplace;
1360 					}
1361 				}
1362 				/* tuple already deleted; nothing to do */
1363 				return NULL;
1364 
1365 			default:
1366 				elog(ERROR, "unrecognized heap_update status: %u", result);
1367 				return NULL;
1368 		}
1369 
1370 		/*
1371 		 * Note: instead of having to update the old index tuples associated
1372 		 * with the heap tuple, all we do is form and insert new index tuples.
1373 		 * This is because UPDATEs are actually DELETEs and INSERTs, and index
1374 		 * tuple deletion is done later by VACUUM (see notes in ExecDelete).
1375 		 * All we do here is insert new index tuples.  -cim 9/27/89
1376 		 */
1377 
1378 		/*
1379 		 * insert index entries for tuple
1380 		 *
1381 		 * Note: heap_update returns the tid (location) of the new tuple in
1382 		 * the t_self field.
1383 		 *
1384 		 * If it's a HOT update, we mustn't insert new index entries.
1385 		 */
1386 		if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
1387 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
1388 												   estate, false, NULL, NIL);
1389 	}
1390 
1391 	if (canSetTag)
1392 		(estate->es_processed)++;
1393 
1394 	/* AFTER ROW UPDATE Triggers */
1395 	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple,
1396 						 recheckIndexes,
1397 						 mtstate->operation == CMD_INSERT ?
1398 						 mtstate->mt_oc_transition_capture :
1399 						 mtstate->mt_transition_capture);
1400 
1401 	list_free(recheckIndexes);
1402 
1403 	/*
1404 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
1405 	 * required to do this after testing all constraints and uniqueness
1406 	 * violations per the SQL spec, so we do it after actually updating the
1407 	 * record in the heap and all indexes.
1408 	 *
1409 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1410 	 * are looking for at this point.
1411 	 */
1412 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1413 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1414 
1415 	/* Process RETURNING if present */
1416 	if (resultRelInfo->ri_projectReturning)
1417 		return ExecProcessReturning(resultRelInfo->ri_projectReturning,
1418 									RelationGetRelid(resultRelationDesc),
1419 									slot, planSlot);
1420 
1421 	return NULL;
1422 }
1423 
1424 /*
1425  * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1426  *
1427  * Try to lock tuple for update as part of speculative insertion.  If
1428  * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1429  * (but still lock row, even though it may not satisfy estate's
1430  * snapshot).
1431  *
1432  * Returns true if we're done (with or without an update), or false if
1433  * the caller must retry the INSERT from scratch.
1434  */
1435 static bool
ExecOnConflictUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer conflictTid,TupleTableSlot * planSlot,TupleTableSlot * excludedSlot,EState * estate,bool canSetTag,TupleTableSlot ** returning)1436 ExecOnConflictUpdate(ModifyTableState *mtstate,
1437 					 ResultRelInfo *resultRelInfo,
1438 					 ItemPointer conflictTid,
1439 					 TupleTableSlot *planSlot,
1440 					 TupleTableSlot *excludedSlot,
1441 					 EState *estate,
1442 					 bool canSetTag,
1443 					 TupleTableSlot **returning)
1444 {
1445 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
1446 	Relation	relation = resultRelInfo->ri_RelationDesc;
1447 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1448 	HeapTupleData tuple;
1449 	HeapUpdateFailureData hufd;
1450 	LockTupleMode lockmode;
1451 	HTSU_Result test;
1452 	Buffer		buffer;
1453 
1454 	/* Determine lock mode to use */
1455 	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1456 
1457 	/*
1458 	 * Lock tuple for update.  Don't follow updates when tuple cannot be
1459 	 * locked without doing so.  A row locking conflict here means our
1460 	 * previous conclusion that the tuple is conclusively committed is not
1461 	 * true anymore.
1462 	 */
1463 	tuple.t_self = *conflictTid;
1464 	test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
1465 						   lockmode, LockWaitBlock, false, &buffer,
1466 						   &hufd);
1467 	switch (test)
1468 	{
1469 		case HeapTupleMayBeUpdated:
1470 			/* success! */
1471 			break;
1472 
1473 		case HeapTupleInvisible:
1474 
1475 			/*
1476 			 * This can occur when a just inserted tuple is updated again in
1477 			 * the same command. E.g. because multiple rows with the same
1478 			 * conflicting key values are inserted.
1479 			 *
1480 			 * This is somewhat similar to the ExecUpdate()
1481 			 * HeapTupleSelfUpdated case.  We do not want to proceed because
1482 			 * it would lead to the same row being updated a second time in
1483 			 * some unspecified order, and in contrast to plain UPDATEs
1484 			 * there's no historical behavior to break.
1485 			 *
1486 			 * It is the user's responsibility to prevent this situation from
1487 			 * occurring.  These problems are why SQL-2003 similarly specifies
1488 			 * that for SQL MERGE, an exception must be raised in the event of
1489 			 * an attempt to update the same row twice.
1490 			 */
1491 			if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
1492 				ereport(ERROR,
1493 						(errcode(ERRCODE_CARDINALITY_VIOLATION),
1494 						 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1495 						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1496 
1497 			/* This shouldn't happen */
1498 			elog(ERROR, "attempted to lock invisible tuple");
1499 			break;
1500 
1501 		case HeapTupleSelfUpdated:
1502 
1503 			/*
1504 			 * This state should never be reached. As a dirty snapshot is used
1505 			 * to find conflicting tuples, speculative insertion wouldn't have
1506 			 * seen this row to conflict with.
1507 			 */
1508 			elog(ERROR, "unexpected self-updated tuple");
1509 			break;
1510 
1511 		case HeapTupleUpdated:
1512 			if (IsolationUsesXactSnapshot())
1513 				ereport(ERROR,
1514 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1515 						 errmsg("could not serialize access due to concurrent update")));
1516 
1517 			/*
1518 			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
1519 			 * a partitioned table we shouldn't reach to a case where tuple to
1520 			 * be lock is moved to another partition due to concurrent update
1521 			 * of the partition key.
1522 			 */
1523 			Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
1524 
1525 			/*
1526 			 * Tell caller to try again from the very start.
1527 			 *
1528 			 * It does not make sense to use the usual EvalPlanQual() style
1529 			 * loop here, as the new version of the row might not conflict
1530 			 * anymore, or the conflicting tuple has actually been deleted.
1531 			 */
1532 			ReleaseBuffer(buffer);
1533 			return false;
1534 
1535 		default:
1536 			elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
1537 	}
1538 
1539 	/*
1540 	 * Success, the tuple is locked.
1541 	 *
1542 	 * Reset per-tuple memory context to free any expression evaluation
1543 	 * storage allocated in the previous cycle.
1544 	 */
1545 	ResetExprContext(econtext);
1546 
1547 	/*
1548 	 * Verify that the tuple is visible to our MVCC snapshot if the current
1549 	 * isolation level mandates that.
1550 	 *
1551 	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
1552 	 * CONFLICT ... WHERE clause may prevent us from reaching that.
1553 	 *
1554 	 * This means we only ever continue when a new command in the current
1555 	 * transaction could see the row, even though in READ COMMITTED mode the
1556 	 * tuple will not be visible according to the current statement's
1557 	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
1558 	 * versions.
1559 	 */
1560 	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
1561 
1562 	/* Store target's existing tuple in the state's dedicated slot */
1563 	ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
1564 
1565 	/*
1566 	 * Make tuple and any needed join variables available to ExecQual and
1567 	 * ExecProject.  The EXCLUDED tuple is installed in ecxt_innertuple, while
1568 	 * the target's existing tuple is installed in the scantuple.  EXCLUDED
1569 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
1570 	 * other redirection.
1571 	 */
1572 	econtext->ecxt_scantuple = mtstate->mt_existing;
1573 	econtext->ecxt_innertuple = excludedSlot;
1574 	econtext->ecxt_outertuple = NULL;
1575 
1576 	if (!ExecQual(onConflictSetWhere, econtext))
1577 	{
1578 		ReleaseBuffer(buffer);
1579 		InstrCountFiltered1(&mtstate->ps, 1);
1580 		return true;			/* done with the tuple */
1581 	}
1582 
1583 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1584 	{
1585 		/*
1586 		 * Check target's existing tuple against UPDATE-applicable USING
1587 		 * security barrier quals (if any), enforced here as RLS checks/WCOs.
1588 		 *
1589 		 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1590 		 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1591 		 * but that's almost the extent of its special handling for ON
1592 		 * CONFLICT DO UPDATE.
1593 		 *
1594 		 * The rewriter will also have associated UPDATE applicable straight
1595 		 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
1596 		 * follows.  INSERTs and UPDATEs naturally have mutually exclusive WCO
1597 		 * kinds, so there is no danger of spurious over-enforcement in the
1598 		 * INSERT or UPDATE path.
1599 		 */
1600 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1601 							 mtstate->mt_existing,
1602 							 mtstate->ps.state);
1603 	}
1604 
1605 	/* Project the new tuple version */
1606 	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1607 
1608 	/*
1609 	 * Note that it is possible that the target tuple has been modified in
1610 	 * this session, after the above heap_lock_tuple. We choose to not error
1611 	 * out in that case, in line with ExecUpdate's treatment of similar cases.
1612 	 * This can happen if an UPDATE is triggered from within ExecQual(),
1613 	 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1614 	 * wCTE in the ON CONFLICT's SET.
1615 	 */
1616 
1617 	/* Execute UPDATE with projection */
1618 	*returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
1619 							mtstate->mt_conflproj, planSlot,
1620 							&mtstate->mt_epqstate, mtstate->ps.state,
1621 							canSetTag);
1622 
1623 	ReleaseBuffer(buffer);
1624 	return true;
1625 }
1626 
1627 
1628 /*
1629  * Process BEFORE EACH STATEMENT triggers
1630  */
1631 static void
fireBSTriggers(ModifyTableState * node)1632 fireBSTriggers(ModifyTableState *node)
1633 {
1634 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1635 	ResultRelInfo *resultRelInfo = node->resultRelInfo;
1636 
1637 	/*
1638 	 * If the node modifies a partitioned table, we must fire its triggers.
1639 	 * Note that in that case, node->resultRelInfo points to the first leaf
1640 	 * partition, not the root table.
1641 	 */
1642 	if (node->rootResultRelInfo != NULL)
1643 		resultRelInfo = node->rootResultRelInfo;
1644 
1645 	switch (node->operation)
1646 	{
1647 		case CMD_INSERT:
1648 			ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1649 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1650 				ExecBSUpdateTriggers(node->ps.state,
1651 									 resultRelInfo);
1652 			break;
1653 		case CMD_UPDATE:
1654 			ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1655 			break;
1656 		case CMD_DELETE:
1657 			ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1658 			break;
1659 		default:
1660 			elog(ERROR, "unknown operation");
1661 			break;
1662 	}
1663 }
1664 
1665 /*
1666  * Return the target rel ResultRelInfo.
1667  *
1668  * This relation is the same as :
1669  * - the relation for which we will fire AFTER STATEMENT triggers.
1670  * - the relation into whose tuple format all captured transition tuples must
1671  *   be converted.
1672  * - the root partitioned table.
1673  */
1674 static ResultRelInfo *
getTargetResultRelInfo(ModifyTableState * node)1675 getTargetResultRelInfo(ModifyTableState *node)
1676 {
1677 	/*
1678 	 * Note that if the node modifies a partitioned table, node->resultRelInfo
1679 	 * points to the first leaf partition, not the root table.
1680 	 */
1681 	if (node->rootResultRelInfo != NULL)
1682 		return node->rootResultRelInfo;
1683 	else
1684 		return node->resultRelInfo;
1685 }
1686 
1687 /*
1688  * Process AFTER EACH STATEMENT triggers
1689  */
1690 static void
fireASTriggers(ModifyTableState * node)1691 fireASTriggers(ModifyTableState *node)
1692 {
1693 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1694 	ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1695 
1696 	switch (node->operation)
1697 	{
1698 		case CMD_INSERT:
1699 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1700 				ExecASUpdateTriggers(node->ps.state,
1701 									 resultRelInfo,
1702 									 node->mt_oc_transition_capture);
1703 			ExecASInsertTriggers(node->ps.state, resultRelInfo,
1704 								 node->mt_transition_capture);
1705 			break;
1706 		case CMD_UPDATE:
1707 			ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1708 								 node->mt_transition_capture);
1709 			break;
1710 		case CMD_DELETE:
1711 			ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1712 								 node->mt_transition_capture);
1713 			break;
1714 		default:
1715 			elog(ERROR, "unknown operation");
1716 			break;
1717 	}
1718 }
1719 
1720 /*
1721  * Set up the state needed for collecting transition tuples for AFTER
1722  * triggers.
1723  */
1724 static void
ExecSetupTransitionCaptureState(ModifyTableState * mtstate,EState * estate)1725 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
1726 {
1727 	ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
1728 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1729 
1730 	/* Check for transition tables on the directly targeted relation. */
1731 	mtstate->mt_transition_capture =
1732 		MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1733 								   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1734 								   mtstate->operation);
1735 	if (plan->operation == CMD_INSERT &&
1736 		plan->onConflictAction == ONCONFLICT_UPDATE)
1737 		mtstate->mt_oc_transition_capture =
1738 			MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1739 									   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1740 									   CMD_UPDATE);
1741 
1742 	/*
1743 	 * If we found that we need to collect transition tuples then we may also
1744 	 * need tuple conversion maps for any children that have TupleDescs that
1745 	 * aren't compatible with the tuplestores.  (We can share these maps
1746 	 * between the regular and ON CONFLICT cases.)
1747 	 */
1748 	if (mtstate->mt_transition_capture != NULL ||
1749 		mtstate->mt_oc_transition_capture != NULL)
1750 	{
1751 		ExecSetupChildParentMapForTcs(mtstate);
1752 
1753 		/*
1754 		 * Install the conversion map for the first plan for UPDATE and DELETE
1755 		 * operations.  It will be advanced each time we switch to the next
1756 		 * plan.  (INSERT operations set it every time, so we need not update
1757 		 * mtstate->mt_oc_transition_capture here.)
1758 		 */
1759 		if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1760 			mtstate->mt_transition_capture->tcs_map =
1761 				tupconv_map_for_subplan(mtstate, 0);
1762 	}
1763 }
1764 
1765 /*
1766  * ExecPrepareTupleRouting --- prepare for routing one tuple
1767  *
1768  * Determine the partition in which the tuple in slot is to be inserted,
1769  * and modify mtstate and estate to prepare for it.
1770  *
1771  * Caller must revert the estate changes after executing the insertion!
1772  * In mtstate, transition capture changes may also need to be reverted.
1773  *
1774  * Returns a slot holding the tuple of the partition rowtype.
1775  */
1776 static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState * mtstate,EState * estate,PartitionTupleRouting * proute,ResultRelInfo * targetRelInfo,TupleTableSlot * slot)1777 ExecPrepareTupleRouting(ModifyTableState *mtstate,
1778 						EState *estate,
1779 						PartitionTupleRouting *proute,
1780 						ResultRelInfo *targetRelInfo,
1781 						TupleTableSlot *slot)
1782 {
1783 	ModifyTable *node;
1784 	int			partidx;
1785 	ResultRelInfo *partrel;
1786 	HeapTuple	tuple;
1787 
1788 	/*
1789 	 * Determine the target partition.  If ExecFindPartition does not find a
1790 	 * partition after all, it doesn't return here; otherwise, the returned
1791 	 * value is to be used as an index into the arrays for the ResultRelInfo
1792 	 * and TupleConversionMap for the partition.
1793 	 */
1794 	partidx = ExecFindPartition(targetRelInfo,
1795 								proute->partition_dispatch_info,
1796 								slot,
1797 								estate);
1798 	Assert(partidx >= 0 && partidx < proute->num_partitions);
1799 
1800 	/*
1801 	 * Get the ResultRelInfo corresponding to the selected partition; if not
1802 	 * yet there, initialize it.
1803 	 */
1804 	partrel = proute->partitions[partidx];
1805 	if (partrel == NULL)
1806 		partrel = ExecInitPartitionInfo(mtstate, targetRelInfo,
1807 										proute, estate,
1808 										partidx);
1809 
1810 	/*
1811 	 * Check whether the partition is routable if we didn't yet
1812 	 *
1813 	 * Note: an UPDATE of a partition key invokes an INSERT that moves the
1814 	 * tuple to a new partition.  This check would be applied to a subplan
1815 	 * partition of such an UPDATE that is chosen as the partition to route
1816 	 * the tuple to.  The reason we do this check here rather than in
1817 	 * ExecSetupPartitionTupleRouting is to avoid aborting such an UPDATE
1818 	 * unnecessarily due to non-routable subplan partitions that may not be
1819 	 * chosen for update tuple movement after all.
1820 	 */
1821 	if (!partrel->ri_PartitionReadyForRouting)
1822 	{
1823 		/* Verify the partition is a valid target for INSERT. */
1824 		CheckValidResultRel(partrel, CMD_INSERT);
1825 
1826 		/* Set up information needed for routing tuples to the partition. */
1827 		ExecInitRoutingInfo(mtstate, estate, proute, partrel, partidx);
1828 	}
1829 
1830 	/*
1831 	 * Make it look like we are inserting into the partition.
1832 	 */
1833 	estate->es_result_relation_info = partrel;
1834 
1835 	/* Get the heap tuple out of the given slot. */
1836 	tuple = ExecMaterializeSlot(slot);
1837 
1838 	/*
1839 	 * If we're capturing transition tuples, we might need to convert from the
1840 	 * partition rowtype to parent rowtype.
1841 	 */
1842 	if (mtstate->mt_transition_capture != NULL)
1843 	{
1844 		if (partrel->ri_TrigDesc &&
1845 			partrel->ri_TrigDesc->trig_insert_before_row)
1846 		{
1847 			/*
1848 			 * If there are any BEFORE triggers on the partition, we'll have
1849 			 * to be ready to convert their result back to tuplestore format.
1850 			 */
1851 			mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1852 			mtstate->mt_transition_capture->tcs_map =
1853 				TupConvMapForLeaf(proute, targetRelInfo, partidx);
1854 		}
1855 		else
1856 		{
1857 			/*
1858 			 * Otherwise, just remember the original unconverted tuple, to
1859 			 * avoid a needless round trip conversion.
1860 			 */
1861 			mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
1862 			mtstate->mt_transition_capture->tcs_map = NULL;
1863 		}
1864 	}
1865 	if (mtstate->mt_oc_transition_capture != NULL)
1866 	{
1867 		mtstate->mt_oc_transition_capture->tcs_map =
1868 			TupConvMapForLeaf(proute, targetRelInfo, partidx);
1869 	}
1870 
1871 	/*
1872 	 * Convert the tuple, if necessary.
1873 	 */
1874 	ConvertPartitionTupleSlot(proute->parent_child_tupconv_maps[partidx],
1875 							  tuple,
1876 							  proute->partition_tuple_slot,
1877 							  &slot);
1878 
1879 	/* Initialize information needed to handle ON CONFLICT DO UPDATE. */
1880 	Assert(mtstate != NULL);
1881 	node = (ModifyTable *) mtstate->ps.plan;
1882 	if (node->onConflictAction == ONCONFLICT_UPDATE)
1883 	{
1884 		Assert(mtstate->mt_existing != NULL);
1885 		ExecSetSlotDescriptor(mtstate->mt_existing,
1886 							  RelationGetDescr(partrel->ri_RelationDesc));
1887 		Assert(mtstate->mt_conflproj != NULL);
1888 		ExecSetSlotDescriptor(mtstate->mt_conflproj,
1889 							  partrel->ri_onConflict->oc_ProjTupdesc);
1890 	}
1891 
1892 	return slot;
1893 }
1894 
1895 /*
1896  * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
1897  *
1898  * This map array is required to convert the tuple from the subplan result rel
1899  * to the target table descriptor. This requirement arises for two independent
1900  * scenarios:
1901  * 1. For update-tuple-routing.
1902  * 2. For capturing tuples in transition tables.
1903  */
1904 static void
ExecSetupChildParentMapForSubplan(ModifyTableState * mtstate)1905 ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
1906 {
1907 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1908 	ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
1909 	TupleDesc	outdesc;
1910 	int			numResultRelInfos = mtstate->mt_nplans;
1911 	int			i;
1912 
1913 	/*
1914 	 * First check if there is already a per-subplan array allocated. Even if
1915 	 * there is already a per-leaf map array, we won't require a per-subplan
1916 	 * one, since we will use the subplan offset array to convert the subplan
1917 	 * index to per-leaf index.
1918 	 */
1919 	if (mtstate->mt_per_subplan_tupconv_maps ||
1920 		(mtstate->mt_partition_tuple_routing &&
1921 		 mtstate->mt_partition_tuple_routing->child_parent_tupconv_maps))
1922 		return;
1923 
1924 	/*
1925 	 * Build array of conversion maps from each child's TupleDesc to the one
1926 	 * used in the target relation.  The map pointers may be NULL when no
1927 	 * conversion is necessary, which is hopefully a common case.
1928 	 */
1929 
1930 	/* Get tuple descriptor of the target rel. */
1931 	outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
1932 
1933 	mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
1934 		palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
1935 
1936 	for (i = 0; i < numResultRelInfos; ++i)
1937 	{
1938 		mtstate->mt_per_subplan_tupconv_maps[i] =
1939 			convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
1940 								   outdesc,
1941 								   gettext_noop("could not convert row type"));
1942 	}
1943 }
1944 
1945 /*
1946  * Initialize the child-to-root tuple conversion map array required for
1947  * capturing transition tuples.
1948  *
1949  * The map array can be indexed either by subplan index or by leaf-partition
1950  * index.  For transition tables, we need a subplan-indexed access to the map,
1951  * and where tuple-routing is present, we also require a leaf-indexed access.
1952  */
1953 static void
ExecSetupChildParentMapForTcs(ModifyTableState * mtstate)1954 ExecSetupChildParentMapForTcs(ModifyTableState *mtstate)
1955 {
1956 	PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1957 
1958 	/*
1959 	 * If partition tuple routing is set up, we will require partition-indexed
1960 	 * access. In that case, create the map array indexed by partition; we
1961 	 * will still be able to access the maps using a subplan index by
1962 	 * converting the subplan index to a partition index using
1963 	 * subplan_partition_offsets. If tuple routing is not set up, it means we
1964 	 * don't require partition-indexed access. In that case, create just a
1965 	 * subplan-indexed map.
1966 	 */
1967 	if (proute)
1968 	{
1969 		/*
1970 		 * If a partition-indexed map array is to be created, the subplan map
1971 		 * array has to be NULL.  If the subplan map array is already created,
1972 		 * we won't be able to access the map using a partition index.
1973 		 */
1974 		Assert(mtstate->mt_per_subplan_tupconv_maps == NULL);
1975 
1976 		ExecSetupChildParentMapForLeaf(proute);
1977 	}
1978 	else
1979 		ExecSetupChildParentMapForSubplan(mtstate);
1980 }
1981 
1982 /*
1983  * For a given subplan index, get the tuple conversion map.
1984  */
1985 static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState * mtstate,int whichplan)1986 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
1987 {
1988 	/*
1989 	 * If a partition-index tuple conversion map array is allocated, we need
1990 	 * to first get the index into the partition array. Exactly *one* of the
1991 	 * two arrays is allocated. This is because if there is a partition array
1992 	 * required, we don't require subplan-indexed array since we can translate
1993 	 * subplan index into partition index. And, we create a subplan-indexed
1994 	 * array *only* if partition-indexed array is not required.
1995 	 */
1996 	if (mtstate->mt_per_subplan_tupconv_maps == NULL)
1997 	{
1998 		int			leaf_index;
1999 		PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
2000 
2001 		/*
2002 		 * If subplan-indexed array is NULL, things should have been arranged
2003 		 * to convert the subplan index to partition index.
2004 		 */
2005 		Assert(proute && proute->subplan_partition_offsets != NULL &&
2006 			   whichplan < proute->num_subplan_partition_offsets);
2007 
2008 		leaf_index = proute->subplan_partition_offsets[whichplan];
2009 
2010 		return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate),
2011 								 leaf_index);
2012 	}
2013 	else
2014 	{
2015 		Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
2016 		return mtstate->mt_per_subplan_tupconv_maps[whichplan];
2017 	}
2018 }
2019 
2020 /* ----------------------------------------------------------------
2021  *	   ExecModifyTable
2022  *
2023  *		Perform table modifications as required, and return RETURNING results
2024  *		if needed.
2025  * ----------------------------------------------------------------
2026  */
2027 static TupleTableSlot *
ExecModifyTable(PlanState * pstate)2028 ExecModifyTable(PlanState *pstate)
2029 {
2030 	ModifyTableState *node = castNode(ModifyTableState, pstate);
2031 	PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2032 	EState	   *estate = node->ps.state;
2033 	CmdType		operation = node->operation;
2034 	ResultRelInfo *saved_resultRelInfo;
2035 	ResultRelInfo *resultRelInfo;
2036 	PlanState  *subplanstate;
2037 	JunkFilter *junkfilter;
2038 	TupleTableSlot *slot;
2039 	TupleTableSlot *planSlot;
2040 	ItemPointer tupleid;
2041 	ItemPointerData tuple_ctid;
2042 	HeapTupleData oldtupdata;
2043 	HeapTuple	oldtuple;
2044 
2045 	CHECK_FOR_INTERRUPTS();
2046 
2047 	/*
2048 	 * This should NOT get called during EvalPlanQual; we should have passed a
2049 	 * subplan tree to EvalPlanQual, instead.  Use a runtime test not just
2050 	 * Assert because this condition is easy to miss in testing.  (Note:
2051 	 * although ModifyTable should not get executed within an EvalPlanQual
2052 	 * operation, we do have to allow it to be initialized and shut down in
2053 	 * case it is within a CTE subplan.  Hence this test must be here, not in
2054 	 * ExecInitModifyTable.)
2055 	 */
2056 	if (estate->es_epqTuple != NULL)
2057 		elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2058 
2059 	/*
2060 	 * If we've already completed processing, don't try to do more.  We need
2061 	 * this test because ExecPostprocessPlan might call us an extra time, and
2062 	 * our subplan's nodes aren't necessarily robust against being called
2063 	 * extra times.
2064 	 */
2065 	if (node->mt_done)
2066 		return NULL;
2067 
2068 	/*
2069 	 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2070 	 */
2071 	if (node->fireBSTriggers)
2072 	{
2073 		fireBSTriggers(node);
2074 		node->fireBSTriggers = false;
2075 	}
2076 
2077 	/* Preload local variables */
2078 	resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2079 	subplanstate = node->mt_plans[node->mt_whichplan];
2080 	junkfilter = resultRelInfo->ri_junkFilter;
2081 
2082 	/*
2083 	 * es_result_relation_info must point to the currently active result
2084 	 * relation while we are within this ModifyTable node.  Even though
2085 	 * ModifyTable nodes can't be nested statically, they can be nested
2086 	 * dynamically (since our subplan could include a reference to a modifying
2087 	 * CTE).  So we have to save and restore the caller's value.
2088 	 */
2089 	saved_resultRelInfo = estate->es_result_relation_info;
2090 
2091 	estate->es_result_relation_info = resultRelInfo;
2092 
2093 	/*
2094 	 * Fetch rows from subplan(s), and execute the required table modification
2095 	 * for each row.
2096 	 */
2097 	for (;;)
2098 	{
2099 		/*
2100 		 * Reset the per-output-tuple exprcontext.  This is needed because
2101 		 * triggers expect to use that context as workspace.  It's a bit ugly
2102 		 * to do this below the top level of the plan, however.  We might need
2103 		 * to rethink this later.
2104 		 */
2105 		ResetPerTupleExprContext(estate);
2106 
2107 		planSlot = ExecProcNode(subplanstate);
2108 
2109 		if (TupIsNull(planSlot))
2110 		{
2111 			/* advance to next subplan if any */
2112 			node->mt_whichplan++;
2113 			if (node->mt_whichplan < node->mt_nplans)
2114 			{
2115 				resultRelInfo++;
2116 				subplanstate = node->mt_plans[node->mt_whichplan];
2117 				junkfilter = resultRelInfo->ri_junkFilter;
2118 				estate->es_result_relation_info = resultRelInfo;
2119 				EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2120 									node->mt_arowmarks[node->mt_whichplan]);
2121 				/* Prepare to convert transition tuples from this child. */
2122 				if (node->mt_transition_capture != NULL)
2123 				{
2124 					node->mt_transition_capture->tcs_map =
2125 						tupconv_map_for_subplan(node, node->mt_whichplan);
2126 				}
2127 				if (node->mt_oc_transition_capture != NULL)
2128 				{
2129 					node->mt_oc_transition_capture->tcs_map =
2130 						tupconv_map_for_subplan(node, node->mt_whichplan);
2131 				}
2132 				continue;
2133 			}
2134 			else
2135 				break;
2136 		}
2137 
2138 		/*
2139 		 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2140 		 * here is compute the RETURNING expressions.
2141 		 */
2142 		if (resultRelInfo->ri_usesFdwDirectModify)
2143 		{
2144 			Assert(resultRelInfo->ri_projectReturning);
2145 
2146 			/*
2147 			 * A scan slot containing the data that was actually inserted,
2148 			 * updated or deleted has already been made available to
2149 			 * ExecProcessReturning by IterateDirectModify, so no need to
2150 			 * provide it here.
2151 			 */
2152 			slot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
2153 										RelationGetRelid(resultRelInfo->ri_RelationDesc),
2154 										NULL, planSlot);
2155 
2156 			estate->es_result_relation_info = saved_resultRelInfo;
2157 			return slot;
2158 		}
2159 
2160 		EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2161 		slot = planSlot;
2162 
2163 		tupleid = NULL;
2164 		oldtuple = NULL;
2165 		if (junkfilter != NULL)
2166 		{
2167 			/*
2168 			 * extract the 'ctid' or 'wholerow' junk attribute.
2169 			 */
2170 			if (operation == CMD_UPDATE || operation == CMD_DELETE)
2171 			{
2172 				char		relkind;
2173 				Datum		datum;
2174 				bool		isNull;
2175 
2176 				relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2177 				if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
2178 				{
2179 					datum = ExecGetJunkAttribute(slot,
2180 												 junkfilter->jf_junkAttNo,
2181 												 &isNull);
2182 					/* shouldn't ever get a null result... */
2183 					if (isNull)
2184 						elog(ERROR, "ctid is NULL");
2185 
2186 					tupleid = (ItemPointer) DatumGetPointer(datum);
2187 					tuple_ctid = *tupleid;	/* be sure we don't free ctid!! */
2188 					tupleid = &tuple_ctid;
2189 				}
2190 
2191 				/*
2192 				 * Use the wholerow attribute, when available, to reconstruct
2193 				 * the old relation tuple.
2194 				 *
2195 				 * Foreign table updates have a wholerow attribute when the
2196 				 * relation has a row-level trigger.  Note that the wholerow
2197 				 * attribute does not carry system columns.  Foreign table
2198 				 * triggers miss seeing those, except that we know enough here
2199 				 * to set t_tableOid.  Quite separately from this, the FDW may
2200 				 * fetch its own junk attrs to identify the row.
2201 				 *
2202 				 * Other relevant relkinds, currently limited to views, always
2203 				 * have a wholerow attribute.
2204 				 */
2205 				else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2206 				{
2207 					datum = ExecGetJunkAttribute(slot,
2208 												 junkfilter->jf_junkAttNo,
2209 												 &isNull);
2210 					/* shouldn't ever get a null result... */
2211 					if (isNull)
2212 						elog(ERROR, "wholerow is NULL");
2213 
2214 					oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2215 					oldtupdata.t_len =
2216 						HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2217 					ItemPointerSetInvalid(&(oldtupdata.t_self));
2218 					/* Historically, view triggers see invalid t_tableOid. */
2219 					oldtupdata.t_tableOid =
2220 						(relkind == RELKIND_VIEW) ? InvalidOid :
2221 						RelationGetRelid(resultRelInfo->ri_RelationDesc);
2222 
2223 					oldtuple = &oldtupdata;
2224 				}
2225 				else
2226 					Assert(relkind == RELKIND_FOREIGN_TABLE);
2227 			}
2228 
2229 			/*
2230 			 * apply the junkfilter if needed.
2231 			 */
2232 			if (operation != CMD_DELETE)
2233 				slot = ExecFilterJunk(junkfilter, slot);
2234 		}
2235 
2236 		switch (operation)
2237 		{
2238 			case CMD_INSERT:
2239 				/* Prepare for tuple routing if needed. */
2240 				if (proute)
2241 					slot = ExecPrepareTupleRouting(node, estate, proute,
2242 												   resultRelInfo, slot);
2243 				slot = ExecInsert(node, slot, planSlot,
2244 								  NULL, estate->es_result_relation_info,
2245 								  estate, node->canSetTag);
2246 				/* Revert ExecPrepareTupleRouting's state change. */
2247 				if (proute)
2248 					estate->es_result_relation_info = resultRelInfo;
2249 				break;
2250 			case CMD_UPDATE:
2251 				slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2252 								  &node->mt_epqstate, estate, node->canSetTag);
2253 				break;
2254 			case CMD_DELETE:
2255 				slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2256 								  &node->mt_epqstate, estate,
2257 								  true, node->canSetTag,
2258 								  false /* changingPart */ , NULL, NULL);
2259 				break;
2260 			default:
2261 				elog(ERROR, "unknown operation");
2262 				break;
2263 		}
2264 
2265 		/*
2266 		 * If we got a RETURNING result, return it to caller.  We'll continue
2267 		 * the work on next call.
2268 		 */
2269 		if (slot)
2270 		{
2271 			estate->es_result_relation_info = saved_resultRelInfo;
2272 			return slot;
2273 		}
2274 	}
2275 
2276 	/* Restore es_result_relation_info before exiting */
2277 	estate->es_result_relation_info = saved_resultRelInfo;
2278 
2279 	/*
2280 	 * We're done, but fire AFTER STATEMENT triggers before exiting.
2281 	 */
2282 	fireASTriggers(node);
2283 
2284 	node->mt_done = true;
2285 
2286 	return NULL;
2287 }
2288 
2289 /* ----------------------------------------------------------------
2290  *		ExecInitModifyTable
2291  * ----------------------------------------------------------------
2292  */
2293 ModifyTableState *
ExecInitModifyTable(ModifyTable * node,EState * estate,int eflags)2294 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2295 {
2296 	ModifyTableState *mtstate;
2297 	CmdType		operation = node->operation;
2298 	int			nplans = list_length(node->plans);
2299 	ResultRelInfo *saved_resultRelInfo;
2300 	ResultRelInfo *resultRelInfo;
2301 	Plan	   *subplan;
2302 	ListCell   *l;
2303 	int			i;
2304 	Relation	rel;
2305 	bool		update_tuple_routing_needed = node->partColsUpdated;
2306 
2307 	/* check for unsupported flags */
2308 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2309 
2310 	/*
2311 	 * create state structure
2312 	 */
2313 	mtstate = makeNode(ModifyTableState);
2314 	mtstate->ps.plan = (Plan *) node;
2315 	mtstate->ps.state = estate;
2316 	mtstate->ps.ExecProcNode = ExecModifyTable;
2317 
2318 	mtstate->operation = operation;
2319 	mtstate->canSetTag = node->canSetTag;
2320 	mtstate->mt_done = false;
2321 
2322 	mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
2323 	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2324 
2325 	/* If modifying a partitioned table, initialize the root table info */
2326 	if (node->rootResultRelIndex >= 0)
2327 		mtstate->rootResultRelInfo = estate->es_root_result_relations +
2328 			node->rootResultRelIndex;
2329 
2330 	mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
2331 	mtstate->mt_nplans = nplans;
2332 
2333 	/* set up epqstate with dummy subplan data for the moment */
2334 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2335 	mtstate->fireBSTriggers = true;
2336 
2337 	/*
2338 	 * call ExecInitNode on each of the plans to be executed and save the
2339 	 * results into the array "mt_plans".  This is also a convenient place to
2340 	 * verify that the proposed target relations are valid and open their
2341 	 * indexes for insertion of new index entries.  Note we *must* set
2342 	 * estate->es_result_relation_info correctly while we initialize each
2343 	 * sub-plan; ExecContextForcesOids depends on that!
2344 	 */
2345 	saved_resultRelInfo = estate->es_result_relation_info;
2346 
2347 	resultRelInfo = mtstate->resultRelInfo;
2348 	i = 0;
2349 	foreach(l, node->plans)
2350 	{
2351 		subplan = (Plan *) lfirst(l);
2352 
2353 		/* Initialize the usesFdwDirectModify flag */
2354 		resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2355 															  node->fdwDirectModifyPlans);
2356 
2357 		/*
2358 		 * Verify result relation is a valid target for the current operation
2359 		 */
2360 		CheckValidResultRel(resultRelInfo, operation);
2361 
2362 		/*
2363 		 * If there are indices on the result relation, open them and save
2364 		 * descriptors in the result relation info, so that we can add new
2365 		 * index entries for the tuples we add/update.  We need not do this
2366 		 * for a DELETE, however, since deletion doesn't affect indexes. Also,
2367 		 * inside an EvalPlanQual operation, the indexes might be open
2368 		 * already, since we share the resultrel state with the original
2369 		 * query.
2370 		 */
2371 		if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2372 			operation != CMD_DELETE &&
2373 			resultRelInfo->ri_IndexRelationDescs == NULL)
2374 			ExecOpenIndices(resultRelInfo,
2375 							node->onConflictAction != ONCONFLICT_NONE);
2376 
2377 		/*
2378 		 * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2379 		 * trigger itself might modify the partition-key values. So arrange
2380 		 * for tuple routing.
2381 		 */
2382 		if (resultRelInfo->ri_TrigDesc &&
2383 			resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2384 			operation == CMD_UPDATE)
2385 			update_tuple_routing_needed = true;
2386 
2387 		/* Now init the plan for this result rel */
2388 		estate->es_result_relation_info = resultRelInfo;
2389 		mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2390 
2391 		/* Also let FDWs init themselves for foreign-table result rels */
2392 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2393 			resultRelInfo->ri_FdwRoutine != NULL &&
2394 			resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2395 		{
2396 			List	   *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2397 
2398 			resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2399 															 resultRelInfo,
2400 															 fdw_private,
2401 															 i,
2402 															 eflags);
2403 		}
2404 
2405 		resultRelInfo++;
2406 		i++;
2407 	}
2408 
2409 	estate->es_result_relation_info = saved_resultRelInfo;
2410 
2411 	/* Get the target relation */
2412 	resultRelInfo = getTargetResultRelInfo(mtstate);
2413 	rel = resultRelInfo->ri_RelationDesc;
2414 
2415 	/*
2416 	 * If it's not a partitioned table after all, UPDATE tuple routing should
2417 	 * not be attempted.
2418 	 */
2419 	if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2420 		update_tuple_routing_needed = false;
2421 
2422 	/*
2423 	 * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2424 	 * partition key.
2425 	 */
2426 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2427 		(operation == CMD_INSERT || update_tuple_routing_needed))
2428 		mtstate->mt_partition_tuple_routing =
2429 			ExecSetupPartitionTupleRouting(mtstate, resultRelInfo);
2430 
2431 	/*
2432 	 * Build state for collecting transition tuples.  This requires having a
2433 	 * valid trigger query context, so skip it in explain-only mode.
2434 	 */
2435 	if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2436 		ExecSetupTransitionCaptureState(mtstate, estate);
2437 
2438 	/*
2439 	 * Construct mapping from each of the per-subplan partition attnos to the
2440 	 * root attno.  This is required when during update row movement the tuple
2441 	 * descriptor of a source partition does not match the root partitioned
2442 	 * table descriptor.  In such a case we need to convert tuples to the root
2443 	 * tuple descriptor, because the search for destination partition starts
2444 	 * from the root.  Skip this setup if it's not a partition key update.
2445 	 */
2446 	if (update_tuple_routing_needed)
2447 		ExecSetupChildParentMapForSubplan(mtstate);
2448 
2449 	/*
2450 	 * Initialize any WITH CHECK OPTION constraints if needed.
2451 	 */
2452 	resultRelInfo = mtstate->resultRelInfo;
2453 	i = 0;
2454 	foreach(l, node->withCheckOptionLists)
2455 	{
2456 		List	   *wcoList = (List *) lfirst(l);
2457 		List	   *wcoExprs = NIL;
2458 		ListCell   *ll;
2459 
2460 		foreach(ll, wcoList)
2461 		{
2462 			WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2463 			ExprState  *wcoExpr = ExecInitQual((List *) wco->qual,
2464 											   &mtstate->ps);
2465 
2466 			wcoExprs = lappend(wcoExprs, wcoExpr);
2467 		}
2468 
2469 		resultRelInfo->ri_WithCheckOptions = wcoList;
2470 		resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2471 		resultRelInfo++;
2472 		i++;
2473 	}
2474 
2475 	/*
2476 	 * Initialize RETURNING projections if needed.
2477 	 */
2478 	if (node->returningLists)
2479 	{
2480 		TupleTableSlot *slot;
2481 		ExprContext *econtext;
2482 
2483 		/*
2484 		 * Initialize result tuple slot and assign its rowtype using the first
2485 		 * RETURNING list.  We assume the rest will look the same.
2486 		 */
2487 		mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2488 
2489 		/* Set up a slot for the output of the RETURNING projection(s) */
2490 		ExecInitResultTupleSlotTL(estate, &mtstate->ps);
2491 		slot = mtstate->ps.ps_ResultTupleSlot;
2492 
2493 		/* Need an econtext too */
2494 		if (mtstate->ps.ps_ExprContext == NULL)
2495 			ExecAssignExprContext(estate, &mtstate->ps);
2496 		econtext = mtstate->ps.ps_ExprContext;
2497 
2498 		/*
2499 		 * Build a projection for each result rel.
2500 		 */
2501 		resultRelInfo = mtstate->resultRelInfo;
2502 		foreach(l, node->returningLists)
2503 		{
2504 			List	   *rlist = (List *) lfirst(l);
2505 
2506 			resultRelInfo->ri_returningList = rlist;
2507 			resultRelInfo->ri_projectReturning =
2508 				ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2509 										resultRelInfo->ri_RelationDesc->rd_att);
2510 			resultRelInfo++;
2511 		}
2512 	}
2513 	else
2514 	{
2515 		/*
2516 		 * We still must construct a dummy result tuple type, because InitPlan
2517 		 * expects one (maybe should change that?).
2518 		 */
2519 		mtstate->ps.plan->targetlist = NIL;
2520 		ExecInitResultTupleSlotTL(estate, &mtstate->ps);
2521 
2522 		mtstate->ps.ps_ExprContext = NULL;
2523 	}
2524 
2525 	/* Set the list of arbiter indexes if needed for ON CONFLICT */
2526 	resultRelInfo = mtstate->resultRelInfo;
2527 	if (node->onConflictAction != ONCONFLICT_NONE)
2528 		resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2529 
2530 	/*
2531 	 * If needed, Initialize target list, projection and qual for ON CONFLICT
2532 	 * DO UPDATE.
2533 	 */
2534 	if (node->onConflictAction == ONCONFLICT_UPDATE)
2535 	{
2536 		OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2537 		ExprContext *econtext;
2538 		TupleDesc	relationDesc;
2539 
2540 		/* insert may only have one plan, inheritance is not expanded */
2541 		Assert(nplans == 1);
2542 
2543 		/* already exists if created by RETURNING processing above */
2544 		if (mtstate->ps.ps_ExprContext == NULL)
2545 			ExecAssignExprContext(estate, &mtstate->ps);
2546 
2547 		econtext = mtstate->ps.ps_ExprContext;
2548 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2549 
2550 		/*
2551 		 * Initialize slot for the existing tuple.  If we'll be performing
2552 		 * tuple routing, the tuple descriptor to use for this will be
2553 		 * determined based on which relation the update is actually applied
2554 		 * to, so we don't set its tuple descriptor here.
2555 		 */
2556 		mtstate->mt_existing =
2557 			ExecInitExtraTupleSlot(mtstate->ps.state,
2558 								   mtstate->mt_partition_tuple_routing ?
2559 								   NULL : relationDesc);
2560 
2561 		/* carried forward solely for the benefit of explain */
2562 		mtstate->mt_excludedtlist = node->exclRelTlist;
2563 
2564 		/* create state for DO UPDATE SET operation */
2565 		resultRelInfo->ri_onConflict = onconfl;
2566 
2567 		/*
2568 		 * Create the tuple slot for the UPDATE SET projection.
2569 		 *
2570 		 * Just like mt_existing above, we leave it without a tuple descriptor
2571 		 * in the case of partitioning tuple routing, so that it can be
2572 		 * changed by ExecPrepareTupleRouting.  In that case, we still save
2573 		 * the tupdesc in the parent's state: it can be reused by partitions
2574 		 * with an identical descriptor to the parent.
2575 		 */
2576 		mtstate->mt_conflproj =
2577 			ExecInitExtraTupleSlot(mtstate->ps.state,
2578 								   mtstate->mt_partition_tuple_routing ?
2579 								   NULL : relationDesc);
2580 		onconfl->oc_ProjTupdesc = relationDesc;
2581 
2582 		/*
2583 		 * The onConflictSet tlist should already have been adjusted to emit
2584 		 * the table's exact column list.  It could also contain resjunk
2585 		 * columns, which should be evaluated but not included in the
2586 		 * projection result.
2587 		 */
2588 		ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2589 							node->onConflictSet);
2590 
2591 		/* build UPDATE SET projection state */
2592 		onconfl->oc_ProjInfo =
2593 			ExecBuildProjectionInfoExt(node->onConflictSet, econtext,
2594 									   mtstate->mt_conflproj, false,
2595 									   &mtstate->ps,
2596 									   relationDesc);
2597 
2598 		/* initialize state to evaluate the WHERE clause, if any */
2599 		if (node->onConflictWhere)
2600 		{
2601 			ExprState  *qualexpr;
2602 
2603 			qualexpr = ExecInitQual((List *) node->onConflictWhere,
2604 									&mtstate->ps);
2605 			onconfl->oc_WhereClause = qualexpr;
2606 		}
2607 	}
2608 
2609 	/*
2610 	 * If we have any secondary relations in an UPDATE or DELETE, they need to
2611 	 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2612 	 * EvalPlanQual mechanism needs to be told about them.  Locate the
2613 	 * relevant ExecRowMarks.
2614 	 */
2615 	foreach(l, node->rowMarks)
2616 	{
2617 		PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2618 		ExecRowMark *erm;
2619 
2620 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
2621 		if (rc->isParent)
2622 			continue;
2623 
2624 		/* find ExecRowMark (same for all subplans) */
2625 		erm = ExecFindRowMark(estate, rc->rti, false);
2626 
2627 		/* build ExecAuxRowMark for each subplan */
2628 		for (i = 0; i < nplans; i++)
2629 		{
2630 			ExecAuxRowMark *aerm;
2631 
2632 			subplan = mtstate->mt_plans[i]->plan;
2633 			aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2634 			mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2635 		}
2636 	}
2637 
2638 	/* select first subplan */
2639 	mtstate->mt_whichplan = 0;
2640 	subplan = (Plan *) linitial(node->plans);
2641 	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2642 						mtstate->mt_arowmarks[0]);
2643 
2644 	/*
2645 	 * Initialize the junk filter(s) if needed.  INSERT queries need a filter
2646 	 * if there are any junk attrs in the tlist.  UPDATE and DELETE always
2647 	 * need a filter, since there's always at least one junk attribute present
2648 	 * --- no need to look first.  Typically, this will be a 'ctid' or
2649 	 * 'wholerow' attribute, but in the case of a foreign data wrapper it
2650 	 * might be a set of junk attributes sufficient to identify the remote
2651 	 * row.
2652 	 *
2653 	 * If there are multiple result relations, each one needs its own junk
2654 	 * filter.  Note multiple rels are only possible for UPDATE/DELETE, so we
2655 	 * can't be fooled by some needing a filter and some not.
2656 	 *
2657 	 * This section of code is also a convenient place to verify that the
2658 	 * output of an INSERT or UPDATE matches the target table(s).
2659 	 */
2660 	{
2661 		bool		junk_filter_needed = false;
2662 
2663 		switch (operation)
2664 		{
2665 			case CMD_INSERT:
2666 				foreach(l, subplan->targetlist)
2667 				{
2668 					TargetEntry *tle = (TargetEntry *) lfirst(l);
2669 
2670 					if (tle->resjunk)
2671 					{
2672 						junk_filter_needed = true;
2673 						break;
2674 					}
2675 				}
2676 				break;
2677 			case CMD_UPDATE:
2678 			case CMD_DELETE:
2679 				junk_filter_needed = true;
2680 				break;
2681 			default:
2682 				elog(ERROR, "unknown operation");
2683 				break;
2684 		}
2685 
2686 		if (junk_filter_needed)
2687 		{
2688 			resultRelInfo = mtstate->resultRelInfo;
2689 			for (i = 0; i < nplans; i++)
2690 			{
2691 				JunkFilter *j;
2692 
2693 				subplan = mtstate->mt_plans[i]->plan;
2694 				if (operation == CMD_INSERT || operation == CMD_UPDATE)
2695 					ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2696 										subplan->targetlist);
2697 
2698 				j = ExecInitJunkFilter(subplan->targetlist,
2699 									   resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
2700 									   ExecInitExtraTupleSlot(estate, NULL));
2701 
2702 				if (operation == CMD_UPDATE || operation == CMD_DELETE)
2703 				{
2704 					/* For UPDATE/DELETE, find the appropriate junk attr now */
2705 					char		relkind;
2706 
2707 					relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2708 					if (relkind == RELKIND_RELATION ||
2709 						relkind == RELKIND_MATVIEW ||
2710 						relkind == RELKIND_PARTITIONED_TABLE)
2711 					{
2712 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2713 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2714 							elog(ERROR, "could not find junk ctid column");
2715 					}
2716 					else if (relkind == RELKIND_FOREIGN_TABLE)
2717 					{
2718 						/*
2719 						 * When there is a row-level trigger, there should be
2720 						 * a wholerow attribute.
2721 						 */
2722 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2723 					}
2724 					else
2725 					{
2726 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2727 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2728 							elog(ERROR, "could not find junk wholerow column");
2729 					}
2730 				}
2731 
2732 				resultRelInfo->ri_junkFilter = j;
2733 				resultRelInfo++;
2734 			}
2735 		}
2736 		else
2737 		{
2738 			if (operation == CMD_INSERT)
2739 				ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2740 									subplan->targetlist);
2741 		}
2742 	}
2743 
2744 	/*
2745 	 * Set up a tuple table slot for use for trigger output tuples. In a plan
2746 	 * containing multiple ModifyTable nodes, all can share one such slot, so
2747 	 * we keep it in the estate.
2748 	 */
2749 	if (estate->es_trig_tuple_slot == NULL)
2750 		estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL);
2751 
2752 	/*
2753 	 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2754 	 * to estate->es_auxmodifytables so that it will be run to completion by
2755 	 * ExecPostprocessPlan.  (It'd actually work fine to add the primary
2756 	 * ModifyTable node too, but there's no need.)  Note the use of lcons not
2757 	 * lappend: we need later-initialized ModifyTable nodes to be shut down
2758 	 * before earlier ones.  This ensures that we don't throw away RETURNING
2759 	 * rows that need to be seen by a later CTE subplan.
2760 	 */
2761 	if (!mtstate->canSetTag)
2762 		estate->es_auxmodifytables = lcons(mtstate,
2763 										   estate->es_auxmodifytables);
2764 
2765 	return mtstate;
2766 }
2767 
2768 /* ----------------------------------------------------------------
2769  *		ExecEndModifyTable
2770  *
2771  *		Shuts down the plan.
2772  *
2773  *		Returns nothing of interest.
2774  * ----------------------------------------------------------------
2775  */
2776 void
ExecEndModifyTable(ModifyTableState * node)2777 ExecEndModifyTable(ModifyTableState *node)
2778 {
2779 	int			i;
2780 
2781 	/*
2782 	 * Allow any FDWs to shut down
2783 	 */
2784 	for (i = 0; i < node->mt_nplans; i++)
2785 	{
2786 		ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2787 
2788 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2789 			resultRelInfo->ri_FdwRoutine != NULL &&
2790 			resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2791 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2792 														   resultRelInfo);
2793 	}
2794 
2795 	/* Close all the partitioned tables, leaf partitions, and their indices */
2796 	if (node->mt_partition_tuple_routing)
2797 		ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2798 
2799 	/*
2800 	 * Free the exprcontext
2801 	 */
2802 	ExecFreeExprContext(&node->ps);
2803 
2804 	/*
2805 	 * clean out the tuple table
2806 	 */
2807 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
2808 
2809 	/*
2810 	 * Terminate EPQ execution if active
2811 	 */
2812 	EvalPlanQualEnd(&node->mt_epqstate);
2813 
2814 	/*
2815 	 * shut down subplans
2816 	 */
2817 	for (i = 0; i < node->mt_nplans; i++)
2818 		ExecEndNode(node->mt_plans[i]);
2819 }
2820 
2821 void
ExecReScanModifyTable(ModifyTableState * node)2822 ExecReScanModifyTable(ModifyTableState *node)
2823 {
2824 	/*
2825 	 * Currently, we don't need to support rescan on ModifyTable nodes. The
2826 	 * semantics of that would be a bit debatable anyway.
2827 	 */
2828 	elog(ERROR, "ExecReScanModifyTable is not implemented");
2829 }
2830