1 /*-------------------------------------------------------------------------
2 *
3 * nodeModifyTable.c
4 * routines to handle ModifyTable nodes.
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeModifyTable.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /* INTERFACE ROUTINES
16 * ExecInitModifyTable - initialize the ModifyTable node
17 * ExecModifyTable - retrieve the next tuple from the node
18 * ExecEndModifyTable - shut down the ModifyTable node
19 * ExecReScanModifyTable - rescan the ModifyTable node
20 *
21 * NOTES
22 * Each ModifyTable node contains a list of one or more subplans,
23 * much like an Append node. There is one subplan per result relation.
24 * The key reason for this is that in an inherited UPDATE command, each
25 * result relation could have a different schema (more or different
26 * columns) requiring a different plan tree to produce it. In an
27 * inherited DELETE, all the subplans should produce the same output
28 * rowtype, but we might still find that different plans are appropriate
29 * for different child relations.
30 *
31 * If the query specifies RETURNING, then the ModifyTable returns a
32 * RETURNING tuple after completing each row insert, update, or delete.
33 * It must be called again to continue the operation. Without RETURNING,
34 * we just loop within the node until all the work is done, then
35 * return NULL. This avoids useless call/return overhead.
36 */
37
38 #include "postgres.h"
39
40 #include "access/heapam.h"
41 #include "access/htup_details.h"
42 #include "access/tableam.h"
43 #include "access/xact.h"
44 #include "catalog/catalog.h"
45 #include "commands/trigger.h"
46 #include "executor/execPartition.h"
47 #include "executor/executor.h"
48 #include "executor/nodeModifyTable.h"
49 #include "foreign/fdwapi.h"
50 #include "miscadmin.h"
51 #include "nodes/nodeFuncs.h"
52 #include "rewrite/rewriteHandler.h"
53 #include "storage/bufmgr.h"
54 #include "storage/lmgr.h"
55 #include "utils/builtins.h"
56 #include "utils/datum.h"
57 #include "utils/memutils.h"
58 #include "utils/rel.h"
59
60
61 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
62 ResultRelInfo *resultRelInfo,
63 ItemPointer conflictTid,
64 TupleTableSlot *planSlot,
65 TupleTableSlot *excludedSlot,
66 EState *estate,
67 bool canSetTag,
68 TupleTableSlot **returning);
69 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
70 EState *estate,
71 PartitionTupleRouting *proute,
72 ResultRelInfo *targetRelInfo,
73 TupleTableSlot *slot);
74 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
75 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
76 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
77 int whichplan);
78
79 /*
80 * Verify that the tuples to be produced by INSERT or UPDATE match the
81 * target relation's rowtype
82 *
83 * We do this to guard against stale plans. If plan invalidation is
84 * functioning properly then we should never get a failure here, but better
85 * safe than sorry. Note that this is called after we have obtained lock
86 * on the target rel, so the rowtype can't change underneath us.
87 *
88 * The plan output is represented by its targetlist, because that makes
89 * handling the dropped-column case easier.
90 */
91 static void
ExecCheckPlanOutput(Relation resultRel,List * targetList)92 ExecCheckPlanOutput(Relation resultRel, List *targetList)
93 {
94 TupleDesc resultDesc = RelationGetDescr(resultRel);
95 int attno = 0;
96 ListCell *lc;
97
98 foreach(lc, targetList)
99 {
100 TargetEntry *tle = (TargetEntry *) lfirst(lc);
101 Form_pg_attribute attr;
102
103 if (tle->resjunk)
104 continue; /* ignore junk tlist items */
105
106 if (attno >= resultDesc->natts)
107 ereport(ERROR,
108 (errcode(ERRCODE_DATATYPE_MISMATCH),
109 errmsg("table row type and query-specified row type do not match"),
110 errdetail("Query has too many columns.")));
111 attr = TupleDescAttr(resultDesc, attno);
112 attno++;
113
114 if (!attr->attisdropped)
115 {
116 /* Normal case: demand type match */
117 if (exprType((Node *) tle->expr) != attr->atttypid)
118 ereport(ERROR,
119 (errcode(ERRCODE_DATATYPE_MISMATCH),
120 errmsg("table row type and query-specified row type do not match"),
121 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
122 format_type_be(attr->atttypid),
123 attno,
124 format_type_be(exprType((Node *) tle->expr)))));
125 }
126 else
127 {
128 /*
129 * For a dropped column, we can't check atttypid (it's likely 0).
130 * In any case the planner has most likely inserted an INT4 null.
131 * What we insist on is just *some* NULL constant.
132 */
133 if (!IsA(tle->expr, Const) ||
134 !((Const *) tle->expr)->constisnull)
135 ereport(ERROR,
136 (errcode(ERRCODE_DATATYPE_MISMATCH),
137 errmsg("table row type and query-specified row type do not match"),
138 errdetail("Query provides a value for a dropped column at ordinal position %d.",
139 attno)));
140 }
141 }
142 if (attno != resultDesc->natts)
143 ereport(ERROR,
144 (errcode(ERRCODE_DATATYPE_MISMATCH),
145 errmsg("table row type and query-specified row type do not match"),
146 errdetail("Query has too few columns.")));
147 }
148
149 /*
150 * ExecProcessReturning --- evaluate a RETURNING list
151 *
152 * projectReturning: the projection to evaluate
153 * resultRelOid: result relation's OID
154 * tupleSlot: slot holding tuple actually inserted/updated/deleted
155 * planSlot: slot holding tuple returned by top subplan node
156 *
157 * In cross-partition UPDATE cases, projectReturning and planSlot are as
158 * for the source partition, and tupleSlot must conform to that. But
159 * resultRelOid is for the destination partition.
160 *
161 * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
162 * scan tuple.
163 *
164 * Returns a slot holding the result tuple
165 */
166 static TupleTableSlot *
ExecProcessReturning(ProjectionInfo * projectReturning,Oid resultRelOid,TupleTableSlot * tupleSlot,TupleTableSlot * planSlot)167 ExecProcessReturning(ProjectionInfo *projectReturning,
168 Oid resultRelOid,
169 TupleTableSlot *tupleSlot,
170 TupleTableSlot *planSlot)
171 {
172 ExprContext *econtext = projectReturning->pi_exprContext;
173
174 /* Make tuple and any needed join variables available to ExecProject */
175 if (tupleSlot)
176 econtext->ecxt_scantuple = tupleSlot;
177 else
178 Assert(econtext->ecxt_scantuple);
179 econtext->ecxt_outertuple = planSlot;
180
181 /*
182 * RETURNING expressions might reference the tableoid column, so be sure
183 * we expose the desired OID, ie that of the real target relation.
184 */
185 econtext->ecxt_scantuple->tts_tableOid = resultRelOid;
186
187 /* Compute the RETURNING expressions */
188 return ExecProject(projectReturning);
189 }
190
191 /*
192 * ExecCheckTupleVisible -- verify tuple is visible
193 *
194 * It would not be consistent with guarantees of the higher isolation levels to
195 * proceed with avoiding insertion (taking speculative insertion's alternative
196 * path) on the basis of another tuple that is not visible to MVCC snapshot.
197 * Check for the need to raise a serialization failure, and do so as necessary.
198 */
199 static void
ExecCheckTupleVisible(EState * estate,Relation rel,TupleTableSlot * slot)200 ExecCheckTupleVisible(EState *estate,
201 Relation rel,
202 TupleTableSlot *slot)
203 {
204 if (!IsolationUsesXactSnapshot())
205 return;
206
207 if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
208 {
209 Datum xminDatum;
210 TransactionId xmin;
211 bool isnull;
212
213 xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
214 Assert(!isnull);
215 xmin = DatumGetTransactionId(xminDatum);
216
217 /*
218 * We should not raise a serialization failure if the conflict is
219 * against a tuple inserted by our own transaction, even if it's not
220 * visible to our snapshot. (This would happen, for example, if
221 * conflicting keys are proposed for insertion in a single command.)
222 */
223 if (!TransactionIdIsCurrentTransactionId(xmin))
224 ereport(ERROR,
225 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
226 errmsg("could not serialize access due to concurrent update")));
227 }
228 }
229
230 /*
231 * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
232 */
233 static void
ExecCheckTIDVisible(EState * estate,ResultRelInfo * relinfo,ItemPointer tid,TupleTableSlot * tempSlot)234 ExecCheckTIDVisible(EState *estate,
235 ResultRelInfo *relinfo,
236 ItemPointer tid,
237 TupleTableSlot *tempSlot)
238 {
239 Relation rel = relinfo->ri_RelationDesc;
240
241 /* Redundantly check isolation level */
242 if (!IsolationUsesXactSnapshot())
243 return;
244
245 if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
246 elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
247 ExecCheckTupleVisible(estate, rel, tempSlot);
248 ExecClearTuple(tempSlot);
249 }
250
251 /*
252 * Compute stored generated columns for a tuple
253 */
254 void
ExecComputeStoredGenerated(EState * estate,TupleTableSlot * slot,CmdType cmdtype)255 ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot, CmdType cmdtype)
256 {
257 ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
258 Relation rel = resultRelInfo->ri_RelationDesc;
259 TupleDesc tupdesc = RelationGetDescr(rel);
260 int natts = tupdesc->natts;
261 MemoryContext oldContext;
262 Datum *values;
263 bool *nulls;
264
265 Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
266
267 /*
268 * If first time through for this result relation, build expression
269 * nodetrees for rel's stored generation expressions. Keep them in the
270 * per-query memory context so they'll survive throughout the query.
271 */
272 if (resultRelInfo->ri_GeneratedExprs == NULL)
273 {
274 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
275
276 resultRelInfo->ri_GeneratedExprs =
277 (ExprState **) palloc(natts * sizeof(ExprState *));
278 resultRelInfo->ri_NumGeneratedNeeded = 0;
279
280 for (int i = 0; i < natts; i++)
281 {
282 if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
283 {
284 Expr *expr;
285
286 /*
287 * If it's an update and the current column was not marked as
288 * being updated, then we can skip the computation. But if
289 * there is a BEFORE ROW UPDATE trigger, we cannot skip
290 * because the trigger might affect additional columns.
291 */
292 if (cmdtype == CMD_UPDATE &&
293 !(rel->trigdesc && rel->trigdesc->trig_update_before_row) &&
294 !bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
295 ExecGetExtraUpdatedCols(resultRelInfo, estate)))
296 {
297 resultRelInfo->ri_GeneratedExprs[i] = NULL;
298 continue;
299 }
300
301 expr = (Expr *) build_column_default(rel, i + 1);
302 if (expr == NULL)
303 elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
304 i + 1, RelationGetRelationName(rel));
305
306 resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
307 resultRelInfo->ri_NumGeneratedNeeded++;
308 }
309 }
310
311 MemoryContextSwitchTo(oldContext);
312 }
313
314 /*
315 * If no generated columns have been affected by this change, then skip
316 * the rest.
317 */
318 if (resultRelInfo->ri_NumGeneratedNeeded == 0)
319 return;
320
321 oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
322
323 values = palloc(sizeof(*values) * natts);
324 nulls = palloc(sizeof(*nulls) * natts);
325
326 slot_getallattrs(slot);
327 memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
328
329 for (int i = 0; i < natts; i++)
330 {
331 Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
332
333 if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED &&
334 resultRelInfo->ri_GeneratedExprs[i])
335 {
336 ExprContext *econtext;
337 Datum val;
338 bool isnull;
339
340 econtext = GetPerTupleExprContext(estate);
341 econtext->ecxt_scantuple = slot;
342
343 val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
344
345 /*
346 * We must make a copy of val as we have no guarantees about where
347 * memory for a pass-by-reference Datum is located.
348 */
349 if (!isnull)
350 val = datumCopy(val, attr->attbyval, attr->attlen);
351
352 values[i] = val;
353 nulls[i] = isnull;
354 }
355 else
356 {
357 if (!nulls[i])
358 values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
359 }
360 }
361
362 ExecClearTuple(slot);
363 memcpy(slot->tts_values, values, sizeof(*values) * natts);
364 memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
365 ExecStoreVirtualTuple(slot);
366 ExecMaterializeSlot(slot);
367
368 MemoryContextSwitchTo(oldContext);
369 }
370
371 /* ----------------------------------------------------------------
372 * ExecInsert
373 *
374 * For INSERT, we have to insert the tuple into the target relation
375 * and insert appropriate tuples into the index relations.
376 *
377 * slot contains the new tuple value to be stored.
378 * planSlot is the output of the ModifyTable's subplan; we use it
379 * to access "junk" columns that are not going to be stored.
380 * In a cross-partition UPDATE, srcSlot is the slot that held the
381 * updated tuple for the source relation; otherwise it's NULL.
382 *
383 * returningRelInfo is the resultRelInfo for the source relation of a
384 * cross-partition UPDATE; otherwise it's the current result relation.
385 * We use it to process RETURNING lists, for reasons explained below.
386 *
387 * Returns RETURNING result if any, otherwise NULL.
388 * ----------------------------------------------------------------
389 */
390 static TupleTableSlot *
ExecInsert(ModifyTableState * mtstate,TupleTableSlot * slot,TupleTableSlot * planSlot,TupleTableSlot * srcSlot,ResultRelInfo * returningRelInfo,EState * estate,bool canSetTag)391 ExecInsert(ModifyTableState *mtstate,
392 TupleTableSlot *slot,
393 TupleTableSlot *planSlot,
394 TupleTableSlot *srcSlot,
395 ResultRelInfo *returningRelInfo,
396 EState *estate,
397 bool canSetTag)
398 {
399 ResultRelInfo *resultRelInfo;
400 Relation resultRelationDesc;
401 List *recheckIndexes = NIL;
402 TupleTableSlot *result = NULL;
403 TransitionCaptureState *ar_insert_trig_tcs;
404 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
405 OnConflictAction onconflict = node->onConflictAction;
406
407 ExecMaterializeSlot(slot);
408
409 /*
410 * get information on the (current) result relation
411 */
412 resultRelInfo = estate->es_result_relation_info;
413 resultRelationDesc = resultRelInfo->ri_RelationDesc;
414
415 /*
416 * BEFORE ROW INSERT Triggers.
417 *
418 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
419 * INSERT ... ON CONFLICT statement. We cannot check for constraint
420 * violations before firing these triggers, because they can change the
421 * values to insert. Also, they can run arbitrary user-defined code with
422 * side-effects that we can't cancel by just not inserting the tuple.
423 */
424 if (resultRelInfo->ri_TrigDesc &&
425 resultRelInfo->ri_TrigDesc->trig_insert_before_row)
426 {
427 if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
428 return NULL; /* "do nothing" */
429 }
430
431 /* INSTEAD OF ROW INSERT Triggers */
432 if (resultRelInfo->ri_TrigDesc &&
433 resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
434 {
435 if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
436 return NULL; /* "do nothing" */
437 }
438 else if (resultRelInfo->ri_FdwRoutine)
439 {
440 /*
441 * GENERATED expressions might reference the tableoid column, so
442 * (re-)initialize tts_tableOid before evaluating them.
443 */
444 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
445
446 /*
447 * Compute stored generated columns
448 */
449 if (resultRelationDesc->rd_att->constr &&
450 resultRelationDesc->rd_att->constr->has_generated_stored)
451 ExecComputeStoredGenerated(estate, slot, CMD_INSERT);
452
453 /*
454 * insert into foreign table: let the FDW do it
455 */
456 slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
457 resultRelInfo,
458 slot,
459 planSlot);
460
461 if (slot == NULL) /* "do nothing" */
462 return NULL;
463
464 /*
465 * AFTER ROW Triggers or RETURNING expressions might reference the
466 * tableoid column, so (re-)initialize tts_tableOid before evaluating
467 * them. (This covers the case where the FDW replaced the slot.)
468 */
469 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
470 }
471 else
472 {
473 WCOKind wco_kind;
474
475 /*
476 * Constraints and GENERATED expressions might reference the tableoid
477 * column, so (re-)initialize tts_tableOid before evaluating them.
478 */
479 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
480
481 /*
482 * Compute stored generated columns
483 */
484 if (resultRelationDesc->rd_att->constr &&
485 resultRelationDesc->rd_att->constr->has_generated_stored)
486 ExecComputeStoredGenerated(estate, slot, CMD_INSERT);
487
488 /*
489 * Check any RLS WITH CHECK policies.
490 *
491 * Normally we should check INSERT policies. But if the insert is the
492 * result of a partition key update that moved the tuple to a new
493 * partition, we should instead check UPDATE policies, because we are
494 * executing policies defined on the target table, and not those
495 * defined on the child partitions.
496 */
497 wco_kind = (mtstate->operation == CMD_UPDATE) ?
498 WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
499
500 /*
501 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
502 * we are looking for at this point.
503 */
504 if (resultRelInfo->ri_WithCheckOptions != NIL)
505 ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
506
507 /*
508 * Check the constraints of the tuple.
509 */
510 if (resultRelationDesc->rd_att->constr)
511 ExecConstraints(resultRelInfo, slot, estate);
512
513 /*
514 * Also check the tuple against the partition constraint, if there is
515 * one; except that if we got here via tuple-routing, we don't need to
516 * if there's no BR trigger defined on the partition.
517 */
518 if (resultRelInfo->ri_PartitionCheck &&
519 (resultRelInfo->ri_RootResultRelInfo == NULL ||
520 (resultRelInfo->ri_TrigDesc &&
521 resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
522 ExecPartitionCheck(resultRelInfo, slot, estate, true);
523
524 if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
525 {
526 /* Perform a speculative insertion. */
527 uint32 specToken;
528 ItemPointerData conflictTid;
529 bool specConflict;
530 List *arbiterIndexes;
531
532 arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
533
534 /*
535 * Do a non-conclusive check for conflicts first.
536 *
537 * We're not holding any locks yet, so this doesn't guarantee that
538 * the later insert won't conflict. But it avoids leaving behind
539 * a lot of canceled speculative insertions, if you run a lot of
540 * INSERT ON CONFLICT statements that do conflict.
541 *
542 * We loop back here if we find a conflict below, either during
543 * the pre-check, or when we re-check after inserting the tuple
544 * speculatively.
545 */
546 vlock:
547 specConflict = false;
548 if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
549 arbiterIndexes))
550 {
551 /* committed conflict tuple found */
552 if (onconflict == ONCONFLICT_UPDATE)
553 {
554 /*
555 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
556 * part. Be prepared to retry if the UPDATE fails because
557 * of another concurrent UPDATE/DELETE to the conflict
558 * tuple.
559 */
560 TupleTableSlot *returning = NULL;
561
562 if (ExecOnConflictUpdate(mtstate, resultRelInfo,
563 &conflictTid, planSlot, slot,
564 estate, canSetTag, &returning))
565 {
566 InstrCountTuples2(&mtstate->ps, 1);
567 return returning;
568 }
569 else
570 goto vlock;
571 }
572 else
573 {
574 /*
575 * In case of ON CONFLICT DO NOTHING, do nothing. However,
576 * verify that the tuple is visible to the executor's MVCC
577 * snapshot at higher isolation levels.
578 *
579 * Using ExecGetReturningSlot() to store the tuple for the
580 * recheck isn't that pretty, but we can't trivially use
581 * the input slot, because it might not be of a compatible
582 * type. As there's no conflicting usage of
583 * ExecGetReturningSlot() in the DO NOTHING case...
584 */
585 Assert(onconflict == ONCONFLICT_NOTHING);
586 ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
587 ExecGetReturningSlot(estate, resultRelInfo));
588 InstrCountTuples2(&mtstate->ps, 1);
589 return NULL;
590 }
591 }
592
593 /*
594 * Before we start insertion proper, acquire our "speculative
595 * insertion lock". Others can use that to wait for us to decide
596 * if we're going to go ahead with the insertion, instead of
597 * waiting for the whole transaction to complete.
598 */
599 specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
600
601 /* insert the tuple, with the speculative token */
602 table_tuple_insert_speculative(resultRelationDesc, slot,
603 estate->es_output_cid,
604 0,
605 NULL,
606 specToken);
607
608 /* insert index entries for tuple */
609 recheckIndexes = ExecInsertIndexTuples(slot, estate, true,
610 &specConflict,
611 arbiterIndexes);
612
613 /* adjust the tuple's state accordingly */
614 table_tuple_complete_speculative(resultRelationDesc, slot,
615 specToken, !specConflict);
616
617 /*
618 * Wake up anyone waiting for our decision. They will re-check
619 * the tuple, see that it's no longer speculative, and wait on our
620 * XID as if this was a regularly inserted tuple all along. Or if
621 * we killed the tuple, they will see it's dead, and proceed as if
622 * the tuple never existed.
623 */
624 SpeculativeInsertionLockRelease(GetCurrentTransactionId());
625
626 /*
627 * If there was a conflict, start from the beginning. We'll do
628 * the pre-check again, which will now find the conflicting tuple
629 * (unless it aborts before we get there).
630 */
631 if (specConflict)
632 {
633 list_free(recheckIndexes);
634 goto vlock;
635 }
636
637 /* Since there was no insertion conflict, we're done */
638 }
639 else
640 {
641 /* insert the tuple normally */
642 table_tuple_insert(resultRelationDesc, slot,
643 estate->es_output_cid,
644 0, NULL);
645
646 /* insert index entries for tuple */
647 if (resultRelInfo->ri_NumIndices > 0)
648 recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
649 NIL);
650 }
651 }
652
653 if (canSetTag)
654 {
655 (estate->es_processed)++;
656 setLastTid(&slot->tts_tid);
657 }
658
659 /*
660 * If this insert is the result of a partition key update that moved the
661 * tuple to a new partition, put this row into the transition NEW TABLE,
662 * if there is one. We need to do this separately for DELETE and INSERT
663 * because they happen on different tables.
664 */
665 ar_insert_trig_tcs = mtstate->mt_transition_capture;
666 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
667 && mtstate->mt_transition_capture->tcs_update_new_table)
668 {
669 ExecARUpdateTriggers(estate, resultRelInfo, NULL,
670 NULL,
671 slot,
672 NULL,
673 mtstate->mt_transition_capture);
674
675 /*
676 * We've already captured the NEW TABLE row, so make sure any AR
677 * INSERT trigger fired below doesn't capture it again.
678 */
679 ar_insert_trig_tcs = NULL;
680 }
681
682 /* AFTER ROW INSERT Triggers */
683 ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
684 ar_insert_trig_tcs);
685
686 list_free(recheckIndexes);
687
688 /*
689 * Check any WITH CHECK OPTION constraints from parent views. We are
690 * required to do this after testing all constraints and uniqueness
691 * violations per the SQL spec, so we do it after actually inserting the
692 * record into the heap and all indexes.
693 *
694 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
695 * tuple will never be seen, if it violates the WITH CHECK OPTION.
696 *
697 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
698 * are looking for at this point.
699 */
700 if (resultRelInfo->ri_WithCheckOptions != NIL)
701 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
702
703 /* Process RETURNING if present */
704 if (returningRelInfo->ri_projectReturning)
705 {
706 /*
707 * In a cross-partition UPDATE with RETURNING, we have to use the
708 * source partition's RETURNING list, because that matches the output
709 * of the planSlot, while the destination partition might have
710 * different resjunk columns. This means we have to map the
711 * destination tuple back to the source's format so we can apply that
712 * RETURNING list. This is expensive, but it should be an uncommon
713 * corner case, so we won't spend much effort on making it fast.
714 *
715 * We assume that we can use srcSlot to hold the re-converted tuple.
716 * Note that in the common case where the child partitions both match
717 * the root's format, previous optimizations will have resulted in
718 * slot and srcSlot being identical, cueing us that there's nothing to
719 * do here.
720 */
721 if (returningRelInfo != resultRelInfo && slot != srcSlot)
722 {
723 Relation srcRelationDesc = returningRelInfo->ri_RelationDesc;
724 AttrMap *map;
725
726 map = build_attrmap_by_name_if_req(RelationGetDescr(resultRelationDesc),
727 RelationGetDescr(srcRelationDesc));
728 if (map)
729 {
730 TupleTableSlot *origSlot = slot;
731
732 slot = execute_attr_map_slot(map, slot, srcSlot);
733 slot->tts_tid = origSlot->tts_tid;
734 slot->tts_tableOid = origSlot->tts_tableOid;
735 free_attrmap(map);
736 }
737 }
738
739 result = ExecProcessReturning(returningRelInfo->ri_projectReturning,
740 RelationGetRelid(resultRelationDesc),
741 slot, planSlot);
742 }
743
744 return result;
745 }
746
747 /* ----------------------------------------------------------------
748 * ExecDelete
749 *
750 * DELETE is like UPDATE, except that we delete the tuple and no
751 * index modifications are needed.
752 *
753 * When deleting from a table, tupleid identifies the tuple to
754 * delete and oldtuple is NULL. When deleting from a view,
755 * oldtuple is passed to the INSTEAD OF triggers and identifies
756 * what to delete, and tupleid is invalid. When deleting from a
757 * foreign table, tupleid is invalid; the FDW has to figure out
758 * which row to delete using data from the planSlot. oldtuple is
759 * passed to foreign table triggers; it is NULL when the foreign
760 * table has no relevant triggers. We use tupleDeleted to indicate
761 * whether the tuple is actually deleted, callers can use it to
762 * decide whether to continue the operation. When this DELETE is a
763 * part of an UPDATE of partition-key, then the slot returned by
764 * EvalPlanQual() is passed back using output parameter epqslot.
765 *
766 * Returns RETURNING result if any, otherwise NULL.
767 * ----------------------------------------------------------------
768 */
769 static TupleTableSlot *
ExecDelete(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool processReturning,bool canSetTag,bool changingPart,bool * tupleDeleted,TupleTableSlot ** epqreturnslot)770 ExecDelete(ModifyTableState *mtstate,
771 ItemPointer tupleid,
772 HeapTuple oldtuple,
773 TupleTableSlot *planSlot,
774 EPQState *epqstate,
775 EState *estate,
776 bool processReturning,
777 bool canSetTag,
778 bool changingPart,
779 bool *tupleDeleted,
780 TupleTableSlot **epqreturnslot)
781 {
782 ResultRelInfo *resultRelInfo;
783 Relation resultRelationDesc;
784 TM_Result result;
785 TM_FailureData tmfd;
786 TupleTableSlot *slot = NULL;
787 TransitionCaptureState *ar_delete_trig_tcs;
788
789 if (tupleDeleted)
790 *tupleDeleted = false;
791
792 /*
793 * get information on the (current) result relation
794 */
795 resultRelInfo = estate->es_result_relation_info;
796 resultRelationDesc = resultRelInfo->ri_RelationDesc;
797
798 /* BEFORE ROW DELETE Triggers */
799 if (resultRelInfo->ri_TrigDesc &&
800 resultRelInfo->ri_TrigDesc->trig_delete_before_row)
801 {
802 bool dodelete;
803
804 dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
805 tupleid, oldtuple, epqreturnslot);
806
807 if (!dodelete) /* "do nothing" */
808 return NULL;
809 }
810
811 /* INSTEAD OF ROW DELETE Triggers */
812 if (resultRelInfo->ri_TrigDesc &&
813 resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
814 {
815 bool dodelete;
816
817 Assert(oldtuple != NULL);
818 dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
819
820 if (!dodelete) /* "do nothing" */
821 return NULL;
822 }
823 else if (resultRelInfo->ri_FdwRoutine)
824 {
825 /*
826 * delete from foreign table: let the FDW do it
827 *
828 * We offer the returning slot as a place to store RETURNING data,
829 * although the FDW can return some other slot if it wants.
830 */
831 slot = ExecGetReturningSlot(estate, resultRelInfo);
832 slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
833 resultRelInfo,
834 slot,
835 planSlot);
836
837 if (slot == NULL) /* "do nothing" */
838 return NULL;
839
840 /*
841 * RETURNING expressions might reference the tableoid column, so
842 * (re)initialize tts_tableOid before evaluating them.
843 */
844 if (TTS_EMPTY(slot))
845 ExecStoreAllNullTuple(slot);
846
847 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
848 }
849 else
850 {
851 /*
852 * delete the tuple
853 *
854 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
855 * that the row to be deleted is visible to that snapshot, and throw a
856 * can't-serialize error if not. This is a special-case behavior
857 * needed for referential integrity updates in transaction-snapshot
858 * mode transactions.
859 */
860 ldelete:;
861 result = table_tuple_delete(resultRelationDesc, tupleid,
862 estate->es_output_cid,
863 estate->es_snapshot,
864 estate->es_crosscheck_snapshot,
865 true /* wait for commit */ ,
866 &tmfd,
867 changingPart);
868
869 switch (result)
870 {
871 case TM_SelfModified:
872
873 /*
874 * The target tuple was already updated or deleted by the
875 * current command, or by a later command in the current
876 * transaction. The former case is possible in a join DELETE
877 * where multiple tuples join to the same target tuple. This
878 * is somewhat questionable, but Postgres has always allowed
879 * it: we just ignore additional deletion attempts.
880 *
881 * The latter case arises if the tuple is modified by a
882 * command in a BEFORE trigger, or perhaps by a command in a
883 * volatile function used in the query. In such situations we
884 * should not ignore the deletion, but it is equally unsafe to
885 * proceed. We don't want to discard the original DELETE
886 * while keeping the triggered actions based on its deletion;
887 * and it would be no better to allow the original DELETE
888 * while discarding updates that it triggered. The row update
889 * carries some information that might be important according
890 * to business rules; so throwing an error is the only safe
891 * course.
892 *
893 * If a trigger actually intends this type of interaction, it
894 * can re-execute the DELETE and then return NULL to cancel
895 * the outer delete.
896 */
897 if (tmfd.cmax != estate->es_output_cid)
898 ereport(ERROR,
899 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
900 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
901 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
902
903 /* Else, already deleted by self; nothing to do */
904 return NULL;
905
906 case TM_Ok:
907 break;
908
909 case TM_Updated:
910 {
911 TupleTableSlot *inputslot;
912 TupleTableSlot *epqslot;
913
914 if (IsolationUsesXactSnapshot())
915 ereport(ERROR,
916 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
917 errmsg("could not serialize access due to concurrent update")));
918
919 /*
920 * Already know that we're going to need to do EPQ, so
921 * fetch tuple directly into the right slot.
922 */
923 EvalPlanQualBegin(epqstate);
924 inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
925 resultRelInfo->ri_RangeTableIndex);
926
927 result = table_tuple_lock(resultRelationDesc, tupleid,
928 estate->es_snapshot,
929 inputslot, estate->es_output_cid,
930 LockTupleExclusive, LockWaitBlock,
931 TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
932 &tmfd);
933
934 switch (result)
935 {
936 case TM_Ok:
937 Assert(tmfd.traversed);
938 epqslot = EvalPlanQual(epqstate,
939 resultRelationDesc,
940 resultRelInfo->ri_RangeTableIndex,
941 inputslot);
942 if (TupIsNull(epqslot))
943 /* Tuple not passing quals anymore, exiting... */
944 return NULL;
945
946 /*
947 * If requested, skip delete and pass back the
948 * updated row.
949 */
950 if (epqreturnslot)
951 {
952 *epqreturnslot = epqslot;
953 return NULL;
954 }
955 else
956 goto ldelete;
957
958 case TM_SelfModified:
959
960 /*
961 * This can be reached when following an update
962 * chain from a tuple updated by another session,
963 * reaching a tuple that was already updated in
964 * this transaction. If previously updated by this
965 * command, ignore the delete, otherwise error
966 * out.
967 *
968 * See also TM_SelfModified response to
969 * table_tuple_delete() above.
970 */
971 if (tmfd.cmax != estate->es_output_cid)
972 ereport(ERROR,
973 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
974 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
975 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
976 return NULL;
977
978 case TM_Deleted:
979 /* tuple already deleted; nothing to do */
980 return NULL;
981
982 default:
983
984 /*
985 * TM_Invisible should be impossible because we're
986 * waiting for updated row versions, and would
987 * already have errored out if the first version
988 * is invisible.
989 *
990 * TM_Updated should be impossible, because we're
991 * locking the latest version via
992 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
993 */
994 elog(ERROR, "unexpected table_tuple_lock status: %u",
995 result);
996 return NULL;
997 }
998
999 Assert(false);
1000 break;
1001 }
1002
1003 case TM_Deleted:
1004 if (IsolationUsesXactSnapshot())
1005 ereport(ERROR,
1006 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1007 errmsg("could not serialize access due to concurrent delete")));
1008 /* tuple already deleted; nothing to do */
1009 return NULL;
1010
1011 default:
1012 elog(ERROR, "unrecognized table_tuple_delete status: %u",
1013 result);
1014 return NULL;
1015 }
1016
1017 /*
1018 * Note: Normally one would think that we have to delete index tuples
1019 * associated with the heap tuple now...
1020 *
1021 * ... but in POSTGRES, we have no need to do this because VACUUM will
1022 * take care of it later. We can't delete index tuples immediately
1023 * anyway, since the tuple is still visible to other transactions.
1024 */
1025 }
1026
1027 if (canSetTag)
1028 (estate->es_processed)++;
1029
1030 /* Tell caller that the delete actually happened. */
1031 if (tupleDeleted)
1032 *tupleDeleted = true;
1033
1034 /*
1035 * If this delete is the result of a partition key update that moved the
1036 * tuple to a new partition, put this row into the transition OLD TABLE,
1037 * if there is one. We need to do this separately for DELETE and INSERT
1038 * because they happen on different tables.
1039 */
1040 ar_delete_trig_tcs = mtstate->mt_transition_capture;
1041 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
1042 && mtstate->mt_transition_capture->tcs_update_old_table)
1043 {
1044 ExecARUpdateTriggers(estate, resultRelInfo,
1045 tupleid,
1046 oldtuple,
1047 NULL,
1048 NULL,
1049 mtstate->mt_transition_capture);
1050
1051 /*
1052 * We've already captured the NEW TABLE row, so make sure any AR
1053 * DELETE trigger fired below doesn't capture it again.
1054 */
1055 ar_delete_trig_tcs = NULL;
1056 }
1057
1058 /* AFTER ROW DELETE Triggers */
1059 ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
1060 ar_delete_trig_tcs);
1061
1062 /* Process RETURNING if present and if requested */
1063 if (processReturning && resultRelInfo->ri_projectReturning)
1064 {
1065 /*
1066 * We have to put the target tuple into a slot, which means first we
1067 * gotta fetch it. We can use the trigger tuple slot.
1068 */
1069 TupleTableSlot *rslot;
1070
1071 if (resultRelInfo->ri_FdwRoutine)
1072 {
1073 /* FDW must have provided a slot containing the deleted row */
1074 Assert(!TupIsNull(slot));
1075 }
1076 else
1077 {
1078 slot = ExecGetReturningSlot(estate, resultRelInfo);
1079 if (oldtuple != NULL)
1080 {
1081 ExecForceStoreHeapTuple(oldtuple, slot, false);
1082 }
1083 else
1084 {
1085 if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
1086 SnapshotAny, slot))
1087 elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1088 }
1089 }
1090
1091 rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
1092 RelationGetRelid(resultRelationDesc),
1093 slot, planSlot);
1094
1095 /*
1096 * Before releasing the target tuple again, make sure rslot has a
1097 * local copy of any pass-by-reference values.
1098 */
1099 ExecMaterializeSlot(rslot);
1100
1101 ExecClearTuple(slot);
1102
1103 return rslot;
1104 }
1105
1106 return NULL;
1107 }
1108
1109 /* ----------------------------------------------------------------
1110 * ExecUpdate
1111 *
1112 * note: we can't run UPDATE queries with transactions
1113 * off because UPDATEs are actually INSERTs and our
1114 * scan will mistakenly loop forever, updating the tuple
1115 * it just inserted.. This should be fixed but until it
1116 * is, we don't want to get stuck in an infinite loop
1117 * which corrupts your database..
1118 *
1119 * When updating a table, tupleid identifies the tuple to
1120 * update and oldtuple is NULL. When updating a view, oldtuple
1121 * is passed to the INSTEAD OF triggers and identifies what to
1122 * update, and tupleid is invalid. When updating a foreign table,
1123 * tupleid is invalid; the FDW has to figure out which row to
1124 * update using data from the planSlot. oldtuple is passed to
1125 * foreign table triggers; it is NULL when the foreign table has
1126 * no relevant triggers.
1127 *
1128 * Returns RETURNING result if any, otherwise NULL.
1129 * ----------------------------------------------------------------
1130 */
1131 static TupleTableSlot *
ExecUpdate(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool canSetTag)1132 ExecUpdate(ModifyTableState *mtstate,
1133 ItemPointer tupleid,
1134 HeapTuple oldtuple,
1135 TupleTableSlot *slot,
1136 TupleTableSlot *planSlot,
1137 EPQState *epqstate,
1138 EState *estate,
1139 bool canSetTag)
1140 {
1141 ResultRelInfo *resultRelInfo;
1142 Relation resultRelationDesc;
1143 TM_Result result;
1144 TM_FailureData tmfd;
1145 List *recheckIndexes = NIL;
1146 TupleConversionMap *saved_tcs_map = NULL;
1147
1148 /*
1149 * abort the operation if not running transactions
1150 */
1151 if (IsBootstrapProcessingMode())
1152 elog(ERROR, "cannot UPDATE during bootstrap");
1153
1154 ExecMaterializeSlot(slot);
1155
1156 /*
1157 * get information on the (current) result relation
1158 */
1159 resultRelInfo = estate->es_result_relation_info;
1160 resultRelationDesc = resultRelInfo->ri_RelationDesc;
1161
1162 /* BEFORE ROW UPDATE Triggers */
1163 if (resultRelInfo->ri_TrigDesc &&
1164 resultRelInfo->ri_TrigDesc->trig_update_before_row)
1165 {
1166 if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1167 tupleid, oldtuple, slot))
1168 return NULL; /* "do nothing" */
1169 }
1170
1171 /* INSTEAD OF ROW UPDATE Triggers */
1172 if (resultRelInfo->ri_TrigDesc &&
1173 resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1174 {
1175 if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1176 oldtuple, slot))
1177 return NULL; /* "do nothing" */
1178 }
1179 else if (resultRelInfo->ri_FdwRoutine)
1180 {
1181 /*
1182 * GENERATED expressions might reference the tableoid column, so
1183 * (re-)initialize tts_tableOid before evaluating them.
1184 */
1185 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1186
1187 /*
1188 * Compute stored generated columns
1189 */
1190 if (resultRelationDesc->rd_att->constr &&
1191 resultRelationDesc->rd_att->constr->has_generated_stored)
1192 ExecComputeStoredGenerated(estate, slot, CMD_UPDATE);
1193
1194 /*
1195 * update in foreign table: let the FDW do it
1196 */
1197 slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1198 resultRelInfo,
1199 slot,
1200 planSlot);
1201
1202 if (slot == NULL) /* "do nothing" */
1203 return NULL;
1204
1205 /*
1206 * AFTER ROW Triggers or RETURNING expressions might reference the
1207 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1208 * them. (This covers the case where the FDW replaced the slot.)
1209 */
1210 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1211 }
1212 else
1213 {
1214 LockTupleMode lockmode;
1215 bool partition_constraint_failed;
1216 bool update_indexes;
1217
1218 /*
1219 * Constraints and GENERATED expressions might reference the tableoid
1220 * column, so (re-)initialize tts_tableOid before evaluating them.
1221 */
1222 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1223
1224 /*
1225 * Compute stored generated columns
1226 */
1227 if (resultRelationDesc->rd_att->constr &&
1228 resultRelationDesc->rd_att->constr->has_generated_stored)
1229 ExecComputeStoredGenerated(estate, slot, CMD_UPDATE);
1230
1231 /*
1232 * Check any RLS UPDATE WITH CHECK policies
1233 *
1234 * If we generate a new candidate tuple after EvalPlanQual testing, we
1235 * must loop back here and recheck any RLS policies and constraints.
1236 * (We don't need to redo triggers, however. If there are any BEFORE
1237 * triggers then trigger.c will have done table_tuple_lock to lock the
1238 * correct tuple, so there's no need to do them again.)
1239 */
1240 lreplace:;
1241
1242 /* ensure slot is independent, consider e.g. EPQ */
1243 ExecMaterializeSlot(slot);
1244
1245 /*
1246 * If partition constraint fails, this row might get moved to another
1247 * partition, in which case we should check the RLS CHECK policy just
1248 * before inserting into the new partition, rather than doing it here.
1249 * This is because a trigger on that partition might again change the
1250 * row. So skip the WCO checks if the partition constraint fails.
1251 */
1252 partition_constraint_failed =
1253 resultRelInfo->ri_PartitionCheck &&
1254 !ExecPartitionCheck(resultRelInfo, slot, estate, false);
1255
1256 if (!partition_constraint_failed &&
1257 resultRelInfo->ri_WithCheckOptions != NIL)
1258 {
1259 /*
1260 * ExecWithCheckOptions() will skip any WCOs which are not of the
1261 * kind we are looking for at this point.
1262 */
1263 ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1264 resultRelInfo, slot, estate);
1265 }
1266
1267 /*
1268 * If a partition check failed, try to move the row into the right
1269 * partition.
1270 */
1271 if (partition_constraint_failed)
1272 {
1273 bool tuple_deleted;
1274 TupleTableSlot *ret_slot;
1275 TupleTableSlot *orig_slot = slot;
1276 TupleTableSlot *epqslot = NULL;
1277 PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1278 int map_index;
1279 TupleConversionMap *tupconv_map;
1280
1281 /*
1282 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1283 * original row to migrate to a different partition. Maybe this
1284 * can be implemented some day, but it seems a fringe feature with
1285 * little redeeming value.
1286 */
1287 if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1288 ereport(ERROR,
1289 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1290 errmsg("invalid ON UPDATE specification"),
1291 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1292
1293 /*
1294 * When an UPDATE is run on a leaf partition, we will not have
1295 * partition tuple routing set up. In that case, fail with
1296 * partition constraint violation error.
1297 */
1298 if (proute == NULL)
1299 ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1300
1301 /*
1302 * Row movement, part 1. Delete the tuple, but skip RETURNING
1303 * processing. We want to return rows from INSERT.
1304 */
1305 ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1306 estate, false, false /* canSetTag */ ,
1307 true /* changingPart */ , &tuple_deleted, &epqslot);
1308
1309 /*
1310 * For some reason if DELETE didn't happen (e.g. trigger prevented
1311 * it, or it was already deleted by self, or it was concurrently
1312 * deleted by another transaction), then we should skip the insert
1313 * as well; otherwise, an UPDATE could cause an increase in the
1314 * total number of rows across all partitions, which is clearly
1315 * wrong.
1316 *
1317 * For a normal UPDATE, the case where the tuple has been the
1318 * subject of a concurrent UPDATE or DELETE would be handled by
1319 * the EvalPlanQual machinery, but for an UPDATE that we've
1320 * translated into a DELETE from this partition and an INSERT into
1321 * some other partition, that's not available, because CTID chains
1322 * can't span relation boundaries. We mimic the semantics to a
1323 * limited extent by skipping the INSERT if the DELETE fails to
1324 * find a tuple. This ensures that two concurrent attempts to
1325 * UPDATE the same tuple at the same time can't turn one tuple
1326 * into two, and that an UPDATE of a just-deleted tuple can't
1327 * resurrect it.
1328 */
1329 if (!tuple_deleted)
1330 {
1331 /*
1332 * epqslot will be typically NULL. But when ExecDelete()
1333 * finds that another transaction has concurrently updated the
1334 * same row, it re-fetches the row, skips the delete, and
1335 * epqslot is set to the re-fetched tuple slot. In that case,
1336 * we need to do all the checks again.
1337 */
1338 if (TupIsNull(epqslot))
1339 return NULL;
1340 else
1341 {
1342 slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1343 goto lreplace;
1344 }
1345 }
1346
1347 /*
1348 * Updates set the transition capture map only when a new subplan
1349 * is chosen. But for inserts, it is set for each row. So after
1350 * INSERT, we need to revert back to the map created for UPDATE;
1351 * otherwise the next UPDATE will incorrectly use the one created
1352 * for INSERT. So first save the one created for UPDATE.
1353 */
1354 if (mtstate->mt_transition_capture)
1355 saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1356
1357 /*
1358 * resultRelInfo is one of the per-subplan resultRelInfos. So we
1359 * should convert the tuple into root's tuple descriptor, since
1360 * ExecInsert() starts the search from root. The tuple conversion
1361 * map list is in the order of mtstate->resultRelInfo[], so to
1362 * retrieve the one for this resultRel, we need to know the
1363 * position of the resultRel in mtstate->resultRelInfo[].
1364 */
1365 map_index = resultRelInfo - mtstate->resultRelInfo;
1366 Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1367 tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1368 if (tupconv_map != NULL)
1369 slot = execute_attr_map_slot(tupconv_map->attrMap,
1370 slot,
1371 mtstate->mt_root_tuple_slot);
1372
1373 /*
1374 * Prepare for tuple routing, making it look like we're inserting
1375 * into the root.
1376 */
1377 Assert(mtstate->rootResultRelInfo != NULL);
1378 slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1379 mtstate->rootResultRelInfo, slot);
1380
1381 ret_slot = ExecInsert(mtstate, slot, planSlot,
1382 orig_slot, resultRelInfo,
1383 estate, canSetTag);
1384
1385 /* Revert ExecPrepareTupleRouting's node change. */
1386 estate->es_result_relation_info = resultRelInfo;
1387 if (mtstate->mt_transition_capture)
1388 {
1389 mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1390 mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1391 }
1392
1393 return ret_slot;
1394 }
1395
1396 /*
1397 * Check the constraints of the tuple. We've already checked the
1398 * partition constraint above; however, we must still ensure the tuple
1399 * passes all other constraints, so we will call ExecConstraints() and
1400 * have it validate all remaining checks.
1401 */
1402 if (resultRelationDesc->rd_att->constr)
1403 ExecConstraints(resultRelInfo, slot, estate);
1404
1405 /*
1406 * replace the heap tuple
1407 *
1408 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1409 * that the row to be updated is visible to that snapshot, and throw a
1410 * can't-serialize error if not. This is a special-case behavior
1411 * needed for referential integrity updates in transaction-snapshot
1412 * mode transactions.
1413 */
1414 result = table_tuple_update(resultRelationDesc, tupleid, slot,
1415 estate->es_output_cid,
1416 estate->es_snapshot,
1417 estate->es_crosscheck_snapshot,
1418 true /* wait for commit */ ,
1419 &tmfd, &lockmode, &update_indexes);
1420
1421 switch (result)
1422 {
1423 case TM_SelfModified:
1424
1425 /*
1426 * The target tuple was already updated or deleted by the
1427 * current command, or by a later command in the current
1428 * transaction. The former case is possible in a join UPDATE
1429 * where multiple tuples join to the same target tuple. This
1430 * is pretty questionable, but Postgres has always allowed it:
1431 * we just execute the first update action and ignore
1432 * additional update attempts.
1433 *
1434 * The latter case arises if the tuple is modified by a
1435 * command in a BEFORE trigger, or perhaps by a command in a
1436 * volatile function used in the query. In such situations we
1437 * should not ignore the update, but it is equally unsafe to
1438 * proceed. We don't want to discard the original UPDATE
1439 * while keeping the triggered actions based on it; and we
1440 * have no principled way to merge this update with the
1441 * previous ones. So throwing an error is the only safe
1442 * course.
1443 *
1444 * If a trigger actually intends this type of interaction, it
1445 * can re-execute the UPDATE (assuming it can figure out how)
1446 * and then return NULL to cancel the outer update.
1447 */
1448 if (tmfd.cmax != estate->es_output_cid)
1449 ereport(ERROR,
1450 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1451 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1452 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1453
1454 /* Else, already updated by self; nothing to do */
1455 return NULL;
1456
1457 case TM_Ok:
1458 break;
1459
1460 case TM_Updated:
1461 {
1462 TupleTableSlot *inputslot;
1463 TupleTableSlot *epqslot;
1464
1465 if (IsolationUsesXactSnapshot())
1466 ereport(ERROR,
1467 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1468 errmsg("could not serialize access due to concurrent update")));
1469
1470 /*
1471 * Already know that we're going to need to do EPQ, so
1472 * fetch tuple directly into the right slot.
1473 */
1474 inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1475 resultRelInfo->ri_RangeTableIndex);
1476
1477 result = table_tuple_lock(resultRelationDesc, tupleid,
1478 estate->es_snapshot,
1479 inputslot, estate->es_output_cid,
1480 lockmode, LockWaitBlock,
1481 TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1482 &tmfd);
1483
1484 switch (result)
1485 {
1486 case TM_Ok:
1487 Assert(tmfd.traversed);
1488
1489 epqslot = EvalPlanQual(epqstate,
1490 resultRelationDesc,
1491 resultRelInfo->ri_RangeTableIndex,
1492 inputslot);
1493 if (TupIsNull(epqslot))
1494 /* Tuple not passing quals anymore, exiting... */
1495 return NULL;
1496
1497 slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1498 goto lreplace;
1499
1500 case TM_Deleted:
1501 /* tuple already deleted; nothing to do */
1502 return NULL;
1503
1504 case TM_SelfModified:
1505
1506 /*
1507 * This can be reached when following an update
1508 * chain from a tuple updated by another session,
1509 * reaching a tuple that was already updated in
1510 * this transaction. If previously modified by
1511 * this command, ignore the redundant update,
1512 * otherwise error out.
1513 *
1514 * See also TM_SelfModified response to
1515 * table_tuple_update() above.
1516 */
1517 if (tmfd.cmax != estate->es_output_cid)
1518 ereport(ERROR,
1519 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1520 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1521 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1522 return NULL;
1523
1524 default:
1525 /* see table_tuple_lock call in ExecDelete() */
1526 elog(ERROR, "unexpected table_tuple_lock status: %u",
1527 result);
1528 return NULL;
1529 }
1530 }
1531
1532 break;
1533
1534 case TM_Deleted:
1535 if (IsolationUsesXactSnapshot())
1536 ereport(ERROR,
1537 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1538 errmsg("could not serialize access due to concurrent delete")));
1539 /* tuple already deleted; nothing to do */
1540 return NULL;
1541
1542 default:
1543 elog(ERROR, "unrecognized table_tuple_update status: %u",
1544 result);
1545 return NULL;
1546 }
1547
1548 /* insert index entries for tuple if necessary */
1549 if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
1550 recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
1551 }
1552
1553 if (canSetTag)
1554 (estate->es_processed)++;
1555
1556 /* AFTER ROW UPDATE Triggers */
1557 ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1558 recheckIndexes,
1559 mtstate->operation == CMD_INSERT ?
1560 mtstate->mt_oc_transition_capture :
1561 mtstate->mt_transition_capture);
1562
1563 list_free(recheckIndexes);
1564
1565 /*
1566 * Check any WITH CHECK OPTION constraints from parent views. We are
1567 * required to do this after testing all constraints and uniqueness
1568 * violations per the SQL spec, so we do it after actually updating the
1569 * record in the heap and all indexes.
1570 *
1571 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1572 * are looking for at this point.
1573 */
1574 if (resultRelInfo->ri_WithCheckOptions != NIL)
1575 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1576
1577 /* Process RETURNING if present */
1578 if (resultRelInfo->ri_projectReturning)
1579 return ExecProcessReturning(resultRelInfo->ri_projectReturning,
1580 RelationGetRelid(resultRelationDesc),
1581 slot, planSlot);
1582
1583 return NULL;
1584 }
1585
1586 /*
1587 * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1588 *
1589 * Try to lock tuple for update as part of speculative insertion. If
1590 * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1591 * (but still lock row, even though it may not satisfy estate's
1592 * snapshot).
1593 *
1594 * Returns true if we're done (with or without an update), or false if
1595 * the caller must retry the INSERT from scratch.
1596 */
1597 static bool
ExecOnConflictUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer conflictTid,TupleTableSlot * planSlot,TupleTableSlot * excludedSlot,EState * estate,bool canSetTag,TupleTableSlot ** returning)1598 ExecOnConflictUpdate(ModifyTableState *mtstate,
1599 ResultRelInfo *resultRelInfo,
1600 ItemPointer conflictTid,
1601 TupleTableSlot *planSlot,
1602 TupleTableSlot *excludedSlot,
1603 EState *estate,
1604 bool canSetTag,
1605 TupleTableSlot **returning)
1606 {
1607 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1608 Relation relation = resultRelInfo->ri_RelationDesc;
1609 ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1610 TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1611 TM_FailureData tmfd;
1612 LockTupleMode lockmode;
1613 TM_Result test;
1614 Datum xminDatum;
1615 TransactionId xmin;
1616 bool isnull;
1617
1618 /* Determine lock mode to use */
1619 lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1620
1621 /*
1622 * Lock tuple for update. Don't follow updates when tuple cannot be
1623 * locked without doing so. A row locking conflict here means our
1624 * previous conclusion that the tuple is conclusively committed is not
1625 * true anymore.
1626 */
1627 test = table_tuple_lock(relation, conflictTid,
1628 estate->es_snapshot,
1629 existing, estate->es_output_cid,
1630 lockmode, LockWaitBlock, 0,
1631 &tmfd);
1632 switch (test)
1633 {
1634 case TM_Ok:
1635 /* success! */
1636 break;
1637
1638 case TM_Invisible:
1639
1640 /*
1641 * This can occur when a just inserted tuple is updated again in
1642 * the same command. E.g. because multiple rows with the same
1643 * conflicting key values are inserted.
1644 *
1645 * This is somewhat similar to the ExecUpdate() TM_SelfModified
1646 * case. We do not want to proceed because it would lead to the
1647 * same row being updated a second time in some unspecified order,
1648 * and in contrast to plain UPDATEs there's no historical behavior
1649 * to break.
1650 *
1651 * It is the user's responsibility to prevent this situation from
1652 * occurring. These problems are why SQL-2003 similarly specifies
1653 * that for SQL MERGE, an exception must be raised in the event of
1654 * an attempt to update the same row twice.
1655 */
1656 xminDatum = slot_getsysattr(existing,
1657 MinTransactionIdAttributeNumber,
1658 &isnull);
1659 Assert(!isnull);
1660 xmin = DatumGetTransactionId(xminDatum);
1661
1662 if (TransactionIdIsCurrentTransactionId(xmin))
1663 ereport(ERROR,
1664 (errcode(ERRCODE_CARDINALITY_VIOLATION),
1665 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1666 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1667
1668 /* This shouldn't happen */
1669 elog(ERROR, "attempted to lock invisible tuple");
1670 break;
1671
1672 case TM_SelfModified:
1673
1674 /*
1675 * This state should never be reached. As a dirty snapshot is used
1676 * to find conflicting tuples, speculative insertion wouldn't have
1677 * seen this row to conflict with.
1678 */
1679 elog(ERROR, "unexpected self-updated tuple");
1680 break;
1681
1682 case TM_Updated:
1683 if (IsolationUsesXactSnapshot())
1684 ereport(ERROR,
1685 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1686 errmsg("could not serialize access due to concurrent update")));
1687
1688 /*
1689 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
1690 * a partitioned table we shouldn't reach to a case where tuple to
1691 * be lock is moved to another partition due to concurrent update
1692 * of the partition key.
1693 */
1694 Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1695
1696 /*
1697 * Tell caller to try again from the very start.
1698 *
1699 * It does not make sense to use the usual EvalPlanQual() style
1700 * loop here, as the new version of the row might not conflict
1701 * anymore, or the conflicting tuple has actually been deleted.
1702 */
1703 ExecClearTuple(existing);
1704 return false;
1705
1706 case TM_Deleted:
1707 if (IsolationUsesXactSnapshot())
1708 ereport(ERROR,
1709 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1710 errmsg("could not serialize access due to concurrent delete")));
1711
1712 /* see TM_Updated case */
1713 Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1714 ExecClearTuple(existing);
1715 return false;
1716
1717 default:
1718 elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
1719 }
1720
1721 /* Success, the tuple is locked. */
1722
1723 /*
1724 * Verify that the tuple is visible to our MVCC snapshot if the current
1725 * isolation level mandates that.
1726 *
1727 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
1728 * CONFLICT ... WHERE clause may prevent us from reaching that.
1729 *
1730 * This means we only ever continue when a new command in the current
1731 * transaction could see the row, even though in READ COMMITTED mode the
1732 * tuple will not be visible according to the current statement's
1733 * snapshot. This is in line with the way UPDATE deals with newer tuple
1734 * versions.
1735 */
1736 ExecCheckTupleVisible(estate, relation, existing);
1737
1738 /*
1739 * Make tuple and any needed join variables available to ExecQual and
1740 * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
1741 * the target's existing tuple is installed in the scantuple. EXCLUDED
1742 * has been made to reference INNER_VAR in setrefs.c, but there is no
1743 * other redirection.
1744 */
1745 econtext->ecxt_scantuple = existing;
1746 econtext->ecxt_innertuple = excludedSlot;
1747 econtext->ecxt_outertuple = NULL;
1748
1749 if (!ExecQual(onConflictSetWhere, econtext))
1750 {
1751 ExecClearTuple(existing); /* see return below */
1752 InstrCountFiltered1(&mtstate->ps, 1);
1753 return true; /* done with the tuple */
1754 }
1755
1756 if (resultRelInfo->ri_WithCheckOptions != NIL)
1757 {
1758 /*
1759 * Check target's existing tuple against UPDATE-applicable USING
1760 * security barrier quals (if any), enforced here as RLS checks/WCOs.
1761 *
1762 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1763 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1764 * but that's almost the extent of its special handling for ON
1765 * CONFLICT DO UPDATE.
1766 *
1767 * The rewriter will also have associated UPDATE applicable straight
1768 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
1769 * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
1770 * kinds, so there is no danger of spurious over-enforcement in the
1771 * INSERT or UPDATE path.
1772 */
1773 ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1774 existing,
1775 mtstate->ps.state);
1776 }
1777
1778 /* Project the new tuple version */
1779 ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1780
1781 /*
1782 * Note that it is possible that the target tuple has been modified in
1783 * this session, after the above table_tuple_lock. We choose to not error
1784 * out in that case, in line with ExecUpdate's treatment of similar cases.
1785 * This can happen if an UPDATE is triggered from within ExecQual(),
1786 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1787 * wCTE in the ON CONFLICT's SET.
1788 */
1789
1790 /* Execute UPDATE with projection */
1791 *returning = ExecUpdate(mtstate, conflictTid, NULL,
1792 resultRelInfo->ri_onConflict->oc_ProjSlot,
1793 planSlot,
1794 &mtstate->mt_epqstate, mtstate->ps.state,
1795 canSetTag);
1796
1797 /*
1798 * Clear out existing tuple, as there might not be another conflict among
1799 * the next input rows. Don't want to hold resources till the end of the
1800 * query.
1801 */
1802 ExecClearTuple(existing);
1803 return true;
1804 }
1805
1806
1807 /*
1808 * Process BEFORE EACH STATEMENT triggers
1809 */
1810 static void
fireBSTriggers(ModifyTableState * node)1811 fireBSTriggers(ModifyTableState *node)
1812 {
1813 ModifyTable *plan = (ModifyTable *) node->ps.plan;
1814 ResultRelInfo *resultRelInfo = node->resultRelInfo;
1815
1816 /*
1817 * If the node modifies a partitioned table, we must fire its triggers.
1818 * Note that in that case, node->resultRelInfo points to the first leaf
1819 * partition, not the root table.
1820 */
1821 if (node->rootResultRelInfo != NULL)
1822 resultRelInfo = node->rootResultRelInfo;
1823
1824 switch (node->operation)
1825 {
1826 case CMD_INSERT:
1827 ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1828 if (plan->onConflictAction == ONCONFLICT_UPDATE)
1829 ExecBSUpdateTriggers(node->ps.state,
1830 resultRelInfo);
1831 break;
1832 case CMD_UPDATE:
1833 ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1834 break;
1835 case CMD_DELETE:
1836 ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1837 break;
1838 default:
1839 elog(ERROR, "unknown operation");
1840 break;
1841 }
1842 }
1843
1844 /*
1845 * Return the target rel ResultRelInfo.
1846 *
1847 * This relation is the same as :
1848 * - the relation for which we will fire AFTER STATEMENT triggers.
1849 * - the relation into whose tuple format all captured transition tuples must
1850 * be converted.
1851 * - the root partitioned table.
1852 */
1853 static ResultRelInfo *
getTargetResultRelInfo(ModifyTableState * node)1854 getTargetResultRelInfo(ModifyTableState *node)
1855 {
1856 /*
1857 * Note that if the node modifies a partitioned table, node->resultRelInfo
1858 * points to the first leaf partition, not the root table.
1859 */
1860 if (node->rootResultRelInfo != NULL)
1861 return node->rootResultRelInfo;
1862 else
1863 return node->resultRelInfo;
1864 }
1865
1866 /*
1867 * Process AFTER EACH STATEMENT triggers
1868 */
1869 static void
fireASTriggers(ModifyTableState * node)1870 fireASTriggers(ModifyTableState *node)
1871 {
1872 ModifyTable *plan = (ModifyTable *) node->ps.plan;
1873 ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1874
1875 switch (node->operation)
1876 {
1877 case CMD_INSERT:
1878 if (plan->onConflictAction == ONCONFLICT_UPDATE)
1879 ExecASUpdateTriggers(node->ps.state,
1880 resultRelInfo,
1881 node->mt_oc_transition_capture);
1882 ExecASInsertTriggers(node->ps.state, resultRelInfo,
1883 node->mt_transition_capture);
1884 break;
1885 case CMD_UPDATE:
1886 ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1887 node->mt_transition_capture);
1888 break;
1889 case CMD_DELETE:
1890 ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1891 node->mt_transition_capture);
1892 break;
1893 default:
1894 elog(ERROR, "unknown operation");
1895 break;
1896 }
1897 }
1898
1899 /*
1900 * Set up the state needed for collecting transition tuples for AFTER
1901 * triggers.
1902 */
1903 static void
ExecSetupTransitionCaptureState(ModifyTableState * mtstate,EState * estate)1904 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
1905 {
1906 ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
1907 ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1908
1909 /* Check for transition tables on the directly targeted relation. */
1910 mtstate->mt_transition_capture =
1911 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1912 RelationGetRelid(targetRelInfo->ri_RelationDesc),
1913 mtstate->operation);
1914 if (plan->operation == CMD_INSERT &&
1915 plan->onConflictAction == ONCONFLICT_UPDATE)
1916 mtstate->mt_oc_transition_capture =
1917 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1918 RelationGetRelid(targetRelInfo->ri_RelationDesc),
1919 CMD_UPDATE);
1920
1921 /*
1922 * If we found that we need to collect transition tuples then we may also
1923 * need tuple conversion maps for any children that have TupleDescs that
1924 * aren't compatible with the tuplestores. (We can share these maps
1925 * between the regular and ON CONFLICT cases.)
1926 */
1927 if (mtstate->mt_transition_capture != NULL ||
1928 mtstate->mt_oc_transition_capture != NULL)
1929 {
1930 ExecSetupChildParentMapForSubplan(mtstate);
1931
1932 /*
1933 * Install the conversion map for the first plan for UPDATE and DELETE
1934 * operations. It will be advanced each time we switch to the next
1935 * plan. (INSERT operations set it every time, so we need not update
1936 * mtstate->mt_oc_transition_capture here.)
1937 */
1938 if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1939 mtstate->mt_transition_capture->tcs_map =
1940 tupconv_map_for_subplan(mtstate, 0);
1941 }
1942 }
1943
1944 /*
1945 * ExecPrepareTupleRouting --- prepare for routing one tuple
1946 *
1947 * Determine the partition in which the tuple in slot is to be inserted,
1948 * and modify mtstate and estate to prepare for it.
1949 *
1950 * Caller must revert the estate changes after executing the insertion!
1951 * In mtstate, transition capture changes may also need to be reverted.
1952 *
1953 * Returns a slot holding the tuple of the partition rowtype.
1954 */
1955 static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState * mtstate,EState * estate,PartitionTupleRouting * proute,ResultRelInfo * targetRelInfo,TupleTableSlot * slot)1956 ExecPrepareTupleRouting(ModifyTableState *mtstate,
1957 EState *estate,
1958 PartitionTupleRouting *proute,
1959 ResultRelInfo *targetRelInfo,
1960 TupleTableSlot *slot)
1961 {
1962 ResultRelInfo *partrel;
1963 PartitionRoutingInfo *partrouteinfo;
1964 TupleConversionMap *map;
1965
1966 /*
1967 * Lookup the target partition's ResultRelInfo. If ExecFindPartition does
1968 * not find a valid partition for the tuple in 'slot' then an error is
1969 * raised. An error may also be raised if the found partition is not a
1970 * valid target for INSERTs. This is required since a partitioned table
1971 * UPDATE to another partition becomes a DELETE+INSERT.
1972 */
1973 partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
1974 partrouteinfo = partrel->ri_PartitionInfo;
1975 Assert(partrouteinfo != NULL);
1976
1977 /*
1978 * Make it look like we are inserting into the partition.
1979 */
1980 estate->es_result_relation_info = partrel;
1981
1982 /*
1983 * If we're capturing transition tuples, we might need to convert from the
1984 * partition rowtype to root partitioned table's rowtype.
1985 */
1986 if (mtstate->mt_transition_capture != NULL)
1987 {
1988 if (partrel->ri_TrigDesc &&
1989 partrel->ri_TrigDesc->trig_insert_before_row)
1990 {
1991 /*
1992 * If there are any BEFORE triggers on the partition, we'll have
1993 * to be ready to convert their result back to tuplestore format.
1994 */
1995 mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1996 mtstate->mt_transition_capture->tcs_map =
1997 partrouteinfo->pi_PartitionToRootMap;
1998 }
1999 else
2000 {
2001 /*
2002 * Otherwise, just remember the original unconverted tuple, to
2003 * avoid a needless round trip conversion.
2004 */
2005 mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
2006 mtstate->mt_transition_capture->tcs_map = NULL;
2007 }
2008 }
2009 if (mtstate->mt_oc_transition_capture != NULL)
2010 {
2011 mtstate->mt_oc_transition_capture->tcs_map =
2012 partrouteinfo->pi_PartitionToRootMap;
2013 }
2014
2015 /*
2016 * Convert the tuple, if necessary.
2017 */
2018 map = partrouteinfo->pi_RootToPartitionMap;
2019 if (map != NULL)
2020 {
2021 TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
2022
2023 slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
2024 }
2025
2026 return slot;
2027 }
2028
2029 /*
2030 * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
2031 *
2032 * This map array is required to convert the tuple from the subplan result rel
2033 * to the target table descriptor. This requirement arises for two independent
2034 * scenarios:
2035 * 1. For update-tuple-routing.
2036 * 2. For capturing tuples in transition tables.
2037 */
2038 static void
ExecSetupChildParentMapForSubplan(ModifyTableState * mtstate)2039 ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
2040 {
2041 ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
2042 ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
2043 TupleDesc outdesc;
2044 int numResultRelInfos = mtstate->mt_nplans;
2045 int i;
2046
2047 /*
2048 * Build array of conversion maps from each child's TupleDesc to the one
2049 * used in the target relation. The map pointers may be NULL when no
2050 * conversion is necessary, which is hopefully a common case.
2051 */
2052
2053 /* Get tuple descriptor of the target rel. */
2054 outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
2055
2056 mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
2057 palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
2058
2059 for (i = 0; i < numResultRelInfos; ++i)
2060 {
2061 mtstate->mt_per_subplan_tupconv_maps[i] =
2062 convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
2063 outdesc);
2064 }
2065 }
2066
2067 /*
2068 * For a given subplan index, get the tuple conversion map.
2069 */
2070 static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState * mtstate,int whichplan)2071 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
2072 {
2073 /* If nobody else set the per-subplan array of maps, do so ourselves. */
2074 if (mtstate->mt_per_subplan_tupconv_maps == NULL)
2075 ExecSetupChildParentMapForSubplan(mtstate);
2076
2077 Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
2078 return mtstate->mt_per_subplan_tupconv_maps[whichplan];
2079 }
2080
2081 /* ----------------------------------------------------------------
2082 * ExecModifyTable
2083 *
2084 * Perform table modifications as required, and return RETURNING results
2085 * if needed.
2086 * ----------------------------------------------------------------
2087 */
2088 static TupleTableSlot *
ExecModifyTable(PlanState * pstate)2089 ExecModifyTable(PlanState *pstate)
2090 {
2091 ModifyTableState *node = castNode(ModifyTableState, pstate);
2092 PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2093 EState *estate = node->ps.state;
2094 CmdType operation = node->operation;
2095 ResultRelInfo *saved_resultRelInfo;
2096 ResultRelInfo *resultRelInfo;
2097 PlanState *subplanstate;
2098 JunkFilter *junkfilter;
2099 TupleTableSlot *slot;
2100 TupleTableSlot *planSlot;
2101 ItemPointer tupleid;
2102 ItemPointerData tuple_ctid;
2103 HeapTupleData oldtupdata;
2104 HeapTuple oldtuple;
2105
2106 CHECK_FOR_INTERRUPTS();
2107
2108 /*
2109 * This should NOT get called during EvalPlanQual; we should have passed a
2110 * subplan tree to EvalPlanQual, instead. Use a runtime test not just
2111 * Assert because this condition is easy to miss in testing. (Note:
2112 * although ModifyTable should not get executed within an EvalPlanQual
2113 * operation, we do have to allow it to be initialized and shut down in
2114 * case it is within a CTE subplan. Hence this test must be here, not in
2115 * ExecInitModifyTable.)
2116 */
2117 if (estate->es_epq_active != NULL)
2118 elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2119
2120 /*
2121 * If we've already completed processing, don't try to do more. We need
2122 * this test because ExecPostprocessPlan might call us an extra time, and
2123 * our subplan's nodes aren't necessarily robust against being called
2124 * extra times.
2125 */
2126 if (node->mt_done)
2127 return NULL;
2128
2129 /*
2130 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2131 */
2132 if (node->fireBSTriggers)
2133 {
2134 fireBSTriggers(node);
2135 node->fireBSTriggers = false;
2136 }
2137
2138 /* Preload local variables */
2139 resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2140 subplanstate = node->mt_plans[node->mt_whichplan];
2141 junkfilter = resultRelInfo->ri_junkFilter;
2142
2143 /*
2144 * es_result_relation_info must point to the currently active result
2145 * relation while we are within this ModifyTable node. Even though
2146 * ModifyTable nodes can't be nested statically, they can be nested
2147 * dynamically (since our subplan could include a reference to a modifying
2148 * CTE). So we have to save and restore the caller's value.
2149 */
2150 saved_resultRelInfo = estate->es_result_relation_info;
2151
2152 estate->es_result_relation_info = resultRelInfo;
2153
2154 /*
2155 * Fetch rows from subplan(s), and execute the required table modification
2156 * for each row.
2157 */
2158 for (;;)
2159 {
2160 /*
2161 * Reset the per-output-tuple exprcontext. This is needed because
2162 * triggers expect to use that context as workspace. It's a bit ugly
2163 * to do this below the top level of the plan, however. We might need
2164 * to rethink this later.
2165 */
2166 ResetPerTupleExprContext(estate);
2167
2168 /*
2169 * Reset per-tuple memory context used for processing on conflict and
2170 * returning clauses, to free any expression evaluation storage
2171 * allocated in the previous cycle.
2172 */
2173 if (pstate->ps_ExprContext)
2174 ResetExprContext(pstate->ps_ExprContext);
2175
2176 planSlot = ExecProcNode(subplanstate);
2177
2178 if (TupIsNull(planSlot))
2179 {
2180 /* advance to next subplan if any */
2181 node->mt_whichplan++;
2182 if (node->mt_whichplan < node->mt_nplans)
2183 {
2184 resultRelInfo++;
2185 subplanstate = node->mt_plans[node->mt_whichplan];
2186 junkfilter = resultRelInfo->ri_junkFilter;
2187 estate->es_result_relation_info = resultRelInfo;
2188 EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2189 node->mt_arowmarks[node->mt_whichplan]);
2190 /* Prepare to convert transition tuples from this child. */
2191 if (node->mt_transition_capture != NULL)
2192 {
2193 node->mt_transition_capture->tcs_map =
2194 tupconv_map_for_subplan(node, node->mt_whichplan);
2195 }
2196 if (node->mt_oc_transition_capture != NULL)
2197 {
2198 node->mt_oc_transition_capture->tcs_map =
2199 tupconv_map_for_subplan(node, node->mt_whichplan);
2200 }
2201 continue;
2202 }
2203 else
2204 break;
2205 }
2206
2207 /*
2208 * Ensure input tuple is the right format for the target relation.
2209 */
2210 if (node->mt_scans[node->mt_whichplan]->tts_ops != planSlot->tts_ops)
2211 {
2212 ExecCopySlot(node->mt_scans[node->mt_whichplan], planSlot);
2213 planSlot = node->mt_scans[node->mt_whichplan];
2214 }
2215
2216 /*
2217 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2218 * here is compute the RETURNING expressions.
2219 */
2220 if (resultRelInfo->ri_usesFdwDirectModify)
2221 {
2222 Assert(resultRelInfo->ri_projectReturning);
2223
2224 /*
2225 * A scan slot containing the data that was actually inserted,
2226 * updated or deleted has already been made available to
2227 * ExecProcessReturning by IterateDirectModify, so no need to
2228 * provide it here.
2229 */
2230 slot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
2231 RelationGetRelid(resultRelInfo->ri_RelationDesc),
2232 NULL, planSlot);
2233
2234 estate->es_result_relation_info = saved_resultRelInfo;
2235 return slot;
2236 }
2237
2238 EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2239 slot = planSlot;
2240
2241 tupleid = NULL;
2242 oldtuple = NULL;
2243 if (junkfilter != NULL)
2244 {
2245 /*
2246 * extract the 'ctid' or 'wholerow' junk attribute.
2247 */
2248 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2249 {
2250 char relkind;
2251 Datum datum;
2252 bool isNull;
2253
2254 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2255 if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
2256 {
2257 datum = ExecGetJunkAttribute(slot,
2258 junkfilter->jf_junkAttNo,
2259 &isNull);
2260 /* shouldn't ever get a null result... */
2261 if (isNull)
2262 elog(ERROR, "ctid is NULL");
2263
2264 tupleid = (ItemPointer) DatumGetPointer(datum);
2265 tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
2266 tupleid = &tuple_ctid;
2267 }
2268
2269 /*
2270 * Use the wholerow attribute, when available, to reconstruct
2271 * the old relation tuple.
2272 *
2273 * Foreign table updates have a wholerow attribute when the
2274 * relation has a row-level trigger. Note that the wholerow
2275 * attribute does not carry system columns. Foreign table
2276 * triggers miss seeing those, except that we know enough here
2277 * to set t_tableOid. Quite separately from this, the FDW may
2278 * fetch its own junk attrs to identify the row.
2279 *
2280 * Other relevant relkinds, currently limited to views, always
2281 * have a wholerow attribute.
2282 */
2283 else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2284 {
2285 datum = ExecGetJunkAttribute(slot,
2286 junkfilter->jf_junkAttNo,
2287 &isNull);
2288 /* shouldn't ever get a null result... */
2289 if (isNull)
2290 elog(ERROR, "wholerow is NULL");
2291
2292 oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2293 oldtupdata.t_len =
2294 HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2295 ItemPointerSetInvalid(&(oldtupdata.t_self));
2296 /* Historically, view triggers see invalid t_tableOid. */
2297 oldtupdata.t_tableOid =
2298 (relkind == RELKIND_VIEW) ? InvalidOid :
2299 RelationGetRelid(resultRelInfo->ri_RelationDesc);
2300
2301 oldtuple = &oldtupdata;
2302 }
2303 else
2304 Assert(relkind == RELKIND_FOREIGN_TABLE);
2305 }
2306
2307 /*
2308 * apply the junkfilter if needed.
2309 */
2310 if (operation != CMD_DELETE)
2311 slot = ExecFilterJunk(junkfilter, slot);
2312 }
2313
2314 switch (operation)
2315 {
2316 case CMD_INSERT:
2317 /* Prepare for tuple routing if needed. */
2318 if (proute)
2319 slot = ExecPrepareTupleRouting(node, estate, proute,
2320 resultRelInfo, slot);
2321 slot = ExecInsert(node, slot, planSlot,
2322 NULL, estate->es_result_relation_info,
2323 estate, node->canSetTag);
2324 /* Revert ExecPrepareTupleRouting's state change. */
2325 if (proute)
2326 estate->es_result_relation_info = resultRelInfo;
2327 break;
2328 case CMD_UPDATE:
2329 slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2330 &node->mt_epqstate, estate, node->canSetTag);
2331 break;
2332 case CMD_DELETE:
2333 slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2334 &node->mt_epqstate, estate,
2335 true, node->canSetTag,
2336 false /* changingPart */ , NULL, NULL);
2337 break;
2338 default:
2339 elog(ERROR, "unknown operation");
2340 break;
2341 }
2342
2343 /*
2344 * If we got a RETURNING result, return it to caller. We'll continue
2345 * the work on next call.
2346 */
2347 if (slot)
2348 {
2349 estate->es_result_relation_info = saved_resultRelInfo;
2350 return slot;
2351 }
2352 }
2353
2354 /* Restore es_result_relation_info before exiting */
2355 estate->es_result_relation_info = saved_resultRelInfo;
2356
2357 /*
2358 * We're done, but fire AFTER STATEMENT triggers before exiting.
2359 */
2360 fireASTriggers(node);
2361
2362 node->mt_done = true;
2363
2364 return NULL;
2365 }
2366
2367 /* ----------------------------------------------------------------
2368 * ExecInitModifyTable
2369 * ----------------------------------------------------------------
2370 */
2371 ModifyTableState *
ExecInitModifyTable(ModifyTable * node,EState * estate,int eflags)2372 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2373 {
2374 ModifyTableState *mtstate;
2375 CmdType operation = node->operation;
2376 int nplans = list_length(node->plans);
2377 ResultRelInfo *saved_resultRelInfo;
2378 ResultRelInfo *resultRelInfo;
2379 Plan *subplan;
2380 ListCell *l;
2381 int i;
2382 Relation rel;
2383 bool update_tuple_routing_needed = node->partColsUpdated;
2384
2385 /* check for unsupported flags */
2386 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2387
2388 /*
2389 * create state structure
2390 */
2391 mtstate = makeNode(ModifyTableState);
2392 mtstate->ps.plan = (Plan *) node;
2393 mtstate->ps.state = estate;
2394 mtstate->ps.ExecProcNode = ExecModifyTable;
2395
2396 mtstate->operation = operation;
2397 mtstate->canSetTag = node->canSetTag;
2398 mtstate->mt_done = false;
2399
2400 mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
2401 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2402 mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
2403
2404 /* If modifying a partitioned table, initialize the root table info */
2405 if (node->rootResultRelIndex >= 0)
2406 mtstate->rootResultRelInfo = estate->es_root_result_relations +
2407 node->rootResultRelIndex;
2408
2409 mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
2410 mtstate->mt_nplans = nplans;
2411
2412 /* set up epqstate with dummy subplan data for the moment */
2413 EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2414 mtstate->fireBSTriggers = true;
2415
2416 /*
2417 * call ExecInitNode on each of the plans to be executed and save the
2418 * results into the array "mt_plans". This is also a convenient place to
2419 * verify that the proposed target relations are valid and open their
2420 * indexes for insertion of new index entries. Note we *must* set
2421 * estate->es_result_relation_info correctly while we initialize each
2422 * sub-plan; external modules such as FDWs may depend on that (see
2423 * contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as one
2424 * example).
2425 */
2426 saved_resultRelInfo = estate->es_result_relation_info;
2427
2428 resultRelInfo = mtstate->resultRelInfo;
2429 i = 0;
2430 foreach(l, node->plans)
2431 {
2432 subplan = (Plan *) lfirst(l);
2433
2434 /* Initialize the usesFdwDirectModify flag */
2435 resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2436 node->fdwDirectModifyPlans);
2437
2438 /*
2439 * Verify result relation is a valid target for the current operation
2440 */
2441 CheckValidResultRel(resultRelInfo, operation);
2442
2443 /*
2444 * If there are indices on the result relation, open them and save
2445 * descriptors in the result relation info, so that we can add new
2446 * index entries for the tuples we add/update. We need not do this
2447 * for a DELETE, however, since deletion doesn't affect indexes. Also,
2448 * inside an EvalPlanQual operation, the indexes might be open
2449 * already, since we share the resultrel state with the original
2450 * query.
2451 */
2452 if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2453 operation != CMD_DELETE &&
2454 resultRelInfo->ri_IndexRelationDescs == NULL)
2455 ExecOpenIndices(resultRelInfo,
2456 node->onConflictAction != ONCONFLICT_NONE);
2457
2458 /*
2459 * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2460 * trigger itself might modify the partition-key values. So arrange
2461 * for tuple routing.
2462 */
2463 if (resultRelInfo->ri_TrigDesc &&
2464 resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2465 operation == CMD_UPDATE)
2466 update_tuple_routing_needed = true;
2467
2468 /* Now init the plan for this result rel */
2469 estate->es_result_relation_info = resultRelInfo;
2470 mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2471 mtstate->mt_scans[i] =
2472 ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
2473 table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2474
2475 /* Also let FDWs init themselves for foreign-table result rels */
2476 if (!resultRelInfo->ri_usesFdwDirectModify &&
2477 resultRelInfo->ri_FdwRoutine != NULL &&
2478 resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2479 {
2480 List *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2481
2482 resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2483 resultRelInfo,
2484 fdw_private,
2485 i,
2486 eflags);
2487 }
2488
2489 resultRelInfo++;
2490 i++;
2491 }
2492
2493 estate->es_result_relation_info = saved_resultRelInfo;
2494
2495 /* Get the target relation */
2496 rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
2497
2498 /*
2499 * If it's not a partitioned table after all, UPDATE tuple routing should
2500 * not be attempted.
2501 */
2502 if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2503 update_tuple_routing_needed = false;
2504
2505 /*
2506 * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2507 * partition key.
2508 */
2509 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2510 (operation == CMD_INSERT || update_tuple_routing_needed))
2511 mtstate->mt_partition_tuple_routing =
2512 ExecSetupPartitionTupleRouting(estate, mtstate, rel);
2513
2514 /*
2515 * Build state for collecting transition tuples. This requires having a
2516 * valid trigger query context, so skip it in explain-only mode.
2517 */
2518 if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2519 ExecSetupTransitionCaptureState(mtstate, estate);
2520
2521 /*
2522 * Construct mapping from each of the per-subplan partition attnos to the
2523 * root attno. This is required when during update row movement the tuple
2524 * descriptor of a source partition does not match the root partitioned
2525 * table descriptor. In such a case we need to convert tuples to the root
2526 * tuple descriptor, because the search for destination partition starts
2527 * from the root. We'll also need a slot to store these converted tuples.
2528 * We can skip this setup if it's not a partition key update.
2529 */
2530 if (update_tuple_routing_needed)
2531 {
2532 ExecSetupChildParentMapForSubplan(mtstate);
2533 mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
2534 }
2535
2536 /*
2537 * Initialize any WITH CHECK OPTION constraints if needed.
2538 */
2539 resultRelInfo = mtstate->resultRelInfo;
2540 i = 0;
2541 foreach(l, node->withCheckOptionLists)
2542 {
2543 List *wcoList = (List *) lfirst(l);
2544 List *wcoExprs = NIL;
2545 ListCell *ll;
2546
2547 foreach(ll, wcoList)
2548 {
2549 WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2550 ExprState *wcoExpr = ExecInitQual((List *) wco->qual,
2551 &mtstate->ps);
2552
2553 wcoExprs = lappend(wcoExprs, wcoExpr);
2554 }
2555
2556 resultRelInfo->ri_WithCheckOptions = wcoList;
2557 resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2558 resultRelInfo++;
2559 i++;
2560 }
2561
2562 /*
2563 * Initialize RETURNING projections if needed.
2564 */
2565 if (node->returningLists)
2566 {
2567 TupleTableSlot *slot;
2568 ExprContext *econtext;
2569
2570 /*
2571 * Initialize result tuple slot and assign its rowtype using the first
2572 * RETURNING list. We assume the rest will look the same.
2573 */
2574 mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2575
2576 /* Set up a slot for the output of the RETURNING projection(s) */
2577 ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2578 slot = mtstate->ps.ps_ResultTupleSlot;
2579
2580 /* Need an econtext too */
2581 if (mtstate->ps.ps_ExprContext == NULL)
2582 ExecAssignExprContext(estate, &mtstate->ps);
2583 econtext = mtstate->ps.ps_ExprContext;
2584
2585 /*
2586 * Build a projection for each result rel.
2587 */
2588 resultRelInfo = mtstate->resultRelInfo;
2589 foreach(l, node->returningLists)
2590 {
2591 List *rlist = (List *) lfirst(l);
2592
2593 resultRelInfo->ri_returningList = rlist;
2594 resultRelInfo->ri_projectReturning =
2595 ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2596 resultRelInfo->ri_RelationDesc->rd_att);
2597 resultRelInfo++;
2598 }
2599 }
2600 else
2601 {
2602 /*
2603 * We still must construct a dummy result tuple type, because InitPlan
2604 * expects one (maybe should change that?).
2605 */
2606 mtstate->ps.plan->targetlist = NIL;
2607 ExecInitResultTypeTL(&mtstate->ps);
2608
2609 mtstate->ps.ps_ExprContext = NULL;
2610 }
2611
2612 /* Set the list of arbiter indexes if needed for ON CONFLICT */
2613 resultRelInfo = mtstate->resultRelInfo;
2614 if (node->onConflictAction != ONCONFLICT_NONE)
2615 resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2616
2617 /*
2618 * If needed, Initialize target list, projection and qual for ON CONFLICT
2619 * DO UPDATE.
2620 */
2621 if (node->onConflictAction == ONCONFLICT_UPDATE)
2622 {
2623 OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2624 ExprContext *econtext;
2625 TupleDesc relationDesc;
2626
2627 /* insert may only have one plan, inheritance is not expanded */
2628 Assert(nplans == 1);
2629
2630 /* already exists if created by RETURNING processing above */
2631 if (mtstate->ps.ps_ExprContext == NULL)
2632 ExecAssignExprContext(estate, &mtstate->ps);
2633
2634 econtext = mtstate->ps.ps_ExprContext;
2635 relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2636
2637 /* create state for DO UPDATE SET operation */
2638 resultRelInfo->ri_onConflict = onconfl;
2639
2640 /* initialize slot for the existing tuple */
2641 onconfl->oc_Existing =
2642 table_slot_create(resultRelInfo->ri_RelationDesc,
2643 &mtstate->ps.state->es_tupleTable);
2644
2645 /*
2646 * Create the tuple slot for the UPDATE SET projection. We want a slot
2647 * of the table's type here, because the slot will be used to insert
2648 * into the table, and for RETURNING processing - which may access
2649 * system attributes.
2650 */
2651 onconfl->oc_ProjSlot =
2652 table_slot_create(resultRelInfo->ri_RelationDesc,
2653 &mtstate->ps.state->es_tupleTable);
2654
2655 /*
2656 * The onConflictSet tlist should already have been adjusted to emit
2657 * the table's exact column list. It could also contain resjunk
2658 * columns, which should be evaluated but not included in the
2659 * projection result.
2660 */
2661 ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2662 node->onConflictSet);
2663
2664 /* build UPDATE SET projection state */
2665 onconfl->oc_ProjInfo =
2666 ExecBuildProjectionInfoExt(node->onConflictSet, econtext,
2667 onconfl->oc_ProjSlot, false,
2668 &mtstate->ps,
2669 relationDesc);
2670
2671 /* initialize state to evaluate the WHERE clause, if any */
2672 if (node->onConflictWhere)
2673 {
2674 ExprState *qualexpr;
2675
2676 qualexpr = ExecInitQual((List *) node->onConflictWhere,
2677 &mtstate->ps);
2678 onconfl->oc_WhereClause = qualexpr;
2679 }
2680 }
2681
2682 /*
2683 * If we have any secondary relations in an UPDATE or DELETE, they need to
2684 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2685 * EvalPlanQual mechanism needs to be told about them. Locate the
2686 * relevant ExecRowMarks.
2687 */
2688 foreach(l, node->rowMarks)
2689 {
2690 PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2691 ExecRowMark *erm;
2692
2693 /* ignore "parent" rowmarks; they are irrelevant at runtime */
2694 if (rc->isParent)
2695 continue;
2696
2697 /* find ExecRowMark (same for all subplans) */
2698 erm = ExecFindRowMark(estate, rc->rti, false);
2699
2700 /* build ExecAuxRowMark for each subplan */
2701 for (i = 0; i < nplans; i++)
2702 {
2703 ExecAuxRowMark *aerm;
2704
2705 subplan = mtstate->mt_plans[i]->plan;
2706 aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2707 mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2708 }
2709 }
2710
2711 /* select first subplan */
2712 mtstate->mt_whichplan = 0;
2713 subplan = (Plan *) linitial(node->plans);
2714 EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2715 mtstate->mt_arowmarks[0]);
2716
2717 /*
2718 * Initialize the junk filter(s) if needed. INSERT queries need a filter
2719 * if there are any junk attrs in the tlist. UPDATE and DELETE always
2720 * need a filter, since there's always at least one junk attribute present
2721 * --- no need to look first. Typically, this will be a 'ctid' or
2722 * 'wholerow' attribute, but in the case of a foreign data wrapper it
2723 * might be a set of junk attributes sufficient to identify the remote
2724 * row.
2725 *
2726 * If there are multiple result relations, each one needs its own junk
2727 * filter. Note multiple rels are only possible for UPDATE/DELETE, so we
2728 * can't be fooled by some needing a filter and some not.
2729 *
2730 * This section of code is also a convenient place to verify that the
2731 * output of an INSERT or UPDATE matches the target table(s).
2732 */
2733 {
2734 bool junk_filter_needed = false;
2735
2736 switch (operation)
2737 {
2738 case CMD_INSERT:
2739 foreach(l, subplan->targetlist)
2740 {
2741 TargetEntry *tle = (TargetEntry *) lfirst(l);
2742
2743 if (tle->resjunk)
2744 {
2745 junk_filter_needed = true;
2746 break;
2747 }
2748 }
2749 break;
2750 case CMD_UPDATE:
2751 case CMD_DELETE:
2752 junk_filter_needed = true;
2753 break;
2754 default:
2755 elog(ERROR, "unknown operation");
2756 break;
2757 }
2758
2759 if (junk_filter_needed)
2760 {
2761 resultRelInfo = mtstate->resultRelInfo;
2762 for (i = 0; i < nplans; i++)
2763 {
2764 JunkFilter *j;
2765 TupleTableSlot *junkresslot;
2766
2767 subplan = mtstate->mt_plans[i]->plan;
2768
2769 junkresslot =
2770 ExecInitExtraTupleSlot(estate, NULL,
2771 table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2772
2773 /*
2774 * For an INSERT or UPDATE, the result tuple must always match
2775 * the target table's descriptor. For a DELETE, it won't
2776 * (indeed, there's probably no non-junk output columns).
2777 */
2778 if (operation == CMD_INSERT || operation == CMD_UPDATE)
2779 {
2780 ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2781 subplan->targetlist);
2782 j = ExecInitJunkFilterInsertion(subplan->targetlist,
2783 RelationGetDescr(resultRelInfo->ri_RelationDesc),
2784 junkresslot);
2785 }
2786 else
2787 j = ExecInitJunkFilter(subplan->targetlist,
2788 junkresslot);
2789
2790 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2791 {
2792 /* For UPDATE/DELETE, find the appropriate junk attr now */
2793 char relkind;
2794
2795 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2796 if (relkind == RELKIND_RELATION ||
2797 relkind == RELKIND_MATVIEW ||
2798 relkind == RELKIND_PARTITIONED_TABLE)
2799 {
2800 j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2801 if (!AttributeNumberIsValid(j->jf_junkAttNo))
2802 elog(ERROR, "could not find junk ctid column");
2803 }
2804 else if (relkind == RELKIND_FOREIGN_TABLE)
2805 {
2806 /*
2807 * When there is a row-level trigger, there should be
2808 * a wholerow attribute.
2809 */
2810 j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2811 }
2812 else
2813 {
2814 j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2815 if (!AttributeNumberIsValid(j->jf_junkAttNo))
2816 elog(ERROR, "could not find junk wholerow column");
2817 }
2818 }
2819
2820 resultRelInfo->ri_junkFilter = j;
2821 resultRelInfo++;
2822 }
2823 }
2824 else
2825 {
2826 if (operation == CMD_INSERT)
2827 ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2828 subplan->targetlist);
2829 }
2830 }
2831
2832 /*
2833 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2834 * to estate->es_auxmodifytables so that it will be run to completion by
2835 * ExecPostprocessPlan. (It'd actually work fine to add the primary
2836 * ModifyTable node too, but there's no need.) Note the use of lcons not
2837 * lappend: we need later-initialized ModifyTable nodes to be shut down
2838 * before earlier ones. This ensures that we don't throw away RETURNING
2839 * rows that need to be seen by a later CTE subplan.
2840 */
2841 if (!mtstate->canSetTag)
2842 estate->es_auxmodifytables = lcons(mtstate,
2843 estate->es_auxmodifytables);
2844
2845 return mtstate;
2846 }
2847
2848 /* ----------------------------------------------------------------
2849 * ExecEndModifyTable
2850 *
2851 * Shuts down the plan.
2852 *
2853 * Returns nothing of interest.
2854 * ----------------------------------------------------------------
2855 */
2856 void
ExecEndModifyTable(ModifyTableState * node)2857 ExecEndModifyTable(ModifyTableState *node)
2858 {
2859 int i;
2860
2861 /*
2862 * Allow any FDWs to shut down
2863 */
2864 for (i = 0; i < node->mt_nplans; i++)
2865 {
2866 ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2867
2868 if (!resultRelInfo->ri_usesFdwDirectModify &&
2869 resultRelInfo->ri_FdwRoutine != NULL &&
2870 resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2871 resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2872 resultRelInfo);
2873 }
2874
2875 /*
2876 * Close all the partitioned tables, leaf partitions, and their indices
2877 * and release the slot used for tuple routing, if set.
2878 */
2879 if (node->mt_partition_tuple_routing)
2880 {
2881 ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2882
2883 if (node->mt_root_tuple_slot)
2884 ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
2885 }
2886
2887 /*
2888 * Free the exprcontext
2889 */
2890 ExecFreeExprContext(&node->ps);
2891
2892 /*
2893 * clean out the tuple table
2894 */
2895 if (node->ps.ps_ResultTupleSlot)
2896 ExecClearTuple(node->ps.ps_ResultTupleSlot);
2897
2898 /*
2899 * Terminate EPQ execution if active
2900 */
2901 EvalPlanQualEnd(&node->mt_epqstate);
2902
2903 /*
2904 * shut down subplans
2905 */
2906 for (i = 0; i < node->mt_nplans; i++)
2907 ExecEndNode(node->mt_plans[i]);
2908 }
2909
2910 void
ExecReScanModifyTable(ModifyTableState * node)2911 ExecReScanModifyTable(ModifyTableState *node)
2912 {
2913 /*
2914 * Currently, we don't need to support rescan on ModifyTable nodes. The
2915 * semantics of that would be a bit debatable anyway.
2916 */
2917 elog(ERROR, "ExecReScanModifyTable is not implemented");
2918 }
2919