1 /*-------------------------------------------------------------------------
2 *
3 * nodeModifyTable.c
4 * routines to handle ModifyTable nodes.
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeModifyTable.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /* INTERFACE ROUTINES
16 * ExecInitModifyTable - initialize the ModifyTable node
17 * ExecModifyTable - retrieve the next tuple from the node
18 * ExecEndModifyTable - shut down the ModifyTable node
19 * ExecReScanModifyTable - rescan the ModifyTable node
20 *
21 * NOTES
22 * Each ModifyTable node contains a list of one or more subplans,
23 * much like an Append node. There is one subplan per result relation.
24 * The key reason for this is that in an inherited UPDATE command, each
25 * result relation could have a different schema (more or different
26 * columns) requiring a different plan tree to produce it. In an
27 * inherited DELETE, all the subplans should produce the same output
28 * rowtype, but we might still find that different plans are appropriate
29 * for different child relations.
30 *
31 * If the query specifies RETURNING, then the ModifyTable returns a
32 * RETURNING tuple after completing each row insert, update, or delete.
33 * It must be called again to continue the operation. Without RETURNING,
34 * we just loop within the node until all the work is done, then
35 * return NULL. This avoids useless call/return overhead.
36 */
37
38 #include "postgres.h"
39
40 #include "access/htup_details.h"
41 #include "access/xact.h"
42 #include "commands/trigger.h"
43 #include "executor/execPartition.h"
44 #include "executor/executor.h"
45 #include "executor/nodeModifyTable.h"
46 #include "foreign/fdwapi.h"
47 #include "miscadmin.h"
48 #include "nodes/nodeFuncs.h"
49 #include "storage/bufmgr.h"
50 #include "storage/lmgr.h"
51 #include "utils/builtins.h"
52 #include "utils/memutils.h"
53 #include "utils/rel.h"
54 #include "utils/tqual.h"
55
56
57 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
58 ResultRelInfo *resultRelInfo,
59 ItemPointer conflictTid,
60 TupleTableSlot *planSlot,
61 TupleTableSlot *excludedSlot,
62 EState *estate,
63 bool canSetTag,
64 TupleTableSlot **returning);
65 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
66 EState *estate,
67 PartitionTupleRouting *proute,
68 ResultRelInfo *targetRelInfo,
69 TupleTableSlot *slot);
70 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
71 static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate);
72 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
73 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
74 int whichplan);
75
76 /*
77 * Verify that the tuples to be produced by INSERT or UPDATE match the
78 * target relation's rowtype
79 *
80 * We do this to guard against stale plans. If plan invalidation is
81 * functioning properly then we should never get a failure here, but better
82 * safe than sorry. Note that this is called after we have obtained lock
83 * on the target rel, so the rowtype can't change underneath us.
84 *
85 * The plan output is represented by its targetlist, because that makes
86 * handling the dropped-column case easier.
87 */
88 static void
ExecCheckPlanOutput(Relation resultRel,List * targetList)89 ExecCheckPlanOutput(Relation resultRel, List *targetList)
90 {
91 TupleDesc resultDesc = RelationGetDescr(resultRel);
92 int attno = 0;
93 ListCell *lc;
94
95 foreach(lc, targetList)
96 {
97 TargetEntry *tle = (TargetEntry *) lfirst(lc);
98 Form_pg_attribute attr;
99
100 if (tle->resjunk)
101 continue; /* ignore junk tlist items */
102
103 if (attno >= resultDesc->natts)
104 ereport(ERROR,
105 (errcode(ERRCODE_DATATYPE_MISMATCH),
106 errmsg("table row type and query-specified row type do not match"),
107 errdetail("Query has too many columns.")));
108 attr = TupleDescAttr(resultDesc, attno);
109 attno++;
110
111 if (!attr->attisdropped)
112 {
113 /* Normal case: demand type match */
114 if (exprType((Node *) tle->expr) != attr->atttypid)
115 ereport(ERROR,
116 (errcode(ERRCODE_DATATYPE_MISMATCH),
117 errmsg("table row type and query-specified row type do not match"),
118 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
119 format_type_be(attr->atttypid),
120 attno,
121 format_type_be(exprType((Node *) tle->expr)))));
122 }
123 else
124 {
125 /*
126 * For a dropped column, we can't check atttypid (it's likely 0).
127 * In any case the planner has most likely inserted an INT4 null.
128 * What we insist on is just *some* NULL constant.
129 */
130 if (!IsA(tle->expr, Const) ||
131 !((Const *) tle->expr)->constisnull)
132 ereport(ERROR,
133 (errcode(ERRCODE_DATATYPE_MISMATCH),
134 errmsg("table row type and query-specified row type do not match"),
135 errdetail("Query provides a value for a dropped column at ordinal position %d.",
136 attno)));
137 }
138 }
139 if (attno != resultDesc->natts)
140 ereport(ERROR,
141 (errcode(ERRCODE_DATATYPE_MISMATCH),
142 errmsg("table row type and query-specified row type do not match"),
143 errdetail("Query has too few columns.")));
144 }
145
146 /*
147 * ExecProcessReturning --- evaluate a RETURNING list
148 *
149 * projectReturning: the projection to evaluate
150 * resultRelOid: result relation's OID
151 * tupleSlot: slot holding tuple actually inserted/updated/deleted
152 * planSlot: slot holding tuple returned by top subplan node
153 *
154 * In cross-partition UPDATE cases, projectReturning and planSlot are as
155 * for the source partition, and tupleSlot must conform to that. But
156 * resultRelOid is for the destination partition.
157 *
158 * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
159 * scan tuple.
160 *
161 * Returns a slot holding the result tuple
162 */
163 static TupleTableSlot *
ExecProcessReturning(ProjectionInfo * projectReturning,Oid resultRelOid,TupleTableSlot * tupleSlot,TupleTableSlot * planSlot)164 ExecProcessReturning(ProjectionInfo *projectReturning,
165 Oid resultRelOid,
166 TupleTableSlot *tupleSlot,
167 TupleTableSlot *planSlot)
168 {
169 ExprContext *econtext = projectReturning->pi_exprContext;
170
171 /*
172 * Reset per-tuple memory context to free any expression evaluation
173 * storage allocated in the previous cycle.
174 */
175 ResetExprContext(econtext);
176
177 /* Make tuple and any needed join variables available to ExecProject */
178 if (tupleSlot)
179 econtext->ecxt_scantuple = tupleSlot;
180 else
181 {
182 HeapTuple tuple;
183
184 /*
185 * RETURNING expressions might reference the tableoid column, so be
186 * sure we expose the desired OID, ie that of the real target
187 * relation.
188 */
189 Assert(!TupIsNull(econtext->ecxt_scantuple));
190 tuple = ExecMaterializeSlot(econtext->ecxt_scantuple);
191 tuple->t_tableOid = resultRelOid;
192 }
193 econtext->ecxt_outertuple = planSlot;
194
195 /* Compute the RETURNING expressions */
196 return ExecProject(projectReturning);
197 }
198
199 /*
200 * ExecCheckHeapTupleVisible -- verify heap tuple is visible
201 *
202 * It would not be consistent with guarantees of the higher isolation levels to
203 * proceed with avoiding insertion (taking speculative insertion's alternative
204 * path) on the basis of another tuple that is not visible to MVCC snapshot.
205 * Check for the need to raise a serialization failure, and do so as necessary.
206 */
207 static void
ExecCheckHeapTupleVisible(EState * estate,HeapTuple tuple,Buffer buffer)208 ExecCheckHeapTupleVisible(EState *estate,
209 HeapTuple tuple,
210 Buffer buffer)
211 {
212 if (!IsolationUsesXactSnapshot())
213 return;
214
215 /*
216 * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
217 * Caller should be holding pin, but not lock.
218 */
219 LockBuffer(buffer, BUFFER_LOCK_SHARE);
220 if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
221 {
222 /*
223 * We should not raise a serialization failure if the conflict is
224 * against a tuple inserted by our own transaction, even if it's not
225 * visible to our snapshot. (This would happen, for example, if
226 * conflicting keys are proposed for insertion in a single command.)
227 */
228 if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
229 ereport(ERROR,
230 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
231 errmsg("could not serialize access due to concurrent update")));
232 }
233 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
234 }
235
236 /*
237 * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
238 */
239 static void
ExecCheckTIDVisible(EState * estate,ResultRelInfo * relinfo,ItemPointer tid)240 ExecCheckTIDVisible(EState *estate,
241 ResultRelInfo *relinfo,
242 ItemPointer tid)
243 {
244 Relation rel = relinfo->ri_RelationDesc;
245 Buffer buffer;
246 HeapTupleData tuple;
247
248 /* Redundantly check isolation level */
249 if (!IsolationUsesXactSnapshot())
250 return;
251
252 tuple.t_self = *tid;
253 if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
254 elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
255 ExecCheckHeapTupleVisible(estate, &tuple, buffer);
256 ReleaseBuffer(buffer);
257 }
258
259 /* ----------------------------------------------------------------
260 * ExecInsert
261 *
262 * For INSERT, we have to insert the tuple into the target relation
263 * and insert appropriate tuples into the index relations.
264 *
265 * slot contains the new tuple value to be stored.
266 * planSlot is the output of the ModifyTable's subplan; we use it
267 * to access "junk" columns that are not going to be stored.
268 * In a cross-partition UPDATE, srcSlot is the slot that held the
269 * updated tuple for the source relation; otherwise it's NULL.
270 *
271 * returningRelInfo is the resultRelInfo for the source relation of a
272 * cross-partition UPDATE; otherwise it's the current result relation.
273 * We use it to process RETURNING lists, for reasons explained below.
274 *
275 * Returns RETURNING result if any, otherwise NULL.
276 * ----------------------------------------------------------------
277 */
278 static TupleTableSlot *
ExecInsert(ModifyTableState * mtstate,TupleTableSlot * slot,TupleTableSlot * planSlot,TupleTableSlot * srcSlot,ResultRelInfo * returningRelInfo,EState * estate,bool canSetTag)279 ExecInsert(ModifyTableState *mtstate,
280 TupleTableSlot *slot,
281 TupleTableSlot *planSlot,
282 TupleTableSlot *srcSlot,
283 ResultRelInfo *returningRelInfo,
284 EState *estate,
285 bool canSetTag)
286 {
287 HeapTuple tuple;
288 ResultRelInfo *resultRelInfo;
289 Relation resultRelationDesc;
290 Oid newId;
291 List *recheckIndexes = NIL;
292 TupleTableSlot *result = NULL;
293 TransitionCaptureState *ar_insert_trig_tcs;
294 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
295 OnConflictAction onconflict = node->onConflictAction;
296
297 /*
298 * get the heap tuple out of the tuple table slot, making sure we have a
299 * writable copy
300 */
301 tuple = ExecMaterializeSlot(slot);
302
303 /*
304 * get information on the (current) result relation
305 */
306 resultRelInfo = estate->es_result_relation_info;
307 resultRelationDesc = resultRelInfo->ri_RelationDesc;
308
309 /*
310 * If the result relation has OIDs, force the tuple's OID to zero so that
311 * heap_insert will assign a fresh OID. Usually the OID already will be
312 * zero at this point, but there are corner cases where the plan tree can
313 * return a tuple extracted literally from some table with the same
314 * rowtype.
315 *
316 * XXX if we ever wanted to allow users to assign their own OIDs to new
317 * rows, this'd be the place to do it. For the moment, we make a point of
318 * doing this before calling triggers, so that a user-supplied trigger
319 * could hack the OID if desired.
320 */
321 if (resultRelationDesc->rd_rel->relhasoids)
322 HeapTupleSetOid(tuple, InvalidOid);
323
324 /*
325 * BEFORE ROW INSERT Triggers.
326 *
327 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
328 * INSERT ... ON CONFLICT statement. We cannot check for constraint
329 * violations before firing these triggers, because they can change the
330 * values to insert. Also, they can run arbitrary user-defined code with
331 * side-effects that we can't cancel by just not inserting the tuple.
332 */
333 if (resultRelInfo->ri_TrigDesc &&
334 resultRelInfo->ri_TrigDesc->trig_insert_before_row)
335 {
336 slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
337
338 if (slot == NULL) /* "do nothing" */
339 return NULL;
340
341 /* trigger might have changed tuple */
342 tuple = ExecMaterializeSlot(slot);
343 }
344
345 /* INSTEAD OF ROW INSERT Triggers */
346 if (resultRelInfo->ri_TrigDesc &&
347 resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
348 {
349 slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
350
351 if (slot == NULL) /* "do nothing" */
352 return NULL;
353
354 /* trigger might have changed tuple */
355 tuple = ExecMaterializeSlot(slot);
356
357 newId = InvalidOid;
358 }
359 else if (resultRelInfo->ri_FdwRoutine)
360 {
361 /*
362 * insert into foreign table: let the FDW do it
363 */
364 slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
365 resultRelInfo,
366 slot,
367 planSlot);
368
369 if (slot == NULL) /* "do nothing" */
370 return NULL;
371
372 /* FDW might have changed tuple */
373 tuple = ExecMaterializeSlot(slot);
374
375 /*
376 * AFTER ROW Triggers or RETURNING expressions might reference the
377 * tableoid column, so initialize t_tableOid before evaluating them.
378 */
379 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
380
381 newId = InvalidOid;
382 }
383 else
384 {
385 WCOKind wco_kind;
386
387 /*
388 * Constraints might reference the tableoid column, so initialize
389 * t_tableOid before evaluating them.
390 */
391 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
392
393 /*
394 * Check any RLS WITH CHECK policies.
395 *
396 * Normally we should check INSERT policies. But if the insert is the
397 * result of a partition key update that moved the tuple to a new
398 * partition, we should instead check UPDATE policies, because we are
399 * executing policies defined on the target table, and not those
400 * defined on the child partitions.
401 */
402 wco_kind = (mtstate->operation == CMD_UPDATE) ?
403 WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
404
405 /*
406 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
407 * we are looking for at this point.
408 */
409 if (resultRelInfo->ri_WithCheckOptions != NIL)
410 ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
411
412 /*
413 * Check the constraints of the tuple.
414 */
415 if (resultRelationDesc->rd_att->constr)
416 ExecConstraints(resultRelInfo, slot, estate);
417
418 /*
419 * Also check the tuple against the partition constraint, if there is
420 * one; except that if we got here via tuple-routing, we don't need to
421 * if there's no BR trigger defined on the partition.
422 */
423 if (resultRelInfo->ri_PartitionCheck &&
424 (resultRelInfo->ri_RootResultRelInfo == NULL ||
425 (resultRelInfo->ri_TrigDesc &&
426 resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
427 ExecPartitionCheck(resultRelInfo, slot, estate, true);
428
429 if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
430 {
431 /* Perform a speculative insertion. */
432 uint32 specToken;
433 ItemPointerData conflictTid;
434 bool specConflict;
435 List *arbiterIndexes;
436
437 arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
438
439 /*
440 * Do a non-conclusive check for conflicts first.
441 *
442 * We're not holding any locks yet, so this doesn't guarantee that
443 * the later insert won't conflict. But it avoids leaving behind
444 * a lot of canceled speculative insertions, if you run a lot of
445 * INSERT ON CONFLICT statements that do conflict.
446 *
447 * We loop back here if we find a conflict below, either during
448 * the pre-check, or when we re-check after inserting the tuple
449 * speculatively.
450 */
451 vlock:
452 specConflict = false;
453 if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
454 arbiterIndexes))
455 {
456 /* committed conflict tuple found */
457 if (onconflict == ONCONFLICT_UPDATE)
458 {
459 /*
460 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
461 * part. Be prepared to retry if the UPDATE fails because
462 * of another concurrent UPDATE/DELETE to the conflict
463 * tuple.
464 */
465 TupleTableSlot *returning = NULL;
466
467 if (ExecOnConflictUpdate(mtstate, resultRelInfo,
468 &conflictTid, planSlot, slot,
469 estate, canSetTag, &returning))
470 {
471 InstrCountTuples2(&mtstate->ps, 1);
472 return returning;
473 }
474 else
475 goto vlock;
476 }
477 else
478 {
479 /*
480 * In case of ON CONFLICT DO NOTHING, do nothing. However,
481 * verify that the tuple is visible to the executor's MVCC
482 * snapshot at higher isolation levels.
483 */
484 Assert(onconflict == ONCONFLICT_NOTHING);
485 ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
486 InstrCountTuples2(&mtstate->ps, 1);
487 return NULL;
488 }
489 }
490
491 /*
492 * Before we start insertion proper, acquire our "speculative
493 * insertion lock". Others can use that to wait for us to decide
494 * if we're going to go ahead with the insertion, instead of
495 * waiting for the whole transaction to complete.
496 */
497 specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
498 HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
499
500 /* insert the tuple, with the speculative token */
501 newId = heap_insert(resultRelationDesc, tuple,
502 estate->es_output_cid,
503 HEAP_INSERT_SPECULATIVE,
504 NULL);
505
506 /* insert index entries for tuple */
507 recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
508 estate, true, &specConflict,
509 arbiterIndexes);
510
511 /* adjust the tuple's state accordingly */
512 if (!specConflict)
513 heap_finish_speculative(resultRelationDesc, tuple);
514 else
515 heap_abort_speculative(resultRelationDesc, tuple);
516
517 /*
518 * Wake up anyone waiting for our decision. They will re-check
519 * the tuple, see that it's no longer speculative, and wait on our
520 * XID as if this was a regularly inserted tuple all along. Or if
521 * we killed the tuple, they will see it's dead, and proceed as if
522 * the tuple never existed.
523 */
524 SpeculativeInsertionLockRelease(GetCurrentTransactionId());
525
526 /*
527 * If there was a conflict, start from the beginning. We'll do
528 * the pre-check again, which will now find the conflicting tuple
529 * (unless it aborts before we get there).
530 */
531 if (specConflict)
532 {
533 list_free(recheckIndexes);
534 goto vlock;
535 }
536
537 /* Since there was no insertion conflict, we're done */
538 }
539 else
540 {
541 /*
542 * insert the tuple normally.
543 *
544 * Note: heap_insert returns the tid (location) of the new tuple
545 * in the t_self field.
546 */
547 newId = heap_insert(resultRelationDesc, tuple,
548 estate->es_output_cid,
549 0, NULL);
550
551 /* insert index entries for tuple */
552 if (resultRelInfo->ri_NumIndices > 0)
553 recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
554 estate, false, NULL,
555 NIL);
556 }
557 }
558
559 if (canSetTag)
560 {
561 (estate->es_processed)++;
562 estate->es_lastoid = newId;
563 setLastTid(&(tuple->t_self));
564 }
565
566 /*
567 * If this insert is the result of a partition key update that moved the
568 * tuple to a new partition, put this row into the transition NEW TABLE,
569 * if there is one. We need to do this separately for DELETE and INSERT
570 * because they happen on different tables.
571 */
572 ar_insert_trig_tcs = mtstate->mt_transition_capture;
573 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
574 && mtstate->mt_transition_capture->tcs_update_new_table)
575 {
576 ExecARUpdateTriggers(estate, resultRelInfo, NULL,
577 NULL,
578 tuple,
579 NULL,
580 mtstate->mt_transition_capture);
581
582 /*
583 * We've already captured the NEW TABLE row, so make sure any AR
584 * INSERT trigger fired below doesn't capture it again.
585 */
586 ar_insert_trig_tcs = NULL;
587 }
588
589 /* AFTER ROW INSERT Triggers */
590 ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
591 ar_insert_trig_tcs);
592
593 list_free(recheckIndexes);
594
595 /*
596 * Check any WITH CHECK OPTION constraints from parent views. We are
597 * required to do this after testing all constraints and uniqueness
598 * violations per the SQL spec, so we do it after actually inserting the
599 * record into the heap and all indexes.
600 *
601 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
602 * tuple will never be seen, if it violates the WITH CHECK OPTION.
603 *
604 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
605 * are looking for at this point.
606 */
607 if (resultRelInfo->ri_WithCheckOptions != NIL)
608 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
609
610 /* Process RETURNING if present */
611 if (returningRelInfo->ri_projectReturning)
612 {
613 /*
614 * In a cross-partition UPDATE with RETURNING, we have to use the
615 * source partition's RETURNING list, because that matches the output
616 * of the planSlot, while the destination partition might have
617 * different resjunk columns. This means we have to map the
618 * destination tuple back to the source's format so we can apply that
619 * RETURNING list. This is expensive, but it should be an uncommon
620 * corner case, so we won't spend much effort on making it fast.
621 *
622 * We assume that we can use srcSlot to hold the re-converted tuple.
623 * Note that in the common case where the child partitions both match
624 * the root's format, previous optimizations will have resulted in
625 * slot and srcSlot being identical, cueing us that there's nothing to
626 * do here.
627 */
628 if (returningRelInfo != resultRelInfo && slot != srcSlot)
629 {
630 Relation srcRelationDesc = returningRelInfo->ri_RelationDesc;
631 TupleConversionMap *map;
632
633 map = convert_tuples_by_name(RelationGetDescr(resultRelationDesc),
634 RelationGetDescr(srcRelationDesc),
635 gettext_noop("could not convert row type"));
636 if (map)
637 {
638 HeapTuple origTuple = ExecMaterializeSlot(slot);
639 HeapTuple newTuple;
640
641 newTuple = do_convert_tuple(origTuple, map);
642
643 /* do_convert_tuple doesn't copy system columns, so do that */
644 newTuple->t_self = newTuple->t_data->t_ctid =
645 origTuple->t_self;
646 newTuple->t_tableOid = origTuple->t_tableOid;
647
648 HeapTupleHeaderSetXmin(newTuple->t_data,
649 HeapTupleHeaderGetRawXmin(origTuple->t_data));
650 HeapTupleHeaderSetCmin(newTuple->t_data,
651 HeapTupleHeaderGetRawCommandId(origTuple->t_data));
652 HeapTupleHeaderSetXmax(newTuple->t_data,
653 InvalidTransactionId);
654
655 if (RelationGetDescr(resultRelationDesc)->tdhasoid)
656 {
657 Assert(RelationGetDescr(srcRelationDesc)->tdhasoid);
658 HeapTupleSetOid(newTuple, HeapTupleGetOid(origTuple));
659 }
660
661 slot = ExecStoreTuple(newTuple, srcSlot, InvalidBuffer, true);
662
663 free_conversion_map(map);
664 }
665 }
666
667 result = ExecProcessReturning(returningRelInfo->ri_projectReturning,
668 RelationGetRelid(resultRelationDesc),
669 slot, planSlot);
670 }
671
672 return result;
673 }
674
675 /* ----------------------------------------------------------------
676 * ExecDelete
677 *
678 * DELETE is like UPDATE, except that we delete the tuple and no
679 * index modifications are needed.
680 *
681 * When deleting from a table, tupleid identifies the tuple to
682 * delete and oldtuple is NULL. When deleting from a view,
683 * oldtuple is passed to the INSTEAD OF triggers and identifies
684 * what to delete, and tupleid is invalid. When deleting from a
685 * foreign table, tupleid is invalid; the FDW has to figure out
686 * which row to delete using data from the planSlot. oldtuple is
687 * passed to foreign table triggers; it is NULL when the foreign
688 * table has no relevant triggers. We use tupleDeleted to indicate
689 * whether the tuple is actually deleted, callers can use it to
690 * decide whether to continue the operation. When this DELETE is a
691 * part of an UPDATE of partition-key, then the slot returned by
692 * EvalPlanQual() is passed back using output parameter epqslot.
693 *
694 * Returns RETURNING result if any, otherwise NULL.
695 * ----------------------------------------------------------------
696 */
697 static TupleTableSlot *
ExecDelete(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool processReturning,bool canSetTag,bool changingPart,bool * tupleDeleted,TupleTableSlot ** epqslot)698 ExecDelete(ModifyTableState *mtstate,
699 ItemPointer tupleid,
700 HeapTuple oldtuple,
701 TupleTableSlot *planSlot,
702 EPQState *epqstate,
703 EState *estate,
704 bool processReturning,
705 bool canSetTag,
706 bool changingPart,
707 bool *tupleDeleted,
708 TupleTableSlot **epqslot)
709 {
710 ResultRelInfo *resultRelInfo;
711 Relation resultRelationDesc;
712 HTSU_Result result;
713 HeapUpdateFailureData hufd;
714 TupleTableSlot *slot = NULL;
715 TransitionCaptureState *ar_delete_trig_tcs;
716
717 if (tupleDeleted)
718 *tupleDeleted = false;
719
720 /*
721 * get information on the (current) result relation
722 */
723 resultRelInfo = estate->es_result_relation_info;
724 resultRelationDesc = resultRelInfo->ri_RelationDesc;
725
726 /* BEFORE ROW DELETE Triggers */
727 if (resultRelInfo->ri_TrigDesc &&
728 resultRelInfo->ri_TrigDesc->trig_delete_before_row)
729 {
730 bool dodelete;
731
732 dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
733 tupleid, oldtuple, epqslot);
734
735 if (!dodelete) /* "do nothing" */
736 return NULL;
737 }
738
739 /* INSTEAD OF ROW DELETE Triggers */
740 if (resultRelInfo->ri_TrigDesc &&
741 resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
742 {
743 bool dodelete;
744
745 Assert(oldtuple != NULL);
746 dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
747
748 if (!dodelete) /* "do nothing" */
749 return NULL;
750 }
751 else if (resultRelInfo->ri_FdwRoutine)
752 {
753 HeapTuple tuple;
754
755 /*
756 * delete from foreign table: let the FDW do it
757 *
758 * We offer the trigger tuple slot as a place to store RETURNING data,
759 * although the FDW can return some other slot if it wants. Set up
760 * the slot's tupdesc so the FDW doesn't need to do that for itself.
761 */
762 slot = estate->es_trig_tuple_slot;
763 if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
764 ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
765
766 slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
767 resultRelInfo,
768 slot,
769 planSlot);
770
771 if (slot == NULL) /* "do nothing" */
772 return NULL;
773
774 /*
775 * RETURNING expressions might reference the tableoid column, so
776 * initialize t_tableOid before evaluating them.
777 */
778 if (slot->tts_isempty)
779 ExecStoreAllNullTuple(slot);
780 tuple = ExecMaterializeSlot(slot);
781 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
782 }
783 else
784 {
785 /*
786 * delete the tuple
787 *
788 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
789 * that the row to be deleted is visible to that snapshot, and throw a
790 * can't-serialize error if not. This is a special-case behavior
791 * needed for referential integrity updates in transaction-snapshot
792 * mode transactions.
793 */
794 ldelete:;
795 result = heap_delete(resultRelationDesc, tupleid,
796 estate->es_output_cid,
797 estate->es_crosscheck_snapshot,
798 true /* wait for commit */ ,
799 &hufd,
800 changingPart);
801 switch (result)
802 {
803 case HeapTupleSelfUpdated:
804
805 /*
806 * The target tuple was already updated or deleted by the
807 * current command, or by a later command in the current
808 * transaction. The former case is possible in a join DELETE
809 * where multiple tuples join to the same target tuple. This
810 * is somewhat questionable, but Postgres has always allowed
811 * it: we just ignore additional deletion attempts.
812 *
813 * The latter case arises if the tuple is modified by a
814 * command in a BEFORE trigger, or perhaps by a command in a
815 * volatile function used in the query. In such situations we
816 * should not ignore the deletion, but it is equally unsafe to
817 * proceed. We don't want to discard the original DELETE
818 * while keeping the triggered actions based on its deletion;
819 * and it would be no better to allow the original DELETE
820 * while discarding updates that it triggered. The row update
821 * carries some information that might be important according
822 * to business rules; so throwing an error is the only safe
823 * course.
824 *
825 * If a trigger actually intends this type of interaction, it
826 * can re-execute the DELETE and then return NULL to cancel
827 * the outer delete.
828 */
829 if (hufd.cmax != estate->es_output_cid)
830 ereport(ERROR,
831 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
832 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
833 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
834
835 /* Else, already deleted by self; nothing to do */
836 return NULL;
837
838 case HeapTupleMayBeUpdated:
839 break;
840
841 case HeapTupleUpdated:
842 if (IsolationUsesXactSnapshot())
843 ereport(ERROR,
844 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
845 errmsg("could not serialize access due to concurrent update")));
846 if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
847 ereport(ERROR,
848 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
849 errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
850
851 if (!ItemPointerEquals(tupleid, &hufd.ctid))
852 {
853 TupleTableSlot *my_epqslot;
854
855 my_epqslot = EvalPlanQual(estate,
856 epqstate,
857 resultRelationDesc,
858 resultRelInfo->ri_RangeTableIndex,
859 LockTupleExclusive,
860 &hufd.ctid,
861 hufd.xmax);
862 if (!TupIsNull(my_epqslot))
863 {
864 *tupleid = hufd.ctid;
865
866 /*
867 * If requested, skip delete and pass back the updated
868 * row.
869 */
870 if (epqslot)
871 {
872 *epqslot = my_epqslot;
873 return NULL;
874 }
875 else
876 goto ldelete;
877 }
878 }
879 /* tuple already deleted; nothing to do */
880 return NULL;
881
882 default:
883 elog(ERROR, "unrecognized heap_delete status: %u", result);
884 return NULL;
885 }
886
887 /*
888 * Note: Normally one would think that we have to delete index tuples
889 * associated with the heap tuple now...
890 *
891 * ... but in POSTGRES, we have no need to do this because VACUUM will
892 * take care of it later. We can't delete index tuples immediately
893 * anyway, since the tuple is still visible to other transactions.
894 */
895 }
896
897 if (canSetTag)
898 (estate->es_processed)++;
899
900 /* Tell caller that the delete actually happened. */
901 if (tupleDeleted)
902 *tupleDeleted = true;
903
904 /*
905 * If this delete is the result of a partition key update that moved the
906 * tuple to a new partition, put this row into the transition OLD TABLE,
907 * if there is one. We need to do this separately for DELETE and INSERT
908 * because they happen on different tables.
909 */
910 ar_delete_trig_tcs = mtstate->mt_transition_capture;
911 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
912 && mtstate->mt_transition_capture->tcs_update_old_table)
913 {
914 ExecARUpdateTriggers(estate, resultRelInfo,
915 tupleid,
916 oldtuple,
917 NULL,
918 NULL,
919 mtstate->mt_transition_capture);
920
921 /*
922 * We've already captured the NEW TABLE row, so make sure any AR
923 * DELETE trigger fired below doesn't capture it again.
924 */
925 ar_delete_trig_tcs = NULL;
926 }
927
928 /* AFTER ROW DELETE Triggers */
929 ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
930 ar_delete_trig_tcs);
931
932 /* Process RETURNING if present and if requested */
933 if (processReturning && resultRelInfo->ri_projectReturning)
934 {
935 /*
936 * We have to put the target tuple into a slot, which means first we
937 * gotta fetch it. We can use the trigger tuple slot.
938 */
939 TupleTableSlot *rslot;
940 HeapTupleData deltuple;
941 Buffer delbuffer;
942
943 if (resultRelInfo->ri_FdwRoutine)
944 {
945 /* FDW must have provided a slot containing the deleted row */
946 Assert(!TupIsNull(slot));
947 delbuffer = InvalidBuffer;
948 }
949 else
950 {
951 slot = estate->es_trig_tuple_slot;
952 if (oldtuple != NULL)
953 {
954 deltuple = *oldtuple;
955 delbuffer = InvalidBuffer;
956 }
957 else
958 {
959 deltuple.t_self = *tupleid;
960 if (!heap_fetch(resultRelationDesc, SnapshotAny,
961 &deltuple, &delbuffer, false, NULL))
962 elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
963 }
964
965 if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
966 ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
967 ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
968 }
969
970 rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
971 RelationGetRelid(resultRelationDesc),
972 slot, planSlot);
973
974 /*
975 * Before releasing the target tuple again, make sure rslot has a
976 * local copy of any pass-by-reference values.
977 */
978 ExecMaterializeSlot(rslot);
979
980 ExecClearTuple(slot);
981 if (BufferIsValid(delbuffer))
982 ReleaseBuffer(delbuffer);
983
984 return rslot;
985 }
986
987 return NULL;
988 }
989
990 /* ----------------------------------------------------------------
991 * ExecUpdate
992 *
993 * note: we can't run UPDATE queries with transactions
994 * off because UPDATEs are actually INSERTs and our
995 * scan will mistakenly loop forever, updating the tuple
996 * it just inserted.. This should be fixed but until it
997 * is, we don't want to get stuck in an infinite loop
998 * which corrupts your database..
999 *
1000 * When updating a table, tupleid identifies the tuple to
1001 * update and oldtuple is NULL. When updating a view, oldtuple
1002 * is passed to the INSTEAD OF triggers and identifies what to
1003 * update, and tupleid is invalid. When updating a foreign table,
1004 * tupleid is invalid; the FDW has to figure out which row to
1005 * update using data from the planSlot. oldtuple is passed to
1006 * foreign table triggers; it is NULL when the foreign table has
1007 * no relevant triggers.
1008 *
1009 * Returns RETURNING result if any, otherwise NULL.
1010 * ----------------------------------------------------------------
1011 */
1012 static TupleTableSlot *
ExecUpdate(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool canSetTag)1013 ExecUpdate(ModifyTableState *mtstate,
1014 ItemPointer tupleid,
1015 HeapTuple oldtuple,
1016 TupleTableSlot *slot,
1017 TupleTableSlot *planSlot,
1018 EPQState *epqstate,
1019 EState *estate,
1020 bool canSetTag)
1021 {
1022 HeapTuple tuple;
1023 ResultRelInfo *resultRelInfo;
1024 Relation resultRelationDesc;
1025 HTSU_Result result;
1026 HeapUpdateFailureData hufd;
1027 List *recheckIndexes = NIL;
1028 TupleConversionMap *saved_tcs_map = NULL;
1029
1030 /*
1031 * abort the operation if not running transactions
1032 */
1033 if (IsBootstrapProcessingMode())
1034 elog(ERROR, "cannot UPDATE during bootstrap");
1035
1036 /*
1037 * get the heap tuple out of the tuple table slot, making sure we have a
1038 * writable copy
1039 */
1040 tuple = ExecMaterializeSlot(slot);
1041
1042 /*
1043 * get information on the (current) result relation
1044 */
1045 resultRelInfo = estate->es_result_relation_info;
1046 resultRelationDesc = resultRelInfo->ri_RelationDesc;
1047
1048 /* BEFORE ROW UPDATE Triggers */
1049 if (resultRelInfo->ri_TrigDesc &&
1050 resultRelInfo->ri_TrigDesc->trig_update_before_row)
1051 {
1052 slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1053 tupleid, oldtuple, slot);
1054
1055 if (slot == NULL) /* "do nothing" */
1056 return NULL;
1057
1058 /* trigger might have changed tuple */
1059 tuple = ExecMaterializeSlot(slot);
1060 }
1061
1062 /* INSTEAD OF ROW UPDATE Triggers */
1063 if (resultRelInfo->ri_TrigDesc &&
1064 resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1065 {
1066 slot = ExecIRUpdateTriggers(estate, resultRelInfo,
1067 oldtuple, slot);
1068
1069 if (slot == NULL) /* "do nothing" */
1070 return NULL;
1071
1072 /* trigger might have changed tuple */
1073 tuple = ExecMaterializeSlot(slot);
1074 }
1075 else if (resultRelInfo->ri_FdwRoutine)
1076 {
1077 /*
1078 * update in foreign table: let the FDW do it
1079 */
1080 slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1081 resultRelInfo,
1082 slot,
1083 planSlot);
1084
1085 if (slot == NULL) /* "do nothing" */
1086 return NULL;
1087
1088 /* FDW might have changed tuple */
1089 tuple = ExecMaterializeSlot(slot);
1090
1091 /*
1092 * AFTER ROW Triggers or RETURNING expressions might reference the
1093 * tableoid column, so initialize t_tableOid before evaluating them.
1094 */
1095 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
1096 }
1097 else
1098 {
1099 LockTupleMode lockmode;
1100 bool partition_constraint_failed;
1101
1102 /*
1103 * Constraints might reference the tableoid column, so initialize
1104 * t_tableOid before evaluating them.
1105 */
1106 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
1107
1108 /*
1109 * Check any RLS UPDATE WITH CHECK policies
1110 *
1111 * If we generate a new candidate tuple after EvalPlanQual testing, we
1112 * must loop back here and recheck any RLS policies and constraints.
1113 * (We don't need to redo triggers, however. If there are any BEFORE
1114 * triggers then trigger.c will have done heap_lock_tuple to lock the
1115 * correct tuple, so there's no need to do them again.)
1116 */
1117 lreplace:;
1118
1119 /*
1120 * If partition constraint fails, this row might get moved to another
1121 * partition, in which case we should check the RLS CHECK policy just
1122 * before inserting into the new partition, rather than doing it here.
1123 * This is because a trigger on that partition might again change the
1124 * row. So skip the WCO checks if the partition constraint fails.
1125 */
1126 partition_constraint_failed =
1127 resultRelInfo->ri_PartitionCheck &&
1128 !ExecPartitionCheck(resultRelInfo, slot, estate, false);
1129
1130 if (!partition_constraint_failed &&
1131 resultRelInfo->ri_WithCheckOptions != NIL)
1132 {
1133 /*
1134 * ExecWithCheckOptions() will skip any WCOs which are not of the
1135 * kind we are looking for at this point.
1136 */
1137 ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1138 resultRelInfo, slot, estate);
1139 }
1140
1141 /*
1142 * If a partition check failed, try to move the row into the right
1143 * partition.
1144 */
1145 if (partition_constraint_failed)
1146 {
1147 bool tuple_deleted;
1148 TupleTableSlot *ret_slot;
1149 TupleTableSlot *orig_slot = slot;
1150 TupleTableSlot *epqslot = NULL;
1151 PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1152 int map_index;
1153 TupleConversionMap *tupconv_map;
1154
1155 /*
1156 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1157 * original row to migrate to a different partition. Maybe this
1158 * can be implemented some day, but it seems a fringe feature with
1159 * little redeeming value.
1160 */
1161 if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1162 ereport(ERROR,
1163 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1164 errmsg("invalid ON UPDATE specification"),
1165 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1166
1167 /*
1168 * When an UPDATE is run on a leaf partition, we will not have
1169 * partition tuple routing set up. In that case, fail with
1170 * partition constraint violation error.
1171 */
1172 if (proute == NULL)
1173 ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1174
1175 /*
1176 * Row movement, part 1. Delete the tuple, but skip RETURNING
1177 * processing. We want to return rows from INSERT.
1178 */
1179 ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1180 estate, false, false /* canSetTag */ ,
1181 true /* changingPart */ , &tuple_deleted, &epqslot);
1182
1183 /*
1184 * For some reason if DELETE didn't happen (e.g. trigger prevented
1185 * it, or it was already deleted by self, or it was concurrently
1186 * deleted by another transaction), then we should skip the insert
1187 * as well; otherwise, an UPDATE could cause an increase in the
1188 * total number of rows across all partitions, which is clearly
1189 * wrong.
1190 *
1191 * For a normal UPDATE, the case where the tuple has been the
1192 * subject of a concurrent UPDATE or DELETE would be handled by
1193 * the EvalPlanQual machinery, but for an UPDATE that we've
1194 * translated into a DELETE from this partition and an INSERT into
1195 * some other partition, that's not available, because CTID chains
1196 * can't span relation boundaries. We mimic the semantics to a
1197 * limited extent by skipping the INSERT if the DELETE fails to
1198 * find a tuple. This ensures that two concurrent attempts to
1199 * UPDATE the same tuple at the same time can't turn one tuple
1200 * into two, and that an UPDATE of a just-deleted tuple can't
1201 * resurrect it.
1202 */
1203 if (!tuple_deleted)
1204 {
1205 /*
1206 * epqslot will be typically NULL. But when ExecDelete()
1207 * finds that another transaction has concurrently updated the
1208 * same row, it re-fetches the row, skips the delete, and
1209 * epqslot is set to the re-fetched tuple slot. In that case,
1210 * we need to do all the checks again.
1211 */
1212 if (TupIsNull(epqslot))
1213 return NULL;
1214 else
1215 {
1216 slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1217 tuple = ExecMaterializeSlot(slot);
1218 goto lreplace;
1219 }
1220 }
1221
1222 /*
1223 * Updates set the transition capture map only when a new subplan
1224 * is chosen. But for inserts, it is set for each row. So after
1225 * INSERT, we need to revert back to the map created for UPDATE;
1226 * otherwise the next UPDATE will incorrectly use the one created
1227 * for INSERT. So first save the one created for UPDATE.
1228 */
1229 if (mtstate->mt_transition_capture)
1230 saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1231
1232 /*
1233 * resultRelInfo is one of the per-subplan resultRelInfos. So we
1234 * should convert the tuple into root's tuple descriptor, since
1235 * ExecInsert() starts the search from root. The tuple conversion
1236 * map list is in the order of mtstate->resultRelInfo[], so to
1237 * retrieve the one for this resultRel, we need to know the
1238 * position of the resultRel in mtstate->resultRelInfo[].
1239 */
1240 map_index = resultRelInfo - mtstate->resultRelInfo;
1241 Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1242 tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1243 tuple = ConvertPartitionTupleSlot(tupconv_map,
1244 tuple,
1245 proute->root_tuple_slot,
1246 &slot);
1247
1248 /*
1249 * Prepare for tuple routing, making it look like we're inserting
1250 * into the root.
1251 */
1252 Assert(mtstate->rootResultRelInfo != NULL);
1253 slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1254 mtstate->rootResultRelInfo, slot);
1255
1256 ret_slot = ExecInsert(mtstate, slot, planSlot,
1257 orig_slot, resultRelInfo,
1258 estate, canSetTag);
1259
1260 /* Revert ExecPrepareTupleRouting's node change. */
1261 estate->es_result_relation_info = resultRelInfo;
1262 if (mtstate->mt_transition_capture)
1263 {
1264 mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1265 mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1266 }
1267
1268 return ret_slot;
1269 }
1270
1271 /*
1272 * Check the constraints of the tuple. We've already checked the
1273 * partition constraint above; however, we must still ensure the tuple
1274 * passes all other constraints, so we will call ExecConstraints() and
1275 * have it validate all remaining checks.
1276 */
1277 if (resultRelationDesc->rd_att->constr)
1278 ExecConstraints(resultRelInfo, slot, estate);
1279
1280 /*
1281 * replace the heap tuple
1282 *
1283 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1284 * that the row to be updated is visible to that snapshot, and throw a
1285 * can't-serialize error if not. This is a special-case behavior
1286 * needed for referential integrity updates in transaction-snapshot
1287 * mode transactions.
1288 */
1289 result = heap_update(resultRelationDesc, tupleid, tuple,
1290 estate->es_output_cid,
1291 estate->es_crosscheck_snapshot,
1292 true /* wait for commit */ ,
1293 &hufd, &lockmode);
1294 switch (result)
1295 {
1296 case HeapTupleSelfUpdated:
1297
1298 /*
1299 * The target tuple was already updated or deleted by the
1300 * current command, or by a later command in the current
1301 * transaction. The former case is possible in a join UPDATE
1302 * where multiple tuples join to the same target tuple. This
1303 * is pretty questionable, but Postgres has always allowed it:
1304 * we just execute the first update action and ignore
1305 * additional update attempts.
1306 *
1307 * The latter case arises if the tuple is modified by a
1308 * command in a BEFORE trigger, or perhaps by a command in a
1309 * volatile function used in the query. In such situations we
1310 * should not ignore the update, but it is equally unsafe to
1311 * proceed. We don't want to discard the original UPDATE
1312 * while keeping the triggered actions based on it; and we
1313 * have no principled way to merge this update with the
1314 * previous ones. So throwing an error is the only safe
1315 * course.
1316 *
1317 * If a trigger actually intends this type of interaction, it
1318 * can re-execute the UPDATE (assuming it can figure out how)
1319 * and then return NULL to cancel the outer update.
1320 */
1321 if (hufd.cmax != estate->es_output_cid)
1322 ereport(ERROR,
1323 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1324 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1325 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1326
1327 /* Else, already updated by self; nothing to do */
1328 return NULL;
1329
1330 case HeapTupleMayBeUpdated:
1331 break;
1332
1333 case HeapTupleUpdated:
1334 if (IsolationUsesXactSnapshot())
1335 ereport(ERROR,
1336 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1337 errmsg("could not serialize access due to concurrent update")));
1338 if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
1339 ereport(ERROR,
1340 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1341 errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
1342
1343 if (!ItemPointerEquals(tupleid, &hufd.ctid))
1344 {
1345 TupleTableSlot *epqslot;
1346
1347 epqslot = EvalPlanQual(estate,
1348 epqstate,
1349 resultRelationDesc,
1350 resultRelInfo->ri_RangeTableIndex,
1351 lockmode,
1352 &hufd.ctid,
1353 hufd.xmax);
1354 if (!TupIsNull(epqslot))
1355 {
1356 *tupleid = hufd.ctid;
1357 slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1358 tuple = ExecMaterializeSlot(slot);
1359 goto lreplace;
1360 }
1361 }
1362 /* tuple already deleted; nothing to do */
1363 return NULL;
1364
1365 default:
1366 elog(ERROR, "unrecognized heap_update status: %u", result);
1367 return NULL;
1368 }
1369
1370 /*
1371 * Note: instead of having to update the old index tuples associated
1372 * with the heap tuple, all we do is form and insert new index tuples.
1373 * This is because UPDATEs are actually DELETEs and INSERTs, and index
1374 * tuple deletion is done later by VACUUM (see notes in ExecDelete).
1375 * All we do here is insert new index tuples. -cim 9/27/89
1376 */
1377
1378 /*
1379 * insert index entries for tuple
1380 *
1381 * Note: heap_update returns the tid (location) of the new tuple in
1382 * the t_self field.
1383 *
1384 * If it's a HOT update, we mustn't insert new index entries.
1385 */
1386 if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
1387 recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
1388 estate, false, NULL, NIL);
1389 }
1390
1391 if (canSetTag)
1392 (estate->es_processed)++;
1393
1394 /* AFTER ROW UPDATE Triggers */
1395 ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple,
1396 recheckIndexes,
1397 mtstate->operation == CMD_INSERT ?
1398 mtstate->mt_oc_transition_capture :
1399 mtstate->mt_transition_capture);
1400
1401 list_free(recheckIndexes);
1402
1403 /*
1404 * Check any WITH CHECK OPTION constraints from parent views. We are
1405 * required to do this after testing all constraints and uniqueness
1406 * violations per the SQL spec, so we do it after actually updating the
1407 * record in the heap and all indexes.
1408 *
1409 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1410 * are looking for at this point.
1411 */
1412 if (resultRelInfo->ri_WithCheckOptions != NIL)
1413 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1414
1415 /* Process RETURNING if present */
1416 if (resultRelInfo->ri_projectReturning)
1417 return ExecProcessReturning(resultRelInfo->ri_projectReturning,
1418 RelationGetRelid(resultRelationDesc),
1419 slot, planSlot);
1420
1421 return NULL;
1422 }
1423
1424 /*
1425 * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1426 *
1427 * Try to lock tuple for update as part of speculative insertion. If
1428 * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1429 * (but still lock row, even though it may not satisfy estate's
1430 * snapshot).
1431 *
1432 * Returns true if we're done (with or without an update), or false if
1433 * the caller must retry the INSERT from scratch.
1434 */
1435 static bool
ExecOnConflictUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer conflictTid,TupleTableSlot * planSlot,TupleTableSlot * excludedSlot,EState * estate,bool canSetTag,TupleTableSlot ** returning)1436 ExecOnConflictUpdate(ModifyTableState *mtstate,
1437 ResultRelInfo *resultRelInfo,
1438 ItemPointer conflictTid,
1439 TupleTableSlot *planSlot,
1440 TupleTableSlot *excludedSlot,
1441 EState *estate,
1442 bool canSetTag,
1443 TupleTableSlot **returning)
1444 {
1445 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1446 Relation relation = resultRelInfo->ri_RelationDesc;
1447 ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1448 HeapTupleData tuple;
1449 HeapUpdateFailureData hufd;
1450 LockTupleMode lockmode;
1451 HTSU_Result test;
1452 Buffer buffer;
1453
1454 /* Determine lock mode to use */
1455 lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1456
1457 /*
1458 * Lock tuple for update. Don't follow updates when tuple cannot be
1459 * locked without doing so. A row locking conflict here means our
1460 * previous conclusion that the tuple is conclusively committed is not
1461 * true anymore.
1462 */
1463 tuple.t_self = *conflictTid;
1464 test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
1465 lockmode, LockWaitBlock, false, &buffer,
1466 &hufd);
1467 switch (test)
1468 {
1469 case HeapTupleMayBeUpdated:
1470 /* success! */
1471 break;
1472
1473 case HeapTupleInvisible:
1474
1475 /*
1476 * This can occur when a just inserted tuple is updated again in
1477 * the same command. E.g. because multiple rows with the same
1478 * conflicting key values are inserted.
1479 *
1480 * This is somewhat similar to the ExecUpdate()
1481 * HeapTupleSelfUpdated case. We do not want to proceed because
1482 * it would lead to the same row being updated a second time in
1483 * some unspecified order, and in contrast to plain UPDATEs
1484 * there's no historical behavior to break.
1485 *
1486 * It is the user's responsibility to prevent this situation from
1487 * occurring. These problems are why SQL-2003 similarly specifies
1488 * that for SQL MERGE, an exception must be raised in the event of
1489 * an attempt to update the same row twice.
1490 */
1491 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
1492 ereport(ERROR,
1493 (errcode(ERRCODE_CARDINALITY_VIOLATION),
1494 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1495 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1496
1497 /* This shouldn't happen */
1498 elog(ERROR, "attempted to lock invisible tuple");
1499 break;
1500
1501 case HeapTupleSelfUpdated:
1502
1503 /*
1504 * This state should never be reached. As a dirty snapshot is used
1505 * to find conflicting tuples, speculative insertion wouldn't have
1506 * seen this row to conflict with.
1507 */
1508 elog(ERROR, "unexpected self-updated tuple");
1509 break;
1510
1511 case HeapTupleUpdated:
1512 if (IsolationUsesXactSnapshot())
1513 ereport(ERROR,
1514 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1515 errmsg("could not serialize access due to concurrent update")));
1516
1517 /*
1518 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
1519 * a partitioned table we shouldn't reach to a case where tuple to
1520 * be lock is moved to another partition due to concurrent update
1521 * of the partition key.
1522 */
1523 Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
1524
1525 /*
1526 * Tell caller to try again from the very start.
1527 *
1528 * It does not make sense to use the usual EvalPlanQual() style
1529 * loop here, as the new version of the row might not conflict
1530 * anymore, or the conflicting tuple has actually been deleted.
1531 */
1532 ReleaseBuffer(buffer);
1533 return false;
1534
1535 default:
1536 elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
1537 }
1538
1539 /*
1540 * Success, the tuple is locked.
1541 *
1542 * Reset per-tuple memory context to free any expression evaluation
1543 * storage allocated in the previous cycle.
1544 */
1545 ResetExprContext(econtext);
1546
1547 /*
1548 * Verify that the tuple is visible to our MVCC snapshot if the current
1549 * isolation level mandates that.
1550 *
1551 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
1552 * CONFLICT ... WHERE clause may prevent us from reaching that.
1553 *
1554 * This means we only ever continue when a new command in the current
1555 * transaction could see the row, even though in READ COMMITTED mode the
1556 * tuple will not be visible according to the current statement's
1557 * snapshot. This is in line with the way UPDATE deals with newer tuple
1558 * versions.
1559 */
1560 ExecCheckHeapTupleVisible(estate, &tuple, buffer);
1561
1562 /* Store target's existing tuple in the state's dedicated slot */
1563 ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
1564
1565 /*
1566 * Make tuple and any needed join variables available to ExecQual and
1567 * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
1568 * the target's existing tuple is installed in the scantuple. EXCLUDED
1569 * has been made to reference INNER_VAR in setrefs.c, but there is no
1570 * other redirection.
1571 */
1572 econtext->ecxt_scantuple = mtstate->mt_existing;
1573 econtext->ecxt_innertuple = excludedSlot;
1574 econtext->ecxt_outertuple = NULL;
1575
1576 if (!ExecQual(onConflictSetWhere, econtext))
1577 {
1578 ReleaseBuffer(buffer);
1579 InstrCountFiltered1(&mtstate->ps, 1);
1580 return true; /* done with the tuple */
1581 }
1582
1583 if (resultRelInfo->ri_WithCheckOptions != NIL)
1584 {
1585 /*
1586 * Check target's existing tuple against UPDATE-applicable USING
1587 * security barrier quals (if any), enforced here as RLS checks/WCOs.
1588 *
1589 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1590 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1591 * but that's almost the extent of its special handling for ON
1592 * CONFLICT DO UPDATE.
1593 *
1594 * The rewriter will also have associated UPDATE applicable straight
1595 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
1596 * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
1597 * kinds, so there is no danger of spurious over-enforcement in the
1598 * INSERT or UPDATE path.
1599 */
1600 ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1601 mtstate->mt_existing,
1602 mtstate->ps.state);
1603 }
1604
1605 /* Project the new tuple version */
1606 ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1607
1608 /*
1609 * Note that it is possible that the target tuple has been modified in
1610 * this session, after the above heap_lock_tuple. We choose to not error
1611 * out in that case, in line with ExecUpdate's treatment of similar cases.
1612 * This can happen if an UPDATE is triggered from within ExecQual(),
1613 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1614 * wCTE in the ON CONFLICT's SET.
1615 */
1616
1617 /* Execute UPDATE with projection */
1618 *returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
1619 mtstate->mt_conflproj, planSlot,
1620 &mtstate->mt_epqstate, mtstate->ps.state,
1621 canSetTag);
1622
1623 ReleaseBuffer(buffer);
1624 return true;
1625 }
1626
1627
1628 /*
1629 * Process BEFORE EACH STATEMENT triggers
1630 */
1631 static void
fireBSTriggers(ModifyTableState * node)1632 fireBSTriggers(ModifyTableState *node)
1633 {
1634 ModifyTable *plan = (ModifyTable *) node->ps.plan;
1635 ResultRelInfo *resultRelInfo = node->resultRelInfo;
1636
1637 /*
1638 * If the node modifies a partitioned table, we must fire its triggers.
1639 * Note that in that case, node->resultRelInfo points to the first leaf
1640 * partition, not the root table.
1641 */
1642 if (node->rootResultRelInfo != NULL)
1643 resultRelInfo = node->rootResultRelInfo;
1644
1645 switch (node->operation)
1646 {
1647 case CMD_INSERT:
1648 ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1649 if (plan->onConflictAction == ONCONFLICT_UPDATE)
1650 ExecBSUpdateTriggers(node->ps.state,
1651 resultRelInfo);
1652 break;
1653 case CMD_UPDATE:
1654 ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1655 break;
1656 case CMD_DELETE:
1657 ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1658 break;
1659 default:
1660 elog(ERROR, "unknown operation");
1661 break;
1662 }
1663 }
1664
1665 /*
1666 * Return the target rel ResultRelInfo.
1667 *
1668 * This relation is the same as :
1669 * - the relation for which we will fire AFTER STATEMENT triggers.
1670 * - the relation into whose tuple format all captured transition tuples must
1671 * be converted.
1672 * - the root partitioned table.
1673 */
1674 static ResultRelInfo *
getTargetResultRelInfo(ModifyTableState * node)1675 getTargetResultRelInfo(ModifyTableState *node)
1676 {
1677 /*
1678 * Note that if the node modifies a partitioned table, node->resultRelInfo
1679 * points to the first leaf partition, not the root table.
1680 */
1681 if (node->rootResultRelInfo != NULL)
1682 return node->rootResultRelInfo;
1683 else
1684 return node->resultRelInfo;
1685 }
1686
1687 /*
1688 * Process AFTER EACH STATEMENT triggers
1689 */
1690 static void
fireASTriggers(ModifyTableState * node)1691 fireASTriggers(ModifyTableState *node)
1692 {
1693 ModifyTable *plan = (ModifyTable *) node->ps.plan;
1694 ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1695
1696 switch (node->operation)
1697 {
1698 case CMD_INSERT:
1699 if (plan->onConflictAction == ONCONFLICT_UPDATE)
1700 ExecASUpdateTriggers(node->ps.state,
1701 resultRelInfo,
1702 node->mt_oc_transition_capture);
1703 ExecASInsertTriggers(node->ps.state, resultRelInfo,
1704 node->mt_transition_capture);
1705 break;
1706 case CMD_UPDATE:
1707 ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1708 node->mt_transition_capture);
1709 break;
1710 case CMD_DELETE:
1711 ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1712 node->mt_transition_capture);
1713 break;
1714 default:
1715 elog(ERROR, "unknown operation");
1716 break;
1717 }
1718 }
1719
1720 /*
1721 * Set up the state needed for collecting transition tuples for AFTER
1722 * triggers.
1723 */
1724 static void
ExecSetupTransitionCaptureState(ModifyTableState * mtstate,EState * estate)1725 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
1726 {
1727 ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
1728 ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1729
1730 /* Check for transition tables on the directly targeted relation. */
1731 mtstate->mt_transition_capture =
1732 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1733 RelationGetRelid(targetRelInfo->ri_RelationDesc),
1734 mtstate->operation);
1735 if (plan->operation == CMD_INSERT &&
1736 plan->onConflictAction == ONCONFLICT_UPDATE)
1737 mtstate->mt_oc_transition_capture =
1738 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1739 RelationGetRelid(targetRelInfo->ri_RelationDesc),
1740 CMD_UPDATE);
1741
1742 /*
1743 * If we found that we need to collect transition tuples then we may also
1744 * need tuple conversion maps for any children that have TupleDescs that
1745 * aren't compatible with the tuplestores. (We can share these maps
1746 * between the regular and ON CONFLICT cases.)
1747 */
1748 if (mtstate->mt_transition_capture != NULL ||
1749 mtstate->mt_oc_transition_capture != NULL)
1750 {
1751 ExecSetupChildParentMapForTcs(mtstate);
1752
1753 /*
1754 * Install the conversion map for the first plan for UPDATE and DELETE
1755 * operations. It will be advanced each time we switch to the next
1756 * plan. (INSERT operations set it every time, so we need not update
1757 * mtstate->mt_oc_transition_capture here.)
1758 */
1759 if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1760 mtstate->mt_transition_capture->tcs_map =
1761 tupconv_map_for_subplan(mtstate, 0);
1762 }
1763 }
1764
1765 /*
1766 * ExecPrepareTupleRouting --- prepare for routing one tuple
1767 *
1768 * Determine the partition in which the tuple in slot is to be inserted,
1769 * and modify mtstate and estate to prepare for it.
1770 *
1771 * Caller must revert the estate changes after executing the insertion!
1772 * In mtstate, transition capture changes may also need to be reverted.
1773 *
1774 * Returns a slot holding the tuple of the partition rowtype.
1775 */
1776 static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState * mtstate,EState * estate,PartitionTupleRouting * proute,ResultRelInfo * targetRelInfo,TupleTableSlot * slot)1777 ExecPrepareTupleRouting(ModifyTableState *mtstate,
1778 EState *estate,
1779 PartitionTupleRouting *proute,
1780 ResultRelInfo *targetRelInfo,
1781 TupleTableSlot *slot)
1782 {
1783 ModifyTable *node;
1784 int partidx;
1785 ResultRelInfo *partrel;
1786 HeapTuple tuple;
1787
1788 /*
1789 * Determine the target partition. If ExecFindPartition does not find a
1790 * partition after all, it doesn't return here; otherwise, the returned
1791 * value is to be used as an index into the arrays for the ResultRelInfo
1792 * and TupleConversionMap for the partition.
1793 */
1794 partidx = ExecFindPartition(targetRelInfo,
1795 proute->partition_dispatch_info,
1796 slot,
1797 estate);
1798 Assert(partidx >= 0 && partidx < proute->num_partitions);
1799
1800 /*
1801 * Get the ResultRelInfo corresponding to the selected partition; if not
1802 * yet there, initialize it.
1803 */
1804 partrel = proute->partitions[partidx];
1805 if (partrel == NULL)
1806 partrel = ExecInitPartitionInfo(mtstate, targetRelInfo,
1807 proute, estate,
1808 partidx);
1809
1810 /*
1811 * Check whether the partition is routable if we didn't yet
1812 *
1813 * Note: an UPDATE of a partition key invokes an INSERT that moves the
1814 * tuple to a new partition. This check would be applied to a subplan
1815 * partition of such an UPDATE that is chosen as the partition to route
1816 * the tuple to. The reason we do this check here rather than in
1817 * ExecSetupPartitionTupleRouting is to avoid aborting such an UPDATE
1818 * unnecessarily due to non-routable subplan partitions that may not be
1819 * chosen for update tuple movement after all.
1820 */
1821 if (!partrel->ri_PartitionReadyForRouting)
1822 {
1823 /* Verify the partition is a valid target for INSERT. */
1824 CheckValidResultRel(partrel, CMD_INSERT);
1825
1826 /* Set up information needed for routing tuples to the partition. */
1827 ExecInitRoutingInfo(mtstate, estate, proute, partrel, partidx);
1828 }
1829
1830 /*
1831 * Make it look like we are inserting into the partition.
1832 */
1833 estate->es_result_relation_info = partrel;
1834
1835 /* Get the heap tuple out of the given slot. */
1836 tuple = ExecMaterializeSlot(slot);
1837
1838 /*
1839 * If we're capturing transition tuples, we might need to convert from the
1840 * partition rowtype to parent rowtype.
1841 */
1842 if (mtstate->mt_transition_capture != NULL)
1843 {
1844 if (partrel->ri_TrigDesc &&
1845 partrel->ri_TrigDesc->trig_insert_before_row)
1846 {
1847 /*
1848 * If there are any BEFORE triggers on the partition, we'll have
1849 * to be ready to convert their result back to tuplestore format.
1850 */
1851 mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1852 mtstate->mt_transition_capture->tcs_map =
1853 TupConvMapForLeaf(proute, targetRelInfo, partidx);
1854 }
1855 else
1856 {
1857 /*
1858 * Otherwise, just remember the original unconverted tuple, to
1859 * avoid a needless round trip conversion.
1860 */
1861 mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
1862 mtstate->mt_transition_capture->tcs_map = NULL;
1863 }
1864 }
1865 if (mtstate->mt_oc_transition_capture != NULL)
1866 {
1867 mtstate->mt_oc_transition_capture->tcs_map =
1868 TupConvMapForLeaf(proute, targetRelInfo, partidx);
1869 }
1870
1871 /*
1872 * Convert the tuple, if necessary.
1873 */
1874 ConvertPartitionTupleSlot(proute->parent_child_tupconv_maps[partidx],
1875 tuple,
1876 proute->partition_tuple_slot,
1877 &slot);
1878
1879 /* Initialize information needed to handle ON CONFLICT DO UPDATE. */
1880 Assert(mtstate != NULL);
1881 node = (ModifyTable *) mtstate->ps.plan;
1882 if (node->onConflictAction == ONCONFLICT_UPDATE)
1883 {
1884 Assert(mtstate->mt_existing != NULL);
1885 ExecSetSlotDescriptor(mtstate->mt_existing,
1886 RelationGetDescr(partrel->ri_RelationDesc));
1887 Assert(mtstate->mt_conflproj != NULL);
1888 ExecSetSlotDescriptor(mtstate->mt_conflproj,
1889 partrel->ri_onConflict->oc_ProjTupdesc);
1890 }
1891
1892 return slot;
1893 }
1894
1895 /*
1896 * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
1897 *
1898 * This map array is required to convert the tuple from the subplan result rel
1899 * to the target table descriptor. This requirement arises for two independent
1900 * scenarios:
1901 * 1. For update-tuple-routing.
1902 * 2. For capturing tuples in transition tables.
1903 */
1904 static void
ExecSetupChildParentMapForSubplan(ModifyTableState * mtstate)1905 ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
1906 {
1907 ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1908 ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
1909 TupleDesc outdesc;
1910 int numResultRelInfos = mtstate->mt_nplans;
1911 int i;
1912
1913 /*
1914 * First check if there is already a per-subplan array allocated. Even if
1915 * there is already a per-leaf map array, we won't require a per-subplan
1916 * one, since we will use the subplan offset array to convert the subplan
1917 * index to per-leaf index.
1918 */
1919 if (mtstate->mt_per_subplan_tupconv_maps ||
1920 (mtstate->mt_partition_tuple_routing &&
1921 mtstate->mt_partition_tuple_routing->child_parent_tupconv_maps))
1922 return;
1923
1924 /*
1925 * Build array of conversion maps from each child's TupleDesc to the one
1926 * used in the target relation. The map pointers may be NULL when no
1927 * conversion is necessary, which is hopefully a common case.
1928 */
1929
1930 /* Get tuple descriptor of the target rel. */
1931 outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
1932
1933 mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
1934 palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
1935
1936 for (i = 0; i < numResultRelInfos; ++i)
1937 {
1938 mtstate->mt_per_subplan_tupconv_maps[i] =
1939 convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
1940 outdesc,
1941 gettext_noop("could not convert row type"));
1942 }
1943 }
1944
1945 /*
1946 * Initialize the child-to-root tuple conversion map array required for
1947 * capturing transition tuples.
1948 *
1949 * The map array can be indexed either by subplan index or by leaf-partition
1950 * index. For transition tables, we need a subplan-indexed access to the map,
1951 * and where tuple-routing is present, we also require a leaf-indexed access.
1952 */
1953 static void
ExecSetupChildParentMapForTcs(ModifyTableState * mtstate)1954 ExecSetupChildParentMapForTcs(ModifyTableState *mtstate)
1955 {
1956 PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1957
1958 /*
1959 * If partition tuple routing is set up, we will require partition-indexed
1960 * access. In that case, create the map array indexed by partition; we
1961 * will still be able to access the maps using a subplan index by
1962 * converting the subplan index to a partition index using
1963 * subplan_partition_offsets. If tuple routing is not set up, it means we
1964 * don't require partition-indexed access. In that case, create just a
1965 * subplan-indexed map.
1966 */
1967 if (proute)
1968 {
1969 /*
1970 * If a partition-indexed map array is to be created, the subplan map
1971 * array has to be NULL. If the subplan map array is already created,
1972 * we won't be able to access the map using a partition index.
1973 */
1974 Assert(mtstate->mt_per_subplan_tupconv_maps == NULL);
1975
1976 ExecSetupChildParentMapForLeaf(proute);
1977 }
1978 else
1979 ExecSetupChildParentMapForSubplan(mtstate);
1980 }
1981
1982 /*
1983 * For a given subplan index, get the tuple conversion map.
1984 */
1985 static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState * mtstate,int whichplan)1986 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
1987 {
1988 /*
1989 * If a partition-index tuple conversion map array is allocated, we need
1990 * to first get the index into the partition array. Exactly *one* of the
1991 * two arrays is allocated. This is because if there is a partition array
1992 * required, we don't require subplan-indexed array since we can translate
1993 * subplan index into partition index. And, we create a subplan-indexed
1994 * array *only* if partition-indexed array is not required.
1995 */
1996 if (mtstate->mt_per_subplan_tupconv_maps == NULL)
1997 {
1998 int leaf_index;
1999 PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
2000
2001 /*
2002 * If subplan-indexed array is NULL, things should have been arranged
2003 * to convert the subplan index to partition index.
2004 */
2005 Assert(proute && proute->subplan_partition_offsets != NULL &&
2006 whichplan < proute->num_subplan_partition_offsets);
2007
2008 leaf_index = proute->subplan_partition_offsets[whichplan];
2009
2010 return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate),
2011 leaf_index);
2012 }
2013 else
2014 {
2015 Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
2016 return mtstate->mt_per_subplan_tupconv_maps[whichplan];
2017 }
2018 }
2019
2020 /* ----------------------------------------------------------------
2021 * ExecModifyTable
2022 *
2023 * Perform table modifications as required, and return RETURNING results
2024 * if needed.
2025 * ----------------------------------------------------------------
2026 */
2027 static TupleTableSlot *
ExecModifyTable(PlanState * pstate)2028 ExecModifyTable(PlanState *pstate)
2029 {
2030 ModifyTableState *node = castNode(ModifyTableState, pstate);
2031 PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2032 EState *estate = node->ps.state;
2033 CmdType operation = node->operation;
2034 ResultRelInfo *saved_resultRelInfo;
2035 ResultRelInfo *resultRelInfo;
2036 PlanState *subplanstate;
2037 JunkFilter *junkfilter;
2038 TupleTableSlot *slot;
2039 TupleTableSlot *planSlot;
2040 ItemPointer tupleid;
2041 ItemPointerData tuple_ctid;
2042 HeapTupleData oldtupdata;
2043 HeapTuple oldtuple;
2044
2045 CHECK_FOR_INTERRUPTS();
2046
2047 /*
2048 * This should NOT get called during EvalPlanQual; we should have passed a
2049 * subplan tree to EvalPlanQual, instead. Use a runtime test not just
2050 * Assert because this condition is easy to miss in testing. (Note:
2051 * although ModifyTable should not get executed within an EvalPlanQual
2052 * operation, we do have to allow it to be initialized and shut down in
2053 * case it is within a CTE subplan. Hence this test must be here, not in
2054 * ExecInitModifyTable.)
2055 */
2056 if (estate->es_epqTuple != NULL)
2057 elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2058
2059 /*
2060 * If we've already completed processing, don't try to do more. We need
2061 * this test because ExecPostprocessPlan might call us an extra time, and
2062 * our subplan's nodes aren't necessarily robust against being called
2063 * extra times.
2064 */
2065 if (node->mt_done)
2066 return NULL;
2067
2068 /*
2069 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2070 */
2071 if (node->fireBSTriggers)
2072 {
2073 fireBSTriggers(node);
2074 node->fireBSTriggers = false;
2075 }
2076
2077 /* Preload local variables */
2078 resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2079 subplanstate = node->mt_plans[node->mt_whichplan];
2080 junkfilter = resultRelInfo->ri_junkFilter;
2081
2082 /*
2083 * es_result_relation_info must point to the currently active result
2084 * relation while we are within this ModifyTable node. Even though
2085 * ModifyTable nodes can't be nested statically, they can be nested
2086 * dynamically (since our subplan could include a reference to a modifying
2087 * CTE). So we have to save and restore the caller's value.
2088 */
2089 saved_resultRelInfo = estate->es_result_relation_info;
2090
2091 estate->es_result_relation_info = resultRelInfo;
2092
2093 /*
2094 * Fetch rows from subplan(s), and execute the required table modification
2095 * for each row.
2096 */
2097 for (;;)
2098 {
2099 /*
2100 * Reset the per-output-tuple exprcontext. This is needed because
2101 * triggers expect to use that context as workspace. It's a bit ugly
2102 * to do this below the top level of the plan, however. We might need
2103 * to rethink this later.
2104 */
2105 ResetPerTupleExprContext(estate);
2106
2107 planSlot = ExecProcNode(subplanstate);
2108
2109 if (TupIsNull(planSlot))
2110 {
2111 /* advance to next subplan if any */
2112 node->mt_whichplan++;
2113 if (node->mt_whichplan < node->mt_nplans)
2114 {
2115 resultRelInfo++;
2116 subplanstate = node->mt_plans[node->mt_whichplan];
2117 junkfilter = resultRelInfo->ri_junkFilter;
2118 estate->es_result_relation_info = resultRelInfo;
2119 EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2120 node->mt_arowmarks[node->mt_whichplan]);
2121 /* Prepare to convert transition tuples from this child. */
2122 if (node->mt_transition_capture != NULL)
2123 {
2124 node->mt_transition_capture->tcs_map =
2125 tupconv_map_for_subplan(node, node->mt_whichplan);
2126 }
2127 if (node->mt_oc_transition_capture != NULL)
2128 {
2129 node->mt_oc_transition_capture->tcs_map =
2130 tupconv_map_for_subplan(node, node->mt_whichplan);
2131 }
2132 continue;
2133 }
2134 else
2135 break;
2136 }
2137
2138 /*
2139 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2140 * here is compute the RETURNING expressions.
2141 */
2142 if (resultRelInfo->ri_usesFdwDirectModify)
2143 {
2144 Assert(resultRelInfo->ri_projectReturning);
2145
2146 /*
2147 * A scan slot containing the data that was actually inserted,
2148 * updated or deleted has already been made available to
2149 * ExecProcessReturning by IterateDirectModify, so no need to
2150 * provide it here.
2151 */
2152 slot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
2153 RelationGetRelid(resultRelInfo->ri_RelationDesc),
2154 NULL, planSlot);
2155
2156 estate->es_result_relation_info = saved_resultRelInfo;
2157 return slot;
2158 }
2159
2160 EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2161 slot = planSlot;
2162
2163 tupleid = NULL;
2164 oldtuple = NULL;
2165 if (junkfilter != NULL)
2166 {
2167 /*
2168 * extract the 'ctid' or 'wholerow' junk attribute.
2169 */
2170 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2171 {
2172 char relkind;
2173 Datum datum;
2174 bool isNull;
2175
2176 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2177 if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
2178 {
2179 datum = ExecGetJunkAttribute(slot,
2180 junkfilter->jf_junkAttNo,
2181 &isNull);
2182 /* shouldn't ever get a null result... */
2183 if (isNull)
2184 elog(ERROR, "ctid is NULL");
2185
2186 tupleid = (ItemPointer) DatumGetPointer(datum);
2187 tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
2188 tupleid = &tuple_ctid;
2189 }
2190
2191 /*
2192 * Use the wholerow attribute, when available, to reconstruct
2193 * the old relation tuple.
2194 *
2195 * Foreign table updates have a wholerow attribute when the
2196 * relation has a row-level trigger. Note that the wholerow
2197 * attribute does not carry system columns. Foreign table
2198 * triggers miss seeing those, except that we know enough here
2199 * to set t_tableOid. Quite separately from this, the FDW may
2200 * fetch its own junk attrs to identify the row.
2201 *
2202 * Other relevant relkinds, currently limited to views, always
2203 * have a wholerow attribute.
2204 */
2205 else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2206 {
2207 datum = ExecGetJunkAttribute(slot,
2208 junkfilter->jf_junkAttNo,
2209 &isNull);
2210 /* shouldn't ever get a null result... */
2211 if (isNull)
2212 elog(ERROR, "wholerow is NULL");
2213
2214 oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2215 oldtupdata.t_len =
2216 HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2217 ItemPointerSetInvalid(&(oldtupdata.t_self));
2218 /* Historically, view triggers see invalid t_tableOid. */
2219 oldtupdata.t_tableOid =
2220 (relkind == RELKIND_VIEW) ? InvalidOid :
2221 RelationGetRelid(resultRelInfo->ri_RelationDesc);
2222
2223 oldtuple = &oldtupdata;
2224 }
2225 else
2226 Assert(relkind == RELKIND_FOREIGN_TABLE);
2227 }
2228
2229 /*
2230 * apply the junkfilter if needed.
2231 */
2232 if (operation != CMD_DELETE)
2233 slot = ExecFilterJunk(junkfilter, slot);
2234 }
2235
2236 switch (operation)
2237 {
2238 case CMD_INSERT:
2239 /* Prepare for tuple routing if needed. */
2240 if (proute)
2241 slot = ExecPrepareTupleRouting(node, estate, proute,
2242 resultRelInfo, slot);
2243 slot = ExecInsert(node, slot, planSlot,
2244 NULL, estate->es_result_relation_info,
2245 estate, node->canSetTag);
2246 /* Revert ExecPrepareTupleRouting's state change. */
2247 if (proute)
2248 estate->es_result_relation_info = resultRelInfo;
2249 break;
2250 case CMD_UPDATE:
2251 slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2252 &node->mt_epqstate, estate, node->canSetTag);
2253 break;
2254 case CMD_DELETE:
2255 slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2256 &node->mt_epqstate, estate,
2257 true, node->canSetTag,
2258 false /* changingPart */ , NULL, NULL);
2259 break;
2260 default:
2261 elog(ERROR, "unknown operation");
2262 break;
2263 }
2264
2265 /*
2266 * If we got a RETURNING result, return it to caller. We'll continue
2267 * the work on next call.
2268 */
2269 if (slot)
2270 {
2271 estate->es_result_relation_info = saved_resultRelInfo;
2272 return slot;
2273 }
2274 }
2275
2276 /* Restore es_result_relation_info before exiting */
2277 estate->es_result_relation_info = saved_resultRelInfo;
2278
2279 /*
2280 * We're done, but fire AFTER STATEMENT triggers before exiting.
2281 */
2282 fireASTriggers(node);
2283
2284 node->mt_done = true;
2285
2286 return NULL;
2287 }
2288
2289 /* ----------------------------------------------------------------
2290 * ExecInitModifyTable
2291 * ----------------------------------------------------------------
2292 */
2293 ModifyTableState *
ExecInitModifyTable(ModifyTable * node,EState * estate,int eflags)2294 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2295 {
2296 ModifyTableState *mtstate;
2297 CmdType operation = node->operation;
2298 int nplans = list_length(node->plans);
2299 ResultRelInfo *saved_resultRelInfo;
2300 ResultRelInfo *resultRelInfo;
2301 Plan *subplan;
2302 ListCell *l;
2303 int i;
2304 Relation rel;
2305 bool update_tuple_routing_needed = node->partColsUpdated;
2306
2307 /* check for unsupported flags */
2308 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2309
2310 /*
2311 * create state structure
2312 */
2313 mtstate = makeNode(ModifyTableState);
2314 mtstate->ps.plan = (Plan *) node;
2315 mtstate->ps.state = estate;
2316 mtstate->ps.ExecProcNode = ExecModifyTable;
2317
2318 mtstate->operation = operation;
2319 mtstate->canSetTag = node->canSetTag;
2320 mtstate->mt_done = false;
2321
2322 mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
2323 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2324
2325 /* If modifying a partitioned table, initialize the root table info */
2326 if (node->rootResultRelIndex >= 0)
2327 mtstate->rootResultRelInfo = estate->es_root_result_relations +
2328 node->rootResultRelIndex;
2329
2330 mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
2331 mtstate->mt_nplans = nplans;
2332
2333 /* set up epqstate with dummy subplan data for the moment */
2334 EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2335 mtstate->fireBSTriggers = true;
2336
2337 /*
2338 * call ExecInitNode on each of the plans to be executed and save the
2339 * results into the array "mt_plans". This is also a convenient place to
2340 * verify that the proposed target relations are valid and open their
2341 * indexes for insertion of new index entries. Note we *must* set
2342 * estate->es_result_relation_info correctly while we initialize each
2343 * sub-plan; ExecContextForcesOids depends on that!
2344 */
2345 saved_resultRelInfo = estate->es_result_relation_info;
2346
2347 resultRelInfo = mtstate->resultRelInfo;
2348 i = 0;
2349 foreach(l, node->plans)
2350 {
2351 subplan = (Plan *) lfirst(l);
2352
2353 /* Initialize the usesFdwDirectModify flag */
2354 resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2355 node->fdwDirectModifyPlans);
2356
2357 /*
2358 * Verify result relation is a valid target for the current operation
2359 */
2360 CheckValidResultRel(resultRelInfo, operation);
2361
2362 /*
2363 * If there are indices on the result relation, open them and save
2364 * descriptors in the result relation info, so that we can add new
2365 * index entries for the tuples we add/update. We need not do this
2366 * for a DELETE, however, since deletion doesn't affect indexes. Also,
2367 * inside an EvalPlanQual operation, the indexes might be open
2368 * already, since we share the resultrel state with the original
2369 * query.
2370 */
2371 if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2372 operation != CMD_DELETE &&
2373 resultRelInfo->ri_IndexRelationDescs == NULL)
2374 ExecOpenIndices(resultRelInfo,
2375 node->onConflictAction != ONCONFLICT_NONE);
2376
2377 /*
2378 * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2379 * trigger itself might modify the partition-key values. So arrange
2380 * for tuple routing.
2381 */
2382 if (resultRelInfo->ri_TrigDesc &&
2383 resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2384 operation == CMD_UPDATE)
2385 update_tuple_routing_needed = true;
2386
2387 /* Now init the plan for this result rel */
2388 estate->es_result_relation_info = resultRelInfo;
2389 mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2390
2391 /* Also let FDWs init themselves for foreign-table result rels */
2392 if (!resultRelInfo->ri_usesFdwDirectModify &&
2393 resultRelInfo->ri_FdwRoutine != NULL &&
2394 resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2395 {
2396 List *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2397
2398 resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2399 resultRelInfo,
2400 fdw_private,
2401 i,
2402 eflags);
2403 }
2404
2405 resultRelInfo++;
2406 i++;
2407 }
2408
2409 estate->es_result_relation_info = saved_resultRelInfo;
2410
2411 /* Get the target relation */
2412 resultRelInfo = getTargetResultRelInfo(mtstate);
2413 rel = resultRelInfo->ri_RelationDesc;
2414
2415 /*
2416 * If it's not a partitioned table after all, UPDATE tuple routing should
2417 * not be attempted.
2418 */
2419 if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2420 update_tuple_routing_needed = false;
2421
2422 /*
2423 * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2424 * partition key.
2425 */
2426 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2427 (operation == CMD_INSERT || update_tuple_routing_needed))
2428 mtstate->mt_partition_tuple_routing =
2429 ExecSetupPartitionTupleRouting(mtstate, resultRelInfo);
2430
2431 /*
2432 * Build state for collecting transition tuples. This requires having a
2433 * valid trigger query context, so skip it in explain-only mode.
2434 */
2435 if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2436 ExecSetupTransitionCaptureState(mtstate, estate);
2437
2438 /*
2439 * Construct mapping from each of the per-subplan partition attnos to the
2440 * root attno. This is required when during update row movement the tuple
2441 * descriptor of a source partition does not match the root partitioned
2442 * table descriptor. In such a case we need to convert tuples to the root
2443 * tuple descriptor, because the search for destination partition starts
2444 * from the root. Skip this setup if it's not a partition key update.
2445 */
2446 if (update_tuple_routing_needed)
2447 ExecSetupChildParentMapForSubplan(mtstate);
2448
2449 /*
2450 * Initialize any WITH CHECK OPTION constraints if needed.
2451 */
2452 resultRelInfo = mtstate->resultRelInfo;
2453 i = 0;
2454 foreach(l, node->withCheckOptionLists)
2455 {
2456 List *wcoList = (List *) lfirst(l);
2457 List *wcoExprs = NIL;
2458 ListCell *ll;
2459
2460 foreach(ll, wcoList)
2461 {
2462 WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2463 ExprState *wcoExpr = ExecInitQual((List *) wco->qual,
2464 &mtstate->ps);
2465
2466 wcoExprs = lappend(wcoExprs, wcoExpr);
2467 }
2468
2469 resultRelInfo->ri_WithCheckOptions = wcoList;
2470 resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2471 resultRelInfo++;
2472 i++;
2473 }
2474
2475 /*
2476 * Initialize RETURNING projections if needed.
2477 */
2478 if (node->returningLists)
2479 {
2480 TupleTableSlot *slot;
2481 ExprContext *econtext;
2482
2483 /*
2484 * Initialize result tuple slot and assign its rowtype using the first
2485 * RETURNING list. We assume the rest will look the same.
2486 */
2487 mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2488
2489 /* Set up a slot for the output of the RETURNING projection(s) */
2490 ExecInitResultTupleSlotTL(estate, &mtstate->ps);
2491 slot = mtstate->ps.ps_ResultTupleSlot;
2492
2493 /* Need an econtext too */
2494 if (mtstate->ps.ps_ExprContext == NULL)
2495 ExecAssignExprContext(estate, &mtstate->ps);
2496 econtext = mtstate->ps.ps_ExprContext;
2497
2498 /*
2499 * Build a projection for each result rel.
2500 */
2501 resultRelInfo = mtstate->resultRelInfo;
2502 foreach(l, node->returningLists)
2503 {
2504 List *rlist = (List *) lfirst(l);
2505
2506 resultRelInfo->ri_returningList = rlist;
2507 resultRelInfo->ri_projectReturning =
2508 ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2509 resultRelInfo->ri_RelationDesc->rd_att);
2510 resultRelInfo++;
2511 }
2512 }
2513 else
2514 {
2515 /*
2516 * We still must construct a dummy result tuple type, because InitPlan
2517 * expects one (maybe should change that?).
2518 */
2519 mtstate->ps.plan->targetlist = NIL;
2520 ExecInitResultTupleSlotTL(estate, &mtstate->ps);
2521
2522 mtstate->ps.ps_ExprContext = NULL;
2523 }
2524
2525 /* Set the list of arbiter indexes if needed for ON CONFLICT */
2526 resultRelInfo = mtstate->resultRelInfo;
2527 if (node->onConflictAction != ONCONFLICT_NONE)
2528 resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2529
2530 /*
2531 * If needed, Initialize target list, projection and qual for ON CONFLICT
2532 * DO UPDATE.
2533 */
2534 if (node->onConflictAction == ONCONFLICT_UPDATE)
2535 {
2536 OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2537 ExprContext *econtext;
2538 TupleDesc relationDesc;
2539
2540 /* insert may only have one plan, inheritance is not expanded */
2541 Assert(nplans == 1);
2542
2543 /* already exists if created by RETURNING processing above */
2544 if (mtstate->ps.ps_ExprContext == NULL)
2545 ExecAssignExprContext(estate, &mtstate->ps);
2546
2547 econtext = mtstate->ps.ps_ExprContext;
2548 relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2549
2550 /*
2551 * Initialize slot for the existing tuple. If we'll be performing
2552 * tuple routing, the tuple descriptor to use for this will be
2553 * determined based on which relation the update is actually applied
2554 * to, so we don't set its tuple descriptor here.
2555 */
2556 mtstate->mt_existing =
2557 ExecInitExtraTupleSlot(mtstate->ps.state,
2558 mtstate->mt_partition_tuple_routing ?
2559 NULL : relationDesc);
2560
2561 /* carried forward solely for the benefit of explain */
2562 mtstate->mt_excludedtlist = node->exclRelTlist;
2563
2564 /* create state for DO UPDATE SET operation */
2565 resultRelInfo->ri_onConflict = onconfl;
2566
2567 /*
2568 * Create the tuple slot for the UPDATE SET projection.
2569 *
2570 * Just like mt_existing above, we leave it without a tuple descriptor
2571 * in the case of partitioning tuple routing, so that it can be
2572 * changed by ExecPrepareTupleRouting. In that case, we still save
2573 * the tupdesc in the parent's state: it can be reused by partitions
2574 * with an identical descriptor to the parent.
2575 */
2576 mtstate->mt_conflproj =
2577 ExecInitExtraTupleSlot(mtstate->ps.state,
2578 mtstate->mt_partition_tuple_routing ?
2579 NULL : relationDesc);
2580 onconfl->oc_ProjTupdesc = relationDesc;
2581
2582 /*
2583 * The onConflictSet tlist should already have been adjusted to emit
2584 * the table's exact column list. It could also contain resjunk
2585 * columns, which should be evaluated but not included in the
2586 * projection result.
2587 */
2588 ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2589 node->onConflictSet);
2590
2591 /* build UPDATE SET projection state */
2592 onconfl->oc_ProjInfo =
2593 ExecBuildProjectionInfoExt(node->onConflictSet, econtext,
2594 mtstate->mt_conflproj, false,
2595 &mtstate->ps,
2596 relationDesc);
2597
2598 /* initialize state to evaluate the WHERE clause, if any */
2599 if (node->onConflictWhere)
2600 {
2601 ExprState *qualexpr;
2602
2603 qualexpr = ExecInitQual((List *) node->onConflictWhere,
2604 &mtstate->ps);
2605 onconfl->oc_WhereClause = qualexpr;
2606 }
2607 }
2608
2609 /*
2610 * If we have any secondary relations in an UPDATE or DELETE, they need to
2611 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2612 * EvalPlanQual mechanism needs to be told about them. Locate the
2613 * relevant ExecRowMarks.
2614 */
2615 foreach(l, node->rowMarks)
2616 {
2617 PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2618 ExecRowMark *erm;
2619
2620 /* ignore "parent" rowmarks; they are irrelevant at runtime */
2621 if (rc->isParent)
2622 continue;
2623
2624 /* find ExecRowMark (same for all subplans) */
2625 erm = ExecFindRowMark(estate, rc->rti, false);
2626
2627 /* build ExecAuxRowMark for each subplan */
2628 for (i = 0; i < nplans; i++)
2629 {
2630 ExecAuxRowMark *aerm;
2631
2632 subplan = mtstate->mt_plans[i]->plan;
2633 aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2634 mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2635 }
2636 }
2637
2638 /* select first subplan */
2639 mtstate->mt_whichplan = 0;
2640 subplan = (Plan *) linitial(node->plans);
2641 EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2642 mtstate->mt_arowmarks[0]);
2643
2644 /*
2645 * Initialize the junk filter(s) if needed. INSERT queries need a filter
2646 * if there are any junk attrs in the tlist. UPDATE and DELETE always
2647 * need a filter, since there's always at least one junk attribute present
2648 * --- no need to look first. Typically, this will be a 'ctid' or
2649 * 'wholerow' attribute, but in the case of a foreign data wrapper it
2650 * might be a set of junk attributes sufficient to identify the remote
2651 * row.
2652 *
2653 * If there are multiple result relations, each one needs its own junk
2654 * filter. Note multiple rels are only possible for UPDATE/DELETE, so we
2655 * can't be fooled by some needing a filter and some not.
2656 *
2657 * This section of code is also a convenient place to verify that the
2658 * output of an INSERT or UPDATE matches the target table(s).
2659 */
2660 {
2661 bool junk_filter_needed = false;
2662
2663 switch (operation)
2664 {
2665 case CMD_INSERT:
2666 foreach(l, subplan->targetlist)
2667 {
2668 TargetEntry *tle = (TargetEntry *) lfirst(l);
2669
2670 if (tle->resjunk)
2671 {
2672 junk_filter_needed = true;
2673 break;
2674 }
2675 }
2676 break;
2677 case CMD_UPDATE:
2678 case CMD_DELETE:
2679 junk_filter_needed = true;
2680 break;
2681 default:
2682 elog(ERROR, "unknown operation");
2683 break;
2684 }
2685
2686 if (junk_filter_needed)
2687 {
2688 resultRelInfo = mtstate->resultRelInfo;
2689 for (i = 0; i < nplans; i++)
2690 {
2691 JunkFilter *j;
2692
2693 subplan = mtstate->mt_plans[i]->plan;
2694 if (operation == CMD_INSERT || operation == CMD_UPDATE)
2695 ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2696 subplan->targetlist);
2697
2698 j = ExecInitJunkFilter(subplan->targetlist,
2699 resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
2700 ExecInitExtraTupleSlot(estate, NULL));
2701
2702 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2703 {
2704 /* For UPDATE/DELETE, find the appropriate junk attr now */
2705 char relkind;
2706
2707 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2708 if (relkind == RELKIND_RELATION ||
2709 relkind == RELKIND_MATVIEW ||
2710 relkind == RELKIND_PARTITIONED_TABLE)
2711 {
2712 j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2713 if (!AttributeNumberIsValid(j->jf_junkAttNo))
2714 elog(ERROR, "could not find junk ctid column");
2715 }
2716 else if (relkind == RELKIND_FOREIGN_TABLE)
2717 {
2718 /*
2719 * When there is a row-level trigger, there should be
2720 * a wholerow attribute.
2721 */
2722 j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2723 }
2724 else
2725 {
2726 j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2727 if (!AttributeNumberIsValid(j->jf_junkAttNo))
2728 elog(ERROR, "could not find junk wholerow column");
2729 }
2730 }
2731
2732 resultRelInfo->ri_junkFilter = j;
2733 resultRelInfo++;
2734 }
2735 }
2736 else
2737 {
2738 if (operation == CMD_INSERT)
2739 ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2740 subplan->targetlist);
2741 }
2742 }
2743
2744 /*
2745 * Set up a tuple table slot for use for trigger output tuples. In a plan
2746 * containing multiple ModifyTable nodes, all can share one such slot, so
2747 * we keep it in the estate.
2748 */
2749 if (estate->es_trig_tuple_slot == NULL)
2750 estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL);
2751
2752 /*
2753 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2754 * to estate->es_auxmodifytables so that it will be run to completion by
2755 * ExecPostprocessPlan. (It'd actually work fine to add the primary
2756 * ModifyTable node too, but there's no need.) Note the use of lcons not
2757 * lappend: we need later-initialized ModifyTable nodes to be shut down
2758 * before earlier ones. This ensures that we don't throw away RETURNING
2759 * rows that need to be seen by a later CTE subplan.
2760 */
2761 if (!mtstate->canSetTag)
2762 estate->es_auxmodifytables = lcons(mtstate,
2763 estate->es_auxmodifytables);
2764
2765 return mtstate;
2766 }
2767
2768 /* ----------------------------------------------------------------
2769 * ExecEndModifyTable
2770 *
2771 * Shuts down the plan.
2772 *
2773 * Returns nothing of interest.
2774 * ----------------------------------------------------------------
2775 */
2776 void
ExecEndModifyTable(ModifyTableState * node)2777 ExecEndModifyTable(ModifyTableState *node)
2778 {
2779 int i;
2780
2781 /*
2782 * Allow any FDWs to shut down
2783 */
2784 for (i = 0; i < node->mt_nplans; i++)
2785 {
2786 ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2787
2788 if (!resultRelInfo->ri_usesFdwDirectModify &&
2789 resultRelInfo->ri_FdwRoutine != NULL &&
2790 resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2791 resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2792 resultRelInfo);
2793 }
2794
2795 /* Close all the partitioned tables, leaf partitions, and their indices */
2796 if (node->mt_partition_tuple_routing)
2797 ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2798
2799 /*
2800 * Free the exprcontext
2801 */
2802 ExecFreeExprContext(&node->ps);
2803
2804 /*
2805 * clean out the tuple table
2806 */
2807 ExecClearTuple(node->ps.ps_ResultTupleSlot);
2808
2809 /*
2810 * Terminate EPQ execution if active
2811 */
2812 EvalPlanQualEnd(&node->mt_epqstate);
2813
2814 /*
2815 * shut down subplans
2816 */
2817 for (i = 0; i < node->mt_nplans; i++)
2818 ExecEndNode(node->mt_plans[i]);
2819 }
2820
2821 void
ExecReScanModifyTable(ModifyTableState * node)2822 ExecReScanModifyTable(ModifyTableState *node)
2823 {
2824 /*
2825 * Currently, we don't need to support rescan on ModifyTable nodes. The
2826 * semantics of that would be a bit debatable anyway.
2827 */
2828 elog(ERROR, "ExecReScanModifyTable is not implemented");
2829 }
2830