1 /*-------------------------------------------------------------------------
2 *
3 * execMain.c
4 * top level executor interface routines
5 *
6 * INTERFACE ROUTINES
7 * ExecutorStart()
8 * ExecutorRun()
9 * ExecutorFinish()
10 * ExecutorEnd()
11 *
12 * These four procedures are the external interface to the executor.
13 * In each case, the query descriptor is required as an argument.
14 *
15 * ExecutorStart must be called at the beginning of execution of any
16 * query plan and ExecutorEnd must always be called at the end of
17 * execution of a plan (unless it is aborted due to error).
18 *
19 * ExecutorRun accepts direction and count arguments that specify whether
20 * the plan is to be executed forwards, backwards, and for how many tuples.
21 * In some cases ExecutorRun may be called multiple times to process all
22 * the tuples for a plan. It is also acceptable to stop short of executing
23 * the whole plan (but only if it is a SELECT).
24 *
25 * ExecutorFinish must be called after the final ExecutorRun call and
26 * before ExecutorEnd. This can be omitted only in case of EXPLAIN,
27 * which should also omit ExecutorRun.
28 *
29 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
30 * Portions Copyright (c) 1994, Regents of the University of California
31 *
32 *
33 * IDENTIFICATION
34 * src/backend/executor/execMain.c
35 *
36 *-------------------------------------------------------------------------
37 */
38 #include "postgres.h"
39
40 #include "access/htup_details.h"
41 #include "access/sysattr.h"
42 #include "access/transam.h"
43 #include "access/xact.h"
44 #include "catalog/namespace.h"
45 #include "catalog/partition.h"
46 #include "catalog/pg_inherits_fn.h"
47 #include "catalog/pg_publication.h"
48 #include "commands/matview.h"
49 #include "commands/trigger.h"
50 #include "executor/execdebug.h"
51 #include "executor/nodeSubplan.h"
52 #include "foreign/fdwapi.h"
53 #include "mb/pg_wchar.h"
54 #include "miscadmin.h"
55 #include "optimizer/clauses.h"
56 #include "parser/parsetree.h"
57 #include "rewrite/rewriteManip.h"
58 #include "storage/bufmgr.h"
59 #include "storage/lmgr.h"
60 #include "tcop/utility.h"
61 #include "utils/acl.h"
62 #include "utils/lsyscache.h"
63 #include "utils/memutils.h"
64 #include "utils/rls.h"
65 #include "utils/ruleutils.h"
66 #include "utils/snapmgr.h"
67 #include "utils/tqual.h"
68
69
70 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
71 ExecutorStart_hook_type ExecutorStart_hook = NULL;
72 ExecutorRun_hook_type ExecutorRun_hook = NULL;
73 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
74 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
75
76 /* Hook for plugin to get control in ExecCheckRTPerms() */
77 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
78
79 /* decls for local routines only used within this module */
80 static void InitPlan(QueryDesc *queryDesc, int eflags);
81 static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
82 static void ExecPostprocessPlan(EState *estate);
83 static void ExecEndPlan(PlanState *planstate, EState *estate);
84 static void ExecutePlan(EState *estate, PlanState *planstate,
85 bool use_parallel_mode,
86 CmdType operation,
87 bool sendTuples,
88 uint64 numberTuples,
89 ScanDirection direction,
90 DestReceiver *dest,
91 bool execute_once);
92 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
93 static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
94 Bitmapset *modifiedCols,
95 AclMode requiredPerms);
96 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
97 static char *ExecBuildSlotValueDescription(Oid reloid,
98 TupleTableSlot *slot,
99 TupleDesc tupdesc,
100 Bitmapset *modifiedCols,
101 int maxfieldlen);
102 static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
103 Datum *values,
104 bool *isnull,
105 int maxfieldlen);
106 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
107 Plan *planTree);
108 static void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
109 TupleTableSlot *slot, EState *estate);
110
111 /*
112 * Note that GetUpdatedColumns() also exists in commands/trigger.c. There does
113 * not appear to be any good header to put it into, given the structures that
114 * it uses, so we let them be duplicated. Be sure to update both if one needs
115 * to be changed, however.
116 */
117 #define GetInsertedColumns(relinfo, estate) \
118 (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols)
119 #define GetUpdatedColumns(relinfo, estate) \
120 (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
121
122 /* end of local decls */
123
124
125 /* ----------------------------------------------------------------
126 * ExecutorStart
127 *
128 * This routine must be called at the beginning of any execution of any
129 * query plan
130 *
131 * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
132 * only because some places use QueryDescs for utility commands). The tupDesc
133 * field of the QueryDesc is filled in to describe the tuples that will be
134 * returned, and the internal fields (estate and planstate) are set up.
135 *
136 * eflags contains flag bits as described in executor.h.
137 *
138 * NB: the CurrentMemoryContext when this is called will become the parent
139 * of the per-query context used for this Executor invocation.
140 *
141 * We provide a function hook variable that lets loadable plugins
142 * get control when ExecutorStart is called. Such a plugin would
143 * normally call standard_ExecutorStart().
144 *
145 * ----------------------------------------------------------------
146 */
147 void
ExecutorStart(QueryDesc * queryDesc,int eflags)148 ExecutorStart(QueryDesc *queryDesc, int eflags)
149 {
150 if (ExecutorStart_hook)
151 (*ExecutorStart_hook) (queryDesc, eflags);
152 else
153 standard_ExecutorStart(queryDesc, eflags);
154 }
155
156 void
standard_ExecutorStart(QueryDesc * queryDesc,int eflags)157 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
158 {
159 EState *estate;
160 MemoryContext oldcontext;
161
162 /* sanity checks: queryDesc must not be started already */
163 Assert(queryDesc != NULL);
164 Assert(queryDesc->estate == NULL);
165
166 /*
167 * If the transaction is read-only, we need to check if any writes are
168 * planned to non-temporary tables. EXPLAIN is considered read-only.
169 *
170 * Don't allow writes in parallel mode. Supporting UPDATE and DELETE
171 * would require (a) storing the combocid hash in shared memory, rather
172 * than synchronizing it just once at the start of parallelism, and (b) an
173 * alternative to heap_update()'s reliance on xmax for mutual exclusion.
174 * INSERT may have no such troubles, but we forbid it to simplify the
175 * checks.
176 *
177 * We have lower-level defenses in CommandCounterIncrement and elsewhere
178 * against performing unsafe operations in parallel mode, but this gives a
179 * more user-friendly error message.
180 */
181 if ((XactReadOnly || IsInParallelMode()) &&
182 !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
183 ExecCheckXactReadOnly(queryDesc->plannedstmt);
184
185 /*
186 * Build EState, switch into per-query memory context for startup.
187 */
188 estate = CreateExecutorState();
189 queryDesc->estate = estate;
190
191 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
192
193 /*
194 * Fill in external parameters, if any, from queryDesc; and allocate
195 * workspace for internal parameters
196 */
197 estate->es_param_list_info = queryDesc->params;
198
199 if (queryDesc->plannedstmt->nParamExec > 0)
200 estate->es_param_exec_vals = (ParamExecData *)
201 palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
202
203 estate->es_sourceText = queryDesc->sourceText;
204
205 /*
206 * Fill in the query environment, if any, from queryDesc.
207 */
208 estate->es_queryEnv = queryDesc->queryEnv;
209
210 /*
211 * If non-read-only query, set the command ID to mark output tuples with
212 */
213 switch (queryDesc->operation)
214 {
215 case CMD_SELECT:
216
217 /*
218 * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
219 * tuples
220 */
221 if (queryDesc->plannedstmt->rowMarks != NIL ||
222 queryDesc->plannedstmt->hasModifyingCTE)
223 estate->es_output_cid = GetCurrentCommandId(true);
224
225 /*
226 * A SELECT without modifying CTEs can't possibly queue triggers,
227 * so force skip-triggers mode. This is just a marginal efficiency
228 * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
229 * all that expensive, but we might as well do it.
230 */
231 if (!queryDesc->plannedstmt->hasModifyingCTE)
232 eflags |= EXEC_FLAG_SKIP_TRIGGERS;
233 break;
234
235 case CMD_INSERT:
236 case CMD_DELETE:
237 case CMD_UPDATE:
238 estate->es_output_cid = GetCurrentCommandId(true);
239 break;
240
241 default:
242 elog(ERROR, "unrecognized operation code: %d",
243 (int) queryDesc->operation);
244 break;
245 }
246
247 /*
248 * Copy other important information into the EState
249 */
250 estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
251 estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
252 estate->es_top_eflags = eflags;
253 estate->es_instrument = queryDesc->instrument_options;
254
255 /*
256 * Set up an AFTER-trigger statement context, unless told not to, or
257 * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
258 */
259 if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
260 AfterTriggerBeginQuery();
261
262 /*
263 * Initialize the plan state tree
264 */
265 InitPlan(queryDesc, eflags);
266
267 MemoryContextSwitchTo(oldcontext);
268 }
269
270 /* ----------------------------------------------------------------
271 * ExecutorRun
272 *
273 * This is the main routine of the executor module. It accepts
274 * the query descriptor from the traffic cop and executes the
275 * query plan.
276 *
277 * ExecutorStart must have been called already.
278 *
279 * If direction is NoMovementScanDirection then nothing is done
280 * except to start up/shut down the destination. Otherwise,
281 * we retrieve up to 'count' tuples in the specified direction.
282 *
283 * Note: count = 0 is interpreted as no portal limit, i.e., run to
284 * completion. Also note that the count limit is only applied to
285 * retrieved tuples, not for instance to those inserted/updated/deleted
286 * by a ModifyTable plan node.
287 *
288 * There is no return value, but output tuples (if any) are sent to
289 * the destination receiver specified in the QueryDesc; and the number
290 * of tuples processed at the top level can be found in
291 * estate->es_processed.
292 *
293 * We provide a function hook variable that lets loadable plugins
294 * get control when ExecutorRun is called. Such a plugin would
295 * normally call standard_ExecutorRun().
296 *
297 * ----------------------------------------------------------------
298 */
299 void
ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)300 ExecutorRun(QueryDesc *queryDesc,
301 ScanDirection direction, uint64 count,
302 bool execute_once)
303 {
304 if (ExecutorRun_hook)
305 (*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
306 else
307 standard_ExecutorRun(queryDesc, direction, count, execute_once);
308 }
309
310 void
standard_ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)311 standard_ExecutorRun(QueryDesc *queryDesc,
312 ScanDirection direction, uint64 count, bool execute_once)
313 {
314 EState *estate;
315 CmdType operation;
316 DestReceiver *dest;
317 bool sendTuples;
318 MemoryContext oldcontext;
319
320 /* sanity checks */
321 Assert(queryDesc != NULL);
322
323 estate = queryDesc->estate;
324
325 Assert(estate != NULL);
326 Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
327
328 /*
329 * Switch into per-query memory context
330 */
331 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
332
333 /* Allow instrumentation of Executor overall runtime */
334 if (queryDesc->totaltime)
335 InstrStartNode(queryDesc->totaltime);
336
337 /*
338 * extract information from the query descriptor and the query feature.
339 */
340 operation = queryDesc->operation;
341 dest = queryDesc->dest;
342
343 /*
344 * startup tuple receiver, if we will be emitting tuples
345 */
346 estate->es_processed = 0;
347 estate->es_lastoid = InvalidOid;
348
349 sendTuples = (operation == CMD_SELECT ||
350 queryDesc->plannedstmt->hasReturning);
351
352 if (sendTuples)
353 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
354
355 /*
356 * run plan
357 */
358 if (!ScanDirectionIsNoMovement(direction))
359 {
360 if (execute_once && queryDesc->already_executed)
361 elog(ERROR, "can't re-execute query flagged for single execution");
362 queryDesc->already_executed = true;
363
364 ExecutePlan(estate,
365 queryDesc->planstate,
366 queryDesc->plannedstmt->parallelModeNeeded,
367 operation,
368 sendTuples,
369 count,
370 direction,
371 dest,
372 execute_once);
373 }
374
375 /*
376 * shutdown tuple receiver, if we started it
377 */
378 if (sendTuples)
379 (*dest->rShutdown) (dest);
380
381 if (queryDesc->totaltime)
382 InstrStopNode(queryDesc->totaltime, estate->es_processed);
383
384 MemoryContextSwitchTo(oldcontext);
385 }
386
387 /* ----------------------------------------------------------------
388 * ExecutorFinish
389 *
390 * This routine must be called after the last ExecutorRun call.
391 * It performs cleanup such as firing AFTER triggers. It is
392 * separate from ExecutorEnd because EXPLAIN ANALYZE needs to
393 * include these actions in the total runtime.
394 *
395 * We provide a function hook variable that lets loadable plugins
396 * get control when ExecutorFinish is called. Such a plugin would
397 * normally call standard_ExecutorFinish().
398 *
399 * ----------------------------------------------------------------
400 */
401 void
ExecutorFinish(QueryDesc * queryDesc)402 ExecutorFinish(QueryDesc *queryDesc)
403 {
404 if (ExecutorFinish_hook)
405 (*ExecutorFinish_hook) (queryDesc);
406 else
407 standard_ExecutorFinish(queryDesc);
408 }
409
410 void
standard_ExecutorFinish(QueryDesc * queryDesc)411 standard_ExecutorFinish(QueryDesc *queryDesc)
412 {
413 EState *estate;
414 MemoryContext oldcontext;
415
416 /* sanity checks */
417 Assert(queryDesc != NULL);
418
419 estate = queryDesc->estate;
420
421 Assert(estate != NULL);
422 Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
423
424 /* This should be run once and only once per Executor instance */
425 Assert(!estate->es_finished);
426
427 /* Switch into per-query memory context */
428 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
429
430 /* Allow instrumentation of Executor overall runtime */
431 if (queryDesc->totaltime)
432 InstrStartNode(queryDesc->totaltime);
433
434 /* Run ModifyTable nodes to completion */
435 ExecPostprocessPlan(estate);
436
437 /* Execute queued AFTER triggers, unless told not to */
438 if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
439 AfterTriggerEndQuery(estate);
440
441 if (queryDesc->totaltime)
442 InstrStopNode(queryDesc->totaltime, 0);
443
444 MemoryContextSwitchTo(oldcontext);
445
446 estate->es_finished = true;
447 }
448
449 /* ----------------------------------------------------------------
450 * ExecutorEnd
451 *
452 * This routine must be called at the end of execution of any
453 * query plan
454 *
455 * We provide a function hook variable that lets loadable plugins
456 * get control when ExecutorEnd is called. Such a plugin would
457 * normally call standard_ExecutorEnd().
458 *
459 * ----------------------------------------------------------------
460 */
461 void
ExecutorEnd(QueryDesc * queryDesc)462 ExecutorEnd(QueryDesc *queryDesc)
463 {
464 if (ExecutorEnd_hook)
465 (*ExecutorEnd_hook) (queryDesc);
466 else
467 standard_ExecutorEnd(queryDesc);
468 }
469
470 void
standard_ExecutorEnd(QueryDesc * queryDesc)471 standard_ExecutorEnd(QueryDesc *queryDesc)
472 {
473 EState *estate;
474 MemoryContext oldcontext;
475
476 /* sanity checks */
477 Assert(queryDesc != NULL);
478
479 estate = queryDesc->estate;
480
481 Assert(estate != NULL);
482
483 /*
484 * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
485 * Assert is needed because ExecutorFinish is new as of 9.1, and callers
486 * might forget to call it.
487 */
488 Assert(estate->es_finished ||
489 (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
490
491 /*
492 * Switch into per-query memory context to run ExecEndPlan
493 */
494 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
495
496 ExecEndPlan(queryDesc->planstate, estate);
497
498 /* do away with our snapshots */
499 UnregisterSnapshot(estate->es_snapshot);
500 UnregisterSnapshot(estate->es_crosscheck_snapshot);
501
502 /*
503 * Must switch out of context before destroying it
504 */
505 MemoryContextSwitchTo(oldcontext);
506
507 /*
508 * Release EState and per-query memory context. This should release
509 * everything the executor has allocated.
510 */
511 FreeExecutorState(estate);
512
513 /* Reset queryDesc fields that no longer point to anything */
514 queryDesc->tupDesc = NULL;
515 queryDesc->estate = NULL;
516 queryDesc->planstate = NULL;
517 queryDesc->totaltime = NULL;
518 }
519
520 /* ----------------------------------------------------------------
521 * ExecutorRewind
522 *
523 * This routine may be called on an open queryDesc to rewind it
524 * to the start.
525 * ----------------------------------------------------------------
526 */
527 void
ExecutorRewind(QueryDesc * queryDesc)528 ExecutorRewind(QueryDesc *queryDesc)
529 {
530 EState *estate;
531 MemoryContext oldcontext;
532
533 /* sanity checks */
534 Assert(queryDesc != NULL);
535
536 estate = queryDesc->estate;
537
538 Assert(estate != NULL);
539
540 /* It's probably not sensible to rescan updating queries */
541 Assert(queryDesc->operation == CMD_SELECT);
542
543 /*
544 * Switch into per-query memory context
545 */
546 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
547
548 /*
549 * rescan plan
550 */
551 ExecReScan(queryDesc->planstate);
552
553 MemoryContextSwitchTo(oldcontext);
554 }
555
556
557 /*
558 * ExecCheckRTPerms
559 * Check access permissions for all relations listed in a range table.
560 *
561 * Returns true if permissions are adequate. Otherwise, throws an appropriate
562 * error if ereport_on_violation is true, or simply returns false otherwise.
563 *
564 * Note that this does NOT address row level security policies (aka: RLS). If
565 * rows will be returned to the user as a result of this permission check
566 * passing, then RLS also needs to be consulted (and check_enable_rls()).
567 *
568 * See rewrite/rowsecurity.c.
569 */
570 bool
ExecCheckRTPerms(List * rangeTable,bool ereport_on_violation)571 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
572 {
573 ListCell *l;
574 bool result = true;
575
576 foreach(l, rangeTable)
577 {
578 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
579
580 result = ExecCheckRTEPerms(rte);
581 if (!result)
582 {
583 Assert(rte->rtekind == RTE_RELATION);
584 if (ereport_on_violation)
585 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
586 get_rel_name(rte->relid));
587 return false;
588 }
589 }
590
591 if (ExecutorCheckPerms_hook)
592 result = (*ExecutorCheckPerms_hook) (rangeTable,
593 ereport_on_violation);
594 return result;
595 }
596
597 /*
598 * ExecCheckRTEPerms
599 * Check access permissions for a single RTE.
600 */
601 static bool
ExecCheckRTEPerms(RangeTblEntry * rte)602 ExecCheckRTEPerms(RangeTblEntry *rte)
603 {
604 AclMode requiredPerms;
605 AclMode relPerms;
606 AclMode remainingPerms;
607 Oid relOid;
608 Oid userid;
609
610 /*
611 * Only plain-relation RTEs need to be checked here. Function RTEs are
612 * checked when the function is prepared for execution. Join, subquery,
613 * and special RTEs need no checks.
614 */
615 if (rte->rtekind != RTE_RELATION)
616 return true;
617
618 /*
619 * No work if requiredPerms is empty.
620 */
621 requiredPerms = rte->requiredPerms;
622 if (requiredPerms == 0)
623 return true;
624
625 relOid = rte->relid;
626
627 /*
628 * userid to check as: current user unless we have a setuid indication.
629 *
630 * Note: GetUserId() is presently fast enough that there's no harm in
631 * calling it separately for each RTE. If that stops being true, we could
632 * call it once in ExecCheckRTPerms and pass the userid down from there.
633 * But for now, no need for the extra clutter.
634 */
635 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
636
637 /*
638 * We must have *all* the requiredPerms bits, but some of the bits can be
639 * satisfied from column-level rather than relation-level permissions.
640 * First, remove any bits that are satisfied by relation permissions.
641 */
642 relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
643 remainingPerms = requiredPerms & ~relPerms;
644 if (remainingPerms != 0)
645 {
646 int col = -1;
647
648 /*
649 * If we lack any permissions that exist only as relation permissions,
650 * we can fail straight away.
651 */
652 if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
653 return false;
654
655 /*
656 * Check to see if we have the needed privileges at column level.
657 *
658 * Note: failures just report a table-level error; it would be nicer
659 * to report a column-level error if we have some but not all of the
660 * column privileges.
661 */
662 if (remainingPerms & ACL_SELECT)
663 {
664 /*
665 * When the query doesn't explicitly reference any columns (for
666 * example, SELECT COUNT(*) FROM table), allow the query if we
667 * have SELECT on any column of the rel, as per SQL spec.
668 */
669 if (bms_is_empty(rte->selectedCols))
670 {
671 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
672 ACLMASK_ANY) != ACLCHECK_OK)
673 return false;
674 }
675
676 while ((col = bms_next_member(rte->selectedCols, col)) >= 0)
677 {
678 /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
679 AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber;
680
681 if (attno == InvalidAttrNumber)
682 {
683 /* Whole-row reference, must have priv on all cols */
684 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
685 ACLMASK_ALL) != ACLCHECK_OK)
686 return false;
687 }
688 else
689 {
690 if (pg_attribute_aclcheck(relOid, attno, userid,
691 ACL_SELECT) != ACLCHECK_OK)
692 return false;
693 }
694 }
695 }
696
697 /*
698 * Basically the same for the mod columns, for both INSERT and UPDATE
699 * privilege as specified by remainingPerms.
700 */
701 if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid,
702 userid,
703 rte->insertedCols,
704 ACL_INSERT))
705 return false;
706
707 if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid,
708 userid,
709 rte->updatedCols,
710 ACL_UPDATE))
711 return false;
712 }
713 return true;
714 }
715
716 /*
717 * ExecCheckRTEPermsModified
718 * Check INSERT or UPDATE access permissions for a single RTE (these
719 * are processed uniformly).
720 */
721 static bool
ExecCheckRTEPermsModified(Oid relOid,Oid userid,Bitmapset * modifiedCols,AclMode requiredPerms)722 ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols,
723 AclMode requiredPerms)
724 {
725 int col = -1;
726
727 /*
728 * When the query doesn't explicitly update any columns, allow the query
729 * if we have permission on any column of the rel. This is to handle
730 * SELECT FOR UPDATE as well as possible corner cases in UPDATE.
731 */
732 if (bms_is_empty(modifiedCols))
733 {
734 if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms,
735 ACLMASK_ANY) != ACLCHECK_OK)
736 return false;
737 }
738
739 while ((col = bms_next_member(modifiedCols, col)) >= 0)
740 {
741 /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
742 AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber;
743
744 if (attno == InvalidAttrNumber)
745 {
746 /* whole-row reference can't happen here */
747 elog(ERROR, "whole-row update is not implemented");
748 }
749 else
750 {
751 if (pg_attribute_aclcheck(relOid, attno, userid,
752 requiredPerms) != ACLCHECK_OK)
753 return false;
754 }
755 }
756 return true;
757 }
758
759 /*
760 * Check that the query does not imply any writes to non-temp tables;
761 * unless we're in parallel mode, in which case don't even allow writes
762 * to temp tables.
763 *
764 * Note: in a Hot Standby this would need to reject writes to temp
765 * tables just as we do in parallel mode; but an HS standby can't have created
766 * any temp tables in the first place, so no need to check that.
767 */
768 static void
ExecCheckXactReadOnly(PlannedStmt * plannedstmt)769 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
770 {
771 ListCell *l;
772
773 /*
774 * Fail if write permissions are requested in parallel mode for table
775 * (temp or non-temp), otherwise fail for any non-temp table.
776 */
777 foreach(l, plannedstmt->rtable)
778 {
779 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
780
781 if (rte->rtekind != RTE_RELATION)
782 continue;
783
784 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
785 continue;
786
787 if (isTempNamespace(get_rel_namespace(rte->relid)))
788 continue;
789
790 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
791 }
792
793 if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE)
794 PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt));
795 }
796
797
798 /* ----------------------------------------------------------------
799 * InitPlan
800 *
801 * Initializes the query plan: open files, allocate storage
802 * and start up the rule manager
803 * ----------------------------------------------------------------
804 */
805 static void
InitPlan(QueryDesc * queryDesc,int eflags)806 InitPlan(QueryDesc *queryDesc, int eflags)
807 {
808 CmdType operation = queryDesc->operation;
809 PlannedStmt *plannedstmt = queryDesc->plannedstmt;
810 Plan *plan = plannedstmt->planTree;
811 List *rangeTable = plannedstmt->rtable;
812 EState *estate = queryDesc->estate;
813 PlanState *planstate;
814 TupleDesc tupType;
815 ListCell *l;
816 int i;
817
818 /*
819 * Do permissions checks
820 */
821 ExecCheckRTPerms(rangeTable, true);
822
823 /*
824 * initialize the node's execution state
825 */
826 estate->es_range_table = rangeTable;
827 estate->es_plannedstmt = plannedstmt;
828
829 /*
830 * initialize result relation stuff, and open/lock the result rels.
831 *
832 * We must do this before initializing the plan tree, else we might try to
833 * do a lock upgrade if a result rel is also a source rel.
834 */
835 if (plannedstmt->resultRelations)
836 {
837 List *resultRelations = plannedstmt->resultRelations;
838 int numResultRelations = list_length(resultRelations);
839 ResultRelInfo *resultRelInfos;
840 ResultRelInfo *resultRelInfo;
841
842 resultRelInfos = (ResultRelInfo *)
843 palloc(numResultRelations * sizeof(ResultRelInfo));
844 resultRelInfo = resultRelInfos;
845 foreach(l, resultRelations)
846 {
847 Index resultRelationIndex = lfirst_int(l);
848 Oid resultRelationOid;
849 Relation resultRelation;
850
851 resultRelationOid = getrelid(resultRelationIndex, rangeTable);
852 resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
853
854 InitResultRelInfo(resultRelInfo,
855 resultRelation,
856 resultRelationIndex,
857 NULL,
858 estate->es_instrument);
859 resultRelInfo++;
860 }
861 estate->es_result_relations = resultRelInfos;
862 estate->es_num_result_relations = numResultRelations;
863 /* es_result_relation_info is NULL except when within ModifyTable */
864 estate->es_result_relation_info = NULL;
865
866 /*
867 * In the partitioned result relation case, lock the non-leaf result
868 * relations too. A subset of these are the roots of respective
869 * partitioned tables, for which we also allocate ResultRelInfos.
870 */
871 estate->es_root_result_relations = NULL;
872 estate->es_num_root_result_relations = 0;
873 if (plannedstmt->nonleafResultRelations)
874 {
875 int num_roots = list_length(plannedstmt->rootResultRelations);
876
877 /*
878 * Firstly, build ResultRelInfos for all the partitioned table
879 * roots, because we will need them to fire the statement-level
880 * triggers, if any.
881 */
882 resultRelInfos = (ResultRelInfo *)
883 palloc(num_roots * sizeof(ResultRelInfo));
884 resultRelInfo = resultRelInfos;
885 foreach(l, plannedstmt->rootResultRelations)
886 {
887 Index resultRelIndex = lfirst_int(l);
888 Oid resultRelOid;
889 Relation resultRelDesc;
890
891 resultRelOid = getrelid(resultRelIndex, rangeTable);
892 resultRelDesc = heap_open(resultRelOid, RowExclusiveLock);
893 InitResultRelInfo(resultRelInfo,
894 resultRelDesc,
895 lfirst_int(l),
896 NULL,
897 estate->es_instrument);
898 resultRelInfo++;
899 }
900
901 estate->es_root_result_relations = resultRelInfos;
902 estate->es_num_root_result_relations = num_roots;
903
904 /* Simply lock the rest of them. */
905 foreach(l, plannedstmt->nonleafResultRelations)
906 {
907 Index resultRelIndex = lfirst_int(l);
908
909 /* We locked the roots above. */
910 if (!list_member_int(plannedstmt->rootResultRelations,
911 resultRelIndex))
912 LockRelationOid(getrelid(resultRelIndex, rangeTable),
913 RowExclusiveLock);
914 }
915 }
916 }
917 else
918 {
919 /*
920 * if no result relation, then set state appropriately
921 */
922 estate->es_result_relations = NULL;
923 estate->es_num_result_relations = 0;
924 estate->es_result_relation_info = NULL;
925 estate->es_root_result_relations = NULL;
926 estate->es_num_root_result_relations = 0;
927 }
928
929 /*
930 * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
931 * before we initialize the plan tree, else we'd be risking lock upgrades.
932 * While we are at it, build the ExecRowMark list. Any partitioned child
933 * tables are ignored here (because isParent=true) and will be locked by
934 * the first Append or MergeAppend node that references them. (Note that
935 * the RowMarks corresponding to partitioned child tables are present in
936 * the same list as the rest, i.e., plannedstmt->rowMarks.)
937 */
938 estate->es_rowMarks = NIL;
939 foreach(l, plannedstmt->rowMarks)
940 {
941 PlanRowMark *rc = (PlanRowMark *) lfirst(l);
942 Oid relid;
943 Relation relation;
944 ExecRowMark *erm;
945
946 /* ignore "parent" rowmarks; they are irrelevant at runtime */
947 if (rc->isParent)
948 continue;
949
950 /* get relation's OID (will produce InvalidOid if subquery) */
951 relid = getrelid(rc->rti, rangeTable);
952
953 /*
954 * If you change the conditions under which rel locks are acquired
955 * here, be sure to adjust ExecOpenScanRelation to match.
956 */
957 switch (rc->markType)
958 {
959 case ROW_MARK_EXCLUSIVE:
960 case ROW_MARK_NOKEYEXCLUSIVE:
961 case ROW_MARK_SHARE:
962 case ROW_MARK_KEYSHARE:
963 relation = heap_open(relid, RowShareLock);
964 break;
965 case ROW_MARK_REFERENCE:
966 relation = heap_open(relid, AccessShareLock);
967 break;
968 case ROW_MARK_COPY:
969 /* no physical table access is required */
970 relation = NULL;
971 break;
972 default:
973 elog(ERROR, "unrecognized markType: %d", rc->markType);
974 relation = NULL; /* keep compiler quiet */
975 break;
976 }
977
978 /* Check that relation is a legal target for marking */
979 if (relation)
980 CheckValidRowMarkRel(relation, rc->markType);
981
982 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
983 erm->relation = relation;
984 erm->relid = relid;
985 erm->rti = rc->rti;
986 erm->prti = rc->prti;
987 erm->rowmarkId = rc->rowmarkId;
988 erm->markType = rc->markType;
989 erm->strength = rc->strength;
990 erm->waitPolicy = rc->waitPolicy;
991 erm->ermActive = false;
992 ItemPointerSetInvalid(&(erm->curCtid));
993 erm->ermExtra = NULL;
994 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
995 }
996
997 /*
998 * Initialize the executor's tuple table to empty.
999 */
1000 estate->es_tupleTable = NIL;
1001 estate->es_trig_tuple_slot = NULL;
1002 estate->es_trig_oldtup_slot = NULL;
1003 estate->es_trig_newtup_slot = NULL;
1004
1005 /* mark EvalPlanQual not active */
1006 estate->es_epqTuple = NULL;
1007 estate->es_epqTupleSet = NULL;
1008 estate->es_epqScanDone = NULL;
1009
1010 /*
1011 * Initialize private state information for each SubPlan. We must do this
1012 * before running ExecInitNode on the main query tree, since
1013 * ExecInitSubPlan expects to be able to find these entries.
1014 */
1015 Assert(estate->es_subplanstates == NIL);
1016 i = 1; /* subplan indices count from 1 */
1017 foreach(l, plannedstmt->subplans)
1018 {
1019 Plan *subplan = (Plan *) lfirst(l);
1020 PlanState *subplanstate;
1021 int sp_eflags;
1022
1023 /*
1024 * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
1025 * it is a parameterless subplan (not initplan), we suggest that it be
1026 * prepared to handle REWIND efficiently; otherwise there is no need.
1027 */
1028 sp_eflags = eflags
1029 & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
1030 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
1031 sp_eflags |= EXEC_FLAG_REWIND;
1032
1033 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
1034
1035 estate->es_subplanstates = lappend(estate->es_subplanstates,
1036 subplanstate);
1037
1038 i++;
1039 }
1040
1041 /*
1042 * Initialize the private state information for all the nodes in the query
1043 * tree. This opens files, allocates storage and leaves us ready to start
1044 * processing tuples.
1045 */
1046 planstate = ExecInitNode(plan, estate, eflags);
1047
1048 /*
1049 * Get the tuple descriptor describing the type of tuples to return.
1050 */
1051 tupType = ExecGetResultType(planstate);
1052
1053 /*
1054 * Initialize the junk filter if needed. SELECT queries need a filter if
1055 * there are any junk attrs in the top-level tlist.
1056 */
1057 if (operation == CMD_SELECT)
1058 {
1059 bool junk_filter_needed = false;
1060 ListCell *tlist;
1061
1062 foreach(tlist, plan->targetlist)
1063 {
1064 TargetEntry *tle = (TargetEntry *) lfirst(tlist);
1065
1066 if (tle->resjunk)
1067 {
1068 junk_filter_needed = true;
1069 break;
1070 }
1071 }
1072
1073 if (junk_filter_needed)
1074 {
1075 JunkFilter *j;
1076
1077 j = ExecInitJunkFilter(planstate->plan->targetlist,
1078 tupType->tdhasoid,
1079 ExecInitExtraTupleSlot(estate));
1080 estate->es_junkFilter = j;
1081
1082 /* Want to return the cleaned tuple type */
1083 tupType = j->jf_cleanTupType;
1084 }
1085 }
1086
1087 queryDesc->tupDesc = tupType;
1088 queryDesc->planstate = planstate;
1089 }
1090
1091 /*
1092 * Check that a proposed result relation is a legal target for the operation
1093 *
1094 * Generally the parser and/or planner should have noticed any such mistake
1095 * already, but let's make sure.
1096 *
1097 * Note: when changing this function, you probably also need to look at
1098 * CheckValidRowMarkRel.
1099 */
1100 void
CheckValidResultRel(ResultRelInfo * resultRelInfo,CmdType operation)1101 CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
1102 {
1103 Relation resultRel = resultRelInfo->ri_RelationDesc;
1104 TriggerDesc *trigDesc = resultRel->trigdesc;
1105 FdwRoutine *fdwroutine;
1106
1107 switch (resultRel->rd_rel->relkind)
1108 {
1109 case RELKIND_RELATION:
1110 case RELKIND_PARTITIONED_TABLE:
1111 CheckCmdReplicaIdentity(resultRel, operation);
1112 break;
1113 case RELKIND_SEQUENCE:
1114 ereport(ERROR,
1115 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1116 errmsg("cannot change sequence \"%s\"",
1117 RelationGetRelationName(resultRel))));
1118 break;
1119 case RELKIND_TOASTVALUE:
1120 ereport(ERROR,
1121 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1122 errmsg("cannot change TOAST relation \"%s\"",
1123 RelationGetRelationName(resultRel))));
1124 break;
1125 case RELKIND_VIEW:
1126
1127 /*
1128 * Okay only if there's a suitable INSTEAD OF trigger. Messages
1129 * here should match rewriteHandler.c's rewriteTargetView and
1130 * RewriteQuery, except that we omit errdetail because we haven't
1131 * got the information handy (and given that we really shouldn't
1132 * get here anyway, it's not worth great exertion to get).
1133 */
1134 switch (operation)
1135 {
1136 case CMD_INSERT:
1137 if (!trigDesc || !trigDesc->trig_insert_instead_row)
1138 ereport(ERROR,
1139 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1140 errmsg("cannot insert into view \"%s\"",
1141 RelationGetRelationName(resultRel)),
1142 errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.")));
1143 break;
1144 case CMD_UPDATE:
1145 if (!trigDesc || !trigDesc->trig_update_instead_row)
1146 ereport(ERROR,
1147 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1148 errmsg("cannot update view \"%s\"",
1149 RelationGetRelationName(resultRel)),
1150 errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.")));
1151 break;
1152 case CMD_DELETE:
1153 if (!trigDesc || !trigDesc->trig_delete_instead_row)
1154 ereport(ERROR,
1155 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1156 errmsg("cannot delete from view \"%s\"",
1157 RelationGetRelationName(resultRel)),
1158 errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.")));
1159 break;
1160 default:
1161 elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1162 break;
1163 }
1164 break;
1165 case RELKIND_MATVIEW:
1166 if (!MatViewIncrementalMaintenanceIsEnabled())
1167 ereport(ERROR,
1168 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1169 errmsg("cannot change materialized view \"%s\"",
1170 RelationGetRelationName(resultRel))));
1171 break;
1172 case RELKIND_FOREIGN_TABLE:
1173 /* Okay only if the FDW supports it */
1174 fdwroutine = resultRelInfo->ri_FdwRoutine;
1175 switch (operation)
1176 {
1177 case CMD_INSERT:
1178
1179 /*
1180 * If foreign partition to do tuple-routing for, skip the
1181 * check; it's disallowed elsewhere.
1182 */
1183 if (resultRelInfo->ri_PartitionRoot)
1184 break;
1185 if (fdwroutine->ExecForeignInsert == NULL)
1186 ereport(ERROR,
1187 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1188 errmsg("cannot insert into foreign table \"%s\"",
1189 RelationGetRelationName(resultRel))));
1190 if (fdwroutine->IsForeignRelUpdatable != NULL &&
1191 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0)
1192 ereport(ERROR,
1193 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1194 errmsg("foreign table \"%s\" does not allow inserts",
1195 RelationGetRelationName(resultRel))));
1196 break;
1197 case CMD_UPDATE:
1198 if (fdwroutine->ExecForeignUpdate == NULL)
1199 ereport(ERROR,
1200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1201 errmsg("cannot update foreign table \"%s\"",
1202 RelationGetRelationName(resultRel))));
1203 if (fdwroutine->IsForeignRelUpdatable != NULL &&
1204 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0)
1205 ereport(ERROR,
1206 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1207 errmsg("foreign table \"%s\" does not allow updates",
1208 RelationGetRelationName(resultRel))));
1209 break;
1210 case CMD_DELETE:
1211 if (fdwroutine->ExecForeignDelete == NULL)
1212 ereport(ERROR,
1213 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1214 errmsg("cannot delete from foreign table \"%s\"",
1215 RelationGetRelationName(resultRel))));
1216 if (fdwroutine->IsForeignRelUpdatable != NULL &&
1217 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0)
1218 ereport(ERROR,
1219 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1220 errmsg("foreign table \"%s\" does not allow deletes",
1221 RelationGetRelationName(resultRel))));
1222 break;
1223 default:
1224 elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1225 break;
1226 }
1227 break;
1228 default:
1229 ereport(ERROR,
1230 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1231 errmsg("cannot change relation \"%s\"",
1232 RelationGetRelationName(resultRel))));
1233 break;
1234 }
1235 }
1236
1237 /*
1238 * Check that a proposed rowmark target relation is a legal target
1239 *
1240 * In most cases parser and/or planner should have noticed this already, but
1241 * they don't cover all cases.
1242 */
1243 static void
CheckValidRowMarkRel(Relation rel,RowMarkType markType)1244 CheckValidRowMarkRel(Relation rel, RowMarkType markType)
1245 {
1246 FdwRoutine *fdwroutine;
1247
1248 switch (rel->rd_rel->relkind)
1249 {
1250 case RELKIND_RELATION:
1251 case RELKIND_PARTITIONED_TABLE:
1252 /* OK */
1253 break;
1254 case RELKIND_SEQUENCE:
1255 /* Must disallow this because we don't vacuum sequences */
1256 ereport(ERROR,
1257 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1258 errmsg("cannot lock rows in sequence \"%s\"",
1259 RelationGetRelationName(rel))));
1260 break;
1261 case RELKIND_TOASTVALUE:
1262 /* We could allow this, but there seems no good reason to */
1263 ereport(ERROR,
1264 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1265 errmsg("cannot lock rows in TOAST relation \"%s\"",
1266 RelationGetRelationName(rel))));
1267 break;
1268 case RELKIND_VIEW:
1269 /* Should not get here; planner should have expanded the view */
1270 ereport(ERROR,
1271 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1272 errmsg("cannot lock rows in view \"%s\"",
1273 RelationGetRelationName(rel))));
1274 break;
1275 case RELKIND_MATVIEW:
1276 /* Allow referencing a matview, but not actual locking clauses */
1277 if (markType != ROW_MARK_REFERENCE)
1278 ereport(ERROR,
1279 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1280 errmsg("cannot lock rows in materialized view \"%s\"",
1281 RelationGetRelationName(rel))));
1282 break;
1283 case RELKIND_FOREIGN_TABLE:
1284 /* Okay only if the FDW supports it */
1285 fdwroutine = GetFdwRoutineForRelation(rel, false);
1286 if (fdwroutine->RefetchForeignRow == NULL)
1287 ereport(ERROR,
1288 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1289 errmsg("cannot lock rows in foreign table \"%s\"",
1290 RelationGetRelationName(rel))));
1291 break;
1292 default:
1293 ereport(ERROR,
1294 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1295 errmsg("cannot lock rows in relation \"%s\"",
1296 RelationGetRelationName(rel))));
1297 break;
1298 }
1299 }
1300
1301 /*
1302 * Initialize ResultRelInfo data for one result relation
1303 *
1304 * Caution: before Postgres 9.1, this function included the relkind checking
1305 * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1306 * appropriate. Be sure callers cover those needs.
1307 */
1308 void
InitResultRelInfo(ResultRelInfo * resultRelInfo,Relation resultRelationDesc,Index resultRelationIndex,Relation partition_root,int instrument_options)1309 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1310 Relation resultRelationDesc,
1311 Index resultRelationIndex,
1312 Relation partition_root,
1313 int instrument_options)
1314 {
1315 List *partition_check = NIL;
1316
1317 MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1318 resultRelInfo->type = T_ResultRelInfo;
1319 resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1320 resultRelInfo->ri_RelationDesc = resultRelationDesc;
1321 resultRelInfo->ri_NumIndices = 0;
1322 resultRelInfo->ri_IndexRelationDescs = NULL;
1323 resultRelInfo->ri_IndexRelationInfo = NULL;
1324 /* make a copy so as not to depend on relcache info not changing... */
1325 resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1326 if (resultRelInfo->ri_TrigDesc)
1327 {
1328 int n = resultRelInfo->ri_TrigDesc->numtriggers;
1329
1330 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1331 palloc0(n * sizeof(FmgrInfo));
1332 resultRelInfo->ri_TrigWhenExprs = (ExprState **)
1333 palloc0(n * sizeof(ExprState *));
1334 if (instrument_options)
1335 resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1336 }
1337 else
1338 {
1339 resultRelInfo->ri_TrigFunctions = NULL;
1340 resultRelInfo->ri_TrigWhenExprs = NULL;
1341 resultRelInfo->ri_TrigInstrument = NULL;
1342 }
1343 if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1344 resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
1345 else
1346 resultRelInfo->ri_FdwRoutine = NULL;
1347 resultRelInfo->ri_FdwState = NULL;
1348 resultRelInfo->ri_usesFdwDirectModify = false;
1349 resultRelInfo->ri_ConstraintExprs = NULL;
1350 resultRelInfo->ri_junkFilter = NULL;
1351 resultRelInfo->ri_projectReturning = NULL;
1352
1353 /*
1354 * Partition constraint, which also includes the partition constraint of
1355 * all the ancestors that are partitions. Note that it will be checked
1356 * even in the case of tuple-routing where this table is the target leaf
1357 * partition, if there any BR triggers defined on the table. Although
1358 * tuple-routing implicitly preserves the partition constraint of the
1359 * target partition for a given row, the BR triggers may change the row
1360 * such that the constraint is no longer satisfied, which we must fail for
1361 * by checking it explicitly.
1362 *
1363 * If this is a partitioned table, the partition constraint (if any) of a
1364 * given row will be checked just before performing tuple-routing.
1365 */
1366 partition_check = RelationGetPartitionQual(resultRelationDesc);
1367
1368 resultRelInfo->ri_PartitionCheck = partition_check;
1369 resultRelInfo->ri_PartitionRoot = partition_root;
1370 }
1371
1372 /*
1373 * ExecGetTriggerResultRel
1374 *
1375 * Get a ResultRelInfo for a trigger target relation. Most of the time,
1376 * triggers are fired on one of the result relations of the query, and so
1377 * we can just return a member of the es_result_relations array, the
1378 * es_root_result_relations array (if any), or the es_leaf_result_relations
1379 * list (if any). (Note: in self-join situations there might be multiple
1380 * members with the same OID; if so it doesn't matter which one we pick.)
1381 * However, it is sometimes necessary to fire triggers on other relations;
1382 * this happens mainly when an RI update trigger queues additional triggers
1383 * on other relations, which will be processed in the context of the outer
1384 * query. For efficiency's sake, we want to have a ResultRelInfo for those
1385 * triggers too; that can avoid repeated re-opening of the relation. (It
1386 * also provides a way for EXPLAIN ANALYZE to report the runtimes of such
1387 * triggers.) So we make additional ResultRelInfo's as needed, and save them
1388 * in es_trig_target_relations.
1389 */
1390 ResultRelInfo *
ExecGetTriggerResultRel(EState * estate,Oid relid)1391 ExecGetTriggerResultRel(EState *estate, Oid relid)
1392 {
1393 ResultRelInfo *rInfo;
1394 int nr;
1395 ListCell *l;
1396 Relation rel;
1397 MemoryContext oldcontext;
1398
1399 /* First, search through the query result relations */
1400 rInfo = estate->es_result_relations;
1401 nr = estate->es_num_result_relations;
1402 while (nr > 0)
1403 {
1404 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1405 return rInfo;
1406 rInfo++;
1407 nr--;
1408 }
1409 /* Second, search through the root result relations, if any */
1410 rInfo = estate->es_root_result_relations;
1411 nr = estate->es_num_root_result_relations;
1412 while (nr > 0)
1413 {
1414 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1415 return rInfo;
1416 rInfo++;
1417 nr--;
1418 }
1419 /* Third, search through the leaf result relations, if any */
1420 foreach(l, estate->es_leaf_result_relations)
1421 {
1422 rInfo = (ResultRelInfo *) lfirst(l);
1423 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1424 return rInfo;
1425 }
1426 /* Nope, but maybe we already made an extra ResultRelInfo for it */
1427 foreach(l, estate->es_trig_target_relations)
1428 {
1429 rInfo = (ResultRelInfo *) lfirst(l);
1430 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1431 return rInfo;
1432 }
1433 /* Nope, so we need a new one */
1434
1435 /*
1436 * Open the target relation's relcache entry. We assume that an
1437 * appropriate lock is still held by the backend from whenever the trigger
1438 * event got queued, so we need take no new lock here. Also, we need not
1439 * recheck the relkind, so no need for CheckValidResultRel.
1440 */
1441 rel = heap_open(relid, NoLock);
1442
1443 /*
1444 * Make the new entry in the right context.
1445 */
1446 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1447 rInfo = makeNode(ResultRelInfo);
1448 InitResultRelInfo(rInfo,
1449 rel,
1450 0, /* dummy rangetable index */
1451 NULL,
1452 estate->es_instrument);
1453 estate->es_trig_target_relations =
1454 lappend(estate->es_trig_target_relations, rInfo);
1455 MemoryContextSwitchTo(oldcontext);
1456
1457 /*
1458 * Currently, we don't need any index information in ResultRelInfos used
1459 * only for triggers, so no need to call ExecOpenIndices.
1460 */
1461
1462 return rInfo;
1463 }
1464
1465 /*
1466 * Close any relations that have been opened by ExecGetTriggerResultRel().
1467 */
1468 void
ExecCleanUpTriggerState(EState * estate)1469 ExecCleanUpTriggerState(EState *estate)
1470 {
1471 ListCell *l;
1472
1473 foreach(l, estate->es_trig_target_relations)
1474 {
1475 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
1476
1477 /* Close indices and then the relation itself */
1478 ExecCloseIndices(resultRelInfo);
1479 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1480 }
1481 }
1482
1483 /*
1484 * ExecContextForcesOids
1485 *
1486 * This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS,
1487 * we need to ensure that result tuples have space for an OID iff they are
1488 * going to be stored into a relation that has OIDs. In other contexts
1489 * we are free to choose whether to leave space for OIDs in result tuples
1490 * (we generally don't want to, but we do if a physical-tlist optimization
1491 * is possible). This routine checks the plan context and returns TRUE if the
1492 * choice is forced, FALSE if the choice is not forced. In the TRUE case,
1493 * *hasoids is set to the required value.
1494 *
1495 * One reason this is ugly is that all plan nodes in the plan tree will emit
1496 * tuples with space for an OID, though we really only need the topmost node
1497 * to do so. However, node types like Sort don't project new tuples but just
1498 * return their inputs, and in those cases the requirement propagates down
1499 * to the input node. Eventually we might make this code smart enough to
1500 * recognize how far down the requirement really goes, but for now we just
1501 * make all plan nodes do the same thing if the top level forces the choice.
1502 *
1503 * We assume that if we are generating tuples for INSERT or UPDATE,
1504 * estate->es_result_relation_info is already set up to describe the target
1505 * relation. Note that in an UPDATE that spans an inheritance tree, some of
1506 * the target relations may have OIDs and some not. We have to make the
1507 * decisions on a per-relation basis as we initialize each of the subplans of
1508 * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1509 * while initializing each subplan.
1510 *
1511 * CREATE TABLE AS is even uglier, because we don't have the target relation's
1512 * descriptor available when this code runs; we have to look aside at the
1513 * flags passed to ExecutorStart().
1514 */
1515 bool
ExecContextForcesOids(PlanState * planstate,bool * hasoids)1516 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1517 {
1518 ResultRelInfo *ri = planstate->state->es_result_relation_info;
1519
1520 if (ri != NULL)
1521 {
1522 Relation rel = ri->ri_RelationDesc;
1523
1524 if (rel != NULL)
1525 {
1526 *hasoids = rel->rd_rel->relhasoids;
1527 return true;
1528 }
1529 }
1530
1531 if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS)
1532 {
1533 *hasoids = true;
1534 return true;
1535 }
1536 if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS)
1537 {
1538 *hasoids = false;
1539 return true;
1540 }
1541
1542 return false;
1543 }
1544
1545 /* ----------------------------------------------------------------
1546 * ExecPostprocessPlan
1547 *
1548 * Give plan nodes a final chance to execute before shutdown
1549 * ----------------------------------------------------------------
1550 */
1551 static void
ExecPostprocessPlan(EState * estate)1552 ExecPostprocessPlan(EState *estate)
1553 {
1554 ListCell *lc;
1555
1556 /*
1557 * Make sure nodes run forward.
1558 */
1559 estate->es_direction = ForwardScanDirection;
1560
1561 /*
1562 * Run any secondary ModifyTable nodes to completion, in case the main
1563 * query did not fetch all rows from them. (We do this to ensure that
1564 * such nodes have predictable results.)
1565 */
1566 foreach(lc, estate->es_auxmodifytables)
1567 {
1568 PlanState *ps = (PlanState *) lfirst(lc);
1569
1570 for (;;)
1571 {
1572 TupleTableSlot *slot;
1573
1574 /* Reset the per-output-tuple exprcontext each time */
1575 ResetPerTupleExprContext(estate);
1576
1577 slot = ExecProcNode(ps);
1578
1579 if (TupIsNull(slot))
1580 break;
1581 }
1582 }
1583 }
1584
1585 /* ----------------------------------------------------------------
1586 * ExecEndPlan
1587 *
1588 * Cleans up the query plan -- closes files and frees up storage
1589 *
1590 * NOTE: we are no longer very worried about freeing storage per se
1591 * in this code; FreeExecutorState should be guaranteed to release all
1592 * memory that needs to be released. What we are worried about doing
1593 * is closing relations and dropping buffer pins. Thus, for example,
1594 * tuple tables must be cleared or dropped to ensure pins are released.
1595 * ----------------------------------------------------------------
1596 */
1597 static void
ExecEndPlan(PlanState * planstate,EState * estate)1598 ExecEndPlan(PlanState *planstate, EState *estate)
1599 {
1600 ResultRelInfo *resultRelInfo;
1601 int i;
1602 ListCell *l;
1603
1604 /*
1605 * shut down the node-type-specific query processing
1606 */
1607 ExecEndNode(planstate);
1608
1609 /*
1610 * for subplans too
1611 */
1612 foreach(l, estate->es_subplanstates)
1613 {
1614 PlanState *subplanstate = (PlanState *) lfirst(l);
1615
1616 ExecEndNode(subplanstate);
1617 }
1618
1619 /*
1620 * destroy the executor's tuple table. Actually we only care about
1621 * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1622 * the TupleTableSlots, since the containing memory context is about to go
1623 * away anyway.
1624 */
1625 ExecResetTupleTable(estate->es_tupleTable, false);
1626
1627 /*
1628 * close the result relation(s) if any, but hold locks until xact commit.
1629 */
1630 resultRelInfo = estate->es_result_relations;
1631 for (i = estate->es_num_result_relations; i > 0; i--)
1632 {
1633 /* Close indices and then the relation itself */
1634 ExecCloseIndices(resultRelInfo);
1635 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1636 resultRelInfo++;
1637 }
1638
1639 /* Close the root target relation(s). */
1640 resultRelInfo = estate->es_root_result_relations;
1641 for (i = estate->es_num_root_result_relations; i > 0; i--)
1642 {
1643 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1644 resultRelInfo++;
1645 }
1646
1647 /* likewise close any trigger target relations */
1648 ExecCleanUpTriggerState(estate);
1649
1650 /*
1651 * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping
1652 * locks
1653 */
1654 foreach(l, estate->es_rowMarks)
1655 {
1656 ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1657
1658 if (erm->relation)
1659 heap_close(erm->relation, NoLock);
1660 }
1661 }
1662
1663 /* ----------------------------------------------------------------
1664 * ExecutePlan
1665 *
1666 * Processes the query plan until we have retrieved 'numberTuples' tuples,
1667 * moving in the specified direction.
1668 *
1669 * Runs to completion if numberTuples is 0
1670 *
1671 * Note: the ctid attribute is a 'junk' attribute that is removed before the
1672 * user can see it
1673 * ----------------------------------------------------------------
1674 */
1675 static void
ExecutePlan(EState * estate,PlanState * planstate,bool use_parallel_mode,CmdType operation,bool sendTuples,uint64 numberTuples,ScanDirection direction,DestReceiver * dest,bool execute_once)1676 ExecutePlan(EState *estate,
1677 PlanState *planstate,
1678 bool use_parallel_mode,
1679 CmdType operation,
1680 bool sendTuples,
1681 uint64 numberTuples,
1682 ScanDirection direction,
1683 DestReceiver *dest,
1684 bool execute_once)
1685 {
1686 TupleTableSlot *slot;
1687 uint64 current_tuple_count;
1688
1689 /*
1690 * initialize local variables
1691 */
1692 current_tuple_count = 0;
1693
1694 /*
1695 * Set the direction.
1696 */
1697 estate->es_direction = direction;
1698
1699 /*
1700 * If the plan might potentially be executed multiple times, we must force
1701 * it to run without parallelism, because we might exit early. Also
1702 * disable parallelism when writing into a relation, because no database
1703 * changes are allowed in parallel mode.
1704 */
1705 if (!execute_once || dest->mydest == DestIntoRel)
1706 use_parallel_mode = false;
1707
1708 estate->es_use_parallel_mode = use_parallel_mode;
1709 if (use_parallel_mode)
1710 EnterParallelMode();
1711
1712 /*
1713 * Loop until we've processed the proper number of tuples from the plan.
1714 */
1715 for (;;)
1716 {
1717 /* Reset the per-output-tuple exprcontext */
1718 ResetPerTupleExprContext(estate);
1719
1720 /*
1721 * Execute the plan and obtain a tuple
1722 */
1723 slot = ExecProcNode(planstate);
1724
1725 /*
1726 * if the tuple is null, then we assume there is nothing more to
1727 * process so we just end the loop...
1728 */
1729 if (TupIsNull(slot))
1730 {
1731 /*
1732 * If we know we won't need to back up, we can release resources
1733 * at this point.
1734 */
1735 if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD))
1736 (void) ExecShutdownNode(planstate);
1737 break;
1738 }
1739
1740 /*
1741 * If we have a junk filter, then project a new tuple with the junk
1742 * removed.
1743 *
1744 * Store this new "clean" tuple in the junkfilter's resultSlot.
1745 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1746 * because that tuple slot has the wrong descriptor.)
1747 */
1748 if (estate->es_junkFilter != NULL)
1749 slot = ExecFilterJunk(estate->es_junkFilter, slot);
1750
1751 /*
1752 * If we are supposed to send the tuple somewhere, do so. (In
1753 * practice, this is probably always the case at this point.)
1754 */
1755 if (sendTuples)
1756 {
1757 /*
1758 * If we are not able to send the tuple, we assume the destination
1759 * has closed and no more tuples can be sent. If that's the case,
1760 * end the loop.
1761 */
1762 if (!((*dest->receiveSlot) (slot, dest)))
1763 break;
1764 }
1765
1766 /*
1767 * Count tuples processed, if this is a SELECT. (For other operation
1768 * types, the ModifyTable plan node must count the appropriate
1769 * events.)
1770 */
1771 if (operation == CMD_SELECT)
1772 (estate->es_processed)++;
1773
1774 /*
1775 * check our tuple count.. if we've processed the proper number then
1776 * quit, else loop again and process more tuples. Zero numberTuples
1777 * means no limit.
1778 */
1779 current_tuple_count++;
1780 if (numberTuples && numberTuples == current_tuple_count)
1781 {
1782 /*
1783 * If we know we won't need to back up, we can release resources
1784 * at this point.
1785 */
1786 if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD))
1787 (void) ExecShutdownNode(planstate);
1788 break;
1789 }
1790 }
1791
1792 if (use_parallel_mode)
1793 ExitParallelMode();
1794 }
1795
1796
1797 /*
1798 * ExecRelCheck --- check that tuple meets constraints for result relation
1799 *
1800 * Returns NULL if OK, else name of failed check constraint
1801 */
1802 static const char *
ExecRelCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1803 ExecRelCheck(ResultRelInfo *resultRelInfo,
1804 TupleTableSlot *slot, EState *estate)
1805 {
1806 Relation rel = resultRelInfo->ri_RelationDesc;
1807 int ncheck = rel->rd_att->constr->num_check;
1808 ConstrCheck *check = rel->rd_att->constr->check;
1809 ExprContext *econtext;
1810 MemoryContext oldContext;
1811 int i;
1812
1813 /*
1814 * If first time through for this result relation, build expression
1815 * nodetrees for rel's constraint expressions. Keep them in the per-query
1816 * memory context so they'll survive throughout the query.
1817 */
1818 if (resultRelInfo->ri_ConstraintExprs == NULL)
1819 {
1820 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1821 resultRelInfo->ri_ConstraintExprs =
1822 (ExprState **) palloc(ncheck * sizeof(ExprState *));
1823 for (i = 0; i < ncheck; i++)
1824 {
1825 Expr *checkconstr;
1826
1827 checkconstr = stringToNode(check[i].ccbin);
1828 resultRelInfo->ri_ConstraintExprs[i] =
1829 ExecPrepareExpr(checkconstr, estate);
1830 }
1831 MemoryContextSwitchTo(oldContext);
1832 }
1833
1834 /*
1835 * We will use the EState's per-tuple context for evaluating constraint
1836 * expressions (creating it if it's not already there).
1837 */
1838 econtext = GetPerTupleExprContext(estate);
1839
1840 /* Arrange for econtext's scan tuple to be the tuple under test */
1841 econtext->ecxt_scantuple = slot;
1842
1843 /* And evaluate the constraints */
1844 for (i = 0; i < ncheck; i++)
1845 {
1846 ExprState *checkconstr = resultRelInfo->ri_ConstraintExprs[i];
1847
1848 /*
1849 * NOTE: SQL specifies that a NULL result from a constraint expression
1850 * is not to be treated as a failure. Therefore, use ExecCheck not
1851 * ExecQual.
1852 */
1853 if (!ExecCheck(checkconstr, econtext))
1854 return check[i].ccname;
1855 }
1856
1857 /* NULL result means no error */
1858 return NULL;
1859 }
1860
1861 /*
1862 * ExecPartitionCheck --- check that tuple meets the partition constraint.
1863 */
1864 static void
ExecPartitionCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1865 ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
1866 EState *estate)
1867 {
1868 Relation rel = resultRelInfo->ri_RelationDesc;
1869 TupleDesc tupdesc = RelationGetDescr(rel);
1870 Bitmapset *modifiedCols;
1871 Bitmapset *insertedCols;
1872 Bitmapset *updatedCols;
1873 ExprContext *econtext;
1874
1875 /*
1876 * If first time through, build expression state tree for the partition
1877 * check expression. Keep it in the per-query memory context so they'll
1878 * survive throughout the query.
1879 */
1880 if (resultRelInfo->ri_PartitionCheckExpr == NULL)
1881 {
1882 List *qual = resultRelInfo->ri_PartitionCheck;
1883
1884 resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
1885 }
1886
1887 /*
1888 * We will use the EState's per-tuple context for evaluating constraint
1889 * expressions (creating it if it's not already there).
1890 */
1891 econtext = GetPerTupleExprContext(estate);
1892
1893 /* Arrange for econtext's scan tuple to be the tuple under test */
1894 econtext->ecxt_scantuple = slot;
1895
1896 /*
1897 * As in case of the catalogued constraints, we treat a NULL result as
1898 * success here, not a failure.
1899 */
1900 if (!ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext))
1901 {
1902 char *val_desc;
1903 Relation orig_rel = rel;
1904
1905 /* See the comment above. */
1906 if (resultRelInfo->ri_PartitionRoot)
1907 {
1908 HeapTuple tuple = ExecFetchSlotTuple(slot);
1909 TupleDesc old_tupdesc = RelationGetDescr(rel);
1910 TupleConversionMap *map;
1911
1912 rel = resultRelInfo->ri_PartitionRoot;
1913 tupdesc = RelationGetDescr(rel);
1914 /* a reverse map */
1915 map = convert_tuples_by_name(old_tupdesc, tupdesc,
1916 gettext_noop("could not convert row type"));
1917 if (map != NULL)
1918 {
1919 tuple = do_convert_tuple(tuple, map);
1920 /* one off slot for building error message */
1921 slot = MakeTupleTableSlot();
1922 ExecSetSlotDescriptor(slot, tupdesc);
1923 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1924 }
1925 }
1926
1927 insertedCols = GetInsertedColumns(resultRelInfo, estate);
1928 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
1929 modifiedCols = bms_union(insertedCols, updatedCols);
1930 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
1931 slot,
1932 tupdesc,
1933 modifiedCols,
1934 64);
1935 ereport(ERROR,
1936 (errcode(ERRCODE_CHECK_VIOLATION),
1937 errmsg("new row for relation \"%s\" violates partition constraint",
1938 RelationGetRelationName(orig_rel)),
1939 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
1940 }
1941 }
1942
1943 /*
1944 * ExecConstraints - check constraints of the tuple in 'slot'
1945 *
1946 * This checks the traditional NOT NULL and check constraints, as well as
1947 * the partition constraint, if any.
1948 *
1949 * Note: 'slot' contains the tuple to check the constraints of, which may
1950 * have been converted from the original input tuple after tuple routing.
1951 * 'resultRelInfo' is the original result relation, before tuple routing.
1952 */
1953 void
ExecConstraints(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1954 ExecConstraints(ResultRelInfo *resultRelInfo,
1955 TupleTableSlot *slot, EState *estate)
1956 {
1957 Relation rel = resultRelInfo->ri_RelationDesc;
1958 TupleDesc tupdesc = RelationGetDescr(rel);
1959 TupleConstr *constr = tupdesc->constr;
1960 Bitmapset *modifiedCols;
1961 Bitmapset *insertedCols;
1962 Bitmapset *updatedCols;
1963
1964 Assert(constr || resultRelInfo->ri_PartitionCheck);
1965
1966 if (constr && constr->has_not_null)
1967 {
1968 int natts = tupdesc->natts;
1969 int attrChk;
1970
1971 for (attrChk = 1; attrChk <= natts; attrChk++)
1972 {
1973 if (tupdesc->attrs[attrChk - 1]->attnotnull &&
1974 slot_attisnull(slot, attrChk))
1975 {
1976 char *val_desc;
1977 Relation orig_rel = rel;
1978 TupleDesc orig_tupdesc = RelationGetDescr(rel);
1979
1980 /*
1981 * If the tuple has been routed, it's been converted to the
1982 * partition's rowtype, which might differ from the root
1983 * table's. We must convert it back to the root table's
1984 * rowtype so that val_desc shown error message matches the
1985 * input tuple.
1986 */
1987 if (resultRelInfo->ri_PartitionRoot)
1988 {
1989 HeapTuple tuple = ExecFetchSlotTuple(slot);
1990 TupleConversionMap *map;
1991
1992 rel = resultRelInfo->ri_PartitionRoot;
1993 tupdesc = RelationGetDescr(rel);
1994 /* a reverse map */
1995 map = convert_tuples_by_name(orig_tupdesc, tupdesc,
1996 gettext_noop("could not convert row type"));
1997 if (map != NULL)
1998 {
1999 tuple = do_convert_tuple(tuple, map);
2000 /* one off slot for building error message */
2001 slot = MakeTupleTableSlot();
2002 ExecSetSlotDescriptor(slot, tupdesc);
2003 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2004 }
2005 }
2006
2007 insertedCols = GetInsertedColumns(resultRelInfo, estate);
2008 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2009 modifiedCols = bms_union(insertedCols, updatedCols);
2010 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2011 slot,
2012 tupdesc,
2013 modifiedCols,
2014 64);
2015
2016 ereport(ERROR,
2017 (errcode(ERRCODE_NOT_NULL_VIOLATION),
2018 errmsg("null value in column \"%s\" violates not-null constraint",
2019 NameStr(orig_tupdesc->attrs[attrChk - 1]->attname)),
2020 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2021 errtablecol(orig_rel, attrChk)));
2022 }
2023 }
2024 }
2025
2026 if (constr && constr->num_check > 0)
2027 {
2028 const char *failed;
2029
2030 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
2031 {
2032 char *val_desc;
2033 Relation orig_rel = rel;
2034
2035 /* See the comment above. */
2036 if (resultRelInfo->ri_PartitionRoot)
2037 {
2038 HeapTuple tuple = ExecFetchSlotTuple(slot);
2039 TupleDesc old_tupdesc = RelationGetDescr(rel);
2040 TupleConversionMap *map;
2041
2042 rel = resultRelInfo->ri_PartitionRoot;
2043 tupdesc = RelationGetDescr(rel);
2044 /* a reverse map */
2045 map = convert_tuples_by_name(old_tupdesc, tupdesc,
2046 gettext_noop("could not convert row type"));
2047 if (map != NULL)
2048 {
2049 tuple = do_convert_tuple(tuple, map);
2050 /* one off slot for building error message */
2051 slot = MakeTupleTableSlot();
2052 ExecSetSlotDescriptor(slot, tupdesc);
2053 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2054 }
2055 }
2056
2057 insertedCols = GetInsertedColumns(resultRelInfo, estate);
2058 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2059 modifiedCols = bms_union(insertedCols, updatedCols);
2060 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2061 slot,
2062 tupdesc,
2063 modifiedCols,
2064 64);
2065 ereport(ERROR,
2066 (errcode(ERRCODE_CHECK_VIOLATION),
2067 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
2068 RelationGetRelationName(orig_rel), failed),
2069 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2070 errtableconstraint(orig_rel, failed)));
2071 }
2072 }
2073
2074 if (resultRelInfo->ri_PartitionCheck)
2075 ExecPartitionCheck(resultRelInfo, slot, estate);
2076 }
2077
2078
2079 /*
2080 * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
2081 * of the specified kind.
2082 *
2083 * Note that this needs to be called multiple times to ensure that all kinds of
2084 * WITH CHECK OPTIONs are handled (both those from views which have the WITH
2085 * CHECK OPTION set and from row level security policies). See ExecInsert()
2086 * and ExecUpdate().
2087 */
2088 void
ExecWithCheckOptions(WCOKind kind,ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)2089 ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
2090 TupleTableSlot *slot, EState *estate)
2091 {
2092 Relation rel = resultRelInfo->ri_RelationDesc;
2093 TupleDesc tupdesc = RelationGetDescr(rel);
2094 ExprContext *econtext;
2095 ListCell *l1,
2096 *l2;
2097
2098 /*
2099 * We will use the EState's per-tuple context for evaluating constraint
2100 * expressions (creating it if it's not already there).
2101 */
2102 econtext = GetPerTupleExprContext(estate);
2103
2104 /* Arrange for econtext's scan tuple to be the tuple under test */
2105 econtext->ecxt_scantuple = slot;
2106
2107 /* Check each of the constraints */
2108 forboth(l1, resultRelInfo->ri_WithCheckOptions,
2109 l2, resultRelInfo->ri_WithCheckOptionExprs)
2110 {
2111 WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
2112 ExprState *wcoExpr = (ExprState *) lfirst(l2);
2113
2114 /*
2115 * Skip any WCOs which are not the kind we are looking for at this
2116 * time.
2117 */
2118 if (wco->kind != kind)
2119 continue;
2120
2121 /*
2122 * WITH CHECK OPTION checks are intended to ensure that the new tuple
2123 * is visible (in the case of a view) or that it passes the
2124 * 'with-check' policy (in the case of row security). If the qual
2125 * evaluates to NULL or FALSE, then the new tuple won't be included in
2126 * the view or doesn't pass the 'with-check' policy for the table.
2127 */
2128 if (!ExecQual(wcoExpr, econtext))
2129 {
2130 char *val_desc;
2131 Bitmapset *modifiedCols;
2132 Bitmapset *insertedCols;
2133 Bitmapset *updatedCols;
2134
2135 switch (wco->kind)
2136 {
2137 /*
2138 * For WITH CHECK OPTIONs coming from views, we might be
2139 * able to provide the details on the row, depending on
2140 * the permissions on the relation (that is, if the user
2141 * could view it directly anyway). For RLS violations, we
2142 * don't include the data since we don't know if the user
2143 * should be able to view the tuple as that depends on the
2144 * USING policy.
2145 */
2146 case WCO_VIEW_CHECK:
2147 /* See the comment in ExecConstraints(). */
2148 if (resultRelInfo->ri_PartitionRoot)
2149 {
2150 HeapTuple tuple = ExecFetchSlotTuple(slot);
2151 TupleDesc old_tupdesc = RelationGetDescr(rel);
2152 TupleConversionMap *map;
2153
2154 rel = resultRelInfo->ri_PartitionRoot;
2155 tupdesc = RelationGetDescr(rel);
2156 /* a reverse map */
2157 map = convert_tuples_by_name(old_tupdesc, tupdesc,
2158 gettext_noop("could not convert row type"));
2159 if (map != NULL)
2160 {
2161 tuple = do_convert_tuple(tuple, map);
2162 /* one off slot for building error message */
2163 slot = MakeTupleTableSlot();
2164 ExecSetSlotDescriptor(slot, tupdesc);
2165 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2166 }
2167 }
2168
2169 insertedCols = GetInsertedColumns(resultRelInfo, estate);
2170 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2171 modifiedCols = bms_union(insertedCols, updatedCols);
2172 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2173 slot,
2174 tupdesc,
2175 modifiedCols,
2176 64);
2177
2178 ereport(ERROR,
2179 (errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
2180 errmsg("new row violates check option for view \"%s\"",
2181 wco->relname),
2182 val_desc ? errdetail("Failing row contains %s.",
2183 val_desc) : 0));
2184 break;
2185 case WCO_RLS_INSERT_CHECK:
2186 case WCO_RLS_UPDATE_CHECK:
2187 if (wco->polname != NULL)
2188 ereport(ERROR,
2189 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2190 errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
2191 wco->polname, wco->relname)));
2192 else
2193 ereport(ERROR,
2194 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2195 errmsg("new row violates row-level security policy for table \"%s\"",
2196 wco->relname)));
2197 break;
2198 case WCO_RLS_CONFLICT_CHECK:
2199 if (wco->polname != NULL)
2200 ereport(ERROR,
2201 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2202 errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
2203 wco->polname, wco->relname)));
2204 else
2205 ereport(ERROR,
2206 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2207 errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
2208 wco->relname)));
2209 break;
2210 default:
2211 elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
2212 break;
2213 }
2214 }
2215 }
2216 }
2217
2218 /*
2219 * ExecBuildSlotValueDescription -- construct a string representing a tuple
2220 *
2221 * This is intentionally very similar to BuildIndexValueDescription, but
2222 * unlike that function, we truncate long field values (to at most maxfieldlen
2223 * bytes). That seems necessary here since heap field values could be very
2224 * long, whereas index entries typically aren't so wide.
2225 *
2226 * Also, unlike the case with index entries, we need to be prepared to ignore
2227 * dropped columns. We used to use the slot's tuple descriptor to decode the
2228 * data, but the slot's descriptor doesn't identify dropped columns, so we
2229 * now need to be passed the relation's descriptor.
2230 *
2231 * Note that, like BuildIndexValueDescription, if the user does not have
2232 * permission to view any of the columns involved, a NULL is returned. Unlike
2233 * BuildIndexValueDescription, if the user has access to view a subset of the
2234 * column involved, that subset will be returned with a key identifying which
2235 * columns they are.
2236 */
2237 static char *
ExecBuildSlotValueDescription(Oid reloid,TupleTableSlot * slot,TupleDesc tupdesc,Bitmapset * modifiedCols,int maxfieldlen)2238 ExecBuildSlotValueDescription(Oid reloid,
2239 TupleTableSlot *slot,
2240 TupleDesc tupdesc,
2241 Bitmapset *modifiedCols,
2242 int maxfieldlen)
2243 {
2244 StringInfoData buf;
2245 StringInfoData collist;
2246 bool write_comma = false;
2247 bool write_comma_collist = false;
2248 int i;
2249 AclResult aclresult;
2250 bool table_perm = false;
2251 bool any_perm = false;
2252
2253 /*
2254 * Check if RLS is enabled and should be active for the relation; if so,
2255 * then don't return anything. Otherwise, go through normal permission
2256 * checks.
2257 */
2258 if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED)
2259 return NULL;
2260
2261 initStringInfo(&buf);
2262
2263 appendStringInfoChar(&buf, '(');
2264
2265 /*
2266 * Check if the user has permissions to see the row. Table-level SELECT
2267 * allows access to all columns. If the user does not have table-level
2268 * SELECT then we check each column and include those the user has SELECT
2269 * rights on. Additionally, we always include columns the user provided
2270 * data for.
2271 */
2272 aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT);
2273 if (aclresult != ACLCHECK_OK)
2274 {
2275 /* Set up the buffer for the column list */
2276 initStringInfo(&collist);
2277 appendStringInfoChar(&collist, '(');
2278 }
2279 else
2280 table_perm = any_perm = true;
2281
2282 /* Make sure the tuple is fully deconstructed */
2283 slot_getallattrs(slot);
2284
2285 for (i = 0; i < tupdesc->natts; i++)
2286 {
2287 bool column_perm = false;
2288 char *val;
2289 int vallen;
2290
2291 /* ignore dropped columns */
2292 if (tupdesc->attrs[i]->attisdropped)
2293 continue;
2294
2295 if (!table_perm)
2296 {
2297 /*
2298 * No table-level SELECT, so need to make sure they either have
2299 * SELECT rights on the column or that they have provided the data
2300 * for the column. If not, omit this column from the error
2301 * message.
2302 */
2303 aclresult = pg_attribute_aclcheck(reloid, tupdesc->attrs[i]->attnum,
2304 GetUserId(), ACL_SELECT);
2305 if (bms_is_member(tupdesc->attrs[i]->attnum - FirstLowInvalidHeapAttributeNumber,
2306 modifiedCols) || aclresult == ACLCHECK_OK)
2307 {
2308 column_perm = any_perm = true;
2309
2310 if (write_comma_collist)
2311 appendStringInfoString(&collist, ", ");
2312 else
2313 write_comma_collist = true;
2314
2315 appendStringInfoString(&collist, NameStr(tupdesc->attrs[i]->attname));
2316 }
2317 }
2318
2319 if (table_perm || column_perm)
2320 {
2321 if (slot->tts_isnull[i])
2322 val = "null";
2323 else
2324 {
2325 Oid foutoid;
2326 bool typisvarlena;
2327
2328 getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
2329 &foutoid, &typisvarlena);
2330 val = OidOutputFunctionCall(foutoid, slot->tts_values[i]);
2331 }
2332
2333 if (write_comma)
2334 appendStringInfoString(&buf, ", ");
2335 else
2336 write_comma = true;
2337
2338 /* truncate if needed */
2339 vallen = strlen(val);
2340 if (vallen <= maxfieldlen)
2341 appendStringInfoString(&buf, val);
2342 else
2343 {
2344 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
2345 appendBinaryStringInfo(&buf, val, vallen);
2346 appendStringInfoString(&buf, "...");
2347 }
2348 }
2349 }
2350
2351 /* If we end up with zero columns being returned, then return NULL. */
2352 if (!any_perm)
2353 return NULL;
2354
2355 appendStringInfoChar(&buf, ')');
2356
2357 if (!table_perm)
2358 {
2359 appendStringInfoString(&collist, ") = ");
2360 appendStringInfoString(&collist, buf.data);
2361
2362 return collist.data;
2363 }
2364
2365 return buf.data;
2366 }
2367
2368
2369 /*
2370 * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
2371 * given ResultRelInfo
2372 */
2373 LockTupleMode
ExecUpdateLockMode(EState * estate,ResultRelInfo * relinfo)2374 ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
2375 {
2376 Bitmapset *keyCols;
2377 Bitmapset *updatedCols;
2378
2379 /*
2380 * Compute lock mode to use. If columns that are part of the key have not
2381 * been modified, then we can use a weaker lock, allowing for better
2382 * concurrency.
2383 */
2384 updatedCols = GetUpdatedColumns(relinfo, estate);
2385 keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
2386 INDEX_ATTR_BITMAP_KEY);
2387
2388 if (bms_overlap(keyCols, updatedCols))
2389 return LockTupleExclusive;
2390
2391 return LockTupleNoKeyExclusive;
2392 }
2393
2394 /*
2395 * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
2396 *
2397 * If no such struct, either return NULL or throw error depending on missing_ok
2398 */
2399 ExecRowMark *
ExecFindRowMark(EState * estate,Index rti,bool missing_ok)2400 ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
2401 {
2402 ListCell *lc;
2403
2404 foreach(lc, estate->es_rowMarks)
2405 {
2406 ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
2407
2408 if (erm->rti == rti)
2409 return erm;
2410 }
2411 if (!missing_ok)
2412 elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
2413 return NULL;
2414 }
2415
2416 /*
2417 * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
2418 *
2419 * Inputs are the underlying ExecRowMark struct and the targetlist of the
2420 * input plan node (not planstate node!). We need the latter to find out
2421 * the column numbers of the resjunk columns.
2422 */
2423 ExecAuxRowMark *
ExecBuildAuxRowMark(ExecRowMark * erm,List * targetlist)2424 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
2425 {
2426 ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
2427 char resname[32];
2428
2429 aerm->rowmark = erm;
2430
2431 /* Look up the resjunk columns associated with this rowmark */
2432 if (erm->markType != ROW_MARK_COPY)
2433 {
2434 /* need ctid for all methods other than COPY */
2435 snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
2436 aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2437 resname);
2438 if (!AttributeNumberIsValid(aerm->ctidAttNo))
2439 elog(ERROR, "could not find junk %s column", resname);
2440 }
2441 else
2442 {
2443 /* need wholerow if COPY */
2444 snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
2445 aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
2446 resname);
2447 if (!AttributeNumberIsValid(aerm->wholeAttNo))
2448 elog(ERROR, "could not find junk %s column", resname);
2449 }
2450
2451 /* if child rel, need tableoid */
2452 if (erm->rti != erm->prti)
2453 {
2454 snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
2455 aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2456 resname);
2457 if (!AttributeNumberIsValid(aerm->toidAttNo))
2458 elog(ERROR, "could not find junk %s column", resname);
2459 }
2460
2461 return aerm;
2462 }
2463
2464
2465 /*
2466 * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
2467 * process the updated version under READ COMMITTED rules.
2468 *
2469 * See backend/executor/README for some info about how this works.
2470 */
2471
2472
2473 /*
2474 * Check a modified tuple to see if we want to process its updated version
2475 * under READ COMMITTED rules.
2476 *
2477 * estate - outer executor state data
2478 * epqstate - state for EvalPlanQual rechecking
2479 * relation - table containing tuple
2480 * rti - rangetable index of table containing tuple
2481 * lockmode - requested tuple lock mode
2482 * *tid - t_ctid from the outdated tuple (ie, next updated version)
2483 * priorXmax - t_xmax from the outdated tuple
2484 *
2485 * *tid is also an output parameter: it's modified to hold the TID of the
2486 * latest version of the tuple (note this may be changed even on failure)
2487 *
2488 * Returns a slot containing the new candidate update/delete tuple, or
2489 * NULL if we determine we shouldn't process the row.
2490 *
2491 * Note: properly, lockmode should be declared as enum LockTupleMode,
2492 * but we use "int" to avoid having to include heapam.h in executor.h.
2493 */
2494 TupleTableSlot *
EvalPlanQual(EState * estate,EPQState * epqstate,Relation relation,Index rti,int lockmode,ItemPointer tid,TransactionId priorXmax)2495 EvalPlanQual(EState *estate, EPQState *epqstate,
2496 Relation relation, Index rti, int lockmode,
2497 ItemPointer tid, TransactionId priorXmax)
2498 {
2499 TupleTableSlot *slot;
2500 HeapTuple copyTuple;
2501
2502 Assert(rti > 0);
2503
2504 /*
2505 * Get and lock the updated version of the row; if fail, return NULL.
2506 */
2507 copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
2508 tid, priorXmax);
2509
2510 if (copyTuple == NULL)
2511 return NULL;
2512
2513 /*
2514 * For UPDATE/DELETE we have to return tid of actual row we're executing
2515 * PQ for.
2516 */
2517 *tid = copyTuple->t_self;
2518
2519 /*
2520 * Need to run a recheck subquery. Initialize or reinitialize EPQ state.
2521 */
2522 EvalPlanQualBegin(epqstate, estate);
2523
2524 /*
2525 * Free old test tuple, if any, and store new tuple where relation's scan
2526 * node will see it
2527 */
2528 EvalPlanQualSetTuple(epqstate, rti, copyTuple);
2529
2530 /*
2531 * Fetch any non-locked source rows
2532 */
2533 EvalPlanQualFetchRowMarks(epqstate);
2534
2535 /*
2536 * Run the EPQ query. We assume it will return at most one tuple.
2537 */
2538 slot = EvalPlanQualNext(epqstate);
2539
2540 /*
2541 * If we got a tuple, force the slot to materialize the tuple so that it
2542 * is not dependent on any local state in the EPQ query (in particular,
2543 * it's highly likely that the slot contains references to any pass-by-ref
2544 * datums that may be present in copyTuple). As with the next step, this
2545 * is to guard against early re-use of the EPQ query.
2546 */
2547 if (!TupIsNull(slot))
2548 (void) ExecMaterializeSlot(slot);
2549
2550 /*
2551 * Clear out the test tuple. This is needed in case the EPQ query is
2552 * re-used to test a tuple for a different relation. (Not clear that can
2553 * really happen, but let's be safe.)
2554 */
2555 EvalPlanQualSetTuple(epqstate, rti, NULL);
2556
2557 return slot;
2558 }
2559
2560 /*
2561 * Fetch a copy of the newest version of an outdated tuple
2562 *
2563 * estate - executor state data
2564 * relation - table containing tuple
2565 * lockmode - requested tuple lock mode
2566 * wait_policy - requested lock wait policy
2567 * *tid - t_ctid from the outdated tuple (ie, next updated version)
2568 * priorXmax - t_xmax from the outdated tuple
2569 *
2570 * Returns a palloc'd copy of the newest tuple version, or NULL if we find
2571 * that there is no newest version (ie, the row was deleted not updated).
2572 * We also return NULL if the tuple is locked and the wait policy is to skip
2573 * such tuples.
2574 *
2575 * If successful, we have locked the newest tuple version, so caller does not
2576 * need to worry about it changing anymore.
2577 *
2578 * Note: properly, lockmode should be declared as enum LockTupleMode,
2579 * but we use "int" to avoid having to include heapam.h in executor.h.
2580 */
2581 HeapTuple
EvalPlanQualFetch(EState * estate,Relation relation,int lockmode,LockWaitPolicy wait_policy,ItemPointer tid,TransactionId priorXmax)2582 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
2583 LockWaitPolicy wait_policy,
2584 ItemPointer tid, TransactionId priorXmax)
2585 {
2586 HeapTuple copyTuple = NULL;
2587 HeapTupleData tuple;
2588 SnapshotData SnapshotDirty;
2589
2590 /*
2591 * fetch target tuple
2592 *
2593 * Loop here to deal with updated or busy tuples
2594 */
2595 InitDirtySnapshot(SnapshotDirty);
2596 tuple.t_self = *tid;
2597 for (;;)
2598 {
2599 Buffer buffer;
2600
2601 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2602 {
2603 HTSU_Result test;
2604 HeapUpdateFailureData hufd;
2605
2606 /*
2607 * If xmin isn't what we're expecting, the slot must have been
2608 * recycled and reused for an unrelated tuple. This implies that
2609 * the latest version of the row was deleted, so we need do
2610 * nothing. (Should be safe to examine xmin without getting
2611 * buffer's content lock. We assume reading a TransactionId to be
2612 * atomic, and Xmin never changes in an existing tuple, except to
2613 * invalid or frozen, and neither of those can match priorXmax.)
2614 */
2615 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2616 priorXmax))
2617 {
2618 ReleaseBuffer(buffer);
2619 return NULL;
2620 }
2621
2622 /* otherwise xmin should not be dirty... */
2623 if (TransactionIdIsValid(SnapshotDirty.xmin))
2624 ereport(ERROR,
2625 (errcode(ERRCODE_DATA_CORRUPTED),
2626 errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
2627 SnapshotDirty.xmin,
2628 ItemPointerGetBlockNumber(&tuple.t_self),
2629 ItemPointerGetOffsetNumber(&tuple.t_self),
2630 RelationGetRelationName(relation))));
2631
2632 /*
2633 * If tuple is being updated by other transaction then we have to
2634 * wait for its commit/abort, or die trying.
2635 */
2636 if (TransactionIdIsValid(SnapshotDirty.xmax))
2637 {
2638 ReleaseBuffer(buffer);
2639 switch (wait_policy)
2640 {
2641 case LockWaitBlock:
2642 XactLockTableWait(SnapshotDirty.xmax,
2643 relation, &tuple.t_self,
2644 XLTW_FetchUpdated);
2645 break;
2646 case LockWaitSkip:
2647 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2648 return NULL; /* skip instead of waiting */
2649 break;
2650 case LockWaitError:
2651 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2652 ereport(ERROR,
2653 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
2654 errmsg("could not obtain lock on row in relation \"%s\"",
2655 RelationGetRelationName(relation))));
2656 break;
2657 }
2658 continue; /* loop back to repeat heap_fetch */
2659 }
2660
2661 /*
2662 * If tuple was inserted by our own transaction, we have to check
2663 * cmin against es_output_cid: cmin >= current CID means our
2664 * command cannot see the tuple, so we should ignore it. Otherwise
2665 * heap_lock_tuple() will throw an error, and so would any later
2666 * attempt to update or delete the tuple. (We need not check cmax
2667 * because HeapTupleSatisfiesDirty will consider a tuple deleted
2668 * by our transaction dead, regardless of cmax.) We just checked
2669 * that priorXmax == xmin, so we can test that variable instead of
2670 * doing HeapTupleHeaderGetXmin again.
2671 */
2672 if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2673 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2674 {
2675 ReleaseBuffer(buffer);
2676 return NULL;
2677 }
2678
2679 /*
2680 * This is a live tuple, so now try to lock it.
2681 */
2682 test = heap_lock_tuple(relation, &tuple,
2683 estate->es_output_cid,
2684 lockmode, wait_policy,
2685 false, &buffer, &hufd);
2686 /* We now have two pins on the buffer, get rid of one */
2687 ReleaseBuffer(buffer);
2688
2689 switch (test)
2690 {
2691 case HeapTupleSelfUpdated:
2692
2693 /*
2694 * The target tuple was already updated or deleted by the
2695 * current command, or by a later command in the current
2696 * transaction. We *must* ignore the tuple in the former
2697 * case, so as to avoid the "Halloween problem" of
2698 * repeated update attempts. In the latter case it might
2699 * be sensible to fetch the updated tuple instead, but
2700 * doing so would require changing heap_update and
2701 * heap_delete to not complain about updating "invisible"
2702 * tuples, which seems pretty scary (heap_lock_tuple will
2703 * not complain, but few callers expect
2704 * HeapTupleInvisible, and we're not one of them). So for
2705 * now, treat the tuple as deleted and do not process.
2706 */
2707 ReleaseBuffer(buffer);
2708 return NULL;
2709
2710 case HeapTupleMayBeUpdated:
2711 /* successfully locked */
2712 break;
2713
2714 case HeapTupleUpdated:
2715 ReleaseBuffer(buffer);
2716 if (IsolationUsesXactSnapshot())
2717 ereport(ERROR,
2718 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2719 errmsg("could not serialize access due to concurrent update")));
2720
2721 /* Should not encounter speculative tuple on recheck */
2722 Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
2723 if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
2724 {
2725 /* it was updated, so look at the updated version */
2726 tuple.t_self = hufd.ctid;
2727 /* updated row should have xmin matching this xmax */
2728 priorXmax = hufd.xmax;
2729 continue;
2730 }
2731 /* tuple was deleted, so give up */
2732 return NULL;
2733
2734 case HeapTupleWouldBlock:
2735 ReleaseBuffer(buffer);
2736 return NULL;
2737
2738 case HeapTupleInvisible:
2739 elog(ERROR, "attempted to lock invisible tuple");
2740
2741 default:
2742 ReleaseBuffer(buffer);
2743 elog(ERROR, "unrecognized heap_lock_tuple status: %u",
2744 test);
2745 return NULL; /* keep compiler quiet */
2746 }
2747
2748 /*
2749 * We got tuple - now copy it for use by recheck query.
2750 */
2751 copyTuple = heap_copytuple(&tuple);
2752 ReleaseBuffer(buffer);
2753 break;
2754 }
2755
2756 /*
2757 * If the referenced slot was actually empty, the latest version of
2758 * the row must have been deleted, so we need do nothing.
2759 */
2760 if (tuple.t_data == NULL)
2761 {
2762 ReleaseBuffer(buffer);
2763 return NULL;
2764 }
2765
2766 /*
2767 * As above, if xmin isn't what we're expecting, do nothing.
2768 */
2769 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2770 priorXmax))
2771 {
2772 ReleaseBuffer(buffer);
2773 return NULL;
2774 }
2775
2776 /*
2777 * If we get here, the tuple was found but failed SnapshotDirty.
2778 * Assuming the xmin is either a committed xact or our own xact (as it
2779 * certainly should be if we're trying to modify the tuple), this must
2780 * mean that the row was updated or deleted by either a committed xact
2781 * or our own xact. If it was deleted, we can ignore it; if it was
2782 * updated then chain up to the next version and repeat the whole
2783 * process.
2784 *
2785 * As above, it should be safe to examine xmax and t_ctid without the
2786 * buffer content lock, because they can't be changing.
2787 */
2788 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2789 {
2790 /* deleted, so forget about it */
2791 ReleaseBuffer(buffer);
2792 return NULL;
2793 }
2794
2795 /* updated, so look at the updated row */
2796 tuple.t_self = tuple.t_data->t_ctid;
2797 /* updated row should have xmin matching this xmax */
2798 priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
2799 ReleaseBuffer(buffer);
2800 /* loop back to fetch next in chain */
2801 }
2802
2803 /*
2804 * Return the copied tuple
2805 */
2806 return copyTuple;
2807 }
2808
2809 /*
2810 * EvalPlanQualInit -- initialize during creation of a plan state node
2811 * that might need to invoke EPQ processing.
2812 *
2813 * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
2814 * with EvalPlanQualSetPlan.
2815 */
2816 void
EvalPlanQualInit(EPQState * epqstate,EState * estate,Plan * subplan,List * auxrowmarks,int epqParam)2817 EvalPlanQualInit(EPQState *epqstate, EState *estate,
2818 Plan *subplan, List *auxrowmarks, int epqParam)
2819 {
2820 /* Mark the EPQ state inactive */
2821 epqstate->estate = NULL;
2822 epqstate->planstate = NULL;
2823 epqstate->origslot = NULL;
2824 /* ... and remember data that EvalPlanQualBegin will need */
2825 epqstate->plan = subplan;
2826 epqstate->arowMarks = auxrowmarks;
2827 epqstate->epqParam = epqParam;
2828 }
2829
2830 /*
2831 * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
2832 *
2833 * We need this so that ModifyTable can deal with multiple subplans.
2834 */
2835 void
EvalPlanQualSetPlan(EPQState * epqstate,Plan * subplan,List * auxrowmarks)2836 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
2837 {
2838 /* If we have a live EPQ query, shut it down */
2839 EvalPlanQualEnd(epqstate);
2840 /* And set/change the plan pointer */
2841 epqstate->plan = subplan;
2842 /* The rowmarks depend on the plan, too */
2843 epqstate->arowMarks = auxrowmarks;
2844 }
2845
2846 /*
2847 * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
2848 *
2849 * NB: passed tuple must be palloc'd; it may get freed later
2850 */
2851 void
EvalPlanQualSetTuple(EPQState * epqstate,Index rti,HeapTuple tuple)2852 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
2853 {
2854 EState *estate = epqstate->estate;
2855
2856 Assert(rti > 0);
2857
2858 /*
2859 * free old test tuple, if any, and store new tuple where relation's scan
2860 * node will see it
2861 */
2862 if (estate->es_epqTuple[rti - 1] != NULL)
2863 heap_freetuple(estate->es_epqTuple[rti - 1]);
2864 estate->es_epqTuple[rti - 1] = tuple;
2865 estate->es_epqTupleSet[rti - 1] = true;
2866 }
2867
2868 /*
2869 * Fetch back the current test tuple (if any) for the specified RTI
2870 */
2871 HeapTuple
EvalPlanQualGetTuple(EPQState * epqstate,Index rti)2872 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
2873 {
2874 EState *estate = epqstate->estate;
2875
2876 Assert(rti > 0);
2877
2878 return estate->es_epqTuple[rti - 1];
2879 }
2880
2881 /*
2882 * Fetch the current row values for any non-locked relations that need
2883 * to be scanned by an EvalPlanQual operation. origslot must have been set
2884 * to contain the current result row (top-level row) that we need to recheck.
2885 */
2886 void
EvalPlanQualFetchRowMarks(EPQState * epqstate)2887 EvalPlanQualFetchRowMarks(EPQState *epqstate)
2888 {
2889 ListCell *l;
2890
2891 Assert(epqstate->origslot != NULL);
2892
2893 foreach(l, epqstate->arowMarks)
2894 {
2895 ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
2896 ExecRowMark *erm = aerm->rowmark;
2897 Datum datum;
2898 bool isNull;
2899 HeapTupleData tuple;
2900
2901 if (RowMarkRequiresRowShareLock(erm->markType))
2902 elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
2903
2904 /* clear any leftover test tuple for this rel */
2905 EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
2906
2907 /* if child rel, must check whether it produced this row */
2908 if (erm->rti != erm->prti)
2909 {
2910 Oid tableoid;
2911
2912 datum = ExecGetJunkAttribute(epqstate->origslot,
2913 aerm->toidAttNo,
2914 &isNull);
2915 /* non-locked rels could be on the inside of outer joins */
2916 if (isNull)
2917 continue;
2918 tableoid = DatumGetObjectId(datum);
2919
2920 Assert(OidIsValid(erm->relid));
2921 if (tableoid != erm->relid)
2922 {
2923 /* this child is inactive right now */
2924 continue;
2925 }
2926 }
2927
2928 if (erm->markType == ROW_MARK_REFERENCE)
2929 {
2930 HeapTuple copyTuple;
2931
2932 Assert(erm->relation != NULL);
2933
2934 /* fetch the tuple's ctid */
2935 datum = ExecGetJunkAttribute(epqstate->origslot,
2936 aerm->ctidAttNo,
2937 &isNull);
2938 /* non-locked rels could be on the inside of outer joins */
2939 if (isNull)
2940 continue;
2941
2942 /* fetch requests on foreign tables must be passed to their FDW */
2943 if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2944 {
2945 FdwRoutine *fdwroutine;
2946 bool updated = false;
2947
2948 fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
2949 /* this should have been checked already, but let's be safe */
2950 if (fdwroutine->RefetchForeignRow == NULL)
2951 ereport(ERROR,
2952 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2953 errmsg("cannot lock rows in foreign table \"%s\"",
2954 RelationGetRelationName(erm->relation))));
2955 copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
2956 erm,
2957 datum,
2958 &updated);
2959 if (copyTuple == NULL)
2960 elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2961
2962 /*
2963 * Ideally we'd insist on updated == false here, but that
2964 * assumes that FDWs can track that exactly, which they might
2965 * not be able to. So just ignore the flag.
2966 */
2967 }
2968 else
2969 {
2970 /* ordinary table, fetch the tuple */
2971 Buffer buffer;
2972
2973 tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2974 if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2975 false, NULL))
2976 elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2977
2978 /* successful, copy tuple */
2979 copyTuple = heap_copytuple(&tuple);
2980 ReleaseBuffer(buffer);
2981 }
2982
2983 /* store tuple */
2984 EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
2985 }
2986 else
2987 {
2988 HeapTupleHeader td;
2989
2990 Assert(erm->markType == ROW_MARK_COPY);
2991
2992 /* fetch the whole-row Var for the relation */
2993 datum = ExecGetJunkAttribute(epqstate->origslot,
2994 aerm->wholeAttNo,
2995 &isNull);
2996 /* non-locked rels could be on the inside of outer joins */
2997 if (isNull)
2998 continue;
2999 td = DatumGetHeapTupleHeader(datum);
3000
3001 /* build a temporary HeapTuple control structure */
3002 tuple.t_len = HeapTupleHeaderGetDatumLength(td);
3003 tuple.t_data = td;
3004 /* relation might be a foreign table, if so provide tableoid */
3005 tuple.t_tableOid = erm->relid;
3006 /* also copy t_ctid in case there's valid data there */
3007 tuple.t_self = td->t_ctid;
3008
3009 /* copy and store tuple */
3010 EvalPlanQualSetTuple(epqstate, erm->rti,
3011 heap_copytuple(&tuple));
3012 }
3013 }
3014 }
3015
3016 /*
3017 * Fetch the next row (if any) from EvalPlanQual testing
3018 *
3019 * (In practice, there should never be more than one row...)
3020 */
3021 TupleTableSlot *
EvalPlanQualNext(EPQState * epqstate)3022 EvalPlanQualNext(EPQState *epqstate)
3023 {
3024 MemoryContext oldcontext;
3025 TupleTableSlot *slot;
3026
3027 oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
3028 slot = ExecProcNode(epqstate->planstate);
3029 MemoryContextSwitchTo(oldcontext);
3030
3031 return slot;
3032 }
3033
3034 /*
3035 * Initialize or reset an EvalPlanQual state tree
3036 */
3037 void
EvalPlanQualBegin(EPQState * epqstate,EState * parentestate)3038 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
3039 {
3040 EState *estate = epqstate->estate;
3041
3042 if (estate == NULL)
3043 {
3044 /* First time through, so create a child EState */
3045 EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
3046 }
3047 else
3048 {
3049 /*
3050 * We already have a suitable child EPQ tree, so just reset it.
3051 */
3052 int rtsize = list_length(parentestate->es_range_table);
3053 PlanState *planstate = epqstate->planstate;
3054
3055 MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
3056
3057 /* Recopy current values of parent parameters */
3058 if (parentestate->es_plannedstmt->nParamExec > 0)
3059 {
3060 int i;
3061
3062 /*
3063 * Force evaluation of any InitPlan outputs that could be needed
3064 * by the subplan, just in case they got reset since
3065 * EvalPlanQualStart (see comments therein).
3066 */
3067 ExecSetParamPlanMulti(planstate->plan->extParam,
3068 GetPerTupleExprContext(parentestate));
3069
3070 i = parentestate->es_plannedstmt->nParamExec;
3071
3072 while (--i >= 0)
3073 {
3074 /* copy value if any, but not execPlan link */
3075 estate->es_param_exec_vals[i].value =
3076 parentestate->es_param_exec_vals[i].value;
3077 estate->es_param_exec_vals[i].isnull =
3078 parentestate->es_param_exec_vals[i].isnull;
3079 }
3080 }
3081
3082 /*
3083 * Mark child plan tree as needing rescan at all scan nodes. The
3084 * first ExecProcNode will take care of actually doing the rescan.
3085 */
3086 planstate->chgParam = bms_add_member(planstate->chgParam,
3087 epqstate->epqParam);
3088 }
3089 }
3090
3091 /*
3092 * Start execution of an EvalPlanQual plan tree.
3093 *
3094 * This is a cut-down version of ExecutorStart(): we copy some state from
3095 * the top-level estate rather than initializing it fresh.
3096 */
3097 static void
EvalPlanQualStart(EPQState * epqstate,EState * parentestate,Plan * planTree)3098 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
3099 {
3100 EState *estate;
3101 int rtsize;
3102 MemoryContext oldcontext;
3103 ListCell *l;
3104
3105 rtsize = list_length(parentestate->es_range_table);
3106
3107 epqstate->estate = estate = CreateExecutorState();
3108
3109 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3110
3111 /*
3112 * Child EPQ EStates share the parent's copy of unchanging state such as
3113 * the snapshot, rangetable, result-rel info, and external Param info.
3114 * They need their own copies of local state, including a tuple table,
3115 * es_param_exec_vals, etc.
3116 *
3117 * The ResultRelInfo array management is trickier than it looks. We
3118 * create fresh arrays for the child but copy all the content from the
3119 * parent. This is because it's okay for the child to share any
3120 * per-relation state the parent has already created --- but if the child
3121 * sets up any ResultRelInfo fields, such as its own junkfilter, that
3122 * state must *not* propagate back to the parent. (For one thing, the
3123 * pointed-to data is in a memory context that won't last long enough.)
3124 */
3125 estate->es_direction = ForwardScanDirection;
3126 estate->es_snapshot = parentestate->es_snapshot;
3127 estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
3128 estate->es_range_table = parentestate->es_range_table;
3129 estate->es_queryEnv = parentestate->es_queryEnv;
3130 estate->es_plannedstmt = parentestate->es_plannedstmt;
3131 estate->es_junkFilter = parentestate->es_junkFilter;
3132 estate->es_output_cid = parentestate->es_output_cid;
3133 if (parentestate->es_num_result_relations > 0)
3134 {
3135 int numResultRelations = parentestate->es_num_result_relations;
3136 int numRootResultRels = parentestate->es_num_root_result_relations;
3137 ResultRelInfo *resultRelInfos;
3138
3139 resultRelInfos = (ResultRelInfo *)
3140 palloc(numResultRelations * sizeof(ResultRelInfo));
3141 memcpy(resultRelInfos, parentestate->es_result_relations,
3142 numResultRelations * sizeof(ResultRelInfo));
3143 estate->es_result_relations = resultRelInfos;
3144 estate->es_num_result_relations = numResultRelations;
3145
3146 /* Also transfer partitioned root result relations. */
3147 if (numRootResultRels > 0)
3148 {
3149 resultRelInfos = (ResultRelInfo *)
3150 palloc(numRootResultRels * sizeof(ResultRelInfo));
3151 memcpy(resultRelInfos, parentestate->es_root_result_relations,
3152 numRootResultRels * sizeof(ResultRelInfo));
3153 estate->es_root_result_relations = resultRelInfos;
3154 estate->es_num_root_result_relations = numRootResultRels;
3155 }
3156 }
3157 /* es_result_relation_info must NOT be copied */
3158 /* es_trig_target_relations must NOT be copied */
3159 estate->es_rowMarks = parentestate->es_rowMarks;
3160 estate->es_top_eflags = parentestate->es_top_eflags;
3161 estate->es_instrument = parentestate->es_instrument;
3162 /* es_auxmodifytables must NOT be copied */
3163
3164 /*
3165 * The external param list is simply shared from parent. The internal
3166 * param workspace has to be local state, but we copy the initial values
3167 * from the parent, so as to have access to any param values that were
3168 * already set from other parts of the parent's plan tree.
3169 */
3170 estate->es_param_list_info = parentestate->es_param_list_info;
3171 if (parentestate->es_plannedstmt->nParamExec > 0)
3172 {
3173 int i;
3174
3175 /*
3176 * Force evaluation of any InitPlan outputs that could be needed by
3177 * the subplan. (With more complexity, maybe we could postpone this
3178 * till the subplan actually demands them, but it doesn't seem worth
3179 * the trouble; this is a corner case already, since usually the
3180 * InitPlans would have been evaluated before reaching EvalPlanQual.)
3181 *
3182 * This will not touch output params of InitPlans that occur somewhere
3183 * within the subplan tree, only those that are attached to the
3184 * ModifyTable node or above it and are referenced within the subplan.
3185 * That's OK though, because the planner would only attach such
3186 * InitPlans to a lower-level SubqueryScan node, and EPQ execution
3187 * will not descend into a SubqueryScan.
3188 *
3189 * The EState's per-output-tuple econtext is sufficiently short-lived
3190 * for this, since it should get reset before there is any chance of
3191 * doing EvalPlanQual again.
3192 */
3193 ExecSetParamPlanMulti(planTree->extParam,
3194 GetPerTupleExprContext(parentestate));
3195
3196 /* now make the internal param workspace ... */
3197 i = parentestate->es_plannedstmt->nParamExec;
3198 estate->es_param_exec_vals = (ParamExecData *)
3199 palloc0(i * sizeof(ParamExecData));
3200 /* ... and copy down all values, whether really needed or not */
3201 while (--i >= 0)
3202 {
3203 /* copy value if any, but not execPlan link */
3204 estate->es_param_exec_vals[i].value =
3205 parentestate->es_param_exec_vals[i].value;
3206 estate->es_param_exec_vals[i].isnull =
3207 parentestate->es_param_exec_vals[i].isnull;
3208 }
3209 }
3210
3211 /*
3212 * Each EState must have its own es_epqScanDone state, but if we have
3213 * nested EPQ checks they should share es_epqTuple arrays. This allows
3214 * sub-rechecks to inherit the values being examined by an outer recheck.
3215 */
3216 estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
3217 if (parentestate->es_epqTuple != NULL)
3218 {
3219 estate->es_epqTuple = parentestate->es_epqTuple;
3220 estate->es_epqTupleSet = parentestate->es_epqTupleSet;
3221 }
3222 else
3223 {
3224 estate->es_epqTuple = (HeapTuple *)
3225 palloc0(rtsize * sizeof(HeapTuple));
3226 estate->es_epqTupleSet = (bool *)
3227 palloc0(rtsize * sizeof(bool));
3228 }
3229
3230 /*
3231 * Each estate also has its own tuple table.
3232 */
3233 estate->es_tupleTable = NIL;
3234
3235 /*
3236 * Initialize private state information for each SubPlan. We must do this
3237 * before running ExecInitNode on the main query tree, since
3238 * ExecInitSubPlan expects to be able to find these entries. Some of the
3239 * SubPlans might not be used in the part of the plan tree we intend to
3240 * run, but since it's not easy to tell which, we just initialize them
3241 * all.
3242 */
3243 Assert(estate->es_subplanstates == NIL);
3244 foreach(l, parentestate->es_plannedstmt->subplans)
3245 {
3246 Plan *subplan = (Plan *) lfirst(l);
3247 PlanState *subplanstate;
3248
3249 subplanstate = ExecInitNode(subplan, estate, 0);
3250 estate->es_subplanstates = lappend(estate->es_subplanstates,
3251 subplanstate);
3252 }
3253
3254 /*
3255 * Initialize the private state information for all the nodes in the part
3256 * of the plan tree we need to run. This opens files, allocates storage
3257 * and leaves us ready to start processing tuples.
3258 */
3259 epqstate->planstate = ExecInitNode(planTree, estate, 0);
3260
3261 MemoryContextSwitchTo(oldcontext);
3262 }
3263
3264 /*
3265 * EvalPlanQualEnd -- shut down at termination of parent plan state node,
3266 * or if we are done with the current EPQ child.
3267 *
3268 * This is a cut-down version of ExecutorEnd(); basically we want to do most
3269 * of the normal cleanup, but *not* close result relations (which we are
3270 * just sharing from the outer query). We do, however, have to close any
3271 * trigger target relations that got opened, since those are not shared.
3272 * (There probably shouldn't be any of the latter, but just in case...)
3273 */
3274 void
EvalPlanQualEnd(EPQState * epqstate)3275 EvalPlanQualEnd(EPQState *epqstate)
3276 {
3277 EState *estate = epqstate->estate;
3278 MemoryContext oldcontext;
3279 ListCell *l;
3280
3281 if (estate == NULL)
3282 return; /* idle, so nothing to do */
3283
3284 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3285
3286 ExecEndNode(epqstate->planstate);
3287
3288 foreach(l, estate->es_subplanstates)
3289 {
3290 PlanState *subplanstate = (PlanState *) lfirst(l);
3291
3292 ExecEndNode(subplanstate);
3293 }
3294
3295 /* throw away the per-estate tuple table */
3296 ExecResetTupleTable(estate->es_tupleTable, false);
3297
3298 /* close any trigger target relations attached to this EState */
3299 ExecCleanUpTriggerState(estate);
3300
3301 MemoryContextSwitchTo(oldcontext);
3302
3303 FreeExecutorState(estate);
3304
3305 /* Mark EPQState idle */
3306 epqstate->estate = NULL;
3307 epqstate->planstate = NULL;
3308 epqstate->origslot = NULL;
3309 }
3310
3311 /*
3312 * ExecSetupPartitionTupleRouting - set up information needed during
3313 * tuple routing for partitioned tables
3314 *
3315 * Output arguments:
3316 * 'pd' receives an array of PartitionDispatch objects with one entry for
3317 * every partitioned table in the partition tree
3318 * 'partitions' receives an array of ResultRelInfo objects with one entry for
3319 * every leaf partition in the partition tree
3320 * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
3321 * entry for every leaf partition (required to convert input tuple based
3322 * on the root table's rowtype to a leaf partition's rowtype after tuple
3323 * routing is done)
3324 * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
3325 * to manipulate any given leaf partition's rowtype after that partition
3326 * is chosen by tuple-routing.
3327 * 'num_parted' receives the number of partitioned tables in the partition
3328 * tree (= the number of entries in the 'pd' output array)
3329 * 'num_partitions' receives the number of leaf partitions in the partition
3330 * tree (= the number of entries in the 'partitions' and 'tup_conv_maps'
3331 * output arrays
3332 *
3333 * Note that all the relations in the partition tree are locked using the
3334 * RowExclusiveLock mode upon return from this function.
3335 */
3336 void
ExecSetupPartitionTupleRouting(Relation rel,Index resultRTindex,EState * estate,PartitionDispatch ** pd,ResultRelInfo ** partitions,TupleConversionMap *** tup_conv_maps,TupleTableSlot ** partition_tuple_slot,int * num_parted,int * num_partitions)3337 ExecSetupPartitionTupleRouting(Relation rel,
3338 Index resultRTindex,
3339 EState *estate,
3340 PartitionDispatch **pd,
3341 ResultRelInfo **partitions,
3342 TupleConversionMap ***tup_conv_maps,
3343 TupleTableSlot **partition_tuple_slot,
3344 int *num_parted, int *num_partitions)
3345 {
3346 TupleDesc tupDesc = RelationGetDescr(rel);
3347 List *leaf_parts;
3348 ListCell *cell;
3349 int i;
3350 ResultRelInfo *leaf_part_rri;
3351
3352 /*
3353 * Get the information about the partition tree after locking all the
3354 * partitions.
3355 */
3356 (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
3357 *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
3358 *num_partitions = list_length(leaf_parts);
3359 *partitions = (ResultRelInfo *) palloc(*num_partitions *
3360 sizeof(ResultRelInfo));
3361 *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
3362 sizeof(TupleConversionMap *));
3363
3364 /*
3365 * Initialize an empty slot that will be used to manipulate tuples of any
3366 * given partition's rowtype. It is attached to the caller-specified node
3367 * (such as ModifyTableState) and released when the node finishes
3368 * processing.
3369 */
3370 *partition_tuple_slot = MakeTupleTableSlot();
3371
3372 leaf_part_rri = *partitions;
3373 i = 0;
3374 foreach(cell, leaf_parts)
3375 {
3376 Relation partrel;
3377 TupleDesc part_tupdesc;
3378
3379 /*
3380 * We locked all the partitions above including the leaf partitions.
3381 * Note that each of the relations in *partitions are eventually
3382 * closed by the caller.
3383 */
3384 partrel = heap_open(lfirst_oid(cell), NoLock);
3385 part_tupdesc = RelationGetDescr(partrel);
3386
3387 /*
3388 * Save a tuple conversion map to convert a tuple routed to this
3389 * partition from the parent's type to the partition's.
3390 */
3391 (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc,
3392 gettext_noop("could not convert row type"));
3393
3394 InitResultRelInfo(leaf_part_rri,
3395 partrel,
3396 resultRTindex,
3397 rel,
3398 estate->es_instrument);
3399
3400 /*
3401 * Verify result relation is a valid target for INSERT.
3402 */
3403 CheckValidResultRel(leaf_part_rri, CMD_INSERT);
3404
3405 /*
3406 * Open partition indices (remember we do not support ON CONFLICT in
3407 * case of partitioned tables, so we do not need support information
3408 * for speculative insertion)
3409 */
3410 if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
3411 leaf_part_rri->ri_IndexRelationDescs == NULL)
3412 ExecOpenIndices(leaf_part_rri, false);
3413
3414 estate->es_leaf_result_relations =
3415 lappend(estate->es_leaf_result_relations, leaf_part_rri);
3416
3417 leaf_part_rri++;
3418 i++;
3419 }
3420 }
3421
3422 /*
3423 * ExecFindPartition -- Find a leaf partition in the partition tree rooted
3424 * at parent, for the heap tuple contained in *slot
3425 *
3426 * estate must be non-NULL; we'll need it to compute any expressions in the
3427 * partition key(s)
3428 *
3429 * If no leaf partition is found, this routine errors out with the appropriate
3430 * error message, else it returns the leaf partition sequence number returned
3431 * by get_partition_for_tuple() unchanged.
3432 */
3433 int
ExecFindPartition(ResultRelInfo * resultRelInfo,PartitionDispatch * pd,TupleTableSlot * slot,EState * estate)3434 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
3435 TupleTableSlot *slot, EState *estate)
3436 {
3437 int result;
3438 PartitionDispatchData *failed_at;
3439 TupleTableSlot *failed_slot;
3440
3441 /*
3442 * First check the root table's partition constraint, if any. No point in
3443 * routing the tuple if it doesn't belong in the root table itself.
3444 */
3445 if (resultRelInfo->ri_PartitionCheck)
3446 ExecPartitionCheck(resultRelInfo, slot, estate);
3447
3448 result = get_partition_for_tuple(pd, slot, estate,
3449 &failed_at, &failed_slot);
3450 if (result < 0)
3451 {
3452 Relation failed_rel;
3453 Datum key_values[PARTITION_MAX_KEYS];
3454 bool key_isnull[PARTITION_MAX_KEYS];
3455 char *val_desc;
3456 ExprContext *ecxt = GetPerTupleExprContext(estate);
3457
3458 failed_rel = failed_at->reldesc;
3459 ecxt->ecxt_scantuple = failed_slot;
3460 FormPartitionKeyDatum(failed_at, failed_slot, estate,
3461 key_values, key_isnull);
3462 val_desc = ExecBuildSlotPartitionKeyDescription(failed_rel,
3463 key_values,
3464 key_isnull,
3465 64);
3466 Assert(OidIsValid(RelationGetRelid(failed_rel)));
3467 ereport(ERROR,
3468 (errcode(ERRCODE_CHECK_VIOLATION),
3469 errmsg("no partition of relation \"%s\" found for row",
3470 RelationGetRelationName(failed_rel)),
3471 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
3472 }
3473
3474 return result;
3475 }
3476
3477 /*
3478 * BuildSlotPartitionKeyDescription
3479 *
3480 * This works very much like BuildIndexValueDescription() and is currently
3481 * used for building error messages when ExecFindPartition() fails to find
3482 * partition for a row.
3483 */
3484 static char *
ExecBuildSlotPartitionKeyDescription(Relation rel,Datum * values,bool * isnull,int maxfieldlen)3485 ExecBuildSlotPartitionKeyDescription(Relation rel,
3486 Datum *values,
3487 bool *isnull,
3488 int maxfieldlen)
3489 {
3490 StringInfoData buf;
3491 PartitionKey key = RelationGetPartitionKey(rel);
3492 int partnatts = get_partition_natts(key);
3493 int i;
3494 Oid relid = RelationGetRelid(rel);
3495 AclResult aclresult;
3496
3497 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
3498 return NULL;
3499
3500 /* If the user has table-level access, just go build the description. */
3501 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
3502 if (aclresult != ACLCHECK_OK)
3503 {
3504 /*
3505 * Step through the columns of the partition key and make sure the
3506 * user has SELECT rights on all of them.
3507 */
3508 for (i = 0; i < partnatts; i++)
3509 {
3510 AttrNumber attnum = get_partition_col_attnum(key, i);
3511
3512 /*
3513 * If this partition key column is an expression, we return no
3514 * detail rather than try to figure out what column(s) the
3515 * expression includes and if the user has SELECT rights on them.
3516 */
3517 if (attnum == InvalidAttrNumber ||
3518 pg_attribute_aclcheck(relid, attnum, GetUserId(),
3519 ACL_SELECT) != ACLCHECK_OK)
3520 return NULL;
3521 }
3522 }
3523
3524 initStringInfo(&buf);
3525 appendStringInfo(&buf, "(%s) = (",
3526 pg_get_partkeydef_columns(relid, true));
3527
3528 for (i = 0; i < partnatts; i++)
3529 {
3530 char *val;
3531 int vallen;
3532
3533 if (isnull[i])
3534 val = "null";
3535 else
3536 {
3537 Oid foutoid;
3538 bool typisvarlena;
3539
3540 getTypeOutputInfo(get_partition_col_typid(key, i),
3541 &foutoid, &typisvarlena);
3542 val = OidOutputFunctionCall(foutoid, values[i]);
3543 }
3544
3545 if (i > 0)
3546 appendStringInfoString(&buf, ", ");
3547
3548 /* truncate if needed */
3549 vallen = strlen(val);
3550 if (vallen <= maxfieldlen)
3551 appendStringInfoString(&buf, val);
3552 else
3553 {
3554 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
3555 appendBinaryStringInfo(&buf, val, vallen);
3556 appendStringInfoString(&buf, "...");
3557 }
3558 }
3559
3560 appendStringInfoChar(&buf, ')');
3561
3562 return buf.data;
3563 }
3564