1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *	  top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *	ExecutorStart()
8  *	ExecutorRun()
9  *	ExecutorFinish()
10  *	ExecutorEnd()
11  *
12  *	These four procedures are the external interface to the executor.
13  *	In each case, the query descriptor is required as an argument.
14  *
15  *	ExecutorStart must be called at the beginning of execution of any
16  *	query plan and ExecutorEnd must always be called at the end of
17  *	execution of a plan (unless it is aborted due to error).
18  *
19  *	ExecutorRun accepts direction and count arguments that specify whether
20  *	the plan is to be executed forwards, backwards, and for how many tuples.
21  *	In some cases ExecutorRun may be called multiple times to process all
22  *	the tuples for a plan.  It is also acceptable to stop short of executing
23  *	the whole plan (but only if it is a SELECT).
24  *
25  *	ExecutorFinish must be called after the final ExecutorRun call and
26  *	before ExecutorEnd.  This can be omitted only in case of EXPLAIN,
27  *	which should also omit ExecutorRun.
28  *
29  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
30  * Portions Copyright (c) 1994, Regents of the University of California
31  *
32  *
33  * IDENTIFICATION
34  *	  src/backend/executor/execMain.c
35  *
36  *-------------------------------------------------------------------------
37  */
38 #include "postgres.h"
39 
40 #include "access/htup_details.h"
41 #include "access/sysattr.h"
42 #include "access/transam.h"
43 #include "access/xact.h"
44 #include "catalog/namespace.h"
45 #include "catalog/partition.h"
46 #include "catalog/pg_inherits_fn.h"
47 #include "catalog/pg_publication.h"
48 #include "commands/matview.h"
49 #include "commands/trigger.h"
50 #include "executor/execdebug.h"
51 #include "executor/nodeSubplan.h"
52 #include "foreign/fdwapi.h"
53 #include "mb/pg_wchar.h"
54 #include "miscadmin.h"
55 #include "optimizer/clauses.h"
56 #include "parser/parsetree.h"
57 #include "rewrite/rewriteManip.h"
58 #include "storage/bufmgr.h"
59 #include "storage/lmgr.h"
60 #include "tcop/utility.h"
61 #include "utils/acl.h"
62 #include "utils/lsyscache.h"
63 #include "utils/memutils.h"
64 #include "utils/rls.h"
65 #include "utils/ruleutils.h"
66 #include "utils/snapmgr.h"
67 #include "utils/tqual.h"
68 
69 
70 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
71 ExecutorStart_hook_type ExecutorStart_hook = NULL;
72 ExecutorRun_hook_type ExecutorRun_hook = NULL;
73 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
74 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
75 
76 /* Hook for plugin to get control in ExecCheckRTPerms() */
77 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
78 
79 /* decls for local routines only used within this module */
80 static void InitPlan(QueryDesc *queryDesc, int eflags);
81 static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
82 static void ExecPostprocessPlan(EState *estate);
83 static void ExecEndPlan(PlanState *planstate, EState *estate);
84 static void ExecutePlan(EState *estate, PlanState *planstate,
85 			bool use_parallel_mode,
86 			CmdType operation,
87 			bool sendTuples,
88 			uint64 numberTuples,
89 			ScanDirection direction,
90 			DestReceiver *dest,
91 			bool execute_once);
92 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
93 static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
94 						  Bitmapset *modifiedCols,
95 						  AclMode requiredPerms);
96 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
97 static char *ExecBuildSlotValueDescription(Oid reloid,
98 							  TupleTableSlot *slot,
99 							  TupleDesc tupdesc,
100 							  Bitmapset *modifiedCols,
101 							  int maxfieldlen);
102 static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
103 									 Datum *values,
104 									 bool *isnull,
105 									 int maxfieldlen);
106 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
107 				  Plan *planTree);
108 static void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
109 				   TupleTableSlot *slot, EState *estate);
110 
111 /*
112  * Note that GetUpdatedColumns() also exists in commands/trigger.c.  There does
113  * not appear to be any good header to put it into, given the structures that
114  * it uses, so we let them be duplicated.  Be sure to update both if one needs
115  * to be changed, however.
116  */
117 #define GetInsertedColumns(relinfo, estate) \
118 	(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols)
119 #define GetUpdatedColumns(relinfo, estate) \
120 	(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
121 
122 /* end of local decls */
123 
124 
125 /* ----------------------------------------------------------------
126  *		ExecutorStart
127  *
128  *		This routine must be called at the beginning of any execution of any
129  *		query plan
130  *
131  * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
132  * only because some places use QueryDescs for utility commands).  The tupDesc
133  * field of the QueryDesc is filled in to describe the tuples that will be
134  * returned, and the internal fields (estate and planstate) are set up.
135  *
136  * eflags contains flag bits as described in executor.h.
137  *
138  * NB: the CurrentMemoryContext when this is called will become the parent
139  * of the per-query context used for this Executor invocation.
140  *
141  * We provide a function hook variable that lets loadable plugins
142  * get control when ExecutorStart is called.  Such a plugin would
143  * normally call standard_ExecutorStart().
144  *
145  * ----------------------------------------------------------------
146  */
147 void
ExecutorStart(QueryDesc * queryDesc,int eflags)148 ExecutorStart(QueryDesc *queryDesc, int eflags)
149 {
150 	if (ExecutorStart_hook)
151 		(*ExecutorStart_hook) (queryDesc, eflags);
152 	else
153 		standard_ExecutorStart(queryDesc, eflags);
154 }
155 
156 void
standard_ExecutorStart(QueryDesc * queryDesc,int eflags)157 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
158 {
159 	EState	   *estate;
160 	MemoryContext oldcontext;
161 
162 	/* sanity checks: queryDesc must not be started already */
163 	Assert(queryDesc != NULL);
164 	Assert(queryDesc->estate == NULL);
165 
166 	/*
167 	 * If the transaction is read-only, we need to check if any writes are
168 	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
169 	 *
170 	 * Don't allow writes in parallel mode.  Supporting UPDATE and DELETE
171 	 * would require (a) storing the combocid hash in shared memory, rather
172 	 * than synchronizing it just once at the start of parallelism, and (b) an
173 	 * alternative to heap_update()'s reliance on xmax for mutual exclusion.
174 	 * INSERT may have no such troubles, but we forbid it to simplify the
175 	 * checks.
176 	 *
177 	 * We have lower-level defenses in CommandCounterIncrement and elsewhere
178 	 * against performing unsafe operations in parallel mode, but this gives a
179 	 * more user-friendly error message.
180 	 */
181 	if ((XactReadOnly || IsInParallelMode()) &&
182 		!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
183 		ExecCheckXactReadOnly(queryDesc->plannedstmt);
184 
185 	/*
186 	 * Build EState, switch into per-query memory context for startup.
187 	 */
188 	estate = CreateExecutorState();
189 	queryDesc->estate = estate;
190 
191 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
192 
193 	/*
194 	 * Fill in external parameters, if any, from queryDesc; and allocate
195 	 * workspace for internal parameters
196 	 */
197 	estate->es_param_list_info = queryDesc->params;
198 
199 	if (queryDesc->plannedstmt->nParamExec > 0)
200 		estate->es_param_exec_vals = (ParamExecData *)
201 			palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
202 
203 	estate->es_sourceText = queryDesc->sourceText;
204 
205 	/*
206 	 * Fill in the query environment, if any, from queryDesc.
207 	 */
208 	estate->es_queryEnv = queryDesc->queryEnv;
209 
210 	/*
211 	 * If non-read-only query, set the command ID to mark output tuples with
212 	 */
213 	switch (queryDesc->operation)
214 	{
215 		case CMD_SELECT:
216 
217 			/*
218 			 * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
219 			 * tuples
220 			 */
221 			if (queryDesc->plannedstmt->rowMarks != NIL ||
222 				queryDesc->plannedstmt->hasModifyingCTE)
223 				estate->es_output_cid = GetCurrentCommandId(true);
224 
225 			/*
226 			 * A SELECT without modifying CTEs can't possibly queue triggers,
227 			 * so force skip-triggers mode. This is just a marginal efficiency
228 			 * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
229 			 * all that expensive, but we might as well do it.
230 			 */
231 			if (!queryDesc->plannedstmt->hasModifyingCTE)
232 				eflags |= EXEC_FLAG_SKIP_TRIGGERS;
233 			break;
234 
235 		case CMD_INSERT:
236 		case CMD_DELETE:
237 		case CMD_UPDATE:
238 			estate->es_output_cid = GetCurrentCommandId(true);
239 			break;
240 
241 		default:
242 			elog(ERROR, "unrecognized operation code: %d",
243 				 (int) queryDesc->operation);
244 			break;
245 	}
246 
247 	/*
248 	 * Copy other important information into the EState
249 	 */
250 	estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
251 	estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
252 	estate->es_top_eflags = eflags;
253 	estate->es_instrument = queryDesc->instrument_options;
254 
255 	/*
256 	 * Set up an AFTER-trigger statement context, unless told not to, or
257 	 * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
258 	 */
259 	if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
260 		AfterTriggerBeginQuery();
261 
262 	/*
263 	 * Initialize the plan state tree
264 	 */
265 	InitPlan(queryDesc, eflags);
266 
267 	MemoryContextSwitchTo(oldcontext);
268 }
269 
270 /* ----------------------------------------------------------------
271  *		ExecutorRun
272  *
273  *		This is the main routine of the executor module. It accepts
274  *		the query descriptor from the traffic cop and executes the
275  *		query plan.
276  *
277  *		ExecutorStart must have been called already.
278  *
279  *		If direction is NoMovementScanDirection then nothing is done
280  *		except to start up/shut down the destination.  Otherwise,
281  *		we retrieve up to 'count' tuples in the specified direction.
282  *
283  *		Note: count = 0 is interpreted as no portal limit, i.e., run to
284  *		completion.  Also note that the count limit is only applied to
285  *		retrieved tuples, not for instance to those inserted/updated/deleted
286  *		by a ModifyTable plan node.
287  *
288  *		There is no return value, but output tuples (if any) are sent to
289  *		the destination receiver specified in the QueryDesc; and the number
290  *		of tuples processed at the top level can be found in
291  *		estate->es_processed.
292  *
293  *		We provide a function hook variable that lets loadable plugins
294  *		get control when ExecutorRun is called.  Such a plugin would
295  *		normally call standard_ExecutorRun().
296  *
297  * ----------------------------------------------------------------
298  */
299 void
ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)300 ExecutorRun(QueryDesc *queryDesc,
301 			ScanDirection direction, uint64 count,
302 			bool execute_once)
303 {
304 	if (ExecutorRun_hook)
305 		(*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
306 	else
307 		standard_ExecutorRun(queryDesc, direction, count, execute_once);
308 }
309 
310 void
standard_ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)311 standard_ExecutorRun(QueryDesc *queryDesc,
312 					 ScanDirection direction, uint64 count, bool execute_once)
313 {
314 	EState	   *estate;
315 	CmdType		operation;
316 	DestReceiver *dest;
317 	bool		sendTuples;
318 	MemoryContext oldcontext;
319 
320 	/* sanity checks */
321 	Assert(queryDesc != NULL);
322 
323 	estate = queryDesc->estate;
324 
325 	Assert(estate != NULL);
326 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
327 
328 	/*
329 	 * Switch into per-query memory context
330 	 */
331 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
332 
333 	/* Allow instrumentation of Executor overall runtime */
334 	if (queryDesc->totaltime)
335 		InstrStartNode(queryDesc->totaltime);
336 
337 	/*
338 	 * extract information from the query descriptor and the query feature.
339 	 */
340 	operation = queryDesc->operation;
341 	dest = queryDesc->dest;
342 
343 	/*
344 	 * startup tuple receiver, if we will be emitting tuples
345 	 */
346 	estate->es_processed = 0;
347 	estate->es_lastoid = InvalidOid;
348 
349 	sendTuples = (operation == CMD_SELECT ||
350 				  queryDesc->plannedstmt->hasReturning);
351 
352 	if (sendTuples)
353 		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
354 
355 	/*
356 	 * run plan
357 	 */
358 	if (!ScanDirectionIsNoMovement(direction))
359 	{
360 		if (execute_once && queryDesc->already_executed)
361 			elog(ERROR, "can't re-execute query flagged for single execution");
362 		queryDesc->already_executed = true;
363 
364 		ExecutePlan(estate,
365 					queryDesc->planstate,
366 					queryDesc->plannedstmt->parallelModeNeeded,
367 					operation,
368 					sendTuples,
369 					count,
370 					direction,
371 					dest,
372 					execute_once);
373 	}
374 
375 	/*
376 	 * shutdown tuple receiver, if we started it
377 	 */
378 	if (sendTuples)
379 		(*dest->rShutdown) (dest);
380 
381 	if (queryDesc->totaltime)
382 		InstrStopNode(queryDesc->totaltime, estate->es_processed);
383 
384 	MemoryContextSwitchTo(oldcontext);
385 }
386 
387 /* ----------------------------------------------------------------
388  *		ExecutorFinish
389  *
390  *		This routine must be called after the last ExecutorRun call.
391  *		It performs cleanup such as firing AFTER triggers.  It is
392  *		separate from ExecutorEnd because EXPLAIN ANALYZE needs to
393  *		include these actions in the total runtime.
394  *
395  *		We provide a function hook variable that lets loadable plugins
396  *		get control when ExecutorFinish is called.  Such a plugin would
397  *		normally call standard_ExecutorFinish().
398  *
399  * ----------------------------------------------------------------
400  */
401 void
ExecutorFinish(QueryDesc * queryDesc)402 ExecutorFinish(QueryDesc *queryDesc)
403 {
404 	if (ExecutorFinish_hook)
405 		(*ExecutorFinish_hook) (queryDesc);
406 	else
407 		standard_ExecutorFinish(queryDesc);
408 }
409 
410 void
standard_ExecutorFinish(QueryDesc * queryDesc)411 standard_ExecutorFinish(QueryDesc *queryDesc)
412 {
413 	EState	   *estate;
414 	MemoryContext oldcontext;
415 
416 	/* sanity checks */
417 	Assert(queryDesc != NULL);
418 
419 	estate = queryDesc->estate;
420 
421 	Assert(estate != NULL);
422 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
423 
424 	/* This should be run once and only once per Executor instance */
425 	Assert(!estate->es_finished);
426 
427 	/* Switch into per-query memory context */
428 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
429 
430 	/* Allow instrumentation of Executor overall runtime */
431 	if (queryDesc->totaltime)
432 		InstrStartNode(queryDesc->totaltime);
433 
434 	/* Run ModifyTable nodes to completion */
435 	ExecPostprocessPlan(estate);
436 
437 	/* Execute queued AFTER triggers, unless told not to */
438 	if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
439 		AfterTriggerEndQuery(estate);
440 
441 	if (queryDesc->totaltime)
442 		InstrStopNode(queryDesc->totaltime, 0);
443 
444 	MemoryContextSwitchTo(oldcontext);
445 
446 	estate->es_finished = true;
447 }
448 
449 /* ----------------------------------------------------------------
450  *		ExecutorEnd
451  *
452  *		This routine must be called at the end of execution of any
453  *		query plan
454  *
455  *		We provide a function hook variable that lets loadable plugins
456  *		get control when ExecutorEnd is called.  Such a plugin would
457  *		normally call standard_ExecutorEnd().
458  *
459  * ----------------------------------------------------------------
460  */
461 void
ExecutorEnd(QueryDesc * queryDesc)462 ExecutorEnd(QueryDesc *queryDesc)
463 {
464 	if (ExecutorEnd_hook)
465 		(*ExecutorEnd_hook) (queryDesc);
466 	else
467 		standard_ExecutorEnd(queryDesc);
468 }
469 
470 void
standard_ExecutorEnd(QueryDesc * queryDesc)471 standard_ExecutorEnd(QueryDesc *queryDesc)
472 {
473 	EState	   *estate;
474 	MemoryContext oldcontext;
475 
476 	/* sanity checks */
477 	Assert(queryDesc != NULL);
478 
479 	estate = queryDesc->estate;
480 
481 	Assert(estate != NULL);
482 
483 	/*
484 	 * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
485 	 * Assert is needed because ExecutorFinish is new as of 9.1, and callers
486 	 * might forget to call it.
487 	 */
488 	Assert(estate->es_finished ||
489 		   (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
490 
491 	/*
492 	 * Switch into per-query memory context to run ExecEndPlan
493 	 */
494 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
495 
496 	ExecEndPlan(queryDesc->planstate, estate);
497 
498 	/* do away with our snapshots */
499 	UnregisterSnapshot(estate->es_snapshot);
500 	UnregisterSnapshot(estate->es_crosscheck_snapshot);
501 
502 	/*
503 	 * Must switch out of context before destroying it
504 	 */
505 	MemoryContextSwitchTo(oldcontext);
506 
507 	/*
508 	 * Release EState and per-query memory context.  This should release
509 	 * everything the executor has allocated.
510 	 */
511 	FreeExecutorState(estate);
512 
513 	/* Reset queryDesc fields that no longer point to anything */
514 	queryDesc->tupDesc = NULL;
515 	queryDesc->estate = NULL;
516 	queryDesc->planstate = NULL;
517 	queryDesc->totaltime = NULL;
518 }
519 
520 /* ----------------------------------------------------------------
521  *		ExecutorRewind
522  *
523  *		This routine may be called on an open queryDesc to rewind it
524  *		to the start.
525  * ----------------------------------------------------------------
526  */
527 void
ExecutorRewind(QueryDesc * queryDesc)528 ExecutorRewind(QueryDesc *queryDesc)
529 {
530 	EState	   *estate;
531 	MemoryContext oldcontext;
532 
533 	/* sanity checks */
534 	Assert(queryDesc != NULL);
535 
536 	estate = queryDesc->estate;
537 
538 	Assert(estate != NULL);
539 
540 	/* It's probably not sensible to rescan updating queries */
541 	Assert(queryDesc->operation == CMD_SELECT);
542 
543 	/*
544 	 * Switch into per-query memory context
545 	 */
546 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
547 
548 	/*
549 	 * rescan plan
550 	 */
551 	ExecReScan(queryDesc->planstate);
552 
553 	MemoryContextSwitchTo(oldcontext);
554 }
555 
556 
557 /*
558  * ExecCheckRTPerms
559  *		Check access permissions for all relations listed in a range table.
560  *
561  * Returns true if permissions are adequate.  Otherwise, throws an appropriate
562  * error if ereport_on_violation is true, or simply returns false otherwise.
563  *
564  * Note that this does NOT address row level security policies (aka: RLS).  If
565  * rows will be returned to the user as a result of this permission check
566  * passing, then RLS also needs to be consulted (and check_enable_rls()).
567  *
568  * See rewrite/rowsecurity.c.
569  */
570 bool
ExecCheckRTPerms(List * rangeTable,bool ereport_on_violation)571 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
572 {
573 	ListCell   *l;
574 	bool		result = true;
575 
576 	foreach(l, rangeTable)
577 	{
578 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
579 
580 		result = ExecCheckRTEPerms(rte);
581 		if (!result)
582 		{
583 			Assert(rte->rtekind == RTE_RELATION);
584 			if (ereport_on_violation)
585 				aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
586 							   get_rel_name(rte->relid));
587 			return false;
588 		}
589 	}
590 
591 	if (ExecutorCheckPerms_hook)
592 		result = (*ExecutorCheckPerms_hook) (rangeTable,
593 											 ereport_on_violation);
594 	return result;
595 }
596 
597 /*
598  * ExecCheckRTEPerms
599  *		Check access permissions for a single RTE.
600  */
601 static bool
ExecCheckRTEPerms(RangeTblEntry * rte)602 ExecCheckRTEPerms(RangeTblEntry *rte)
603 {
604 	AclMode		requiredPerms;
605 	AclMode		relPerms;
606 	AclMode		remainingPerms;
607 	Oid			relOid;
608 	Oid			userid;
609 
610 	/*
611 	 * Only plain-relation RTEs need to be checked here.  Function RTEs are
612 	 * checked when the function is prepared for execution.  Join, subquery,
613 	 * and special RTEs need no checks.
614 	 */
615 	if (rte->rtekind != RTE_RELATION)
616 		return true;
617 
618 	/*
619 	 * No work if requiredPerms is empty.
620 	 */
621 	requiredPerms = rte->requiredPerms;
622 	if (requiredPerms == 0)
623 		return true;
624 
625 	relOid = rte->relid;
626 
627 	/*
628 	 * userid to check as: current user unless we have a setuid indication.
629 	 *
630 	 * Note: GetUserId() is presently fast enough that there's no harm in
631 	 * calling it separately for each RTE.  If that stops being true, we could
632 	 * call it once in ExecCheckRTPerms and pass the userid down from there.
633 	 * But for now, no need for the extra clutter.
634 	 */
635 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
636 
637 	/*
638 	 * We must have *all* the requiredPerms bits, but some of the bits can be
639 	 * satisfied from column-level rather than relation-level permissions.
640 	 * First, remove any bits that are satisfied by relation permissions.
641 	 */
642 	relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
643 	remainingPerms = requiredPerms & ~relPerms;
644 	if (remainingPerms != 0)
645 	{
646 		int			col = -1;
647 
648 		/*
649 		 * If we lack any permissions that exist only as relation permissions,
650 		 * we can fail straight away.
651 		 */
652 		if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
653 			return false;
654 
655 		/*
656 		 * Check to see if we have the needed privileges at column level.
657 		 *
658 		 * Note: failures just report a table-level error; it would be nicer
659 		 * to report a column-level error if we have some but not all of the
660 		 * column privileges.
661 		 */
662 		if (remainingPerms & ACL_SELECT)
663 		{
664 			/*
665 			 * When the query doesn't explicitly reference any columns (for
666 			 * example, SELECT COUNT(*) FROM table), allow the query if we
667 			 * have SELECT on any column of the rel, as per SQL spec.
668 			 */
669 			if (bms_is_empty(rte->selectedCols))
670 			{
671 				if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
672 											  ACLMASK_ANY) != ACLCHECK_OK)
673 					return false;
674 			}
675 
676 			while ((col = bms_next_member(rte->selectedCols, col)) >= 0)
677 			{
678 				/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
679 				AttrNumber	attno = col + FirstLowInvalidHeapAttributeNumber;
680 
681 				if (attno == InvalidAttrNumber)
682 				{
683 					/* Whole-row reference, must have priv on all cols */
684 					if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
685 												  ACLMASK_ALL) != ACLCHECK_OK)
686 						return false;
687 				}
688 				else
689 				{
690 					if (pg_attribute_aclcheck(relOid, attno, userid,
691 											  ACL_SELECT) != ACLCHECK_OK)
692 						return false;
693 				}
694 			}
695 		}
696 
697 		/*
698 		 * Basically the same for the mod columns, for both INSERT and UPDATE
699 		 * privilege as specified by remainingPerms.
700 		 */
701 		if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid,
702 																	  userid,
703 																	  rte->insertedCols,
704 																	  ACL_INSERT))
705 			return false;
706 
707 		if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid,
708 																	  userid,
709 																	  rte->updatedCols,
710 																	  ACL_UPDATE))
711 			return false;
712 	}
713 	return true;
714 }
715 
716 /*
717  * ExecCheckRTEPermsModified
718  *		Check INSERT or UPDATE access permissions for a single RTE (these
719  *		are processed uniformly).
720  */
721 static bool
ExecCheckRTEPermsModified(Oid relOid,Oid userid,Bitmapset * modifiedCols,AclMode requiredPerms)722 ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols,
723 						  AclMode requiredPerms)
724 {
725 	int			col = -1;
726 
727 	/*
728 	 * When the query doesn't explicitly update any columns, allow the query
729 	 * if we have permission on any column of the rel.  This is to handle
730 	 * SELECT FOR UPDATE as well as possible corner cases in UPDATE.
731 	 */
732 	if (bms_is_empty(modifiedCols))
733 	{
734 		if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms,
735 									  ACLMASK_ANY) != ACLCHECK_OK)
736 			return false;
737 	}
738 
739 	while ((col = bms_next_member(modifiedCols, col)) >= 0)
740 	{
741 		/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
742 		AttrNumber	attno = col + FirstLowInvalidHeapAttributeNumber;
743 
744 		if (attno == InvalidAttrNumber)
745 		{
746 			/* whole-row reference can't happen here */
747 			elog(ERROR, "whole-row update is not implemented");
748 		}
749 		else
750 		{
751 			if (pg_attribute_aclcheck(relOid, attno, userid,
752 									  requiredPerms) != ACLCHECK_OK)
753 				return false;
754 		}
755 	}
756 	return true;
757 }
758 
759 /*
760  * Check that the query does not imply any writes to non-temp tables;
761  * unless we're in parallel mode, in which case don't even allow writes
762  * to temp tables.
763  *
764  * Note: in a Hot Standby this would need to reject writes to temp
765  * tables just as we do in parallel mode; but an HS standby can't have created
766  * any temp tables in the first place, so no need to check that.
767  */
768 static void
ExecCheckXactReadOnly(PlannedStmt * plannedstmt)769 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
770 {
771 	ListCell   *l;
772 
773 	/*
774 	 * Fail if write permissions are requested in parallel mode for table
775 	 * (temp or non-temp), otherwise fail for any non-temp table.
776 	 */
777 	foreach(l, plannedstmt->rtable)
778 	{
779 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
780 
781 		if (rte->rtekind != RTE_RELATION)
782 			continue;
783 
784 		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
785 			continue;
786 
787 		if (isTempNamespace(get_rel_namespace(rte->relid)))
788 			continue;
789 
790 		PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
791 	}
792 
793 	if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE)
794 		PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt));
795 }
796 
797 
798 /* ----------------------------------------------------------------
799  *		InitPlan
800  *
801  *		Initializes the query plan: open files, allocate storage
802  *		and start up the rule manager
803  * ----------------------------------------------------------------
804  */
805 static void
InitPlan(QueryDesc * queryDesc,int eflags)806 InitPlan(QueryDesc *queryDesc, int eflags)
807 {
808 	CmdType		operation = queryDesc->operation;
809 	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
810 	Plan	   *plan = plannedstmt->planTree;
811 	List	   *rangeTable = plannedstmt->rtable;
812 	EState	   *estate = queryDesc->estate;
813 	PlanState  *planstate;
814 	TupleDesc	tupType;
815 	ListCell   *l;
816 	int			i;
817 
818 	/*
819 	 * Do permissions checks
820 	 */
821 	ExecCheckRTPerms(rangeTable, true);
822 
823 	/*
824 	 * initialize the node's execution state
825 	 */
826 	estate->es_range_table = rangeTable;
827 	estate->es_plannedstmt = plannedstmt;
828 
829 	/*
830 	 * initialize result relation stuff, and open/lock the result rels.
831 	 *
832 	 * We must do this before initializing the plan tree, else we might try to
833 	 * do a lock upgrade if a result rel is also a source rel.
834 	 */
835 	if (plannedstmt->resultRelations)
836 	{
837 		List	   *resultRelations = plannedstmt->resultRelations;
838 		int			numResultRelations = list_length(resultRelations);
839 		ResultRelInfo *resultRelInfos;
840 		ResultRelInfo *resultRelInfo;
841 
842 		resultRelInfos = (ResultRelInfo *)
843 			palloc(numResultRelations * sizeof(ResultRelInfo));
844 		resultRelInfo = resultRelInfos;
845 		foreach(l, resultRelations)
846 		{
847 			Index		resultRelationIndex = lfirst_int(l);
848 			Oid			resultRelationOid;
849 			Relation	resultRelation;
850 
851 			resultRelationOid = getrelid(resultRelationIndex, rangeTable);
852 			resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
853 
854 			InitResultRelInfo(resultRelInfo,
855 							  resultRelation,
856 							  resultRelationIndex,
857 							  NULL,
858 							  estate->es_instrument);
859 			resultRelInfo++;
860 		}
861 		estate->es_result_relations = resultRelInfos;
862 		estate->es_num_result_relations = numResultRelations;
863 		/* es_result_relation_info is NULL except when within ModifyTable */
864 		estate->es_result_relation_info = NULL;
865 
866 		/*
867 		 * In the partitioned result relation case, lock the non-leaf result
868 		 * relations too.  A subset of these are the roots of respective
869 		 * partitioned tables, for which we also allocate ResultRelInfos.
870 		 */
871 		estate->es_root_result_relations = NULL;
872 		estate->es_num_root_result_relations = 0;
873 		if (plannedstmt->nonleafResultRelations)
874 		{
875 			int			num_roots = list_length(plannedstmt->rootResultRelations);
876 
877 			/*
878 			 * Firstly, build ResultRelInfos for all the partitioned table
879 			 * roots, because we will need them to fire the statement-level
880 			 * triggers, if any.
881 			 */
882 			resultRelInfos = (ResultRelInfo *)
883 				palloc(num_roots * sizeof(ResultRelInfo));
884 			resultRelInfo = resultRelInfos;
885 			foreach(l, plannedstmt->rootResultRelations)
886 			{
887 				Index		resultRelIndex = lfirst_int(l);
888 				Oid			resultRelOid;
889 				Relation	resultRelDesc;
890 
891 				resultRelOid = getrelid(resultRelIndex, rangeTable);
892 				resultRelDesc = heap_open(resultRelOid, RowExclusiveLock);
893 				InitResultRelInfo(resultRelInfo,
894 								  resultRelDesc,
895 								  lfirst_int(l),
896 								  NULL,
897 								  estate->es_instrument);
898 				resultRelInfo++;
899 			}
900 
901 			estate->es_root_result_relations = resultRelInfos;
902 			estate->es_num_root_result_relations = num_roots;
903 
904 			/* Simply lock the rest of them. */
905 			foreach(l, plannedstmt->nonleafResultRelations)
906 			{
907 				Index		resultRelIndex = lfirst_int(l);
908 
909 				/* We locked the roots above. */
910 				if (!list_member_int(plannedstmt->rootResultRelations,
911 									 resultRelIndex))
912 					LockRelationOid(getrelid(resultRelIndex, rangeTable),
913 									RowExclusiveLock);
914 			}
915 		}
916 	}
917 	else
918 	{
919 		/*
920 		 * if no result relation, then set state appropriately
921 		 */
922 		estate->es_result_relations = NULL;
923 		estate->es_num_result_relations = 0;
924 		estate->es_result_relation_info = NULL;
925 		estate->es_root_result_relations = NULL;
926 		estate->es_num_root_result_relations = 0;
927 	}
928 
929 	/*
930 	 * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
931 	 * before we initialize the plan tree, else we'd be risking lock upgrades.
932 	 * While we are at it, build the ExecRowMark list.  Any partitioned child
933 	 * tables are ignored here (because isParent=true) and will be locked by
934 	 * the first Append or MergeAppend node that references them.  (Note that
935 	 * the RowMarks corresponding to partitioned child tables are present in
936 	 * the same list as the rest, i.e., plannedstmt->rowMarks.)
937 	 */
938 	estate->es_rowMarks = NIL;
939 	foreach(l, plannedstmt->rowMarks)
940 	{
941 		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
942 		Oid			relid;
943 		Relation	relation;
944 		ExecRowMark *erm;
945 
946 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
947 		if (rc->isParent)
948 			continue;
949 
950 		/* get relation's OID (will produce InvalidOid if subquery) */
951 		relid = getrelid(rc->rti, rangeTable);
952 
953 		/*
954 		 * If you change the conditions under which rel locks are acquired
955 		 * here, be sure to adjust ExecOpenScanRelation to match.
956 		 */
957 		switch (rc->markType)
958 		{
959 			case ROW_MARK_EXCLUSIVE:
960 			case ROW_MARK_NOKEYEXCLUSIVE:
961 			case ROW_MARK_SHARE:
962 			case ROW_MARK_KEYSHARE:
963 				relation = heap_open(relid, RowShareLock);
964 				break;
965 			case ROW_MARK_REFERENCE:
966 				relation = heap_open(relid, AccessShareLock);
967 				break;
968 			case ROW_MARK_COPY:
969 				/* no physical table access is required */
970 				relation = NULL;
971 				break;
972 			default:
973 				elog(ERROR, "unrecognized markType: %d", rc->markType);
974 				relation = NULL;	/* keep compiler quiet */
975 				break;
976 		}
977 
978 		/* Check that relation is a legal target for marking */
979 		if (relation)
980 			CheckValidRowMarkRel(relation, rc->markType);
981 
982 		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
983 		erm->relation = relation;
984 		erm->relid = relid;
985 		erm->rti = rc->rti;
986 		erm->prti = rc->prti;
987 		erm->rowmarkId = rc->rowmarkId;
988 		erm->markType = rc->markType;
989 		erm->strength = rc->strength;
990 		erm->waitPolicy = rc->waitPolicy;
991 		erm->ermActive = false;
992 		ItemPointerSetInvalid(&(erm->curCtid));
993 		erm->ermExtra = NULL;
994 		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
995 	}
996 
997 	/*
998 	 * Initialize the executor's tuple table to empty.
999 	 */
1000 	estate->es_tupleTable = NIL;
1001 	estate->es_trig_tuple_slot = NULL;
1002 	estate->es_trig_oldtup_slot = NULL;
1003 	estate->es_trig_newtup_slot = NULL;
1004 
1005 	/* mark EvalPlanQual not active */
1006 	estate->es_epqTuple = NULL;
1007 	estate->es_epqTupleSet = NULL;
1008 	estate->es_epqScanDone = NULL;
1009 
1010 	/*
1011 	 * Initialize private state information for each SubPlan.  We must do this
1012 	 * before running ExecInitNode on the main query tree, since
1013 	 * ExecInitSubPlan expects to be able to find these entries.
1014 	 */
1015 	Assert(estate->es_subplanstates == NIL);
1016 	i = 1;						/* subplan indices count from 1 */
1017 	foreach(l, plannedstmt->subplans)
1018 	{
1019 		Plan	   *subplan = (Plan *) lfirst(l);
1020 		PlanState  *subplanstate;
1021 		int			sp_eflags;
1022 
1023 		/*
1024 		 * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
1025 		 * it is a parameterless subplan (not initplan), we suggest that it be
1026 		 * prepared to handle REWIND efficiently; otherwise there is no need.
1027 		 */
1028 		sp_eflags = eflags
1029 			& (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
1030 		if (bms_is_member(i, plannedstmt->rewindPlanIDs))
1031 			sp_eflags |= EXEC_FLAG_REWIND;
1032 
1033 		subplanstate = ExecInitNode(subplan, estate, sp_eflags);
1034 
1035 		estate->es_subplanstates = lappend(estate->es_subplanstates,
1036 										   subplanstate);
1037 
1038 		i++;
1039 	}
1040 
1041 	/*
1042 	 * Initialize the private state information for all the nodes in the query
1043 	 * tree.  This opens files, allocates storage and leaves us ready to start
1044 	 * processing tuples.
1045 	 */
1046 	planstate = ExecInitNode(plan, estate, eflags);
1047 
1048 	/*
1049 	 * Get the tuple descriptor describing the type of tuples to return.
1050 	 */
1051 	tupType = ExecGetResultType(planstate);
1052 
1053 	/*
1054 	 * Initialize the junk filter if needed.  SELECT queries need a filter if
1055 	 * there are any junk attrs in the top-level tlist.
1056 	 */
1057 	if (operation == CMD_SELECT)
1058 	{
1059 		bool		junk_filter_needed = false;
1060 		ListCell   *tlist;
1061 
1062 		foreach(tlist, plan->targetlist)
1063 		{
1064 			TargetEntry *tle = (TargetEntry *) lfirst(tlist);
1065 
1066 			if (tle->resjunk)
1067 			{
1068 				junk_filter_needed = true;
1069 				break;
1070 			}
1071 		}
1072 
1073 		if (junk_filter_needed)
1074 		{
1075 			JunkFilter *j;
1076 
1077 			j = ExecInitJunkFilter(planstate->plan->targetlist,
1078 								   tupType->tdhasoid,
1079 								   ExecInitExtraTupleSlot(estate));
1080 			estate->es_junkFilter = j;
1081 
1082 			/* Want to return the cleaned tuple type */
1083 			tupType = j->jf_cleanTupType;
1084 		}
1085 	}
1086 
1087 	queryDesc->tupDesc = tupType;
1088 	queryDesc->planstate = planstate;
1089 }
1090 
1091 /*
1092  * Check that a proposed result relation is a legal target for the operation
1093  *
1094  * Generally the parser and/or planner should have noticed any such mistake
1095  * already, but let's make sure.
1096  *
1097  * Note: when changing this function, you probably also need to look at
1098  * CheckValidRowMarkRel.
1099  */
1100 void
CheckValidResultRel(ResultRelInfo * resultRelInfo,CmdType operation)1101 CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
1102 {
1103 	Relation	resultRel = resultRelInfo->ri_RelationDesc;
1104 	TriggerDesc *trigDesc = resultRel->trigdesc;
1105 	FdwRoutine *fdwroutine;
1106 
1107 	switch (resultRel->rd_rel->relkind)
1108 	{
1109 		case RELKIND_RELATION:
1110 		case RELKIND_PARTITIONED_TABLE:
1111 			CheckCmdReplicaIdentity(resultRel, operation);
1112 			break;
1113 		case RELKIND_SEQUENCE:
1114 			ereport(ERROR,
1115 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1116 					 errmsg("cannot change sequence \"%s\"",
1117 							RelationGetRelationName(resultRel))));
1118 			break;
1119 		case RELKIND_TOASTVALUE:
1120 			ereport(ERROR,
1121 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1122 					 errmsg("cannot change TOAST relation \"%s\"",
1123 							RelationGetRelationName(resultRel))));
1124 			break;
1125 		case RELKIND_VIEW:
1126 
1127 			/*
1128 			 * Okay only if there's a suitable INSTEAD OF trigger.  Messages
1129 			 * here should match rewriteHandler.c's rewriteTargetView and
1130 			 * RewriteQuery, except that we omit errdetail because we haven't
1131 			 * got the information handy (and given that we really shouldn't
1132 			 * get here anyway, it's not worth great exertion to get).
1133 			 */
1134 			switch (operation)
1135 			{
1136 				case CMD_INSERT:
1137 					if (!trigDesc || !trigDesc->trig_insert_instead_row)
1138 						ereport(ERROR,
1139 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1140 								 errmsg("cannot insert into view \"%s\"",
1141 										RelationGetRelationName(resultRel)),
1142 								 errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.")));
1143 					break;
1144 				case CMD_UPDATE:
1145 					if (!trigDesc || !trigDesc->trig_update_instead_row)
1146 						ereport(ERROR,
1147 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1148 								 errmsg("cannot update view \"%s\"",
1149 										RelationGetRelationName(resultRel)),
1150 								 errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.")));
1151 					break;
1152 				case CMD_DELETE:
1153 					if (!trigDesc || !trigDesc->trig_delete_instead_row)
1154 						ereport(ERROR,
1155 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1156 								 errmsg("cannot delete from view \"%s\"",
1157 										RelationGetRelationName(resultRel)),
1158 								 errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.")));
1159 					break;
1160 				default:
1161 					elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1162 					break;
1163 			}
1164 			break;
1165 		case RELKIND_MATVIEW:
1166 			if (!MatViewIncrementalMaintenanceIsEnabled())
1167 				ereport(ERROR,
1168 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1169 						 errmsg("cannot change materialized view \"%s\"",
1170 								RelationGetRelationName(resultRel))));
1171 			break;
1172 		case RELKIND_FOREIGN_TABLE:
1173 			/* Okay only if the FDW supports it */
1174 			fdwroutine = resultRelInfo->ri_FdwRoutine;
1175 			switch (operation)
1176 			{
1177 				case CMD_INSERT:
1178 
1179 					/*
1180 					 * If foreign partition to do tuple-routing for, skip the
1181 					 * check; it's disallowed elsewhere.
1182 					 */
1183 					if (resultRelInfo->ri_PartitionRoot)
1184 						break;
1185 					if (fdwroutine->ExecForeignInsert == NULL)
1186 						ereport(ERROR,
1187 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1188 								 errmsg("cannot insert into foreign table \"%s\"",
1189 										RelationGetRelationName(resultRel))));
1190 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1191 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0)
1192 						ereport(ERROR,
1193 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1194 								 errmsg("foreign table \"%s\" does not allow inserts",
1195 										RelationGetRelationName(resultRel))));
1196 					break;
1197 				case CMD_UPDATE:
1198 					if (fdwroutine->ExecForeignUpdate == NULL)
1199 						ereport(ERROR,
1200 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1201 								 errmsg("cannot update foreign table \"%s\"",
1202 										RelationGetRelationName(resultRel))));
1203 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1204 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0)
1205 						ereport(ERROR,
1206 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1207 								 errmsg("foreign table \"%s\" does not allow updates",
1208 										RelationGetRelationName(resultRel))));
1209 					break;
1210 				case CMD_DELETE:
1211 					if (fdwroutine->ExecForeignDelete == NULL)
1212 						ereport(ERROR,
1213 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1214 								 errmsg("cannot delete from foreign table \"%s\"",
1215 										RelationGetRelationName(resultRel))));
1216 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1217 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0)
1218 						ereport(ERROR,
1219 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1220 								 errmsg("foreign table \"%s\" does not allow deletes",
1221 										RelationGetRelationName(resultRel))));
1222 					break;
1223 				default:
1224 					elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1225 					break;
1226 			}
1227 			break;
1228 		default:
1229 			ereport(ERROR,
1230 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1231 					 errmsg("cannot change relation \"%s\"",
1232 							RelationGetRelationName(resultRel))));
1233 			break;
1234 	}
1235 }
1236 
1237 /*
1238  * Check that a proposed rowmark target relation is a legal target
1239  *
1240  * In most cases parser and/or planner should have noticed this already, but
1241  * they don't cover all cases.
1242  */
1243 static void
CheckValidRowMarkRel(Relation rel,RowMarkType markType)1244 CheckValidRowMarkRel(Relation rel, RowMarkType markType)
1245 {
1246 	FdwRoutine *fdwroutine;
1247 
1248 	switch (rel->rd_rel->relkind)
1249 	{
1250 		case RELKIND_RELATION:
1251 		case RELKIND_PARTITIONED_TABLE:
1252 			/* OK */
1253 			break;
1254 		case RELKIND_SEQUENCE:
1255 			/* Must disallow this because we don't vacuum sequences */
1256 			ereport(ERROR,
1257 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1258 					 errmsg("cannot lock rows in sequence \"%s\"",
1259 							RelationGetRelationName(rel))));
1260 			break;
1261 		case RELKIND_TOASTVALUE:
1262 			/* We could allow this, but there seems no good reason to */
1263 			ereport(ERROR,
1264 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1265 					 errmsg("cannot lock rows in TOAST relation \"%s\"",
1266 							RelationGetRelationName(rel))));
1267 			break;
1268 		case RELKIND_VIEW:
1269 			/* Should not get here; planner should have expanded the view */
1270 			ereport(ERROR,
1271 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1272 					 errmsg("cannot lock rows in view \"%s\"",
1273 							RelationGetRelationName(rel))));
1274 			break;
1275 		case RELKIND_MATVIEW:
1276 			/* Allow referencing a matview, but not actual locking clauses */
1277 			if (markType != ROW_MARK_REFERENCE)
1278 				ereport(ERROR,
1279 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1280 						 errmsg("cannot lock rows in materialized view \"%s\"",
1281 								RelationGetRelationName(rel))));
1282 			break;
1283 		case RELKIND_FOREIGN_TABLE:
1284 			/* Okay only if the FDW supports it */
1285 			fdwroutine = GetFdwRoutineForRelation(rel, false);
1286 			if (fdwroutine->RefetchForeignRow == NULL)
1287 				ereport(ERROR,
1288 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1289 						 errmsg("cannot lock rows in foreign table \"%s\"",
1290 								RelationGetRelationName(rel))));
1291 			break;
1292 		default:
1293 			ereport(ERROR,
1294 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1295 					 errmsg("cannot lock rows in relation \"%s\"",
1296 							RelationGetRelationName(rel))));
1297 			break;
1298 	}
1299 }
1300 
1301 /*
1302  * Initialize ResultRelInfo data for one result relation
1303  *
1304  * Caution: before Postgres 9.1, this function included the relkind checking
1305  * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1306  * appropriate.  Be sure callers cover those needs.
1307  */
1308 void
InitResultRelInfo(ResultRelInfo * resultRelInfo,Relation resultRelationDesc,Index resultRelationIndex,Relation partition_root,int instrument_options)1309 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1310 				  Relation resultRelationDesc,
1311 				  Index resultRelationIndex,
1312 				  Relation partition_root,
1313 				  int instrument_options)
1314 {
1315 	List	   *partition_check = NIL;
1316 
1317 	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1318 	resultRelInfo->type = T_ResultRelInfo;
1319 	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1320 	resultRelInfo->ri_RelationDesc = resultRelationDesc;
1321 	resultRelInfo->ri_NumIndices = 0;
1322 	resultRelInfo->ri_IndexRelationDescs = NULL;
1323 	resultRelInfo->ri_IndexRelationInfo = NULL;
1324 	/* make a copy so as not to depend on relcache info not changing... */
1325 	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1326 	if (resultRelInfo->ri_TrigDesc)
1327 	{
1328 		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
1329 
1330 		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1331 			palloc0(n * sizeof(FmgrInfo));
1332 		resultRelInfo->ri_TrigWhenExprs = (ExprState **)
1333 			palloc0(n * sizeof(ExprState *));
1334 		if (instrument_options)
1335 			resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1336 	}
1337 	else
1338 	{
1339 		resultRelInfo->ri_TrigFunctions = NULL;
1340 		resultRelInfo->ri_TrigWhenExprs = NULL;
1341 		resultRelInfo->ri_TrigInstrument = NULL;
1342 	}
1343 	if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1344 		resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
1345 	else
1346 		resultRelInfo->ri_FdwRoutine = NULL;
1347 	resultRelInfo->ri_FdwState = NULL;
1348 	resultRelInfo->ri_usesFdwDirectModify = false;
1349 	resultRelInfo->ri_ConstraintExprs = NULL;
1350 	resultRelInfo->ri_junkFilter = NULL;
1351 	resultRelInfo->ri_projectReturning = NULL;
1352 
1353 	/*
1354 	 * Partition constraint, which also includes the partition constraint of
1355 	 * all the ancestors that are partitions.  Note that it will be checked
1356 	 * even in the case of tuple-routing where this table is the target leaf
1357 	 * partition, if there any BR triggers defined on the table.  Although
1358 	 * tuple-routing implicitly preserves the partition constraint of the
1359 	 * target partition for a given row, the BR triggers may change the row
1360 	 * such that the constraint is no longer satisfied, which we must fail for
1361 	 * by checking it explicitly.
1362 	 *
1363 	 * If this is a partitioned table, the partition constraint (if any) of a
1364 	 * given row will be checked just before performing tuple-routing.
1365 	 */
1366 	partition_check = RelationGetPartitionQual(resultRelationDesc);
1367 
1368 	resultRelInfo->ri_PartitionCheck = partition_check;
1369 	resultRelInfo->ri_PartitionRoot = partition_root;
1370 }
1371 
1372 /*
1373  *		ExecGetTriggerResultRel
1374  *
1375  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
1376  * triggers are fired on one of the result relations of the query, and so
1377  * we can just return a member of the es_result_relations array, the
1378  * es_root_result_relations array (if any), or the es_leaf_result_relations
1379  * list (if any).  (Note: in self-join situations there might be multiple
1380  * members with the same OID; if so it doesn't matter which one we pick.)
1381  * However, it is sometimes necessary to fire triggers on other relations;
1382  * this happens mainly when an RI update trigger queues additional triggers
1383  * on other relations, which will be processed in the context of the outer
1384  * query.  For efficiency's sake, we want to have a ResultRelInfo for those
1385  * triggers too; that can avoid repeated re-opening of the relation.  (It
1386  * also provides a way for EXPLAIN ANALYZE to report the runtimes of such
1387  * triggers.)  So we make additional ResultRelInfo's as needed, and save them
1388  * in es_trig_target_relations.
1389  */
1390 ResultRelInfo *
ExecGetTriggerResultRel(EState * estate,Oid relid)1391 ExecGetTriggerResultRel(EState *estate, Oid relid)
1392 {
1393 	ResultRelInfo *rInfo;
1394 	int			nr;
1395 	ListCell   *l;
1396 	Relation	rel;
1397 	MemoryContext oldcontext;
1398 
1399 	/* First, search through the query result relations */
1400 	rInfo = estate->es_result_relations;
1401 	nr = estate->es_num_result_relations;
1402 	while (nr > 0)
1403 	{
1404 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1405 			return rInfo;
1406 		rInfo++;
1407 		nr--;
1408 	}
1409 	/* Second, search through the root result relations, if any */
1410 	rInfo = estate->es_root_result_relations;
1411 	nr = estate->es_num_root_result_relations;
1412 	while (nr > 0)
1413 	{
1414 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1415 			return rInfo;
1416 		rInfo++;
1417 		nr--;
1418 	}
1419 	/* Third, search through the leaf result relations, if any */
1420 	foreach(l, estate->es_leaf_result_relations)
1421 	{
1422 		rInfo = (ResultRelInfo *) lfirst(l);
1423 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1424 			return rInfo;
1425 	}
1426 	/* Nope, but maybe we already made an extra ResultRelInfo for it */
1427 	foreach(l, estate->es_trig_target_relations)
1428 	{
1429 		rInfo = (ResultRelInfo *) lfirst(l);
1430 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1431 			return rInfo;
1432 	}
1433 	/* Nope, so we need a new one */
1434 
1435 	/*
1436 	 * Open the target relation's relcache entry.  We assume that an
1437 	 * appropriate lock is still held by the backend from whenever the trigger
1438 	 * event got queued, so we need take no new lock here.  Also, we need not
1439 	 * recheck the relkind, so no need for CheckValidResultRel.
1440 	 */
1441 	rel = heap_open(relid, NoLock);
1442 
1443 	/*
1444 	 * Make the new entry in the right context.
1445 	 */
1446 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1447 	rInfo = makeNode(ResultRelInfo);
1448 	InitResultRelInfo(rInfo,
1449 					  rel,
1450 					  0,		/* dummy rangetable index */
1451 					  NULL,
1452 					  estate->es_instrument);
1453 	estate->es_trig_target_relations =
1454 		lappend(estate->es_trig_target_relations, rInfo);
1455 	MemoryContextSwitchTo(oldcontext);
1456 
1457 	/*
1458 	 * Currently, we don't need any index information in ResultRelInfos used
1459 	 * only for triggers, so no need to call ExecOpenIndices.
1460 	 */
1461 
1462 	return rInfo;
1463 }
1464 
1465 /*
1466  * Close any relations that have been opened by ExecGetTriggerResultRel().
1467  */
1468 void
ExecCleanUpTriggerState(EState * estate)1469 ExecCleanUpTriggerState(EState *estate)
1470 {
1471 	ListCell   *l;
1472 
1473 	foreach(l, estate->es_trig_target_relations)
1474 	{
1475 		ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
1476 
1477 		/* Close indices and then the relation itself */
1478 		ExecCloseIndices(resultRelInfo);
1479 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1480 	}
1481 }
1482 
1483 /*
1484  *		ExecContextForcesOids
1485  *
1486  * This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS,
1487  * we need to ensure that result tuples have space for an OID iff they are
1488  * going to be stored into a relation that has OIDs.  In other contexts
1489  * we are free to choose whether to leave space for OIDs in result tuples
1490  * (we generally don't want to, but we do if a physical-tlist optimization
1491  * is possible).  This routine checks the plan context and returns TRUE if the
1492  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1493  * *hasoids is set to the required value.
1494  *
1495  * One reason this is ugly is that all plan nodes in the plan tree will emit
1496  * tuples with space for an OID, though we really only need the topmost node
1497  * to do so.  However, node types like Sort don't project new tuples but just
1498  * return their inputs, and in those cases the requirement propagates down
1499  * to the input node.  Eventually we might make this code smart enough to
1500  * recognize how far down the requirement really goes, but for now we just
1501  * make all plan nodes do the same thing if the top level forces the choice.
1502  *
1503  * We assume that if we are generating tuples for INSERT or UPDATE,
1504  * estate->es_result_relation_info is already set up to describe the target
1505  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1506  * the target relations may have OIDs and some not.  We have to make the
1507  * decisions on a per-relation basis as we initialize each of the subplans of
1508  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1509  * while initializing each subplan.
1510  *
1511  * CREATE TABLE AS is even uglier, because we don't have the target relation's
1512  * descriptor available when this code runs; we have to look aside at the
1513  * flags passed to ExecutorStart().
1514  */
1515 bool
ExecContextForcesOids(PlanState * planstate,bool * hasoids)1516 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1517 {
1518 	ResultRelInfo *ri = planstate->state->es_result_relation_info;
1519 
1520 	if (ri != NULL)
1521 	{
1522 		Relation	rel = ri->ri_RelationDesc;
1523 
1524 		if (rel != NULL)
1525 		{
1526 			*hasoids = rel->rd_rel->relhasoids;
1527 			return true;
1528 		}
1529 	}
1530 
1531 	if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS)
1532 	{
1533 		*hasoids = true;
1534 		return true;
1535 	}
1536 	if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS)
1537 	{
1538 		*hasoids = false;
1539 		return true;
1540 	}
1541 
1542 	return false;
1543 }
1544 
1545 /* ----------------------------------------------------------------
1546  *		ExecPostprocessPlan
1547  *
1548  *		Give plan nodes a final chance to execute before shutdown
1549  * ----------------------------------------------------------------
1550  */
1551 static void
ExecPostprocessPlan(EState * estate)1552 ExecPostprocessPlan(EState *estate)
1553 {
1554 	ListCell   *lc;
1555 
1556 	/*
1557 	 * Make sure nodes run forward.
1558 	 */
1559 	estate->es_direction = ForwardScanDirection;
1560 
1561 	/*
1562 	 * Run any secondary ModifyTable nodes to completion, in case the main
1563 	 * query did not fetch all rows from them.  (We do this to ensure that
1564 	 * such nodes have predictable results.)
1565 	 */
1566 	foreach(lc, estate->es_auxmodifytables)
1567 	{
1568 		PlanState  *ps = (PlanState *) lfirst(lc);
1569 
1570 		for (;;)
1571 		{
1572 			TupleTableSlot *slot;
1573 
1574 			/* Reset the per-output-tuple exprcontext each time */
1575 			ResetPerTupleExprContext(estate);
1576 
1577 			slot = ExecProcNode(ps);
1578 
1579 			if (TupIsNull(slot))
1580 				break;
1581 		}
1582 	}
1583 }
1584 
1585 /* ----------------------------------------------------------------
1586  *		ExecEndPlan
1587  *
1588  *		Cleans up the query plan -- closes files and frees up storage
1589  *
1590  * NOTE: we are no longer very worried about freeing storage per se
1591  * in this code; FreeExecutorState should be guaranteed to release all
1592  * memory that needs to be released.  What we are worried about doing
1593  * is closing relations and dropping buffer pins.  Thus, for example,
1594  * tuple tables must be cleared or dropped to ensure pins are released.
1595  * ----------------------------------------------------------------
1596  */
1597 static void
ExecEndPlan(PlanState * planstate,EState * estate)1598 ExecEndPlan(PlanState *planstate, EState *estate)
1599 {
1600 	ResultRelInfo *resultRelInfo;
1601 	int			i;
1602 	ListCell   *l;
1603 
1604 	/*
1605 	 * shut down the node-type-specific query processing
1606 	 */
1607 	ExecEndNode(planstate);
1608 
1609 	/*
1610 	 * for subplans too
1611 	 */
1612 	foreach(l, estate->es_subplanstates)
1613 	{
1614 		PlanState  *subplanstate = (PlanState *) lfirst(l);
1615 
1616 		ExecEndNode(subplanstate);
1617 	}
1618 
1619 	/*
1620 	 * destroy the executor's tuple table.  Actually we only care about
1621 	 * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1622 	 * the TupleTableSlots, since the containing memory context is about to go
1623 	 * away anyway.
1624 	 */
1625 	ExecResetTupleTable(estate->es_tupleTable, false);
1626 
1627 	/*
1628 	 * close the result relation(s) if any, but hold locks until xact commit.
1629 	 */
1630 	resultRelInfo = estate->es_result_relations;
1631 	for (i = estate->es_num_result_relations; i > 0; i--)
1632 	{
1633 		/* Close indices and then the relation itself */
1634 		ExecCloseIndices(resultRelInfo);
1635 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1636 		resultRelInfo++;
1637 	}
1638 
1639 	/* Close the root target relation(s). */
1640 	resultRelInfo = estate->es_root_result_relations;
1641 	for (i = estate->es_num_root_result_relations; i > 0; i--)
1642 	{
1643 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1644 		resultRelInfo++;
1645 	}
1646 
1647 	/* likewise close any trigger target relations */
1648 	ExecCleanUpTriggerState(estate);
1649 
1650 	/*
1651 	 * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping
1652 	 * locks
1653 	 */
1654 	foreach(l, estate->es_rowMarks)
1655 	{
1656 		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1657 
1658 		if (erm->relation)
1659 			heap_close(erm->relation, NoLock);
1660 	}
1661 }
1662 
1663 /* ----------------------------------------------------------------
1664  *		ExecutePlan
1665  *
1666  *		Processes the query plan until we have retrieved 'numberTuples' tuples,
1667  *		moving in the specified direction.
1668  *
1669  *		Runs to completion if numberTuples is 0
1670  *
1671  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1672  * user can see it
1673  * ----------------------------------------------------------------
1674  */
1675 static void
ExecutePlan(EState * estate,PlanState * planstate,bool use_parallel_mode,CmdType operation,bool sendTuples,uint64 numberTuples,ScanDirection direction,DestReceiver * dest,bool execute_once)1676 ExecutePlan(EState *estate,
1677 			PlanState *planstate,
1678 			bool use_parallel_mode,
1679 			CmdType operation,
1680 			bool sendTuples,
1681 			uint64 numberTuples,
1682 			ScanDirection direction,
1683 			DestReceiver *dest,
1684 			bool execute_once)
1685 {
1686 	TupleTableSlot *slot;
1687 	uint64		current_tuple_count;
1688 
1689 	/*
1690 	 * initialize local variables
1691 	 */
1692 	current_tuple_count = 0;
1693 
1694 	/*
1695 	 * Set the direction.
1696 	 */
1697 	estate->es_direction = direction;
1698 
1699 	/*
1700 	 * If the plan might potentially be executed multiple times, we must force
1701 	 * it to run without parallelism, because we might exit early.  Also
1702 	 * disable parallelism when writing into a relation, because no database
1703 	 * changes are allowed in parallel mode.
1704 	 */
1705 	if (!execute_once || dest->mydest == DestIntoRel)
1706 		use_parallel_mode = false;
1707 
1708 	estate->es_use_parallel_mode = use_parallel_mode;
1709 	if (use_parallel_mode)
1710 		EnterParallelMode();
1711 
1712 	/*
1713 	 * Loop until we've processed the proper number of tuples from the plan.
1714 	 */
1715 	for (;;)
1716 	{
1717 		/* Reset the per-output-tuple exprcontext */
1718 		ResetPerTupleExprContext(estate);
1719 
1720 		/*
1721 		 * Execute the plan and obtain a tuple
1722 		 */
1723 		slot = ExecProcNode(planstate);
1724 
1725 		/*
1726 		 * if the tuple is null, then we assume there is nothing more to
1727 		 * process so we just end the loop...
1728 		 */
1729 		if (TupIsNull(slot))
1730 		{
1731 			/*
1732 			 * If we know we won't need to back up, we can release resources
1733 			 * at this point.
1734 			 */
1735 			if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD))
1736 				(void) ExecShutdownNode(planstate);
1737 			break;
1738 		}
1739 
1740 		/*
1741 		 * If we have a junk filter, then project a new tuple with the junk
1742 		 * removed.
1743 		 *
1744 		 * Store this new "clean" tuple in the junkfilter's resultSlot.
1745 		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1746 		 * because that tuple slot has the wrong descriptor.)
1747 		 */
1748 		if (estate->es_junkFilter != NULL)
1749 			slot = ExecFilterJunk(estate->es_junkFilter, slot);
1750 
1751 		/*
1752 		 * If we are supposed to send the tuple somewhere, do so. (In
1753 		 * practice, this is probably always the case at this point.)
1754 		 */
1755 		if (sendTuples)
1756 		{
1757 			/*
1758 			 * If we are not able to send the tuple, we assume the destination
1759 			 * has closed and no more tuples can be sent. If that's the case,
1760 			 * end the loop.
1761 			 */
1762 			if (!((*dest->receiveSlot) (slot, dest)))
1763 				break;
1764 		}
1765 
1766 		/*
1767 		 * Count tuples processed, if this is a SELECT.  (For other operation
1768 		 * types, the ModifyTable plan node must count the appropriate
1769 		 * events.)
1770 		 */
1771 		if (operation == CMD_SELECT)
1772 			(estate->es_processed)++;
1773 
1774 		/*
1775 		 * check our tuple count.. if we've processed the proper number then
1776 		 * quit, else loop again and process more tuples.  Zero numberTuples
1777 		 * means no limit.
1778 		 */
1779 		current_tuple_count++;
1780 		if (numberTuples && numberTuples == current_tuple_count)
1781 		{
1782 			/*
1783 			 * If we know we won't need to back up, we can release resources
1784 			 * at this point.
1785 			 */
1786 			if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD))
1787 				(void) ExecShutdownNode(planstate);
1788 			break;
1789 		}
1790 	}
1791 
1792 	if (use_parallel_mode)
1793 		ExitParallelMode();
1794 }
1795 
1796 
1797 /*
1798  * ExecRelCheck --- check that tuple meets constraints for result relation
1799  *
1800  * Returns NULL if OK, else name of failed check constraint
1801  */
1802 static const char *
ExecRelCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1803 ExecRelCheck(ResultRelInfo *resultRelInfo,
1804 			 TupleTableSlot *slot, EState *estate)
1805 {
1806 	Relation	rel = resultRelInfo->ri_RelationDesc;
1807 	int			ncheck = rel->rd_att->constr->num_check;
1808 	ConstrCheck *check = rel->rd_att->constr->check;
1809 	ExprContext *econtext;
1810 	MemoryContext oldContext;
1811 	int			i;
1812 
1813 	/*
1814 	 * If first time through for this result relation, build expression
1815 	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
1816 	 * memory context so they'll survive throughout the query.
1817 	 */
1818 	if (resultRelInfo->ri_ConstraintExprs == NULL)
1819 	{
1820 		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1821 		resultRelInfo->ri_ConstraintExprs =
1822 			(ExprState **) palloc(ncheck * sizeof(ExprState *));
1823 		for (i = 0; i < ncheck; i++)
1824 		{
1825 			Expr	   *checkconstr;
1826 
1827 			checkconstr = stringToNode(check[i].ccbin);
1828 			resultRelInfo->ri_ConstraintExprs[i] =
1829 				ExecPrepareExpr(checkconstr, estate);
1830 		}
1831 		MemoryContextSwitchTo(oldContext);
1832 	}
1833 
1834 	/*
1835 	 * We will use the EState's per-tuple context for evaluating constraint
1836 	 * expressions (creating it if it's not already there).
1837 	 */
1838 	econtext = GetPerTupleExprContext(estate);
1839 
1840 	/* Arrange for econtext's scan tuple to be the tuple under test */
1841 	econtext->ecxt_scantuple = slot;
1842 
1843 	/* And evaluate the constraints */
1844 	for (i = 0; i < ncheck; i++)
1845 	{
1846 		ExprState  *checkconstr = resultRelInfo->ri_ConstraintExprs[i];
1847 
1848 		/*
1849 		 * NOTE: SQL specifies that a NULL result from a constraint expression
1850 		 * is not to be treated as a failure.  Therefore, use ExecCheck not
1851 		 * ExecQual.
1852 		 */
1853 		if (!ExecCheck(checkconstr, econtext))
1854 			return check[i].ccname;
1855 	}
1856 
1857 	/* NULL result means no error */
1858 	return NULL;
1859 }
1860 
1861 /*
1862  * ExecPartitionCheck --- check that tuple meets the partition constraint.
1863  */
1864 static void
ExecPartitionCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1865 ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
1866 				   EState *estate)
1867 {
1868 	Relation	rel = resultRelInfo->ri_RelationDesc;
1869 	TupleDesc	tupdesc = RelationGetDescr(rel);
1870 	Bitmapset  *modifiedCols;
1871 	Bitmapset  *insertedCols;
1872 	Bitmapset  *updatedCols;
1873 	ExprContext *econtext;
1874 
1875 	/*
1876 	 * If first time through, build expression state tree for the partition
1877 	 * check expression.  Keep it in the per-query memory context so they'll
1878 	 * survive throughout the query.
1879 	 */
1880 	if (resultRelInfo->ri_PartitionCheckExpr == NULL)
1881 	{
1882 		List	   *qual = resultRelInfo->ri_PartitionCheck;
1883 
1884 		resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
1885 	}
1886 
1887 	/*
1888 	 * We will use the EState's per-tuple context for evaluating constraint
1889 	 * expressions (creating it if it's not already there).
1890 	 */
1891 	econtext = GetPerTupleExprContext(estate);
1892 
1893 	/* Arrange for econtext's scan tuple to be the tuple under test */
1894 	econtext->ecxt_scantuple = slot;
1895 
1896 	/*
1897 	 * As in case of the catalogued constraints, we treat a NULL result as
1898 	 * success here, not a failure.
1899 	 */
1900 	if (!ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext))
1901 	{
1902 		char	   *val_desc;
1903 		Relation	orig_rel = rel;
1904 
1905 		/* See the comment above. */
1906 		if (resultRelInfo->ri_PartitionRoot)
1907 		{
1908 			HeapTuple	tuple = ExecFetchSlotTuple(slot);
1909 			TupleDesc	old_tupdesc = RelationGetDescr(rel);
1910 			TupleConversionMap *map;
1911 
1912 			rel = resultRelInfo->ri_PartitionRoot;
1913 			tupdesc = RelationGetDescr(rel);
1914 			/* a reverse map */
1915 			map = convert_tuples_by_name(old_tupdesc, tupdesc,
1916 										 gettext_noop("could not convert row type"));
1917 			if (map != NULL)
1918 			{
1919 				tuple = do_convert_tuple(tuple, map);
1920 				/* one off slot for building error message */
1921 				slot = MakeTupleTableSlot();
1922 				ExecSetSlotDescriptor(slot, tupdesc);
1923 				ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1924 			}
1925 		}
1926 
1927 		insertedCols = GetInsertedColumns(resultRelInfo, estate);
1928 		updatedCols = GetUpdatedColumns(resultRelInfo, estate);
1929 		modifiedCols = bms_union(insertedCols, updatedCols);
1930 		val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
1931 												 slot,
1932 												 tupdesc,
1933 												 modifiedCols,
1934 												 64);
1935 		ereport(ERROR,
1936 				(errcode(ERRCODE_CHECK_VIOLATION),
1937 				 errmsg("new row for relation \"%s\" violates partition constraint",
1938 						RelationGetRelationName(orig_rel)),
1939 				 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
1940 	}
1941 }
1942 
1943 /*
1944  * ExecConstraints - check constraints of the tuple in 'slot'
1945  *
1946  * This checks the traditional NOT NULL and check constraints, as well as
1947  * the partition constraint, if any.
1948  *
1949  * Note: 'slot' contains the tuple to check the constraints of, which may
1950  * have been converted from the original input tuple after tuple routing.
1951  * 'resultRelInfo' is the original result relation, before tuple routing.
1952  */
1953 void
ExecConstraints(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1954 ExecConstraints(ResultRelInfo *resultRelInfo,
1955 				TupleTableSlot *slot, EState *estate)
1956 {
1957 	Relation	rel = resultRelInfo->ri_RelationDesc;
1958 	TupleDesc	tupdesc = RelationGetDescr(rel);
1959 	TupleConstr *constr = tupdesc->constr;
1960 	Bitmapset  *modifiedCols;
1961 	Bitmapset  *insertedCols;
1962 	Bitmapset  *updatedCols;
1963 
1964 	Assert(constr || resultRelInfo->ri_PartitionCheck);
1965 
1966 	if (constr && constr->has_not_null)
1967 	{
1968 		int			natts = tupdesc->natts;
1969 		int			attrChk;
1970 
1971 		for (attrChk = 1; attrChk <= natts; attrChk++)
1972 		{
1973 			if (tupdesc->attrs[attrChk - 1]->attnotnull &&
1974 				slot_attisnull(slot, attrChk))
1975 			{
1976 				char	   *val_desc;
1977 				Relation	orig_rel = rel;
1978 				TupleDesc	orig_tupdesc = RelationGetDescr(rel);
1979 
1980 				/*
1981 				 * If the tuple has been routed, it's been converted to the
1982 				 * partition's rowtype, which might differ from the root
1983 				 * table's.  We must convert it back to the root table's
1984 				 * rowtype so that val_desc shown error message matches the
1985 				 * input tuple.
1986 				 */
1987 				if (resultRelInfo->ri_PartitionRoot)
1988 				{
1989 					HeapTuple	tuple = ExecFetchSlotTuple(slot);
1990 					TupleConversionMap *map;
1991 
1992 					rel = resultRelInfo->ri_PartitionRoot;
1993 					tupdesc = RelationGetDescr(rel);
1994 					/* a reverse map */
1995 					map = convert_tuples_by_name(orig_tupdesc, tupdesc,
1996 												 gettext_noop("could not convert row type"));
1997 					if (map != NULL)
1998 					{
1999 						tuple = do_convert_tuple(tuple, map);
2000 						/* one off slot for building error message */
2001 						slot = MakeTupleTableSlot();
2002 						ExecSetSlotDescriptor(slot, tupdesc);
2003 						ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2004 					}
2005 				}
2006 
2007 				insertedCols = GetInsertedColumns(resultRelInfo, estate);
2008 				updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2009 				modifiedCols = bms_union(insertedCols, updatedCols);
2010 				val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2011 														 slot,
2012 														 tupdesc,
2013 														 modifiedCols,
2014 														 64);
2015 
2016 				ereport(ERROR,
2017 						(errcode(ERRCODE_NOT_NULL_VIOLATION),
2018 						 errmsg("null value in column \"%s\" violates not-null constraint",
2019 								NameStr(orig_tupdesc->attrs[attrChk - 1]->attname)),
2020 						 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2021 						 errtablecol(orig_rel, attrChk)));
2022 			}
2023 		}
2024 	}
2025 
2026 	if (constr && constr->num_check > 0)
2027 	{
2028 		const char *failed;
2029 
2030 		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
2031 		{
2032 			char	   *val_desc;
2033 			Relation	orig_rel = rel;
2034 
2035 			/* See the comment above. */
2036 			if (resultRelInfo->ri_PartitionRoot)
2037 			{
2038 				HeapTuple	tuple = ExecFetchSlotTuple(slot);
2039 				TupleDesc	old_tupdesc = RelationGetDescr(rel);
2040 				TupleConversionMap *map;
2041 
2042 				rel = resultRelInfo->ri_PartitionRoot;
2043 				tupdesc = RelationGetDescr(rel);
2044 				/* a reverse map */
2045 				map = convert_tuples_by_name(old_tupdesc, tupdesc,
2046 											 gettext_noop("could not convert row type"));
2047 				if (map != NULL)
2048 				{
2049 					tuple = do_convert_tuple(tuple, map);
2050 					/* one off slot for building error message */
2051 					slot = MakeTupleTableSlot();
2052 					ExecSetSlotDescriptor(slot, tupdesc);
2053 					ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2054 				}
2055 			}
2056 
2057 			insertedCols = GetInsertedColumns(resultRelInfo, estate);
2058 			updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2059 			modifiedCols = bms_union(insertedCols, updatedCols);
2060 			val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2061 													 slot,
2062 													 tupdesc,
2063 													 modifiedCols,
2064 													 64);
2065 			ereport(ERROR,
2066 					(errcode(ERRCODE_CHECK_VIOLATION),
2067 					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
2068 							RelationGetRelationName(orig_rel), failed),
2069 					 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2070 					 errtableconstraint(orig_rel, failed)));
2071 		}
2072 	}
2073 
2074 	if (resultRelInfo->ri_PartitionCheck)
2075 		ExecPartitionCheck(resultRelInfo, slot, estate);
2076 }
2077 
2078 
2079 /*
2080  * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
2081  * of the specified kind.
2082  *
2083  * Note that this needs to be called multiple times to ensure that all kinds of
2084  * WITH CHECK OPTIONs are handled (both those from views which have the WITH
2085  * CHECK OPTION set and from row level security policies).  See ExecInsert()
2086  * and ExecUpdate().
2087  */
2088 void
ExecWithCheckOptions(WCOKind kind,ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)2089 ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
2090 					 TupleTableSlot *slot, EState *estate)
2091 {
2092 	Relation	rel = resultRelInfo->ri_RelationDesc;
2093 	TupleDesc	tupdesc = RelationGetDescr(rel);
2094 	ExprContext *econtext;
2095 	ListCell   *l1,
2096 			   *l2;
2097 
2098 	/*
2099 	 * We will use the EState's per-tuple context for evaluating constraint
2100 	 * expressions (creating it if it's not already there).
2101 	 */
2102 	econtext = GetPerTupleExprContext(estate);
2103 
2104 	/* Arrange for econtext's scan tuple to be the tuple under test */
2105 	econtext->ecxt_scantuple = slot;
2106 
2107 	/* Check each of the constraints */
2108 	forboth(l1, resultRelInfo->ri_WithCheckOptions,
2109 			l2, resultRelInfo->ri_WithCheckOptionExprs)
2110 	{
2111 		WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
2112 		ExprState  *wcoExpr = (ExprState *) lfirst(l2);
2113 
2114 		/*
2115 		 * Skip any WCOs which are not the kind we are looking for at this
2116 		 * time.
2117 		 */
2118 		if (wco->kind != kind)
2119 			continue;
2120 
2121 		/*
2122 		 * WITH CHECK OPTION checks are intended to ensure that the new tuple
2123 		 * is visible (in the case of a view) or that it passes the
2124 		 * 'with-check' policy (in the case of row security). If the qual
2125 		 * evaluates to NULL or FALSE, then the new tuple won't be included in
2126 		 * the view or doesn't pass the 'with-check' policy for the table.
2127 		 */
2128 		if (!ExecQual(wcoExpr, econtext))
2129 		{
2130 			char	   *val_desc;
2131 			Bitmapset  *modifiedCols;
2132 			Bitmapset  *insertedCols;
2133 			Bitmapset  *updatedCols;
2134 
2135 			switch (wco->kind)
2136 			{
2137 					/*
2138 					 * For WITH CHECK OPTIONs coming from views, we might be
2139 					 * able to provide the details on the row, depending on
2140 					 * the permissions on the relation (that is, if the user
2141 					 * could view it directly anyway).  For RLS violations, we
2142 					 * don't include the data since we don't know if the user
2143 					 * should be able to view the tuple as that depends on the
2144 					 * USING policy.
2145 					 */
2146 				case WCO_VIEW_CHECK:
2147 					/* See the comment in ExecConstraints(). */
2148 					if (resultRelInfo->ri_PartitionRoot)
2149 					{
2150 						HeapTuple	tuple = ExecFetchSlotTuple(slot);
2151 						TupleDesc	old_tupdesc = RelationGetDescr(rel);
2152 						TupleConversionMap *map;
2153 
2154 						rel = resultRelInfo->ri_PartitionRoot;
2155 						tupdesc = RelationGetDescr(rel);
2156 						/* a reverse map */
2157 						map = convert_tuples_by_name(old_tupdesc, tupdesc,
2158 													 gettext_noop("could not convert row type"));
2159 						if (map != NULL)
2160 						{
2161 							tuple = do_convert_tuple(tuple, map);
2162 							/* one off slot for building error message */
2163 							slot = MakeTupleTableSlot();
2164 							ExecSetSlotDescriptor(slot, tupdesc);
2165 							ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2166 						}
2167 					}
2168 
2169 					insertedCols = GetInsertedColumns(resultRelInfo, estate);
2170 					updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2171 					modifiedCols = bms_union(insertedCols, updatedCols);
2172 					val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2173 															 slot,
2174 															 tupdesc,
2175 															 modifiedCols,
2176 															 64);
2177 
2178 					ereport(ERROR,
2179 							(errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
2180 							 errmsg("new row violates check option for view \"%s\"",
2181 									wco->relname),
2182 							 val_desc ? errdetail("Failing row contains %s.",
2183 												  val_desc) : 0));
2184 					break;
2185 				case WCO_RLS_INSERT_CHECK:
2186 				case WCO_RLS_UPDATE_CHECK:
2187 					if (wco->polname != NULL)
2188 						ereport(ERROR,
2189 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2190 								 errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
2191 										wco->polname, wco->relname)));
2192 					else
2193 						ereport(ERROR,
2194 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2195 								 errmsg("new row violates row-level security policy for table \"%s\"",
2196 										wco->relname)));
2197 					break;
2198 				case WCO_RLS_CONFLICT_CHECK:
2199 					if (wco->polname != NULL)
2200 						ereport(ERROR,
2201 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2202 								 errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
2203 										wco->polname, wco->relname)));
2204 					else
2205 						ereport(ERROR,
2206 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2207 								 errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
2208 										wco->relname)));
2209 					break;
2210 				default:
2211 					elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
2212 					break;
2213 			}
2214 		}
2215 	}
2216 }
2217 
2218 /*
2219  * ExecBuildSlotValueDescription -- construct a string representing a tuple
2220  *
2221  * This is intentionally very similar to BuildIndexValueDescription, but
2222  * unlike that function, we truncate long field values (to at most maxfieldlen
2223  * bytes).  That seems necessary here since heap field values could be very
2224  * long, whereas index entries typically aren't so wide.
2225  *
2226  * Also, unlike the case with index entries, we need to be prepared to ignore
2227  * dropped columns.  We used to use the slot's tuple descriptor to decode the
2228  * data, but the slot's descriptor doesn't identify dropped columns, so we
2229  * now need to be passed the relation's descriptor.
2230  *
2231  * Note that, like BuildIndexValueDescription, if the user does not have
2232  * permission to view any of the columns involved, a NULL is returned.  Unlike
2233  * BuildIndexValueDescription, if the user has access to view a subset of the
2234  * column involved, that subset will be returned with a key identifying which
2235  * columns they are.
2236  */
2237 static char *
ExecBuildSlotValueDescription(Oid reloid,TupleTableSlot * slot,TupleDesc tupdesc,Bitmapset * modifiedCols,int maxfieldlen)2238 ExecBuildSlotValueDescription(Oid reloid,
2239 							  TupleTableSlot *slot,
2240 							  TupleDesc tupdesc,
2241 							  Bitmapset *modifiedCols,
2242 							  int maxfieldlen)
2243 {
2244 	StringInfoData buf;
2245 	StringInfoData collist;
2246 	bool		write_comma = false;
2247 	bool		write_comma_collist = false;
2248 	int			i;
2249 	AclResult	aclresult;
2250 	bool		table_perm = false;
2251 	bool		any_perm = false;
2252 
2253 	/*
2254 	 * Check if RLS is enabled and should be active for the relation; if so,
2255 	 * then don't return anything.  Otherwise, go through normal permission
2256 	 * checks.
2257 	 */
2258 	if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED)
2259 		return NULL;
2260 
2261 	initStringInfo(&buf);
2262 
2263 	appendStringInfoChar(&buf, '(');
2264 
2265 	/*
2266 	 * Check if the user has permissions to see the row.  Table-level SELECT
2267 	 * allows access to all columns.  If the user does not have table-level
2268 	 * SELECT then we check each column and include those the user has SELECT
2269 	 * rights on.  Additionally, we always include columns the user provided
2270 	 * data for.
2271 	 */
2272 	aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT);
2273 	if (aclresult != ACLCHECK_OK)
2274 	{
2275 		/* Set up the buffer for the column list */
2276 		initStringInfo(&collist);
2277 		appendStringInfoChar(&collist, '(');
2278 	}
2279 	else
2280 		table_perm = any_perm = true;
2281 
2282 	/* Make sure the tuple is fully deconstructed */
2283 	slot_getallattrs(slot);
2284 
2285 	for (i = 0; i < tupdesc->natts; i++)
2286 	{
2287 		bool		column_perm = false;
2288 		char	   *val;
2289 		int			vallen;
2290 
2291 		/* ignore dropped columns */
2292 		if (tupdesc->attrs[i]->attisdropped)
2293 			continue;
2294 
2295 		if (!table_perm)
2296 		{
2297 			/*
2298 			 * No table-level SELECT, so need to make sure they either have
2299 			 * SELECT rights on the column or that they have provided the data
2300 			 * for the column.  If not, omit this column from the error
2301 			 * message.
2302 			 */
2303 			aclresult = pg_attribute_aclcheck(reloid, tupdesc->attrs[i]->attnum,
2304 											  GetUserId(), ACL_SELECT);
2305 			if (bms_is_member(tupdesc->attrs[i]->attnum - FirstLowInvalidHeapAttributeNumber,
2306 							  modifiedCols) || aclresult == ACLCHECK_OK)
2307 			{
2308 				column_perm = any_perm = true;
2309 
2310 				if (write_comma_collist)
2311 					appendStringInfoString(&collist, ", ");
2312 				else
2313 					write_comma_collist = true;
2314 
2315 				appendStringInfoString(&collist, NameStr(tupdesc->attrs[i]->attname));
2316 			}
2317 		}
2318 
2319 		if (table_perm || column_perm)
2320 		{
2321 			if (slot->tts_isnull[i])
2322 				val = "null";
2323 			else
2324 			{
2325 				Oid			foutoid;
2326 				bool		typisvarlena;
2327 
2328 				getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
2329 								  &foutoid, &typisvarlena);
2330 				val = OidOutputFunctionCall(foutoid, slot->tts_values[i]);
2331 			}
2332 
2333 			if (write_comma)
2334 				appendStringInfoString(&buf, ", ");
2335 			else
2336 				write_comma = true;
2337 
2338 			/* truncate if needed */
2339 			vallen = strlen(val);
2340 			if (vallen <= maxfieldlen)
2341 				appendStringInfoString(&buf, val);
2342 			else
2343 			{
2344 				vallen = pg_mbcliplen(val, vallen, maxfieldlen);
2345 				appendBinaryStringInfo(&buf, val, vallen);
2346 				appendStringInfoString(&buf, "...");
2347 			}
2348 		}
2349 	}
2350 
2351 	/* If we end up with zero columns being returned, then return NULL. */
2352 	if (!any_perm)
2353 		return NULL;
2354 
2355 	appendStringInfoChar(&buf, ')');
2356 
2357 	if (!table_perm)
2358 	{
2359 		appendStringInfoString(&collist, ") = ");
2360 		appendStringInfoString(&collist, buf.data);
2361 
2362 		return collist.data;
2363 	}
2364 
2365 	return buf.data;
2366 }
2367 
2368 
2369 /*
2370  * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
2371  * given ResultRelInfo
2372  */
2373 LockTupleMode
ExecUpdateLockMode(EState * estate,ResultRelInfo * relinfo)2374 ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
2375 {
2376 	Bitmapset  *keyCols;
2377 	Bitmapset  *updatedCols;
2378 
2379 	/*
2380 	 * Compute lock mode to use.  If columns that are part of the key have not
2381 	 * been modified, then we can use a weaker lock, allowing for better
2382 	 * concurrency.
2383 	 */
2384 	updatedCols = GetUpdatedColumns(relinfo, estate);
2385 	keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
2386 										 INDEX_ATTR_BITMAP_KEY);
2387 
2388 	if (bms_overlap(keyCols, updatedCols))
2389 		return LockTupleExclusive;
2390 
2391 	return LockTupleNoKeyExclusive;
2392 }
2393 
2394 /*
2395  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
2396  *
2397  * If no such struct, either return NULL or throw error depending on missing_ok
2398  */
2399 ExecRowMark *
ExecFindRowMark(EState * estate,Index rti,bool missing_ok)2400 ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
2401 {
2402 	ListCell   *lc;
2403 
2404 	foreach(lc, estate->es_rowMarks)
2405 	{
2406 		ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
2407 
2408 		if (erm->rti == rti)
2409 			return erm;
2410 	}
2411 	if (!missing_ok)
2412 		elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
2413 	return NULL;
2414 }
2415 
2416 /*
2417  * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
2418  *
2419  * Inputs are the underlying ExecRowMark struct and the targetlist of the
2420  * input plan node (not planstate node!).  We need the latter to find out
2421  * the column numbers of the resjunk columns.
2422  */
2423 ExecAuxRowMark *
ExecBuildAuxRowMark(ExecRowMark * erm,List * targetlist)2424 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
2425 {
2426 	ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
2427 	char		resname[32];
2428 
2429 	aerm->rowmark = erm;
2430 
2431 	/* Look up the resjunk columns associated with this rowmark */
2432 	if (erm->markType != ROW_MARK_COPY)
2433 	{
2434 		/* need ctid for all methods other than COPY */
2435 		snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
2436 		aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2437 													   resname);
2438 		if (!AttributeNumberIsValid(aerm->ctidAttNo))
2439 			elog(ERROR, "could not find junk %s column", resname);
2440 	}
2441 	else
2442 	{
2443 		/* need wholerow if COPY */
2444 		snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
2445 		aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
2446 														resname);
2447 		if (!AttributeNumberIsValid(aerm->wholeAttNo))
2448 			elog(ERROR, "could not find junk %s column", resname);
2449 	}
2450 
2451 	/* if child rel, need tableoid */
2452 	if (erm->rti != erm->prti)
2453 	{
2454 		snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
2455 		aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2456 													   resname);
2457 		if (!AttributeNumberIsValid(aerm->toidAttNo))
2458 			elog(ERROR, "could not find junk %s column", resname);
2459 	}
2460 
2461 	return aerm;
2462 }
2463 
2464 
2465 /*
2466  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
2467  * process the updated version under READ COMMITTED rules.
2468  *
2469  * See backend/executor/README for some info about how this works.
2470  */
2471 
2472 
2473 /*
2474  * Check a modified tuple to see if we want to process its updated version
2475  * under READ COMMITTED rules.
2476  *
2477  *	estate - outer executor state data
2478  *	epqstate - state for EvalPlanQual rechecking
2479  *	relation - table containing tuple
2480  *	rti - rangetable index of table containing tuple
2481  *	lockmode - requested tuple lock mode
2482  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
2483  *	priorXmax - t_xmax from the outdated tuple
2484  *
2485  * *tid is also an output parameter: it's modified to hold the TID of the
2486  * latest version of the tuple (note this may be changed even on failure)
2487  *
2488  * Returns a slot containing the new candidate update/delete tuple, or
2489  * NULL if we determine we shouldn't process the row.
2490  *
2491  * Note: properly, lockmode should be declared as enum LockTupleMode,
2492  * but we use "int" to avoid having to include heapam.h in executor.h.
2493  */
2494 TupleTableSlot *
EvalPlanQual(EState * estate,EPQState * epqstate,Relation relation,Index rti,int lockmode,ItemPointer tid,TransactionId priorXmax)2495 EvalPlanQual(EState *estate, EPQState *epqstate,
2496 			 Relation relation, Index rti, int lockmode,
2497 			 ItemPointer tid, TransactionId priorXmax)
2498 {
2499 	TupleTableSlot *slot;
2500 	HeapTuple	copyTuple;
2501 
2502 	Assert(rti > 0);
2503 
2504 	/*
2505 	 * Get and lock the updated version of the row; if fail, return NULL.
2506 	 */
2507 	copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
2508 								  tid, priorXmax);
2509 
2510 	if (copyTuple == NULL)
2511 		return NULL;
2512 
2513 	/*
2514 	 * For UPDATE/DELETE we have to return tid of actual row we're executing
2515 	 * PQ for.
2516 	 */
2517 	*tid = copyTuple->t_self;
2518 
2519 	/*
2520 	 * Need to run a recheck subquery.  Initialize or reinitialize EPQ state.
2521 	 */
2522 	EvalPlanQualBegin(epqstate, estate);
2523 
2524 	/*
2525 	 * Free old test tuple, if any, and store new tuple where relation's scan
2526 	 * node will see it
2527 	 */
2528 	EvalPlanQualSetTuple(epqstate, rti, copyTuple);
2529 
2530 	/*
2531 	 * Fetch any non-locked source rows
2532 	 */
2533 	EvalPlanQualFetchRowMarks(epqstate);
2534 
2535 	/*
2536 	 * Run the EPQ query.  We assume it will return at most one tuple.
2537 	 */
2538 	slot = EvalPlanQualNext(epqstate);
2539 
2540 	/*
2541 	 * If we got a tuple, force the slot to materialize the tuple so that it
2542 	 * is not dependent on any local state in the EPQ query (in particular,
2543 	 * it's highly likely that the slot contains references to any pass-by-ref
2544 	 * datums that may be present in copyTuple).  As with the next step, this
2545 	 * is to guard against early re-use of the EPQ query.
2546 	 */
2547 	if (!TupIsNull(slot))
2548 		(void) ExecMaterializeSlot(slot);
2549 
2550 	/*
2551 	 * Clear out the test tuple.  This is needed in case the EPQ query is
2552 	 * re-used to test a tuple for a different relation.  (Not clear that can
2553 	 * really happen, but let's be safe.)
2554 	 */
2555 	EvalPlanQualSetTuple(epqstate, rti, NULL);
2556 
2557 	return slot;
2558 }
2559 
2560 /*
2561  * Fetch a copy of the newest version of an outdated tuple
2562  *
2563  *	estate - executor state data
2564  *	relation - table containing tuple
2565  *	lockmode - requested tuple lock mode
2566  *	wait_policy - requested lock wait policy
2567  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
2568  *	priorXmax - t_xmax from the outdated tuple
2569  *
2570  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
2571  * that there is no newest version (ie, the row was deleted not updated).
2572  * We also return NULL if the tuple is locked and the wait policy is to skip
2573  * such tuples.
2574  *
2575  * If successful, we have locked the newest tuple version, so caller does not
2576  * need to worry about it changing anymore.
2577  *
2578  * Note: properly, lockmode should be declared as enum LockTupleMode,
2579  * but we use "int" to avoid having to include heapam.h in executor.h.
2580  */
2581 HeapTuple
EvalPlanQualFetch(EState * estate,Relation relation,int lockmode,LockWaitPolicy wait_policy,ItemPointer tid,TransactionId priorXmax)2582 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
2583 				  LockWaitPolicy wait_policy,
2584 				  ItemPointer tid, TransactionId priorXmax)
2585 {
2586 	HeapTuple	copyTuple = NULL;
2587 	HeapTupleData tuple;
2588 	SnapshotData SnapshotDirty;
2589 
2590 	/*
2591 	 * fetch target tuple
2592 	 *
2593 	 * Loop here to deal with updated or busy tuples
2594 	 */
2595 	InitDirtySnapshot(SnapshotDirty);
2596 	tuple.t_self = *tid;
2597 	for (;;)
2598 	{
2599 		Buffer		buffer;
2600 
2601 		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2602 		{
2603 			HTSU_Result test;
2604 			HeapUpdateFailureData hufd;
2605 
2606 			/*
2607 			 * If xmin isn't what we're expecting, the slot must have been
2608 			 * recycled and reused for an unrelated tuple.  This implies that
2609 			 * the latest version of the row was deleted, so we need do
2610 			 * nothing.  (Should be safe to examine xmin without getting
2611 			 * buffer's content lock.  We assume reading a TransactionId to be
2612 			 * atomic, and Xmin never changes in an existing tuple, except to
2613 			 * invalid or frozen, and neither of those can match priorXmax.)
2614 			 */
2615 			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2616 									 priorXmax))
2617 			{
2618 				ReleaseBuffer(buffer);
2619 				return NULL;
2620 			}
2621 
2622 			/* otherwise xmin should not be dirty... */
2623 			if (TransactionIdIsValid(SnapshotDirty.xmin))
2624 				ereport(ERROR,
2625 						(errcode(ERRCODE_DATA_CORRUPTED),
2626 						 errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
2627 										 SnapshotDirty.xmin,
2628 										 ItemPointerGetBlockNumber(&tuple.t_self),
2629 										 ItemPointerGetOffsetNumber(&tuple.t_self),
2630 										 RelationGetRelationName(relation))));
2631 
2632 			/*
2633 			 * If tuple is being updated by other transaction then we have to
2634 			 * wait for its commit/abort, or die trying.
2635 			 */
2636 			if (TransactionIdIsValid(SnapshotDirty.xmax))
2637 			{
2638 				ReleaseBuffer(buffer);
2639 				switch (wait_policy)
2640 				{
2641 					case LockWaitBlock:
2642 						XactLockTableWait(SnapshotDirty.xmax,
2643 										  relation, &tuple.t_self,
2644 										  XLTW_FetchUpdated);
2645 						break;
2646 					case LockWaitSkip:
2647 						if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2648 							return NULL;	/* skip instead of waiting */
2649 						break;
2650 					case LockWaitError:
2651 						if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2652 							ereport(ERROR,
2653 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
2654 									 errmsg("could not obtain lock on row in relation \"%s\"",
2655 											RelationGetRelationName(relation))));
2656 						break;
2657 				}
2658 				continue;		/* loop back to repeat heap_fetch */
2659 			}
2660 
2661 			/*
2662 			 * If tuple was inserted by our own transaction, we have to check
2663 			 * cmin against es_output_cid: cmin >= current CID means our
2664 			 * command cannot see the tuple, so we should ignore it. Otherwise
2665 			 * heap_lock_tuple() will throw an error, and so would any later
2666 			 * attempt to update or delete the tuple.  (We need not check cmax
2667 			 * because HeapTupleSatisfiesDirty will consider a tuple deleted
2668 			 * by our transaction dead, regardless of cmax.) We just checked
2669 			 * that priorXmax == xmin, so we can test that variable instead of
2670 			 * doing HeapTupleHeaderGetXmin again.
2671 			 */
2672 			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2673 				HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2674 			{
2675 				ReleaseBuffer(buffer);
2676 				return NULL;
2677 			}
2678 
2679 			/*
2680 			 * This is a live tuple, so now try to lock it.
2681 			 */
2682 			test = heap_lock_tuple(relation, &tuple,
2683 								   estate->es_output_cid,
2684 								   lockmode, wait_policy,
2685 								   false, &buffer, &hufd);
2686 			/* We now have two pins on the buffer, get rid of one */
2687 			ReleaseBuffer(buffer);
2688 
2689 			switch (test)
2690 			{
2691 				case HeapTupleSelfUpdated:
2692 
2693 					/*
2694 					 * The target tuple was already updated or deleted by the
2695 					 * current command, or by a later command in the current
2696 					 * transaction.  We *must* ignore the tuple in the former
2697 					 * case, so as to avoid the "Halloween problem" of
2698 					 * repeated update attempts.  In the latter case it might
2699 					 * be sensible to fetch the updated tuple instead, but
2700 					 * doing so would require changing heap_update and
2701 					 * heap_delete to not complain about updating "invisible"
2702 					 * tuples, which seems pretty scary (heap_lock_tuple will
2703 					 * not complain, but few callers expect
2704 					 * HeapTupleInvisible, and we're not one of them).  So for
2705 					 * now, treat the tuple as deleted and do not process.
2706 					 */
2707 					ReleaseBuffer(buffer);
2708 					return NULL;
2709 
2710 				case HeapTupleMayBeUpdated:
2711 					/* successfully locked */
2712 					break;
2713 
2714 				case HeapTupleUpdated:
2715 					ReleaseBuffer(buffer);
2716 					if (IsolationUsesXactSnapshot())
2717 						ereport(ERROR,
2718 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2719 								 errmsg("could not serialize access due to concurrent update")));
2720 
2721 					/* Should not encounter speculative tuple on recheck */
2722 					Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
2723 					if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
2724 					{
2725 						/* it was updated, so look at the updated version */
2726 						tuple.t_self = hufd.ctid;
2727 						/* updated row should have xmin matching this xmax */
2728 						priorXmax = hufd.xmax;
2729 						continue;
2730 					}
2731 					/* tuple was deleted, so give up */
2732 					return NULL;
2733 
2734 				case HeapTupleWouldBlock:
2735 					ReleaseBuffer(buffer);
2736 					return NULL;
2737 
2738 				case HeapTupleInvisible:
2739 					elog(ERROR, "attempted to lock invisible tuple");
2740 
2741 				default:
2742 					ReleaseBuffer(buffer);
2743 					elog(ERROR, "unrecognized heap_lock_tuple status: %u",
2744 						 test);
2745 					return NULL;	/* keep compiler quiet */
2746 			}
2747 
2748 			/*
2749 			 * We got tuple - now copy it for use by recheck query.
2750 			 */
2751 			copyTuple = heap_copytuple(&tuple);
2752 			ReleaseBuffer(buffer);
2753 			break;
2754 		}
2755 
2756 		/*
2757 		 * If the referenced slot was actually empty, the latest version of
2758 		 * the row must have been deleted, so we need do nothing.
2759 		 */
2760 		if (tuple.t_data == NULL)
2761 		{
2762 			ReleaseBuffer(buffer);
2763 			return NULL;
2764 		}
2765 
2766 		/*
2767 		 * As above, if xmin isn't what we're expecting, do nothing.
2768 		 */
2769 		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2770 								 priorXmax))
2771 		{
2772 			ReleaseBuffer(buffer);
2773 			return NULL;
2774 		}
2775 
2776 		/*
2777 		 * If we get here, the tuple was found but failed SnapshotDirty.
2778 		 * Assuming the xmin is either a committed xact or our own xact (as it
2779 		 * certainly should be if we're trying to modify the tuple), this must
2780 		 * mean that the row was updated or deleted by either a committed xact
2781 		 * or our own xact.  If it was deleted, we can ignore it; if it was
2782 		 * updated then chain up to the next version and repeat the whole
2783 		 * process.
2784 		 *
2785 		 * As above, it should be safe to examine xmax and t_ctid without the
2786 		 * buffer content lock, because they can't be changing.
2787 		 */
2788 		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2789 		{
2790 			/* deleted, so forget about it */
2791 			ReleaseBuffer(buffer);
2792 			return NULL;
2793 		}
2794 
2795 		/* updated, so look at the updated row */
2796 		tuple.t_self = tuple.t_data->t_ctid;
2797 		/* updated row should have xmin matching this xmax */
2798 		priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
2799 		ReleaseBuffer(buffer);
2800 		/* loop back to fetch next in chain */
2801 	}
2802 
2803 	/*
2804 	 * Return the copied tuple
2805 	 */
2806 	return copyTuple;
2807 }
2808 
2809 /*
2810  * EvalPlanQualInit -- initialize during creation of a plan state node
2811  * that might need to invoke EPQ processing.
2812  *
2813  * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
2814  * with EvalPlanQualSetPlan.
2815  */
2816 void
EvalPlanQualInit(EPQState * epqstate,EState * estate,Plan * subplan,List * auxrowmarks,int epqParam)2817 EvalPlanQualInit(EPQState *epqstate, EState *estate,
2818 				 Plan *subplan, List *auxrowmarks, int epqParam)
2819 {
2820 	/* Mark the EPQ state inactive */
2821 	epqstate->estate = NULL;
2822 	epqstate->planstate = NULL;
2823 	epqstate->origslot = NULL;
2824 	/* ... and remember data that EvalPlanQualBegin will need */
2825 	epqstate->plan = subplan;
2826 	epqstate->arowMarks = auxrowmarks;
2827 	epqstate->epqParam = epqParam;
2828 }
2829 
2830 /*
2831  * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
2832  *
2833  * We need this so that ModifyTable can deal with multiple subplans.
2834  */
2835 void
EvalPlanQualSetPlan(EPQState * epqstate,Plan * subplan,List * auxrowmarks)2836 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
2837 {
2838 	/* If we have a live EPQ query, shut it down */
2839 	EvalPlanQualEnd(epqstate);
2840 	/* And set/change the plan pointer */
2841 	epqstate->plan = subplan;
2842 	/* The rowmarks depend on the plan, too */
2843 	epqstate->arowMarks = auxrowmarks;
2844 }
2845 
2846 /*
2847  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
2848  *
2849  * NB: passed tuple must be palloc'd; it may get freed later
2850  */
2851 void
EvalPlanQualSetTuple(EPQState * epqstate,Index rti,HeapTuple tuple)2852 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
2853 {
2854 	EState	   *estate = epqstate->estate;
2855 
2856 	Assert(rti > 0);
2857 
2858 	/*
2859 	 * free old test tuple, if any, and store new tuple where relation's scan
2860 	 * node will see it
2861 	 */
2862 	if (estate->es_epqTuple[rti - 1] != NULL)
2863 		heap_freetuple(estate->es_epqTuple[rti - 1]);
2864 	estate->es_epqTuple[rti - 1] = tuple;
2865 	estate->es_epqTupleSet[rti - 1] = true;
2866 }
2867 
2868 /*
2869  * Fetch back the current test tuple (if any) for the specified RTI
2870  */
2871 HeapTuple
EvalPlanQualGetTuple(EPQState * epqstate,Index rti)2872 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
2873 {
2874 	EState	   *estate = epqstate->estate;
2875 
2876 	Assert(rti > 0);
2877 
2878 	return estate->es_epqTuple[rti - 1];
2879 }
2880 
2881 /*
2882  * Fetch the current row values for any non-locked relations that need
2883  * to be scanned by an EvalPlanQual operation.  origslot must have been set
2884  * to contain the current result row (top-level row) that we need to recheck.
2885  */
2886 void
EvalPlanQualFetchRowMarks(EPQState * epqstate)2887 EvalPlanQualFetchRowMarks(EPQState *epqstate)
2888 {
2889 	ListCell   *l;
2890 
2891 	Assert(epqstate->origslot != NULL);
2892 
2893 	foreach(l, epqstate->arowMarks)
2894 	{
2895 		ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
2896 		ExecRowMark *erm = aerm->rowmark;
2897 		Datum		datum;
2898 		bool		isNull;
2899 		HeapTupleData tuple;
2900 
2901 		if (RowMarkRequiresRowShareLock(erm->markType))
2902 			elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
2903 
2904 		/* clear any leftover test tuple for this rel */
2905 		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
2906 
2907 		/* if child rel, must check whether it produced this row */
2908 		if (erm->rti != erm->prti)
2909 		{
2910 			Oid			tableoid;
2911 
2912 			datum = ExecGetJunkAttribute(epqstate->origslot,
2913 										 aerm->toidAttNo,
2914 										 &isNull);
2915 			/* non-locked rels could be on the inside of outer joins */
2916 			if (isNull)
2917 				continue;
2918 			tableoid = DatumGetObjectId(datum);
2919 
2920 			Assert(OidIsValid(erm->relid));
2921 			if (tableoid != erm->relid)
2922 			{
2923 				/* this child is inactive right now */
2924 				continue;
2925 			}
2926 		}
2927 
2928 		if (erm->markType == ROW_MARK_REFERENCE)
2929 		{
2930 			HeapTuple	copyTuple;
2931 
2932 			Assert(erm->relation != NULL);
2933 
2934 			/* fetch the tuple's ctid */
2935 			datum = ExecGetJunkAttribute(epqstate->origslot,
2936 										 aerm->ctidAttNo,
2937 										 &isNull);
2938 			/* non-locked rels could be on the inside of outer joins */
2939 			if (isNull)
2940 				continue;
2941 
2942 			/* fetch requests on foreign tables must be passed to their FDW */
2943 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2944 			{
2945 				FdwRoutine *fdwroutine;
2946 				bool		updated = false;
2947 
2948 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
2949 				/* this should have been checked already, but let's be safe */
2950 				if (fdwroutine->RefetchForeignRow == NULL)
2951 					ereport(ERROR,
2952 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2953 							 errmsg("cannot lock rows in foreign table \"%s\"",
2954 									RelationGetRelationName(erm->relation))));
2955 				copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
2956 														  erm,
2957 														  datum,
2958 														  &updated);
2959 				if (copyTuple == NULL)
2960 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2961 
2962 				/*
2963 				 * Ideally we'd insist on updated == false here, but that
2964 				 * assumes that FDWs can track that exactly, which they might
2965 				 * not be able to.  So just ignore the flag.
2966 				 */
2967 			}
2968 			else
2969 			{
2970 				/* ordinary table, fetch the tuple */
2971 				Buffer		buffer;
2972 
2973 				tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2974 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2975 								false, NULL))
2976 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2977 
2978 				/* successful, copy tuple */
2979 				copyTuple = heap_copytuple(&tuple);
2980 				ReleaseBuffer(buffer);
2981 			}
2982 
2983 			/* store tuple */
2984 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
2985 		}
2986 		else
2987 		{
2988 			HeapTupleHeader td;
2989 
2990 			Assert(erm->markType == ROW_MARK_COPY);
2991 
2992 			/* fetch the whole-row Var for the relation */
2993 			datum = ExecGetJunkAttribute(epqstate->origslot,
2994 										 aerm->wholeAttNo,
2995 										 &isNull);
2996 			/* non-locked rels could be on the inside of outer joins */
2997 			if (isNull)
2998 				continue;
2999 			td = DatumGetHeapTupleHeader(datum);
3000 
3001 			/* build a temporary HeapTuple control structure */
3002 			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
3003 			tuple.t_data = td;
3004 			/* relation might be a foreign table, if so provide tableoid */
3005 			tuple.t_tableOid = erm->relid;
3006 			/* also copy t_ctid in case there's valid data there */
3007 			tuple.t_self = td->t_ctid;
3008 
3009 			/* copy and store tuple */
3010 			EvalPlanQualSetTuple(epqstate, erm->rti,
3011 								 heap_copytuple(&tuple));
3012 		}
3013 	}
3014 }
3015 
3016 /*
3017  * Fetch the next row (if any) from EvalPlanQual testing
3018  *
3019  * (In practice, there should never be more than one row...)
3020  */
3021 TupleTableSlot *
EvalPlanQualNext(EPQState * epqstate)3022 EvalPlanQualNext(EPQState *epqstate)
3023 {
3024 	MemoryContext oldcontext;
3025 	TupleTableSlot *slot;
3026 
3027 	oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
3028 	slot = ExecProcNode(epqstate->planstate);
3029 	MemoryContextSwitchTo(oldcontext);
3030 
3031 	return slot;
3032 }
3033 
3034 /*
3035  * Initialize or reset an EvalPlanQual state tree
3036  */
3037 void
EvalPlanQualBegin(EPQState * epqstate,EState * parentestate)3038 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
3039 {
3040 	EState	   *estate = epqstate->estate;
3041 
3042 	if (estate == NULL)
3043 	{
3044 		/* First time through, so create a child EState */
3045 		EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
3046 	}
3047 	else
3048 	{
3049 		/*
3050 		 * We already have a suitable child EPQ tree, so just reset it.
3051 		 */
3052 		int			rtsize = list_length(parentestate->es_range_table);
3053 		PlanState  *planstate = epqstate->planstate;
3054 
3055 		MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
3056 
3057 		/* Recopy current values of parent parameters */
3058 		if (parentestate->es_plannedstmt->nParamExec > 0)
3059 		{
3060 			int			i;
3061 
3062 			/*
3063 			 * Force evaluation of any InitPlan outputs that could be needed
3064 			 * by the subplan, just in case they got reset since
3065 			 * EvalPlanQualStart (see comments therein).
3066 			 */
3067 			ExecSetParamPlanMulti(planstate->plan->extParam,
3068 								  GetPerTupleExprContext(parentestate));
3069 
3070 			i = parentestate->es_plannedstmt->nParamExec;
3071 
3072 			while (--i >= 0)
3073 			{
3074 				/* copy value if any, but not execPlan link */
3075 				estate->es_param_exec_vals[i].value =
3076 					parentestate->es_param_exec_vals[i].value;
3077 				estate->es_param_exec_vals[i].isnull =
3078 					parentestate->es_param_exec_vals[i].isnull;
3079 			}
3080 		}
3081 
3082 		/*
3083 		 * Mark child plan tree as needing rescan at all scan nodes.  The
3084 		 * first ExecProcNode will take care of actually doing the rescan.
3085 		 */
3086 		planstate->chgParam = bms_add_member(planstate->chgParam,
3087 											 epqstate->epqParam);
3088 	}
3089 }
3090 
3091 /*
3092  * Start execution of an EvalPlanQual plan tree.
3093  *
3094  * This is a cut-down version of ExecutorStart(): we copy some state from
3095  * the top-level estate rather than initializing it fresh.
3096  */
3097 static void
EvalPlanQualStart(EPQState * epqstate,EState * parentestate,Plan * planTree)3098 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
3099 {
3100 	EState	   *estate;
3101 	int			rtsize;
3102 	MemoryContext oldcontext;
3103 	ListCell   *l;
3104 
3105 	rtsize = list_length(parentestate->es_range_table);
3106 
3107 	epqstate->estate = estate = CreateExecutorState();
3108 
3109 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3110 
3111 	/*
3112 	 * Child EPQ EStates share the parent's copy of unchanging state such as
3113 	 * the snapshot, rangetable, result-rel info, and external Param info.
3114 	 * They need their own copies of local state, including a tuple table,
3115 	 * es_param_exec_vals, etc.
3116 	 *
3117 	 * The ResultRelInfo array management is trickier than it looks.  We
3118 	 * create fresh arrays for the child but copy all the content from the
3119 	 * parent.  This is because it's okay for the child to share any
3120 	 * per-relation state the parent has already created --- but if the child
3121 	 * sets up any ResultRelInfo fields, such as its own junkfilter, that
3122 	 * state must *not* propagate back to the parent.  (For one thing, the
3123 	 * pointed-to data is in a memory context that won't last long enough.)
3124 	 */
3125 	estate->es_direction = ForwardScanDirection;
3126 	estate->es_snapshot = parentestate->es_snapshot;
3127 	estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
3128 	estate->es_range_table = parentestate->es_range_table;
3129 	estate->es_queryEnv = parentestate->es_queryEnv;
3130 	estate->es_plannedstmt = parentestate->es_plannedstmt;
3131 	estate->es_junkFilter = parentestate->es_junkFilter;
3132 	estate->es_output_cid = parentestate->es_output_cid;
3133 	if (parentestate->es_num_result_relations > 0)
3134 	{
3135 		int			numResultRelations = parentestate->es_num_result_relations;
3136 		int			numRootResultRels = parentestate->es_num_root_result_relations;
3137 		ResultRelInfo *resultRelInfos;
3138 
3139 		resultRelInfos = (ResultRelInfo *)
3140 			palloc(numResultRelations * sizeof(ResultRelInfo));
3141 		memcpy(resultRelInfos, parentestate->es_result_relations,
3142 			   numResultRelations * sizeof(ResultRelInfo));
3143 		estate->es_result_relations = resultRelInfos;
3144 		estate->es_num_result_relations = numResultRelations;
3145 
3146 		/* Also transfer partitioned root result relations. */
3147 		if (numRootResultRels > 0)
3148 		{
3149 			resultRelInfos = (ResultRelInfo *)
3150 				palloc(numRootResultRels * sizeof(ResultRelInfo));
3151 			memcpy(resultRelInfos, parentestate->es_root_result_relations,
3152 				   numRootResultRels * sizeof(ResultRelInfo));
3153 			estate->es_root_result_relations = resultRelInfos;
3154 			estate->es_num_root_result_relations = numRootResultRels;
3155 		}
3156 	}
3157 	/* es_result_relation_info must NOT be copied */
3158 	/* es_trig_target_relations must NOT be copied */
3159 	estate->es_rowMarks = parentestate->es_rowMarks;
3160 	estate->es_top_eflags = parentestate->es_top_eflags;
3161 	estate->es_instrument = parentestate->es_instrument;
3162 	/* es_auxmodifytables must NOT be copied */
3163 
3164 	/*
3165 	 * The external param list is simply shared from parent.  The internal
3166 	 * param workspace has to be local state, but we copy the initial values
3167 	 * from the parent, so as to have access to any param values that were
3168 	 * already set from other parts of the parent's plan tree.
3169 	 */
3170 	estate->es_param_list_info = parentestate->es_param_list_info;
3171 	if (parentestate->es_plannedstmt->nParamExec > 0)
3172 	{
3173 		int			i;
3174 
3175 		/*
3176 		 * Force evaluation of any InitPlan outputs that could be needed by
3177 		 * the subplan.  (With more complexity, maybe we could postpone this
3178 		 * till the subplan actually demands them, but it doesn't seem worth
3179 		 * the trouble; this is a corner case already, since usually the
3180 		 * InitPlans would have been evaluated before reaching EvalPlanQual.)
3181 		 *
3182 		 * This will not touch output params of InitPlans that occur somewhere
3183 		 * within the subplan tree, only those that are attached to the
3184 		 * ModifyTable node or above it and are referenced within the subplan.
3185 		 * That's OK though, because the planner would only attach such
3186 		 * InitPlans to a lower-level SubqueryScan node, and EPQ execution
3187 		 * will not descend into a SubqueryScan.
3188 		 *
3189 		 * The EState's per-output-tuple econtext is sufficiently short-lived
3190 		 * for this, since it should get reset before there is any chance of
3191 		 * doing EvalPlanQual again.
3192 		 */
3193 		ExecSetParamPlanMulti(planTree->extParam,
3194 							  GetPerTupleExprContext(parentestate));
3195 
3196 		/* now make the internal param workspace ... */
3197 		i = parentestate->es_plannedstmt->nParamExec;
3198 		estate->es_param_exec_vals = (ParamExecData *)
3199 			palloc0(i * sizeof(ParamExecData));
3200 		/* ... and copy down all values, whether really needed or not */
3201 		while (--i >= 0)
3202 		{
3203 			/* copy value if any, but not execPlan link */
3204 			estate->es_param_exec_vals[i].value =
3205 				parentestate->es_param_exec_vals[i].value;
3206 			estate->es_param_exec_vals[i].isnull =
3207 				parentestate->es_param_exec_vals[i].isnull;
3208 		}
3209 	}
3210 
3211 	/*
3212 	 * Each EState must have its own es_epqScanDone state, but if we have
3213 	 * nested EPQ checks they should share es_epqTuple arrays.  This allows
3214 	 * sub-rechecks to inherit the values being examined by an outer recheck.
3215 	 */
3216 	estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
3217 	if (parentestate->es_epqTuple != NULL)
3218 	{
3219 		estate->es_epqTuple = parentestate->es_epqTuple;
3220 		estate->es_epqTupleSet = parentestate->es_epqTupleSet;
3221 	}
3222 	else
3223 	{
3224 		estate->es_epqTuple = (HeapTuple *)
3225 			palloc0(rtsize * sizeof(HeapTuple));
3226 		estate->es_epqTupleSet = (bool *)
3227 			palloc0(rtsize * sizeof(bool));
3228 	}
3229 
3230 	/*
3231 	 * Each estate also has its own tuple table.
3232 	 */
3233 	estate->es_tupleTable = NIL;
3234 
3235 	/*
3236 	 * Initialize private state information for each SubPlan.  We must do this
3237 	 * before running ExecInitNode on the main query tree, since
3238 	 * ExecInitSubPlan expects to be able to find these entries. Some of the
3239 	 * SubPlans might not be used in the part of the plan tree we intend to
3240 	 * run, but since it's not easy to tell which, we just initialize them
3241 	 * all.
3242 	 */
3243 	Assert(estate->es_subplanstates == NIL);
3244 	foreach(l, parentestate->es_plannedstmt->subplans)
3245 	{
3246 		Plan	   *subplan = (Plan *) lfirst(l);
3247 		PlanState  *subplanstate;
3248 
3249 		subplanstate = ExecInitNode(subplan, estate, 0);
3250 		estate->es_subplanstates = lappend(estate->es_subplanstates,
3251 										   subplanstate);
3252 	}
3253 
3254 	/*
3255 	 * Initialize the private state information for all the nodes in the part
3256 	 * of the plan tree we need to run.  This opens files, allocates storage
3257 	 * and leaves us ready to start processing tuples.
3258 	 */
3259 	epqstate->planstate = ExecInitNode(planTree, estate, 0);
3260 
3261 	MemoryContextSwitchTo(oldcontext);
3262 }
3263 
3264 /*
3265  * EvalPlanQualEnd -- shut down at termination of parent plan state node,
3266  * or if we are done with the current EPQ child.
3267  *
3268  * This is a cut-down version of ExecutorEnd(); basically we want to do most
3269  * of the normal cleanup, but *not* close result relations (which we are
3270  * just sharing from the outer query).  We do, however, have to close any
3271  * trigger target relations that got opened, since those are not shared.
3272  * (There probably shouldn't be any of the latter, but just in case...)
3273  */
3274 void
EvalPlanQualEnd(EPQState * epqstate)3275 EvalPlanQualEnd(EPQState *epqstate)
3276 {
3277 	EState	   *estate = epqstate->estate;
3278 	MemoryContext oldcontext;
3279 	ListCell   *l;
3280 
3281 	if (estate == NULL)
3282 		return;					/* idle, so nothing to do */
3283 
3284 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3285 
3286 	ExecEndNode(epqstate->planstate);
3287 
3288 	foreach(l, estate->es_subplanstates)
3289 	{
3290 		PlanState  *subplanstate = (PlanState *) lfirst(l);
3291 
3292 		ExecEndNode(subplanstate);
3293 	}
3294 
3295 	/* throw away the per-estate tuple table */
3296 	ExecResetTupleTable(estate->es_tupleTable, false);
3297 
3298 	/* close any trigger target relations attached to this EState */
3299 	ExecCleanUpTriggerState(estate);
3300 
3301 	MemoryContextSwitchTo(oldcontext);
3302 
3303 	FreeExecutorState(estate);
3304 
3305 	/* Mark EPQState idle */
3306 	epqstate->estate = NULL;
3307 	epqstate->planstate = NULL;
3308 	epqstate->origslot = NULL;
3309 }
3310 
3311 /*
3312  * ExecSetupPartitionTupleRouting - set up information needed during
3313  * tuple routing for partitioned tables
3314  *
3315  * Output arguments:
3316  * 'pd' receives an array of PartitionDispatch objects with one entry for
3317  *		every partitioned table in the partition tree
3318  * 'partitions' receives an array of ResultRelInfo objects with one entry for
3319  *		every leaf partition in the partition tree
3320  * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
3321  *		entry for every leaf partition (required to convert input tuple based
3322  *		on the root table's rowtype to a leaf partition's rowtype after tuple
3323  *		routing is done)
3324  * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
3325  *		to manipulate any given leaf partition's rowtype after that partition
3326  *		is chosen by tuple-routing.
3327  * 'num_parted' receives the number of partitioned tables in the partition
3328  *		tree (= the number of entries in the 'pd' output array)
3329  * 'num_partitions' receives the number of leaf partitions in the partition
3330  *		tree (= the number of entries in the 'partitions' and 'tup_conv_maps'
3331  *		output arrays
3332  *
3333  * Note that all the relations in the partition tree are locked using the
3334  * RowExclusiveLock mode upon return from this function.
3335  */
3336 void
ExecSetupPartitionTupleRouting(Relation rel,Index resultRTindex,EState * estate,PartitionDispatch ** pd,ResultRelInfo ** partitions,TupleConversionMap *** tup_conv_maps,TupleTableSlot ** partition_tuple_slot,int * num_parted,int * num_partitions)3337 ExecSetupPartitionTupleRouting(Relation rel,
3338 							   Index resultRTindex,
3339 							   EState *estate,
3340 							   PartitionDispatch **pd,
3341 							   ResultRelInfo **partitions,
3342 							   TupleConversionMap ***tup_conv_maps,
3343 							   TupleTableSlot **partition_tuple_slot,
3344 							   int *num_parted, int *num_partitions)
3345 {
3346 	TupleDesc	tupDesc = RelationGetDescr(rel);
3347 	List	   *leaf_parts;
3348 	ListCell   *cell;
3349 	int			i;
3350 	ResultRelInfo *leaf_part_rri;
3351 
3352 	/*
3353 	 * Get the information about the partition tree after locking all the
3354 	 * partitions.
3355 	 */
3356 	(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
3357 	*pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
3358 	*num_partitions = list_length(leaf_parts);
3359 	*partitions = (ResultRelInfo *) palloc(*num_partitions *
3360 										   sizeof(ResultRelInfo));
3361 	*tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
3362 													 sizeof(TupleConversionMap *));
3363 
3364 	/*
3365 	 * Initialize an empty slot that will be used to manipulate tuples of any
3366 	 * given partition's rowtype.  It is attached to the caller-specified node
3367 	 * (such as ModifyTableState) and released when the node finishes
3368 	 * processing.
3369 	 */
3370 	*partition_tuple_slot = MakeTupleTableSlot();
3371 
3372 	leaf_part_rri = *partitions;
3373 	i = 0;
3374 	foreach(cell, leaf_parts)
3375 	{
3376 		Relation	partrel;
3377 		TupleDesc	part_tupdesc;
3378 
3379 		/*
3380 		 * We locked all the partitions above including the leaf partitions.
3381 		 * Note that each of the relations in *partitions are eventually
3382 		 * closed by the caller.
3383 		 */
3384 		partrel = heap_open(lfirst_oid(cell), NoLock);
3385 		part_tupdesc = RelationGetDescr(partrel);
3386 
3387 		/*
3388 		 * Save a tuple conversion map to convert a tuple routed to this
3389 		 * partition from the parent's type to the partition's.
3390 		 */
3391 		(*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc,
3392 													 gettext_noop("could not convert row type"));
3393 
3394 		InitResultRelInfo(leaf_part_rri,
3395 						  partrel,
3396 						  resultRTindex,
3397 						  rel,
3398 						  estate->es_instrument);
3399 
3400 		/*
3401 		 * Verify result relation is a valid target for INSERT.
3402 		 */
3403 		CheckValidResultRel(leaf_part_rri, CMD_INSERT);
3404 
3405 		/*
3406 		 * Open partition indices (remember we do not support ON CONFLICT in
3407 		 * case of partitioned tables, so we do not need support information
3408 		 * for speculative insertion)
3409 		 */
3410 		if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
3411 			leaf_part_rri->ri_IndexRelationDescs == NULL)
3412 			ExecOpenIndices(leaf_part_rri, false);
3413 
3414 		estate->es_leaf_result_relations =
3415 			lappend(estate->es_leaf_result_relations, leaf_part_rri);
3416 
3417 		leaf_part_rri++;
3418 		i++;
3419 	}
3420 }
3421 
3422 /*
3423  * ExecFindPartition -- Find a leaf partition in the partition tree rooted
3424  * at parent, for the heap tuple contained in *slot
3425  *
3426  * estate must be non-NULL; we'll need it to compute any expressions in the
3427  * partition key(s)
3428  *
3429  * If no leaf partition is found, this routine errors out with the appropriate
3430  * error message, else it returns the leaf partition sequence number returned
3431  * by get_partition_for_tuple() unchanged.
3432  */
3433 int
ExecFindPartition(ResultRelInfo * resultRelInfo,PartitionDispatch * pd,TupleTableSlot * slot,EState * estate)3434 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
3435 				  TupleTableSlot *slot, EState *estate)
3436 {
3437 	int			result;
3438 	PartitionDispatchData *failed_at;
3439 	TupleTableSlot *failed_slot;
3440 
3441 	/*
3442 	 * First check the root table's partition constraint, if any.  No point in
3443 	 * routing the tuple if it doesn't belong in the root table itself.
3444 	 */
3445 	if (resultRelInfo->ri_PartitionCheck)
3446 		ExecPartitionCheck(resultRelInfo, slot, estate);
3447 
3448 	result = get_partition_for_tuple(pd, slot, estate,
3449 									 &failed_at, &failed_slot);
3450 	if (result < 0)
3451 	{
3452 		Relation	failed_rel;
3453 		Datum		key_values[PARTITION_MAX_KEYS];
3454 		bool		key_isnull[PARTITION_MAX_KEYS];
3455 		char	   *val_desc;
3456 		ExprContext *ecxt = GetPerTupleExprContext(estate);
3457 
3458 		failed_rel = failed_at->reldesc;
3459 		ecxt->ecxt_scantuple = failed_slot;
3460 		FormPartitionKeyDatum(failed_at, failed_slot, estate,
3461 							  key_values, key_isnull);
3462 		val_desc = ExecBuildSlotPartitionKeyDescription(failed_rel,
3463 														key_values,
3464 														key_isnull,
3465 														64);
3466 		Assert(OidIsValid(RelationGetRelid(failed_rel)));
3467 		ereport(ERROR,
3468 				(errcode(ERRCODE_CHECK_VIOLATION),
3469 				 errmsg("no partition of relation \"%s\" found for row",
3470 						RelationGetRelationName(failed_rel)),
3471 				 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
3472 	}
3473 
3474 	return result;
3475 }
3476 
3477 /*
3478  * BuildSlotPartitionKeyDescription
3479  *
3480  * This works very much like BuildIndexValueDescription() and is currently
3481  * used for building error messages when ExecFindPartition() fails to find
3482  * partition for a row.
3483  */
3484 static char *
ExecBuildSlotPartitionKeyDescription(Relation rel,Datum * values,bool * isnull,int maxfieldlen)3485 ExecBuildSlotPartitionKeyDescription(Relation rel,
3486 									 Datum *values,
3487 									 bool *isnull,
3488 									 int maxfieldlen)
3489 {
3490 	StringInfoData buf;
3491 	PartitionKey key = RelationGetPartitionKey(rel);
3492 	int			partnatts = get_partition_natts(key);
3493 	int			i;
3494 	Oid			relid = RelationGetRelid(rel);
3495 	AclResult	aclresult;
3496 
3497 	if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
3498 		return NULL;
3499 
3500 	/* If the user has table-level access, just go build the description. */
3501 	aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
3502 	if (aclresult != ACLCHECK_OK)
3503 	{
3504 		/*
3505 		 * Step through the columns of the partition key and make sure the
3506 		 * user has SELECT rights on all of them.
3507 		 */
3508 		for (i = 0; i < partnatts; i++)
3509 		{
3510 			AttrNumber	attnum = get_partition_col_attnum(key, i);
3511 
3512 			/*
3513 			 * If this partition key column is an expression, we return no
3514 			 * detail rather than try to figure out what column(s) the
3515 			 * expression includes and if the user has SELECT rights on them.
3516 			 */
3517 			if (attnum == InvalidAttrNumber ||
3518 				pg_attribute_aclcheck(relid, attnum, GetUserId(),
3519 									  ACL_SELECT) != ACLCHECK_OK)
3520 				return NULL;
3521 		}
3522 	}
3523 
3524 	initStringInfo(&buf);
3525 	appendStringInfo(&buf, "(%s) = (",
3526 					 pg_get_partkeydef_columns(relid, true));
3527 
3528 	for (i = 0; i < partnatts; i++)
3529 	{
3530 		char	   *val;
3531 		int			vallen;
3532 
3533 		if (isnull[i])
3534 			val = "null";
3535 		else
3536 		{
3537 			Oid			foutoid;
3538 			bool		typisvarlena;
3539 
3540 			getTypeOutputInfo(get_partition_col_typid(key, i),
3541 							  &foutoid, &typisvarlena);
3542 			val = OidOutputFunctionCall(foutoid, values[i]);
3543 		}
3544 
3545 		if (i > 0)
3546 			appendStringInfoString(&buf, ", ");
3547 
3548 		/* truncate if needed */
3549 		vallen = strlen(val);
3550 		if (vallen <= maxfieldlen)
3551 			appendStringInfoString(&buf, val);
3552 		else
3553 		{
3554 			vallen = pg_mbcliplen(val, vallen, maxfieldlen);
3555 			appendBinaryStringInfo(&buf, val, vallen);
3556 			appendStringInfoString(&buf, "...");
3557 		}
3558 	}
3559 
3560 	appendStringInfoChar(&buf, ')');
3561 
3562 	return buf.data;
3563 }
3564