1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *	  top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *	ExecutorStart()
8  *	ExecutorRun()
9  *	ExecutorFinish()
10  *	ExecutorEnd()
11  *
12  *	These four procedures are the external interface to the executor.
13  *	In each case, the query descriptor is required as an argument.
14  *
15  *	ExecutorStart must be called at the beginning of execution of any
16  *	query plan and ExecutorEnd must always be called at the end of
17  *	execution of a plan (unless it is aborted due to error).
18  *
19  *	ExecutorRun accepts direction and count arguments that specify whether
20  *	the plan is to be executed forwards, backwards, and for how many tuples.
21  *	In some cases ExecutorRun may be called multiple times to process all
22  *	the tuples for a plan.  It is also acceptable to stop short of executing
23  *	the whole plan (but only if it is a SELECT).
24  *
25  *	ExecutorFinish must be called after the final ExecutorRun call and
26  *	before ExecutorEnd.  This can be omitted only in case of EXPLAIN,
27  *	which should also omit ExecutorRun.
28  *
29  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
30  * Portions Copyright (c) 1994, Regents of the University of California
31  *
32  *
33  * IDENTIFICATION
34  *	  src/backend/executor/execMain.c
35  *
36  *-------------------------------------------------------------------------
37  */
38 #include "postgres.h"
39 
40 #include "access/htup_details.h"
41 #include "access/sysattr.h"
42 #include "access/transam.h"
43 #include "access/xact.h"
44 #include "catalog/namespace.h"
45 #include "catalog/pg_publication.h"
46 #include "commands/matview.h"
47 #include "commands/trigger.h"
48 #include "executor/execdebug.h"
49 #include "executor/nodeSubplan.h"
50 #include "foreign/fdwapi.h"
51 #include "jit/jit.h"
52 #include "mb/pg_wchar.h"
53 #include "miscadmin.h"
54 #include "optimizer/clauses.h"
55 #include "parser/parsetree.h"
56 #include "rewrite/rewriteManip.h"
57 #include "storage/bufmgr.h"
58 #include "storage/lmgr.h"
59 #include "tcop/utility.h"
60 #include "utils/acl.h"
61 #include "utils/lsyscache.h"
62 #include "utils/memutils.h"
63 #include "utils/partcache.h"
64 #include "utils/rls.h"
65 #include "utils/ruleutils.h"
66 #include "utils/snapmgr.h"
67 #include "utils/tqual.h"
68 
69 
70 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
71 ExecutorStart_hook_type ExecutorStart_hook = NULL;
72 ExecutorRun_hook_type ExecutorRun_hook = NULL;
73 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
74 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
75 
76 /* Hook for plugin to get control in ExecCheckRTPerms() */
77 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
78 
79 /* decls for local routines only used within this module */
80 static void InitPlan(QueryDesc *queryDesc, int eflags);
81 static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
82 static void ExecPostprocessPlan(EState *estate);
83 static void ExecEndPlan(PlanState *planstate, EState *estate);
84 static void ExecutePlan(EState *estate, PlanState *planstate,
85 			bool use_parallel_mode,
86 			CmdType operation,
87 			bool sendTuples,
88 			uint64 numberTuples,
89 			ScanDirection direction,
90 			DestReceiver *dest,
91 			bool execute_once);
92 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
93 static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
94 						  Bitmapset *modifiedCols,
95 						  AclMode requiredPerms);
96 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
97 static char *ExecBuildSlotValueDescription(Oid reloid,
98 							  TupleTableSlot *slot,
99 							  TupleDesc tupdesc,
100 							  Bitmapset *modifiedCols,
101 							  int maxfieldlen);
102 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
103 				  Plan *planTree);
104 
105 /* end of local decls */
106 
107 
108 /* ----------------------------------------------------------------
109  *		ExecutorStart
110  *
111  *		This routine must be called at the beginning of any execution of any
112  *		query plan
113  *
114  * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
115  * only because some places use QueryDescs for utility commands).  The tupDesc
116  * field of the QueryDesc is filled in to describe the tuples that will be
117  * returned, and the internal fields (estate and planstate) are set up.
118  *
119  * eflags contains flag bits as described in executor.h.
120  *
121  * NB: the CurrentMemoryContext when this is called will become the parent
122  * of the per-query context used for this Executor invocation.
123  *
124  * We provide a function hook variable that lets loadable plugins
125  * get control when ExecutorStart is called.  Such a plugin would
126  * normally call standard_ExecutorStart().
127  *
128  * ----------------------------------------------------------------
129  */
130 void
ExecutorStart(QueryDesc * queryDesc,int eflags)131 ExecutorStart(QueryDesc *queryDesc, int eflags)
132 {
133 	if (ExecutorStart_hook)
134 		(*ExecutorStart_hook) (queryDesc, eflags);
135 	else
136 		standard_ExecutorStart(queryDesc, eflags);
137 }
138 
139 void
standard_ExecutorStart(QueryDesc * queryDesc,int eflags)140 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
141 {
142 	EState	   *estate;
143 	MemoryContext oldcontext;
144 
145 	/* sanity checks: queryDesc must not be started already */
146 	Assert(queryDesc != NULL);
147 	Assert(queryDesc->estate == NULL);
148 
149 	/*
150 	 * If the transaction is read-only, we need to check if any writes are
151 	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
152 	 *
153 	 * Don't allow writes in parallel mode.  Supporting UPDATE and DELETE
154 	 * would require (a) storing the combocid hash in shared memory, rather
155 	 * than synchronizing it just once at the start of parallelism, and (b) an
156 	 * alternative to heap_update()'s reliance on xmax for mutual exclusion.
157 	 * INSERT may have no such troubles, but we forbid it to simplify the
158 	 * checks.
159 	 *
160 	 * We have lower-level defenses in CommandCounterIncrement and elsewhere
161 	 * against performing unsafe operations in parallel mode, but this gives a
162 	 * more user-friendly error message.
163 	 */
164 	if ((XactReadOnly || IsInParallelMode()) &&
165 		!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
166 		ExecCheckXactReadOnly(queryDesc->plannedstmt);
167 
168 	/*
169 	 * Build EState, switch into per-query memory context for startup.
170 	 */
171 	estate = CreateExecutorState();
172 	queryDesc->estate = estate;
173 
174 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
175 
176 	/*
177 	 * Fill in external parameters, if any, from queryDesc; and allocate
178 	 * workspace for internal parameters
179 	 */
180 	estate->es_param_list_info = queryDesc->params;
181 
182 	if (queryDesc->plannedstmt->paramExecTypes != NIL)
183 	{
184 		int			nParamExec;
185 
186 		nParamExec = list_length(queryDesc->plannedstmt->paramExecTypes);
187 		estate->es_param_exec_vals = (ParamExecData *)
188 			palloc0(nParamExec * sizeof(ParamExecData));
189 	}
190 
191 	estate->es_sourceText = queryDesc->sourceText;
192 
193 	/*
194 	 * Fill in the query environment, if any, from queryDesc.
195 	 */
196 	estate->es_queryEnv = queryDesc->queryEnv;
197 
198 	/*
199 	 * If non-read-only query, set the command ID to mark output tuples with
200 	 */
201 	switch (queryDesc->operation)
202 	{
203 		case CMD_SELECT:
204 
205 			/*
206 			 * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
207 			 * tuples
208 			 */
209 			if (queryDesc->plannedstmt->rowMarks != NIL ||
210 				queryDesc->plannedstmt->hasModifyingCTE)
211 				estate->es_output_cid = GetCurrentCommandId(true);
212 
213 			/*
214 			 * A SELECT without modifying CTEs can't possibly queue triggers,
215 			 * so force skip-triggers mode. This is just a marginal efficiency
216 			 * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
217 			 * all that expensive, but we might as well do it.
218 			 */
219 			if (!queryDesc->plannedstmt->hasModifyingCTE)
220 				eflags |= EXEC_FLAG_SKIP_TRIGGERS;
221 			break;
222 
223 		case CMD_INSERT:
224 		case CMD_DELETE:
225 		case CMD_UPDATE:
226 			estate->es_output_cid = GetCurrentCommandId(true);
227 			break;
228 
229 		default:
230 			elog(ERROR, "unrecognized operation code: %d",
231 				 (int) queryDesc->operation);
232 			break;
233 	}
234 
235 	/*
236 	 * Copy other important information into the EState
237 	 */
238 	estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
239 	estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
240 	estate->es_top_eflags = eflags;
241 	estate->es_instrument = queryDesc->instrument_options;
242 	estate->es_jit_flags = queryDesc->plannedstmt->jitFlags;
243 
244 	/*
245 	 * Set up an AFTER-trigger statement context, unless told not to, or
246 	 * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
247 	 */
248 	if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
249 		AfterTriggerBeginQuery();
250 
251 	/*
252 	 * Initialize the plan state tree
253 	 */
254 	InitPlan(queryDesc, eflags);
255 
256 	MemoryContextSwitchTo(oldcontext);
257 }
258 
259 /* ----------------------------------------------------------------
260  *		ExecutorRun
261  *
262  *		This is the main routine of the executor module. It accepts
263  *		the query descriptor from the traffic cop and executes the
264  *		query plan.
265  *
266  *		ExecutorStart must have been called already.
267  *
268  *		If direction is NoMovementScanDirection then nothing is done
269  *		except to start up/shut down the destination.  Otherwise,
270  *		we retrieve up to 'count' tuples in the specified direction.
271  *
272  *		Note: count = 0 is interpreted as no portal limit, i.e., run to
273  *		completion.  Also note that the count limit is only applied to
274  *		retrieved tuples, not for instance to those inserted/updated/deleted
275  *		by a ModifyTable plan node.
276  *
277  *		There is no return value, but output tuples (if any) are sent to
278  *		the destination receiver specified in the QueryDesc; and the number
279  *		of tuples processed at the top level can be found in
280  *		estate->es_processed.
281  *
282  *		We provide a function hook variable that lets loadable plugins
283  *		get control when ExecutorRun is called.  Such a plugin would
284  *		normally call standard_ExecutorRun().
285  *
286  * ----------------------------------------------------------------
287  */
288 void
ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)289 ExecutorRun(QueryDesc *queryDesc,
290 			ScanDirection direction, uint64 count,
291 			bool execute_once)
292 {
293 	if (ExecutorRun_hook)
294 		(*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
295 	else
296 		standard_ExecutorRun(queryDesc, direction, count, execute_once);
297 }
298 
299 void
standard_ExecutorRun(QueryDesc * queryDesc,ScanDirection direction,uint64 count,bool execute_once)300 standard_ExecutorRun(QueryDesc *queryDesc,
301 					 ScanDirection direction, uint64 count, bool execute_once)
302 {
303 	EState	   *estate;
304 	CmdType		operation;
305 	DestReceiver *dest;
306 	bool		sendTuples;
307 	MemoryContext oldcontext;
308 
309 	/* sanity checks */
310 	Assert(queryDesc != NULL);
311 
312 	estate = queryDesc->estate;
313 
314 	Assert(estate != NULL);
315 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
316 
317 	/*
318 	 * Switch into per-query memory context
319 	 */
320 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
321 
322 	/* Allow instrumentation of Executor overall runtime */
323 	if (queryDesc->totaltime)
324 		InstrStartNode(queryDesc->totaltime);
325 
326 	/*
327 	 * extract information from the query descriptor and the query feature.
328 	 */
329 	operation = queryDesc->operation;
330 	dest = queryDesc->dest;
331 
332 	/*
333 	 * startup tuple receiver, if we will be emitting tuples
334 	 */
335 	estate->es_processed = 0;
336 	estate->es_lastoid = InvalidOid;
337 
338 	sendTuples = (operation == CMD_SELECT ||
339 				  queryDesc->plannedstmt->hasReturning);
340 
341 	if (sendTuples)
342 		dest->rStartup(dest, operation, queryDesc->tupDesc);
343 
344 	/*
345 	 * run plan
346 	 */
347 	if (!ScanDirectionIsNoMovement(direction))
348 	{
349 		if (execute_once && queryDesc->already_executed)
350 			elog(ERROR, "can't re-execute query flagged for single execution");
351 		queryDesc->already_executed = true;
352 
353 		ExecutePlan(estate,
354 					queryDesc->planstate,
355 					queryDesc->plannedstmt->parallelModeNeeded,
356 					operation,
357 					sendTuples,
358 					count,
359 					direction,
360 					dest,
361 					execute_once);
362 	}
363 
364 	/*
365 	 * shutdown tuple receiver, if we started it
366 	 */
367 	if (sendTuples)
368 		dest->rShutdown(dest);
369 
370 	if (queryDesc->totaltime)
371 		InstrStopNode(queryDesc->totaltime, estate->es_processed);
372 
373 	MemoryContextSwitchTo(oldcontext);
374 }
375 
376 /* ----------------------------------------------------------------
377  *		ExecutorFinish
378  *
379  *		This routine must be called after the last ExecutorRun call.
380  *		It performs cleanup such as firing AFTER triggers.  It is
381  *		separate from ExecutorEnd because EXPLAIN ANALYZE needs to
382  *		include these actions in the total runtime.
383  *
384  *		We provide a function hook variable that lets loadable plugins
385  *		get control when ExecutorFinish is called.  Such a plugin would
386  *		normally call standard_ExecutorFinish().
387  *
388  * ----------------------------------------------------------------
389  */
390 void
ExecutorFinish(QueryDesc * queryDesc)391 ExecutorFinish(QueryDesc *queryDesc)
392 {
393 	if (ExecutorFinish_hook)
394 		(*ExecutorFinish_hook) (queryDesc);
395 	else
396 		standard_ExecutorFinish(queryDesc);
397 }
398 
399 void
standard_ExecutorFinish(QueryDesc * queryDesc)400 standard_ExecutorFinish(QueryDesc *queryDesc)
401 {
402 	EState	   *estate;
403 	MemoryContext oldcontext;
404 
405 	/* sanity checks */
406 	Assert(queryDesc != NULL);
407 
408 	estate = queryDesc->estate;
409 
410 	Assert(estate != NULL);
411 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
412 
413 	/* This should be run once and only once per Executor instance */
414 	Assert(!estate->es_finished);
415 
416 	/* Switch into per-query memory context */
417 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
418 
419 	/* Allow instrumentation of Executor overall runtime */
420 	if (queryDesc->totaltime)
421 		InstrStartNode(queryDesc->totaltime);
422 
423 	/* Run ModifyTable nodes to completion */
424 	ExecPostprocessPlan(estate);
425 
426 	/* Execute queued AFTER triggers, unless told not to */
427 	if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
428 		AfterTriggerEndQuery(estate);
429 
430 	if (queryDesc->totaltime)
431 		InstrStopNode(queryDesc->totaltime, 0);
432 
433 	MemoryContextSwitchTo(oldcontext);
434 
435 	estate->es_finished = true;
436 }
437 
438 /* ----------------------------------------------------------------
439  *		ExecutorEnd
440  *
441  *		This routine must be called at the end of execution of any
442  *		query plan
443  *
444  *		We provide a function hook variable that lets loadable plugins
445  *		get control when ExecutorEnd is called.  Such a plugin would
446  *		normally call standard_ExecutorEnd().
447  *
448  * ----------------------------------------------------------------
449  */
450 void
ExecutorEnd(QueryDesc * queryDesc)451 ExecutorEnd(QueryDesc *queryDesc)
452 {
453 	if (ExecutorEnd_hook)
454 		(*ExecutorEnd_hook) (queryDesc);
455 	else
456 		standard_ExecutorEnd(queryDesc);
457 }
458 
459 void
standard_ExecutorEnd(QueryDesc * queryDesc)460 standard_ExecutorEnd(QueryDesc *queryDesc)
461 {
462 	EState	   *estate;
463 	MemoryContext oldcontext;
464 
465 	/* sanity checks */
466 	Assert(queryDesc != NULL);
467 
468 	estate = queryDesc->estate;
469 
470 	Assert(estate != NULL);
471 
472 	/*
473 	 * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
474 	 * Assert is needed because ExecutorFinish is new as of 9.1, and callers
475 	 * might forget to call it.
476 	 */
477 	Assert(estate->es_finished ||
478 		   (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
479 
480 	/*
481 	 * Switch into per-query memory context to run ExecEndPlan
482 	 */
483 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
484 
485 	ExecEndPlan(queryDesc->planstate, estate);
486 
487 	/* do away with our snapshots */
488 	UnregisterSnapshot(estate->es_snapshot);
489 	UnregisterSnapshot(estate->es_crosscheck_snapshot);
490 
491 	/*
492 	 * Must switch out of context before destroying it
493 	 */
494 	MemoryContextSwitchTo(oldcontext);
495 
496 	/*
497 	 * Release EState and per-query memory context.  This should release
498 	 * everything the executor has allocated.
499 	 */
500 	FreeExecutorState(estate);
501 
502 	/* Reset queryDesc fields that no longer point to anything */
503 	queryDesc->tupDesc = NULL;
504 	queryDesc->estate = NULL;
505 	queryDesc->planstate = NULL;
506 	queryDesc->totaltime = NULL;
507 }
508 
509 /* ----------------------------------------------------------------
510  *		ExecutorRewind
511  *
512  *		This routine may be called on an open queryDesc to rewind it
513  *		to the start.
514  * ----------------------------------------------------------------
515  */
516 void
ExecutorRewind(QueryDesc * queryDesc)517 ExecutorRewind(QueryDesc *queryDesc)
518 {
519 	EState	   *estate;
520 	MemoryContext oldcontext;
521 
522 	/* sanity checks */
523 	Assert(queryDesc != NULL);
524 
525 	estate = queryDesc->estate;
526 
527 	Assert(estate != NULL);
528 
529 	/* It's probably not sensible to rescan updating queries */
530 	Assert(queryDesc->operation == CMD_SELECT);
531 
532 	/*
533 	 * Switch into per-query memory context
534 	 */
535 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
536 
537 	/*
538 	 * rescan plan
539 	 */
540 	ExecReScan(queryDesc->planstate);
541 
542 	MemoryContextSwitchTo(oldcontext);
543 }
544 
545 
546 /*
547  * ExecCheckRTPerms
548  *		Check access permissions for all relations listed in a range table.
549  *
550  * Returns true if permissions are adequate.  Otherwise, throws an appropriate
551  * error if ereport_on_violation is true, or simply returns false otherwise.
552  *
553  * Note that this does NOT address row level security policies (aka: RLS).  If
554  * rows will be returned to the user as a result of this permission check
555  * passing, then RLS also needs to be consulted (and check_enable_rls()).
556  *
557  * See rewrite/rowsecurity.c.
558  */
559 bool
ExecCheckRTPerms(List * rangeTable,bool ereport_on_violation)560 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
561 {
562 	ListCell   *l;
563 	bool		result = true;
564 
565 	foreach(l, rangeTable)
566 	{
567 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
568 
569 		result = ExecCheckRTEPerms(rte);
570 		if (!result)
571 		{
572 			Assert(rte->rtekind == RTE_RELATION);
573 			if (ereport_on_violation)
574 				aclcheck_error(ACLCHECK_NO_PRIV, get_relkind_objtype(get_rel_relkind(rte->relid)),
575 							   get_rel_name(rte->relid));
576 			return false;
577 		}
578 	}
579 
580 	if (ExecutorCheckPerms_hook)
581 		result = (*ExecutorCheckPerms_hook) (rangeTable,
582 											 ereport_on_violation);
583 	return result;
584 }
585 
586 /*
587  * ExecCheckRTEPerms
588  *		Check access permissions for a single RTE.
589  */
590 static bool
ExecCheckRTEPerms(RangeTblEntry * rte)591 ExecCheckRTEPerms(RangeTblEntry *rte)
592 {
593 	AclMode		requiredPerms;
594 	AclMode		relPerms;
595 	AclMode		remainingPerms;
596 	Oid			relOid;
597 	Oid			userid;
598 
599 	/*
600 	 * Only plain-relation RTEs need to be checked here.  Function RTEs are
601 	 * checked when the function is prepared for execution.  Join, subquery,
602 	 * and special RTEs need no checks.
603 	 */
604 	if (rte->rtekind != RTE_RELATION)
605 		return true;
606 
607 	/*
608 	 * No work if requiredPerms is empty.
609 	 */
610 	requiredPerms = rte->requiredPerms;
611 	if (requiredPerms == 0)
612 		return true;
613 
614 	relOid = rte->relid;
615 
616 	/*
617 	 * userid to check as: current user unless we have a setuid indication.
618 	 *
619 	 * Note: GetUserId() is presently fast enough that there's no harm in
620 	 * calling it separately for each RTE.  If that stops being true, we could
621 	 * call it once in ExecCheckRTPerms and pass the userid down from there.
622 	 * But for now, no need for the extra clutter.
623 	 */
624 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
625 
626 	/*
627 	 * We must have *all* the requiredPerms bits, but some of the bits can be
628 	 * satisfied from column-level rather than relation-level permissions.
629 	 * First, remove any bits that are satisfied by relation permissions.
630 	 */
631 	relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
632 	remainingPerms = requiredPerms & ~relPerms;
633 	if (remainingPerms != 0)
634 	{
635 		int			col = -1;
636 
637 		/*
638 		 * If we lack any permissions that exist only as relation permissions,
639 		 * we can fail straight away.
640 		 */
641 		if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
642 			return false;
643 
644 		/*
645 		 * Check to see if we have the needed privileges at column level.
646 		 *
647 		 * Note: failures just report a table-level error; it would be nicer
648 		 * to report a column-level error if we have some but not all of the
649 		 * column privileges.
650 		 */
651 		if (remainingPerms & ACL_SELECT)
652 		{
653 			/*
654 			 * When the query doesn't explicitly reference any columns (for
655 			 * example, SELECT COUNT(*) FROM table), allow the query if we
656 			 * have SELECT on any column of the rel, as per SQL spec.
657 			 */
658 			if (bms_is_empty(rte->selectedCols))
659 			{
660 				if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
661 											  ACLMASK_ANY) != ACLCHECK_OK)
662 					return false;
663 			}
664 
665 			while ((col = bms_next_member(rte->selectedCols, col)) >= 0)
666 			{
667 				/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
668 				AttrNumber	attno = col + FirstLowInvalidHeapAttributeNumber;
669 
670 				if (attno == InvalidAttrNumber)
671 				{
672 					/* Whole-row reference, must have priv on all cols */
673 					if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
674 												  ACLMASK_ALL) != ACLCHECK_OK)
675 						return false;
676 				}
677 				else
678 				{
679 					if (pg_attribute_aclcheck(relOid, attno, userid,
680 											  ACL_SELECT) != ACLCHECK_OK)
681 						return false;
682 				}
683 			}
684 		}
685 
686 		/*
687 		 * Basically the same for the mod columns, for both INSERT and UPDATE
688 		 * privilege as specified by remainingPerms.
689 		 */
690 		if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid,
691 																	  userid,
692 																	  rte->insertedCols,
693 																	  ACL_INSERT))
694 			return false;
695 
696 		if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid,
697 																	  userid,
698 																	  rte->updatedCols,
699 																	  ACL_UPDATE))
700 			return false;
701 	}
702 	return true;
703 }
704 
705 /*
706  * ExecCheckRTEPermsModified
707  *		Check INSERT or UPDATE access permissions for a single RTE (these
708  *		are processed uniformly).
709  */
710 static bool
ExecCheckRTEPermsModified(Oid relOid,Oid userid,Bitmapset * modifiedCols,AclMode requiredPerms)711 ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols,
712 						  AclMode requiredPerms)
713 {
714 	int			col = -1;
715 
716 	/*
717 	 * When the query doesn't explicitly update any columns, allow the query
718 	 * if we have permission on any column of the rel.  This is to handle
719 	 * SELECT FOR UPDATE as well as possible corner cases in UPDATE.
720 	 */
721 	if (bms_is_empty(modifiedCols))
722 	{
723 		if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms,
724 									  ACLMASK_ANY) != ACLCHECK_OK)
725 			return false;
726 	}
727 
728 	while ((col = bms_next_member(modifiedCols, col)) >= 0)
729 	{
730 		/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
731 		AttrNumber	attno = col + FirstLowInvalidHeapAttributeNumber;
732 
733 		if (attno == InvalidAttrNumber)
734 		{
735 			/* whole-row reference can't happen here */
736 			elog(ERROR, "whole-row update is not implemented");
737 		}
738 		else
739 		{
740 			if (pg_attribute_aclcheck(relOid, attno, userid,
741 									  requiredPerms) != ACLCHECK_OK)
742 				return false;
743 		}
744 	}
745 	return true;
746 }
747 
748 /*
749  * Check that the query does not imply any writes to non-temp tables;
750  * unless we're in parallel mode, in which case don't even allow writes
751  * to temp tables.
752  *
753  * Note: in a Hot Standby this would need to reject writes to temp
754  * tables just as we do in parallel mode; but an HS standby can't have created
755  * any temp tables in the first place, so no need to check that.
756  */
757 static void
ExecCheckXactReadOnly(PlannedStmt * plannedstmt)758 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
759 {
760 	ListCell   *l;
761 
762 	/*
763 	 * Fail if write permissions are requested in parallel mode for table
764 	 * (temp or non-temp), otherwise fail for any non-temp table.
765 	 */
766 	foreach(l, plannedstmt->rtable)
767 	{
768 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
769 
770 		if (rte->rtekind != RTE_RELATION)
771 			continue;
772 
773 		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
774 			continue;
775 
776 		if (isTempNamespace(get_rel_namespace(rte->relid)))
777 			continue;
778 
779 		PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
780 	}
781 
782 	if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE)
783 		PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt));
784 }
785 
786 
787 /* ----------------------------------------------------------------
788  *		InitPlan
789  *
790  *		Initializes the query plan: open files, allocate storage
791  *		and start up the rule manager
792  * ----------------------------------------------------------------
793  */
794 static void
InitPlan(QueryDesc * queryDesc,int eflags)795 InitPlan(QueryDesc *queryDesc, int eflags)
796 {
797 	CmdType		operation = queryDesc->operation;
798 	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
799 	Plan	   *plan = plannedstmt->planTree;
800 	List	   *rangeTable = plannedstmt->rtable;
801 	EState	   *estate = queryDesc->estate;
802 	PlanState  *planstate;
803 	TupleDesc	tupType;
804 	ListCell   *l;
805 	int			i;
806 
807 	/*
808 	 * Do permissions checks
809 	 */
810 	ExecCheckRTPerms(rangeTable, true);
811 
812 	/*
813 	 * initialize the node's execution state
814 	 */
815 	estate->es_range_table = rangeTable;
816 	estate->es_plannedstmt = plannedstmt;
817 
818 	/*
819 	 * initialize result relation stuff, and open/lock the result rels.
820 	 *
821 	 * We must do this before initializing the plan tree, else we might try to
822 	 * do a lock upgrade if a result rel is also a source rel.
823 	 */
824 	if (plannedstmt->resultRelations)
825 	{
826 		List	   *resultRelations = plannedstmt->resultRelations;
827 		int			numResultRelations = list_length(resultRelations);
828 		ResultRelInfo *resultRelInfos;
829 		ResultRelInfo *resultRelInfo;
830 
831 		resultRelInfos = (ResultRelInfo *)
832 			palloc(numResultRelations * sizeof(ResultRelInfo));
833 		resultRelInfo = resultRelInfos;
834 		foreach(l, resultRelations)
835 		{
836 			Index		resultRelationIndex = lfirst_int(l);
837 			Oid			resultRelationOid;
838 			Relation	resultRelation;
839 
840 			resultRelationOid = getrelid(resultRelationIndex, rangeTable);
841 			resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
842 
843 			InitResultRelInfo(resultRelInfo,
844 							  resultRelation,
845 							  resultRelationIndex,
846 							  NULL,
847 							  estate->es_instrument);
848 			resultRelInfo++;
849 		}
850 		estate->es_result_relations = resultRelInfos;
851 		estate->es_num_result_relations = numResultRelations;
852 		/* es_result_relation_info is NULL except when within ModifyTable */
853 		estate->es_result_relation_info = NULL;
854 
855 		/*
856 		 * In the partitioned result relation case, lock the non-leaf result
857 		 * relations too.  A subset of these are the roots of respective
858 		 * partitioned tables, for which we also allocate ResultRelInfos.
859 		 */
860 		estate->es_root_result_relations = NULL;
861 		estate->es_num_root_result_relations = 0;
862 		if (plannedstmt->nonleafResultRelations)
863 		{
864 			int			num_roots = list_length(plannedstmt->rootResultRelations);
865 
866 			/*
867 			 * Firstly, build ResultRelInfos for all the partitioned table
868 			 * roots, because we will need them to fire the statement-level
869 			 * triggers, if any.
870 			 */
871 			resultRelInfos = (ResultRelInfo *)
872 				palloc(num_roots * sizeof(ResultRelInfo));
873 			resultRelInfo = resultRelInfos;
874 			foreach(l, plannedstmt->rootResultRelations)
875 			{
876 				Index		resultRelIndex = lfirst_int(l);
877 				Oid			resultRelOid;
878 				Relation	resultRelDesc;
879 
880 				resultRelOid = getrelid(resultRelIndex, rangeTable);
881 				resultRelDesc = heap_open(resultRelOid, RowExclusiveLock);
882 				InitResultRelInfo(resultRelInfo,
883 								  resultRelDesc,
884 								  lfirst_int(l),
885 								  NULL,
886 								  estate->es_instrument);
887 				resultRelInfo++;
888 			}
889 
890 			estate->es_root_result_relations = resultRelInfos;
891 			estate->es_num_root_result_relations = num_roots;
892 
893 			/* Simply lock the rest of them. */
894 			foreach(l, plannedstmt->nonleafResultRelations)
895 			{
896 				Index		resultRelIndex = lfirst_int(l);
897 
898 				/* We locked the roots above. */
899 				if (!list_member_int(plannedstmt->rootResultRelations,
900 									 resultRelIndex))
901 					LockRelationOid(getrelid(resultRelIndex, rangeTable),
902 									RowExclusiveLock);
903 			}
904 		}
905 	}
906 	else
907 	{
908 		/*
909 		 * if no result relation, then set state appropriately
910 		 */
911 		estate->es_result_relations = NULL;
912 		estate->es_num_result_relations = 0;
913 		estate->es_result_relation_info = NULL;
914 		estate->es_root_result_relations = NULL;
915 		estate->es_num_root_result_relations = 0;
916 	}
917 
918 	/*
919 	 * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
920 	 * before we initialize the plan tree, else we'd be risking lock upgrades.
921 	 * While we are at it, build the ExecRowMark list.  Any partitioned child
922 	 * tables are ignored here (because isParent=true) and will be locked by
923 	 * the first Append or MergeAppend node that references them.  (Note that
924 	 * the RowMarks corresponding to partitioned child tables are present in
925 	 * the same list as the rest, i.e., plannedstmt->rowMarks.)
926 	 */
927 	estate->es_rowMarks = NIL;
928 	foreach(l, plannedstmt->rowMarks)
929 	{
930 		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
931 		Oid			relid;
932 		Relation	relation;
933 		ExecRowMark *erm;
934 
935 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
936 		if (rc->isParent)
937 			continue;
938 
939 		/* get relation's OID (will produce InvalidOid if subquery) */
940 		relid = getrelid(rc->rti, rangeTable);
941 
942 		/*
943 		 * If you change the conditions under which rel locks are acquired
944 		 * here, be sure to adjust ExecOpenScanRelation to match.
945 		 */
946 		switch (rc->markType)
947 		{
948 			case ROW_MARK_EXCLUSIVE:
949 			case ROW_MARK_NOKEYEXCLUSIVE:
950 			case ROW_MARK_SHARE:
951 			case ROW_MARK_KEYSHARE:
952 				relation = heap_open(relid, RowShareLock);
953 				break;
954 			case ROW_MARK_REFERENCE:
955 				relation = heap_open(relid, AccessShareLock);
956 				break;
957 			case ROW_MARK_COPY:
958 				/* no physical table access is required */
959 				relation = NULL;
960 				break;
961 			default:
962 				elog(ERROR, "unrecognized markType: %d", rc->markType);
963 				relation = NULL;	/* keep compiler quiet */
964 				break;
965 		}
966 
967 		/* Check that relation is a legal target for marking */
968 		if (relation)
969 			CheckValidRowMarkRel(relation, rc->markType);
970 
971 		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
972 		erm->relation = relation;
973 		erm->relid = relid;
974 		erm->rti = rc->rti;
975 		erm->prti = rc->prti;
976 		erm->rowmarkId = rc->rowmarkId;
977 		erm->markType = rc->markType;
978 		erm->strength = rc->strength;
979 		erm->waitPolicy = rc->waitPolicy;
980 		erm->ermActive = false;
981 		ItemPointerSetInvalid(&(erm->curCtid));
982 		erm->ermExtra = NULL;
983 		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
984 	}
985 
986 	/*
987 	 * Initialize the executor's tuple table to empty.
988 	 */
989 	estate->es_tupleTable = NIL;
990 	estate->es_trig_tuple_slot = NULL;
991 	estate->es_trig_oldtup_slot = NULL;
992 	estate->es_trig_newtup_slot = NULL;
993 
994 	/* mark EvalPlanQual not active */
995 	estate->es_epqTuple = NULL;
996 	estate->es_epqTupleSet = NULL;
997 	estate->es_epqScanDone = NULL;
998 
999 	/*
1000 	 * Initialize private state information for each SubPlan.  We must do this
1001 	 * before running ExecInitNode on the main query tree, since
1002 	 * ExecInitSubPlan expects to be able to find these entries.
1003 	 */
1004 	Assert(estate->es_subplanstates == NIL);
1005 	i = 1;						/* subplan indices count from 1 */
1006 	foreach(l, plannedstmt->subplans)
1007 	{
1008 		Plan	   *subplan = (Plan *) lfirst(l);
1009 		PlanState  *subplanstate;
1010 		int			sp_eflags;
1011 
1012 		/*
1013 		 * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
1014 		 * it is a parameterless subplan (not initplan), we suggest that it be
1015 		 * prepared to handle REWIND efficiently; otherwise there is no need.
1016 		 */
1017 		sp_eflags = eflags
1018 			& (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
1019 		if (bms_is_member(i, plannedstmt->rewindPlanIDs))
1020 			sp_eflags |= EXEC_FLAG_REWIND;
1021 
1022 		subplanstate = ExecInitNode(subplan, estate, sp_eflags);
1023 
1024 		estate->es_subplanstates = lappend(estate->es_subplanstates,
1025 										   subplanstate);
1026 
1027 		i++;
1028 	}
1029 
1030 	/*
1031 	 * Initialize the private state information for all the nodes in the query
1032 	 * tree.  This opens files, allocates storage and leaves us ready to start
1033 	 * processing tuples.
1034 	 */
1035 	planstate = ExecInitNode(plan, estate, eflags);
1036 
1037 	/*
1038 	 * Get the tuple descriptor describing the type of tuples to return.
1039 	 */
1040 	tupType = ExecGetResultType(planstate);
1041 
1042 	/*
1043 	 * Initialize the junk filter if needed.  SELECT queries need a filter if
1044 	 * there are any junk attrs in the top-level tlist.
1045 	 */
1046 	if (operation == CMD_SELECT)
1047 	{
1048 		bool		junk_filter_needed = false;
1049 		ListCell   *tlist;
1050 
1051 		foreach(tlist, plan->targetlist)
1052 		{
1053 			TargetEntry *tle = (TargetEntry *) lfirst(tlist);
1054 
1055 			if (tle->resjunk)
1056 			{
1057 				junk_filter_needed = true;
1058 				break;
1059 			}
1060 		}
1061 
1062 		if (junk_filter_needed)
1063 		{
1064 			JunkFilter *j;
1065 
1066 			j = ExecInitJunkFilter(planstate->plan->targetlist,
1067 								   tupType->tdhasoid,
1068 								   ExecInitExtraTupleSlot(estate, NULL));
1069 			estate->es_junkFilter = j;
1070 
1071 			/* Want to return the cleaned tuple type */
1072 			tupType = j->jf_cleanTupType;
1073 		}
1074 	}
1075 
1076 	queryDesc->tupDesc = tupType;
1077 	queryDesc->planstate = planstate;
1078 }
1079 
1080 /*
1081  * Check that a proposed result relation is a legal target for the operation
1082  *
1083  * Generally the parser and/or planner should have noticed any such mistake
1084  * already, but let's make sure.
1085  *
1086  * Note: when changing this function, you probably also need to look at
1087  * CheckValidRowMarkRel.
1088  */
1089 void
CheckValidResultRel(ResultRelInfo * resultRelInfo,CmdType operation)1090 CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
1091 {
1092 	Relation	resultRel = resultRelInfo->ri_RelationDesc;
1093 	TriggerDesc *trigDesc = resultRel->trigdesc;
1094 	FdwRoutine *fdwroutine;
1095 
1096 	switch (resultRel->rd_rel->relkind)
1097 	{
1098 		case RELKIND_RELATION:
1099 		case RELKIND_PARTITIONED_TABLE:
1100 			CheckCmdReplicaIdentity(resultRel, operation);
1101 			break;
1102 		case RELKIND_SEQUENCE:
1103 			ereport(ERROR,
1104 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1105 					 errmsg("cannot change sequence \"%s\"",
1106 							RelationGetRelationName(resultRel))));
1107 			break;
1108 		case RELKIND_TOASTVALUE:
1109 			ereport(ERROR,
1110 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1111 					 errmsg("cannot change TOAST relation \"%s\"",
1112 							RelationGetRelationName(resultRel))));
1113 			break;
1114 		case RELKIND_VIEW:
1115 
1116 			/*
1117 			 * Okay only if there's a suitable INSTEAD OF trigger.  Messages
1118 			 * here should match rewriteHandler.c's rewriteTargetView and
1119 			 * RewriteQuery, except that we omit errdetail because we haven't
1120 			 * got the information handy (and given that we really shouldn't
1121 			 * get here anyway, it's not worth great exertion to get).
1122 			 */
1123 			switch (operation)
1124 			{
1125 				case CMD_INSERT:
1126 					if (!trigDesc || !trigDesc->trig_insert_instead_row)
1127 						ereport(ERROR,
1128 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1129 								 errmsg("cannot insert into view \"%s\"",
1130 										RelationGetRelationName(resultRel)),
1131 								 errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.")));
1132 					break;
1133 				case CMD_UPDATE:
1134 					if (!trigDesc || !trigDesc->trig_update_instead_row)
1135 						ereport(ERROR,
1136 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1137 								 errmsg("cannot update view \"%s\"",
1138 										RelationGetRelationName(resultRel)),
1139 								 errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.")));
1140 					break;
1141 				case CMD_DELETE:
1142 					if (!trigDesc || !trigDesc->trig_delete_instead_row)
1143 						ereport(ERROR,
1144 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1145 								 errmsg("cannot delete from view \"%s\"",
1146 										RelationGetRelationName(resultRel)),
1147 								 errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.")));
1148 					break;
1149 				default:
1150 					elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1151 					break;
1152 			}
1153 			break;
1154 		case RELKIND_MATVIEW:
1155 			if (!MatViewIncrementalMaintenanceIsEnabled())
1156 				ereport(ERROR,
1157 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1158 						 errmsg("cannot change materialized view \"%s\"",
1159 								RelationGetRelationName(resultRel))));
1160 			break;
1161 		case RELKIND_FOREIGN_TABLE:
1162 			/* Okay only if the FDW supports it */
1163 			fdwroutine = resultRelInfo->ri_FdwRoutine;
1164 			switch (operation)
1165 			{
1166 				case CMD_INSERT:
1167 					if (fdwroutine->ExecForeignInsert == NULL)
1168 						ereport(ERROR,
1169 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1170 								 errmsg("cannot insert into foreign table \"%s\"",
1171 										RelationGetRelationName(resultRel))));
1172 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1173 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0)
1174 						ereport(ERROR,
1175 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1176 								 errmsg("foreign table \"%s\" does not allow inserts",
1177 										RelationGetRelationName(resultRel))));
1178 					break;
1179 				case CMD_UPDATE:
1180 					if (fdwroutine->ExecForeignUpdate == NULL)
1181 						ereport(ERROR,
1182 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1183 								 errmsg("cannot update foreign table \"%s\"",
1184 										RelationGetRelationName(resultRel))));
1185 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1186 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0)
1187 						ereport(ERROR,
1188 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1189 								 errmsg("foreign table \"%s\" does not allow updates",
1190 										RelationGetRelationName(resultRel))));
1191 					break;
1192 				case CMD_DELETE:
1193 					if (fdwroutine->ExecForeignDelete == NULL)
1194 						ereport(ERROR,
1195 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1196 								 errmsg("cannot delete from foreign table \"%s\"",
1197 										RelationGetRelationName(resultRel))));
1198 					if (fdwroutine->IsForeignRelUpdatable != NULL &&
1199 						(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0)
1200 						ereport(ERROR,
1201 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1202 								 errmsg("foreign table \"%s\" does not allow deletes",
1203 										RelationGetRelationName(resultRel))));
1204 					break;
1205 				default:
1206 					elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1207 					break;
1208 			}
1209 			break;
1210 		default:
1211 			ereport(ERROR,
1212 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1213 					 errmsg("cannot change relation \"%s\"",
1214 							RelationGetRelationName(resultRel))));
1215 			break;
1216 	}
1217 }
1218 
1219 /*
1220  * Check that a proposed rowmark target relation is a legal target
1221  *
1222  * In most cases parser and/or planner should have noticed this already, but
1223  * they don't cover all cases.
1224  */
1225 static void
CheckValidRowMarkRel(Relation rel,RowMarkType markType)1226 CheckValidRowMarkRel(Relation rel, RowMarkType markType)
1227 {
1228 	FdwRoutine *fdwroutine;
1229 
1230 	switch (rel->rd_rel->relkind)
1231 	{
1232 		case RELKIND_RELATION:
1233 		case RELKIND_PARTITIONED_TABLE:
1234 			/* OK */
1235 			break;
1236 		case RELKIND_SEQUENCE:
1237 			/* Must disallow this because we don't vacuum sequences */
1238 			ereport(ERROR,
1239 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1240 					 errmsg("cannot lock rows in sequence \"%s\"",
1241 							RelationGetRelationName(rel))));
1242 			break;
1243 		case RELKIND_TOASTVALUE:
1244 			/* We could allow this, but there seems no good reason to */
1245 			ereport(ERROR,
1246 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1247 					 errmsg("cannot lock rows in TOAST relation \"%s\"",
1248 							RelationGetRelationName(rel))));
1249 			break;
1250 		case RELKIND_VIEW:
1251 			/* Should not get here; planner should have expanded the view */
1252 			ereport(ERROR,
1253 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1254 					 errmsg("cannot lock rows in view \"%s\"",
1255 							RelationGetRelationName(rel))));
1256 			break;
1257 		case RELKIND_MATVIEW:
1258 			/* Allow referencing a matview, but not actual locking clauses */
1259 			if (markType != ROW_MARK_REFERENCE)
1260 				ereport(ERROR,
1261 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1262 						 errmsg("cannot lock rows in materialized view \"%s\"",
1263 								RelationGetRelationName(rel))));
1264 			break;
1265 		case RELKIND_FOREIGN_TABLE:
1266 			/* Okay only if the FDW supports it */
1267 			fdwroutine = GetFdwRoutineForRelation(rel, false);
1268 			if (fdwroutine->RefetchForeignRow == NULL)
1269 				ereport(ERROR,
1270 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1271 						 errmsg("cannot lock rows in foreign table \"%s\"",
1272 								RelationGetRelationName(rel))));
1273 			break;
1274 		default:
1275 			ereport(ERROR,
1276 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1277 					 errmsg("cannot lock rows in relation \"%s\"",
1278 							RelationGetRelationName(rel))));
1279 			break;
1280 	}
1281 }
1282 
1283 /*
1284  * Initialize ResultRelInfo data for one result relation
1285  *
1286  * Caution: before Postgres 9.1, this function included the relkind checking
1287  * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1288  * appropriate.  Be sure callers cover those needs.
1289  */
1290 void
InitResultRelInfo(ResultRelInfo * resultRelInfo,Relation resultRelationDesc,Index resultRelationIndex,ResultRelInfo * partition_root_rri,int instrument_options)1291 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1292 				  Relation resultRelationDesc,
1293 				  Index resultRelationIndex,
1294 				  ResultRelInfo *partition_root_rri,
1295 				  int instrument_options)
1296 {
1297 	List	   *partition_check = NIL;
1298 
1299 	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1300 	resultRelInfo->type = T_ResultRelInfo;
1301 	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1302 	resultRelInfo->ri_RelationDesc = resultRelationDesc;
1303 	resultRelInfo->ri_NumIndices = 0;
1304 	resultRelInfo->ri_IndexRelationDescs = NULL;
1305 	resultRelInfo->ri_IndexRelationInfo = NULL;
1306 	/* make a copy so as not to depend on relcache info not changing... */
1307 	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1308 	if (resultRelInfo->ri_TrigDesc)
1309 	{
1310 		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
1311 
1312 		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1313 			palloc0(n * sizeof(FmgrInfo));
1314 		resultRelInfo->ri_TrigWhenExprs = (ExprState **)
1315 			palloc0(n * sizeof(ExprState *));
1316 		if (instrument_options)
1317 			resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1318 	}
1319 	else
1320 	{
1321 		resultRelInfo->ri_TrigFunctions = NULL;
1322 		resultRelInfo->ri_TrigWhenExprs = NULL;
1323 		resultRelInfo->ri_TrigInstrument = NULL;
1324 	}
1325 	if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1326 		resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
1327 	else
1328 		resultRelInfo->ri_FdwRoutine = NULL;
1329 
1330 	/* The following fields are set later if needed */
1331 	resultRelInfo->ri_FdwState = NULL;
1332 	resultRelInfo->ri_usesFdwDirectModify = false;
1333 	resultRelInfo->ri_ConstraintExprs = NULL;
1334 	resultRelInfo->ri_junkFilter = NULL;
1335 	resultRelInfo->ri_projectReturning = NULL;
1336 	resultRelInfo->ri_onConflictArbiterIndexes = NIL;
1337 	resultRelInfo->ri_onConflict = NULL;
1338 
1339 	/*
1340 	 * Partition constraint, which also includes the partition constraint of
1341 	 * all the ancestors that are partitions.  Note that it will be checked
1342 	 * even in the case of tuple-routing where this table is the target leaf
1343 	 * partition, if there any BR triggers defined on the table.  Although
1344 	 * tuple-routing implicitly preserves the partition constraint of the
1345 	 * target partition for a given row, the BR triggers may change the row
1346 	 * such that the constraint is no longer satisfied, which we must fail for
1347 	 * by checking it explicitly.
1348 	 *
1349 	 * If this is a partitioned table, the partition constraint (if any) of a
1350 	 * given row will be checked just before performing tuple-routing.
1351 	 */
1352 	partition_check = RelationGetPartitionQual(resultRelationDesc);
1353 
1354 	resultRelInfo->ri_PartitionCheck = partition_check;
1355 	resultRelInfo->ri_RootResultRelInfo = partition_root_rri;
1356 	resultRelInfo->ri_PartitionReadyForRouting = false;
1357 }
1358 
1359 /*
1360  *		ExecGetTriggerResultRel
1361  *
1362  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
1363  * triggers are fired on one of the result relations of the query, and so
1364  * we can just return a member of the es_result_relations array, the
1365  * es_root_result_relations array (if any), or the es_leaf_result_relations
1366  * list (if any).  (Note: in self-join situations there might be multiple
1367  * members with the same OID; if so it doesn't matter which one we pick.)
1368  * However, it is sometimes necessary to fire triggers on other relations;
1369  * this happens mainly when an RI update trigger queues additional triggers
1370  * on other relations, which will be processed in the context of the outer
1371  * query.  For efficiency's sake, we want to have a ResultRelInfo for those
1372  * triggers too; that can avoid repeated re-opening of the relation.  (It
1373  * also provides a way for EXPLAIN ANALYZE to report the runtimes of such
1374  * triggers.)  So we make additional ResultRelInfo's as needed, and save them
1375  * in es_trig_target_relations.
1376  */
1377 ResultRelInfo *
ExecGetTriggerResultRel(EState * estate,Oid relid)1378 ExecGetTriggerResultRel(EState *estate, Oid relid)
1379 {
1380 	ResultRelInfo *rInfo;
1381 	int			nr;
1382 	ListCell   *l;
1383 	Relation	rel;
1384 	MemoryContext oldcontext;
1385 
1386 	/* First, search through the query result relations */
1387 	rInfo = estate->es_result_relations;
1388 	nr = estate->es_num_result_relations;
1389 	while (nr > 0)
1390 	{
1391 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1392 			return rInfo;
1393 		rInfo++;
1394 		nr--;
1395 	}
1396 	/* Second, search through the root result relations, if any */
1397 	rInfo = estate->es_root_result_relations;
1398 	nr = estate->es_num_root_result_relations;
1399 	while (nr > 0)
1400 	{
1401 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1402 			return rInfo;
1403 		rInfo++;
1404 		nr--;
1405 	}
1406 
1407 	/*
1408 	 * Third, search through the result relations that were created during
1409 	 * tuple routing, if any.
1410 	 */
1411 	foreach(l, estate->es_tuple_routing_result_relations)
1412 	{
1413 		rInfo = (ResultRelInfo *) lfirst(l);
1414 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1415 			return rInfo;
1416 	}
1417 	/* Nope, but maybe we already made an extra ResultRelInfo for it */
1418 	foreach(l, estate->es_trig_target_relations)
1419 	{
1420 		rInfo = (ResultRelInfo *) lfirst(l);
1421 		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1422 			return rInfo;
1423 	}
1424 	/* Nope, so we need a new one */
1425 
1426 	/*
1427 	 * Open the target relation's relcache entry.  We assume that an
1428 	 * appropriate lock is still held by the backend from whenever the trigger
1429 	 * event got queued, so we need take no new lock here.  Also, we need not
1430 	 * recheck the relkind, so no need for CheckValidResultRel.
1431 	 */
1432 	rel = heap_open(relid, NoLock);
1433 
1434 	/*
1435 	 * Make the new entry in the right context.
1436 	 */
1437 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1438 	rInfo = makeNode(ResultRelInfo);
1439 	InitResultRelInfo(rInfo,
1440 					  rel,
1441 					  0,		/* dummy rangetable index */
1442 					  NULL,
1443 					  estate->es_instrument);
1444 	estate->es_trig_target_relations =
1445 		lappend(estate->es_trig_target_relations, rInfo);
1446 	MemoryContextSwitchTo(oldcontext);
1447 
1448 	/*
1449 	 * Currently, we don't need any index information in ResultRelInfos used
1450 	 * only for triggers, so no need to call ExecOpenIndices.
1451 	 */
1452 
1453 	return rInfo;
1454 }
1455 
1456 /*
1457  * Close any relations that have been opened by ExecGetTriggerResultRel().
1458  */
1459 void
ExecCleanUpTriggerState(EState * estate)1460 ExecCleanUpTriggerState(EState *estate)
1461 {
1462 	ListCell   *l;
1463 
1464 	foreach(l, estate->es_trig_target_relations)
1465 	{
1466 		ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
1467 
1468 		/* Close indices and then the relation itself */
1469 		ExecCloseIndices(resultRelInfo);
1470 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1471 	}
1472 }
1473 
1474 /*
1475  *		ExecContextForcesOids
1476  *
1477  * This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS,
1478  * we need to ensure that result tuples have space for an OID iff they are
1479  * going to be stored into a relation that has OIDs.  In other contexts
1480  * we are free to choose whether to leave space for OIDs in result tuples
1481  * (we generally don't want to, but we do if a physical-tlist optimization
1482  * is possible).  This routine checks the plan context and returns true if the
1483  * choice is forced, false if the choice is not forced.  In the true case,
1484  * *hasoids is set to the required value.
1485  *
1486  * One reason this is ugly is that all plan nodes in the plan tree will emit
1487  * tuples with space for an OID, though we really only need the topmost node
1488  * to do so.  However, node types like Sort don't project new tuples but just
1489  * return their inputs, and in those cases the requirement propagates down
1490  * to the input node.  Eventually we might make this code smart enough to
1491  * recognize how far down the requirement really goes, but for now we just
1492  * make all plan nodes do the same thing if the top level forces the choice.
1493  *
1494  * We assume that if we are generating tuples for INSERT or UPDATE,
1495  * estate->es_result_relation_info is already set up to describe the target
1496  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1497  * the target relations may have OIDs and some not.  We have to make the
1498  * decisions on a per-relation basis as we initialize each of the subplans of
1499  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1500  * while initializing each subplan.
1501  *
1502  * CREATE TABLE AS is even uglier, because we don't have the target relation's
1503  * descriptor available when this code runs; we have to look aside at the
1504  * flags passed to ExecutorStart().
1505  */
1506 bool
ExecContextForcesOids(PlanState * planstate,bool * hasoids)1507 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1508 {
1509 	ResultRelInfo *ri = planstate->state->es_result_relation_info;
1510 
1511 	if (ri != NULL)
1512 	{
1513 		Relation	rel = ri->ri_RelationDesc;
1514 
1515 		if (rel != NULL)
1516 		{
1517 			*hasoids = rel->rd_rel->relhasoids;
1518 			return true;
1519 		}
1520 	}
1521 
1522 	if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS)
1523 	{
1524 		*hasoids = true;
1525 		return true;
1526 	}
1527 	if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS)
1528 	{
1529 		*hasoids = false;
1530 		return true;
1531 	}
1532 
1533 	return false;
1534 }
1535 
1536 /* ----------------------------------------------------------------
1537  *		ExecPostprocessPlan
1538  *
1539  *		Give plan nodes a final chance to execute before shutdown
1540  * ----------------------------------------------------------------
1541  */
1542 static void
ExecPostprocessPlan(EState * estate)1543 ExecPostprocessPlan(EState *estate)
1544 {
1545 	ListCell   *lc;
1546 
1547 	/*
1548 	 * Make sure nodes run forward.
1549 	 */
1550 	estate->es_direction = ForwardScanDirection;
1551 
1552 	/*
1553 	 * Run any secondary ModifyTable nodes to completion, in case the main
1554 	 * query did not fetch all rows from them.  (We do this to ensure that
1555 	 * such nodes have predictable results.)
1556 	 */
1557 	foreach(lc, estate->es_auxmodifytables)
1558 	{
1559 		PlanState  *ps = (PlanState *) lfirst(lc);
1560 
1561 		for (;;)
1562 		{
1563 			TupleTableSlot *slot;
1564 
1565 			/* Reset the per-output-tuple exprcontext each time */
1566 			ResetPerTupleExprContext(estate);
1567 
1568 			slot = ExecProcNode(ps);
1569 
1570 			if (TupIsNull(slot))
1571 				break;
1572 		}
1573 	}
1574 }
1575 
1576 /* ----------------------------------------------------------------
1577  *		ExecEndPlan
1578  *
1579  *		Cleans up the query plan -- closes files and frees up storage
1580  *
1581  * NOTE: we are no longer very worried about freeing storage per se
1582  * in this code; FreeExecutorState should be guaranteed to release all
1583  * memory that needs to be released.  What we are worried about doing
1584  * is closing relations and dropping buffer pins.  Thus, for example,
1585  * tuple tables must be cleared or dropped to ensure pins are released.
1586  * ----------------------------------------------------------------
1587  */
1588 static void
ExecEndPlan(PlanState * planstate,EState * estate)1589 ExecEndPlan(PlanState *planstate, EState *estate)
1590 {
1591 	ResultRelInfo *resultRelInfo;
1592 	int			i;
1593 	ListCell   *l;
1594 
1595 	/*
1596 	 * shut down the node-type-specific query processing
1597 	 */
1598 	ExecEndNode(planstate);
1599 
1600 	/*
1601 	 * for subplans too
1602 	 */
1603 	foreach(l, estate->es_subplanstates)
1604 	{
1605 		PlanState  *subplanstate = (PlanState *) lfirst(l);
1606 
1607 		ExecEndNode(subplanstate);
1608 	}
1609 
1610 	/*
1611 	 * destroy the executor's tuple table.  Actually we only care about
1612 	 * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1613 	 * the TupleTableSlots, since the containing memory context is about to go
1614 	 * away anyway.
1615 	 */
1616 	ExecResetTupleTable(estate->es_tupleTable, false);
1617 
1618 	/*
1619 	 * close the result relation(s) if any, but hold locks until xact commit.
1620 	 */
1621 	resultRelInfo = estate->es_result_relations;
1622 	for (i = estate->es_num_result_relations; i > 0; i--)
1623 	{
1624 		/* Close indices and then the relation itself */
1625 		ExecCloseIndices(resultRelInfo);
1626 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1627 		resultRelInfo++;
1628 	}
1629 
1630 	/* Close the root target relation(s). */
1631 	resultRelInfo = estate->es_root_result_relations;
1632 	for (i = estate->es_num_root_result_relations; i > 0; i--)
1633 	{
1634 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1635 		resultRelInfo++;
1636 	}
1637 
1638 	/* likewise close any trigger target relations */
1639 	ExecCleanUpTriggerState(estate);
1640 
1641 	/*
1642 	 * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping
1643 	 * locks
1644 	 */
1645 	foreach(l, estate->es_rowMarks)
1646 	{
1647 		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1648 
1649 		if (erm->relation)
1650 			heap_close(erm->relation, NoLock);
1651 	}
1652 }
1653 
1654 /* ----------------------------------------------------------------
1655  *		ExecutePlan
1656  *
1657  *		Processes the query plan until we have retrieved 'numberTuples' tuples,
1658  *		moving in the specified direction.
1659  *
1660  *		Runs to completion if numberTuples is 0
1661  *
1662  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1663  * user can see it
1664  * ----------------------------------------------------------------
1665  */
1666 static void
ExecutePlan(EState * estate,PlanState * planstate,bool use_parallel_mode,CmdType operation,bool sendTuples,uint64 numberTuples,ScanDirection direction,DestReceiver * dest,bool execute_once)1667 ExecutePlan(EState *estate,
1668 			PlanState *planstate,
1669 			bool use_parallel_mode,
1670 			CmdType operation,
1671 			bool sendTuples,
1672 			uint64 numberTuples,
1673 			ScanDirection direction,
1674 			DestReceiver *dest,
1675 			bool execute_once)
1676 {
1677 	TupleTableSlot *slot;
1678 	uint64		current_tuple_count;
1679 
1680 	/*
1681 	 * initialize local variables
1682 	 */
1683 	current_tuple_count = 0;
1684 
1685 	/*
1686 	 * Set the direction.
1687 	 */
1688 	estate->es_direction = direction;
1689 
1690 	/*
1691 	 * If the plan might potentially be executed multiple times, we must force
1692 	 * it to run without parallelism, because we might exit early.
1693 	 */
1694 	if (!execute_once)
1695 		use_parallel_mode = false;
1696 
1697 	estate->es_use_parallel_mode = use_parallel_mode;
1698 	if (use_parallel_mode)
1699 		EnterParallelMode();
1700 
1701 	/*
1702 	 * Loop until we've processed the proper number of tuples from the plan.
1703 	 */
1704 	for (;;)
1705 	{
1706 		/* Reset the per-output-tuple exprcontext */
1707 		ResetPerTupleExprContext(estate);
1708 
1709 		/*
1710 		 * Execute the plan and obtain a tuple
1711 		 */
1712 		slot = ExecProcNode(planstate);
1713 
1714 		/*
1715 		 * if the tuple is null, then we assume there is nothing more to
1716 		 * process so we just end the loop...
1717 		 */
1718 		if (TupIsNull(slot))
1719 			break;
1720 
1721 		/*
1722 		 * If we have a junk filter, then project a new tuple with the junk
1723 		 * removed.
1724 		 *
1725 		 * Store this new "clean" tuple in the junkfilter's resultSlot.
1726 		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1727 		 * because that tuple slot has the wrong descriptor.)
1728 		 */
1729 		if (estate->es_junkFilter != NULL)
1730 			slot = ExecFilterJunk(estate->es_junkFilter, slot);
1731 
1732 		/*
1733 		 * If we are supposed to send the tuple somewhere, do so. (In
1734 		 * practice, this is probably always the case at this point.)
1735 		 */
1736 		if (sendTuples)
1737 		{
1738 			/*
1739 			 * If we are not able to send the tuple, we assume the destination
1740 			 * has closed and no more tuples can be sent. If that's the case,
1741 			 * end the loop.
1742 			 */
1743 			if (!dest->receiveSlot(slot, dest))
1744 				break;
1745 		}
1746 
1747 		/*
1748 		 * Count tuples processed, if this is a SELECT.  (For other operation
1749 		 * types, the ModifyTable plan node must count the appropriate
1750 		 * events.)
1751 		 */
1752 		if (operation == CMD_SELECT)
1753 			(estate->es_processed)++;
1754 
1755 		/*
1756 		 * check our tuple count.. if we've processed the proper number then
1757 		 * quit, else loop again and process more tuples.  Zero numberTuples
1758 		 * means no limit.
1759 		 */
1760 		current_tuple_count++;
1761 		if (numberTuples && numberTuples == current_tuple_count)
1762 			break;
1763 	}
1764 
1765 	/*
1766 	 * If we know we won't need to back up, we can release resources at this
1767 	 * point.
1768 	 */
1769 	if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD))
1770 		(void) ExecShutdownNode(planstate);
1771 
1772 	if (use_parallel_mode)
1773 		ExitParallelMode();
1774 }
1775 
1776 
1777 /*
1778  * ExecRelCheck --- check that tuple meets constraints for result relation
1779  *
1780  * Returns NULL if OK, else name of failed check constraint
1781  */
1782 static const char *
ExecRelCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1783 ExecRelCheck(ResultRelInfo *resultRelInfo,
1784 			 TupleTableSlot *slot, EState *estate)
1785 {
1786 	Relation	rel = resultRelInfo->ri_RelationDesc;
1787 	int			ncheck = rel->rd_att->constr->num_check;
1788 	ConstrCheck *check = rel->rd_att->constr->check;
1789 	ExprContext *econtext;
1790 	MemoryContext oldContext;
1791 	int			i;
1792 
1793 	/*
1794 	 * If first time through for this result relation, build expression
1795 	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
1796 	 * memory context so they'll survive throughout the query.
1797 	 */
1798 	if (resultRelInfo->ri_ConstraintExprs == NULL)
1799 	{
1800 		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1801 		resultRelInfo->ri_ConstraintExprs =
1802 			(ExprState **) palloc(ncheck * sizeof(ExprState *));
1803 		for (i = 0; i < ncheck; i++)
1804 		{
1805 			Expr	   *checkconstr;
1806 
1807 			checkconstr = stringToNode(check[i].ccbin);
1808 			resultRelInfo->ri_ConstraintExprs[i] =
1809 				ExecPrepareExpr(checkconstr, estate);
1810 		}
1811 		MemoryContextSwitchTo(oldContext);
1812 	}
1813 
1814 	/*
1815 	 * We will use the EState's per-tuple context for evaluating constraint
1816 	 * expressions (creating it if it's not already there).
1817 	 */
1818 	econtext = GetPerTupleExprContext(estate);
1819 
1820 	/* Arrange for econtext's scan tuple to be the tuple under test */
1821 	econtext->ecxt_scantuple = slot;
1822 
1823 	/* And evaluate the constraints */
1824 	for (i = 0; i < ncheck; i++)
1825 	{
1826 		ExprState  *checkconstr = resultRelInfo->ri_ConstraintExprs[i];
1827 
1828 		/*
1829 		 * NOTE: SQL specifies that a NULL result from a constraint expression
1830 		 * is not to be treated as a failure.  Therefore, use ExecCheck not
1831 		 * ExecQual.
1832 		 */
1833 		if (!ExecCheck(checkconstr, econtext))
1834 			return check[i].ccname;
1835 	}
1836 
1837 	/* NULL result means no error */
1838 	return NULL;
1839 }
1840 
1841 /*
1842  * ExecPartitionCheck --- check that tuple meets the partition constraint.
1843  *
1844  * Returns true if it meets the partition constraint.  If the constraint
1845  * fails and we're asked to emit to error, do so and don't return; otherwise
1846  * return false.
1847  */
1848 bool
ExecPartitionCheck(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate,bool emitError)1849 ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
1850 				   EState *estate, bool emitError)
1851 {
1852 	ExprContext *econtext;
1853 	bool		success;
1854 
1855 	/*
1856 	 * If first time through, build expression state tree for the partition
1857 	 * check expression.  Keep it in the per-query memory context so they'll
1858 	 * survive throughout the query.
1859 	 */
1860 	if (resultRelInfo->ri_PartitionCheckExpr == NULL)
1861 	{
1862 		List	   *qual = resultRelInfo->ri_PartitionCheck;
1863 
1864 		resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
1865 	}
1866 
1867 	/*
1868 	 * We will use the EState's per-tuple context for evaluating constraint
1869 	 * expressions (creating it if it's not already there).
1870 	 */
1871 	econtext = GetPerTupleExprContext(estate);
1872 
1873 	/* Arrange for econtext's scan tuple to be the tuple under test */
1874 	econtext->ecxt_scantuple = slot;
1875 
1876 	/*
1877 	 * As in case of the catalogued constraints, we treat a NULL result as
1878 	 * success here, not a failure.
1879 	 */
1880 	success = ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext);
1881 
1882 	/* if asked to emit error, don't actually return on failure */
1883 	if (!success && emitError)
1884 		ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1885 
1886 	return success;
1887 }
1888 
1889 /*
1890  * ExecPartitionCheckEmitError - Form and emit an error message after a failed
1891  * partition constraint check.
1892  */
1893 void
ExecPartitionCheckEmitError(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1894 ExecPartitionCheckEmitError(ResultRelInfo *resultRelInfo,
1895 							TupleTableSlot *slot,
1896 							EState *estate)
1897 {
1898 	Oid			root_relid;
1899 	Relation	rel = resultRelInfo->ri_RelationDesc;
1900 	Relation	orig_rel = rel;
1901 	TupleDesc	tupdesc = RelationGetDescr(rel);
1902 	char	   *val_desc;
1903 	Bitmapset  *modifiedCols;
1904 
1905 	/*
1906 	 * Need to first convert the tuple to the root partitioned table's row
1907 	 * type. For details, check similar comments in ExecConstraints().
1908 	 */
1909 	if (resultRelInfo->ri_RootResultRelInfo)
1910 	{
1911 		HeapTuple	tuple = ExecFetchSlotTuple(slot);
1912 		ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo;
1913 		TupleDesc	old_tupdesc;
1914 		TupleConversionMap *map;
1915 
1916 		root_relid = RelationGetRelid(rootrel->ri_RelationDesc);
1917 		tupdesc = RelationGetDescr(rootrel->ri_RelationDesc);
1918 
1919 		old_tupdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
1920 		/* a reverse map */
1921 		map = convert_tuples_by_name(old_tupdesc, tupdesc,
1922 									 gettext_noop("could not convert row type"));
1923 		if (map != NULL)
1924 		{
1925 			tuple = do_convert_tuple(tuple, map);
1926 			/* one off slot for building error message */
1927 			slot = MakeTupleTableSlot(tupdesc);
1928 			ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1929 		}
1930 		modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate),
1931 								 ExecGetUpdatedCols(rootrel, estate));
1932 	}
1933 	else
1934 	{
1935 		root_relid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1936 		tupdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
1937 		modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate),
1938 								 ExecGetUpdatedCols(resultRelInfo, estate));
1939 	}
1940 
1941 	val_desc = ExecBuildSlotValueDescription(root_relid,
1942 											 slot,
1943 											 tupdesc,
1944 											 modifiedCols,
1945 											 64);
1946 	ereport(ERROR,
1947 			(errcode(ERRCODE_CHECK_VIOLATION),
1948 			 errmsg("new row for relation \"%s\" violates partition constraint",
1949 					RelationGetRelationName(orig_rel)),
1950 			 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
1951 }
1952 
1953 /*
1954  * ExecConstraints - check constraints of the tuple in 'slot'
1955  *
1956  * This checks the traditional NOT NULL and check constraints.
1957  *
1958  * The partition constraint is *NOT* checked.
1959  *
1960  * Note: 'slot' contains the tuple to check the constraints of, which may
1961  * have been converted from the original input tuple after tuple routing.
1962  * 'resultRelInfo' is the final result relation, after tuple routing.
1963  */
1964 void
ExecConstraints(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)1965 ExecConstraints(ResultRelInfo *resultRelInfo,
1966 				TupleTableSlot *slot, EState *estate)
1967 {
1968 	Relation	rel = resultRelInfo->ri_RelationDesc;
1969 	TupleDesc	tupdesc = RelationGetDescr(rel);
1970 	TupleConstr *constr = tupdesc->constr;
1971 	Bitmapset  *modifiedCols;
1972 
1973 	Assert(constr || resultRelInfo->ri_PartitionCheck);
1974 
1975 	if (constr && constr->has_not_null)
1976 	{
1977 		int			natts = tupdesc->natts;
1978 		int			attrChk;
1979 
1980 		for (attrChk = 1; attrChk <= natts; attrChk++)
1981 		{
1982 			Form_pg_attribute att = TupleDescAttr(tupdesc, attrChk - 1);
1983 
1984 			if (att->attnotnull && slot_attisnull(slot, attrChk))
1985 			{
1986 				char	   *val_desc;
1987 				Relation	orig_rel = rel;
1988 				TupleDesc	orig_tupdesc = RelationGetDescr(rel);
1989 
1990 				/*
1991 				 * If the tuple has been routed, it's been converted to the
1992 				 * partition's rowtype, which might differ from the root
1993 				 * table's.  We must convert it back to the root table's
1994 				 * rowtype so that val_desc shown error message matches the
1995 				 * input tuple.
1996 				 */
1997 				if (resultRelInfo->ri_RootResultRelInfo)
1998 				{
1999 					HeapTuple	tuple = ExecFetchSlotTuple(slot);
2000 					ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo;
2001 					TupleConversionMap *map;
2002 
2003 					tupdesc = RelationGetDescr(rootrel->ri_RelationDesc);
2004 					/* a reverse map */
2005 					map = convert_tuples_by_name(orig_tupdesc, tupdesc,
2006 												 gettext_noop("could not convert row type"));
2007 					if (map != NULL)
2008 					{
2009 						tuple = do_convert_tuple(tuple, map);
2010 						/* one off slot for building error message */
2011 						slot = MakeTupleTableSlot(tupdesc);
2012 						ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2013 					}
2014 					modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate),
2015 											 ExecGetUpdatedCols(rootrel, estate));
2016 					rel = rootrel->ri_RelationDesc;
2017 				}
2018 				else
2019 					modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate),
2020 											 ExecGetUpdatedCols(resultRelInfo, estate));
2021 				val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2022 														 slot,
2023 														 tupdesc,
2024 														 modifiedCols,
2025 														 64);
2026 
2027 				ereport(ERROR,
2028 						(errcode(ERRCODE_NOT_NULL_VIOLATION),
2029 						 errmsg("null value in column \"%s\" violates not-null constraint",
2030 								NameStr(att->attname)),
2031 						 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2032 						 errtablecol(orig_rel, attrChk)));
2033 			}
2034 		}
2035 	}
2036 
2037 	if (constr && constr->num_check > 0)
2038 	{
2039 		const char *failed;
2040 
2041 		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
2042 		{
2043 			char	   *val_desc;
2044 			Relation	orig_rel = rel;
2045 
2046 			/* See the comment above. */
2047 			if (resultRelInfo->ri_RootResultRelInfo)
2048 			{
2049 				HeapTuple	tuple = ExecFetchSlotTuple(slot);
2050 				ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo;
2051 				TupleDesc	old_tupdesc = RelationGetDescr(rel);
2052 				TupleConversionMap *map;
2053 
2054 				tupdesc = RelationGetDescr(rootrel->ri_RelationDesc);
2055 				/* a reverse map */
2056 				map = convert_tuples_by_name(old_tupdesc, tupdesc,
2057 											 gettext_noop("could not convert row type"));
2058 				if (map != NULL)
2059 				{
2060 					tuple = do_convert_tuple(tuple, map);
2061 					/* one off slot for building error message */
2062 					slot = MakeTupleTableSlot(tupdesc);
2063 					ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2064 				}
2065 				modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate),
2066 										 ExecGetUpdatedCols(rootrel, estate));
2067 				rel = rootrel->ri_RelationDesc;
2068 			}
2069 			else
2070 				modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate),
2071 										 ExecGetUpdatedCols(resultRelInfo, estate));
2072 			val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2073 													 slot,
2074 													 tupdesc,
2075 													 modifiedCols,
2076 													 64);
2077 			ereport(ERROR,
2078 					(errcode(ERRCODE_CHECK_VIOLATION),
2079 					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
2080 							RelationGetRelationName(orig_rel), failed),
2081 					 val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2082 					 errtableconstraint(orig_rel, failed)));
2083 		}
2084 	}
2085 }
2086 
2087 /*
2088  * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
2089  * of the specified kind.
2090  *
2091  * Note that this needs to be called multiple times to ensure that all kinds of
2092  * WITH CHECK OPTIONs are handled (both those from views which have the WITH
2093  * CHECK OPTION set and from row level security policies).  See ExecInsert()
2094  * and ExecUpdate().
2095  */
2096 void
ExecWithCheckOptions(WCOKind kind,ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate)2097 ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
2098 					 TupleTableSlot *slot, EState *estate)
2099 {
2100 	Relation	rel = resultRelInfo->ri_RelationDesc;
2101 	TupleDesc	tupdesc = RelationGetDescr(rel);
2102 	ExprContext *econtext;
2103 	ListCell   *l1,
2104 			   *l2;
2105 
2106 	/*
2107 	 * We will use the EState's per-tuple context for evaluating constraint
2108 	 * expressions (creating it if it's not already there).
2109 	 */
2110 	econtext = GetPerTupleExprContext(estate);
2111 
2112 	/* Arrange for econtext's scan tuple to be the tuple under test */
2113 	econtext->ecxt_scantuple = slot;
2114 
2115 	/* Check each of the constraints */
2116 	forboth(l1, resultRelInfo->ri_WithCheckOptions,
2117 			l2, resultRelInfo->ri_WithCheckOptionExprs)
2118 	{
2119 		WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
2120 		ExprState  *wcoExpr = (ExprState *) lfirst(l2);
2121 
2122 		/*
2123 		 * Skip any WCOs which are not the kind we are looking for at this
2124 		 * time.
2125 		 */
2126 		if (wco->kind != kind)
2127 			continue;
2128 
2129 		/*
2130 		 * WITH CHECK OPTION checks are intended to ensure that the new tuple
2131 		 * is visible (in the case of a view) or that it passes the
2132 		 * 'with-check' policy (in the case of row security). If the qual
2133 		 * evaluates to NULL or FALSE, then the new tuple won't be included in
2134 		 * the view or doesn't pass the 'with-check' policy for the table.
2135 		 */
2136 		if (!ExecQual(wcoExpr, econtext))
2137 		{
2138 			char	   *val_desc;
2139 			Bitmapset  *modifiedCols;
2140 
2141 			switch (wco->kind)
2142 			{
2143 					/*
2144 					 * For WITH CHECK OPTIONs coming from views, we might be
2145 					 * able to provide the details on the row, depending on
2146 					 * the permissions on the relation (that is, if the user
2147 					 * could view it directly anyway).  For RLS violations, we
2148 					 * don't include the data since we don't know if the user
2149 					 * should be able to view the tuple as that depends on the
2150 					 * USING policy.
2151 					 */
2152 				case WCO_VIEW_CHECK:
2153 					/* See the comment in ExecConstraints(). */
2154 					if (resultRelInfo->ri_RootResultRelInfo)
2155 					{
2156 						HeapTuple	tuple = ExecFetchSlotTuple(slot);
2157 						ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo;
2158 						TupleDesc	old_tupdesc = RelationGetDescr(rel);
2159 						TupleConversionMap *map;
2160 
2161 						tupdesc = RelationGetDescr(rootrel->ri_RelationDesc);
2162 						/* a reverse map */
2163 						map = convert_tuples_by_name(old_tupdesc, tupdesc,
2164 													 gettext_noop("could not convert row type"));
2165 						if (map != NULL)
2166 						{
2167 							tuple = do_convert_tuple(tuple, map);
2168 							/* one off slot for building error message */
2169 							slot = MakeTupleTableSlot(tupdesc);
2170 							ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2171 						}
2172 						modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate),
2173 												 ExecGetUpdatedCols(rootrel, estate));
2174 						rel = rootrel->ri_RelationDesc;
2175 					}
2176 					else
2177 						modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate),
2178 												 ExecGetUpdatedCols(resultRelInfo, estate));
2179 					val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2180 															 slot,
2181 															 tupdesc,
2182 															 modifiedCols,
2183 															 64);
2184 
2185 					ereport(ERROR,
2186 							(errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
2187 							 errmsg("new row violates check option for view \"%s\"",
2188 									wco->relname),
2189 							 val_desc ? errdetail("Failing row contains %s.",
2190 												  val_desc) : 0));
2191 					break;
2192 				case WCO_RLS_INSERT_CHECK:
2193 				case WCO_RLS_UPDATE_CHECK:
2194 					if (wco->polname != NULL)
2195 						ereport(ERROR,
2196 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2197 								 errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
2198 										wco->polname, wco->relname)));
2199 					else
2200 						ereport(ERROR,
2201 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2202 								 errmsg("new row violates row-level security policy for table \"%s\"",
2203 										wco->relname)));
2204 					break;
2205 				case WCO_RLS_CONFLICT_CHECK:
2206 					if (wco->polname != NULL)
2207 						ereport(ERROR,
2208 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2209 								 errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
2210 										wco->polname, wco->relname)));
2211 					else
2212 						ereport(ERROR,
2213 								(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2214 								 errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
2215 										wco->relname)));
2216 					break;
2217 				default:
2218 					elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
2219 					break;
2220 			}
2221 		}
2222 	}
2223 }
2224 
2225 /*
2226  * ExecBuildSlotValueDescription -- construct a string representing a tuple
2227  *
2228  * This is intentionally very similar to BuildIndexValueDescription, but
2229  * unlike that function, we truncate long field values (to at most maxfieldlen
2230  * bytes).  That seems necessary here since heap field values could be very
2231  * long, whereas index entries typically aren't so wide.
2232  *
2233  * Also, unlike the case with index entries, we need to be prepared to ignore
2234  * dropped columns.  We used to use the slot's tuple descriptor to decode the
2235  * data, but the slot's descriptor doesn't identify dropped columns, so we
2236  * now need to be passed the relation's descriptor.
2237  *
2238  * Note that, like BuildIndexValueDescription, if the user does not have
2239  * permission to view any of the columns involved, a NULL is returned.  Unlike
2240  * BuildIndexValueDescription, if the user has access to view a subset of the
2241  * column involved, that subset will be returned with a key identifying which
2242  * columns they are.
2243  */
2244 static char *
ExecBuildSlotValueDescription(Oid reloid,TupleTableSlot * slot,TupleDesc tupdesc,Bitmapset * modifiedCols,int maxfieldlen)2245 ExecBuildSlotValueDescription(Oid reloid,
2246 							  TupleTableSlot *slot,
2247 							  TupleDesc tupdesc,
2248 							  Bitmapset *modifiedCols,
2249 							  int maxfieldlen)
2250 {
2251 	StringInfoData buf;
2252 	StringInfoData collist;
2253 	bool		write_comma = false;
2254 	bool		write_comma_collist = false;
2255 	int			i;
2256 	AclResult	aclresult;
2257 	bool		table_perm = false;
2258 	bool		any_perm = false;
2259 
2260 	/*
2261 	 * Check if RLS is enabled and should be active for the relation; if so,
2262 	 * then don't return anything.  Otherwise, go through normal permission
2263 	 * checks.
2264 	 */
2265 	if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED)
2266 		return NULL;
2267 
2268 	initStringInfo(&buf);
2269 
2270 	appendStringInfoChar(&buf, '(');
2271 
2272 	/*
2273 	 * Check if the user has permissions to see the row.  Table-level SELECT
2274 	 * allows access to all columns.  If the user does not have table-level
2275 	 * SELECT then we check each column and include those the user has SELECT
2276 	 * rights on.  Additionally, we always include columns the user provided
2277 	 * data for.
2278 	 */
2279 	aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT);
2280 	if (aclresult != ACLCHECK_OK)
2281 	{
2282 		/* Set up the buffer for the column list */
2283 		initStringInfo(&collist);
2284 		appendStringInfoChar(&collist, '(');
2285 	}
2286 	else
2287 		table_perm = any_perm = true;
2288 
2289 	/* Make sure the tuple is fully deconstructed */
2290 	slot_getallattrs(slot);
2291 
2292 	for (i = 0; i < tupdesc->natts; i++)
2293 	{
2294 		bool		column_perm = false;
2295 		char	   *val;
2296 		int			vallen;
2297 		Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2298 
2299 		/* ignore dropped columns */
2300 		if (att->attisdropped)
2301 			continue;
2302 
2303 		if (!table_perm)
2304 		{
2305 			/*
2306 			 * No table-level SELECT, so need to make sure they either have
2307 			 * SELECT rights on the column or that they have provided the data
2308 			 * for the column.  If not, omit this column from the error
2309 			 * message.
2310 			 */
2311 			aclresult = pg_attribute_aclcheck(reloid, att->attnum,
2312 											  GetUserId(), ACL_SELECT);
2313 			if (bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
2314 							  modifiedCols) || aclresult == ACLCHECK_OK)
2315 			{
2316 				column_perm = any_perm = true;
2317 
2318 				if (write_comma_collist)
2319 					appendStringInfoString(&collist, ", ");
2320 				else
2321 					write_comma_collist = true;
2322 
2323 				appendStringInfoString(&collist, NameStr(att->attname));
2324 			}
2325 		}
2326 
2327 		if (table_perm || column_perm)
2328 		{
2329 			if (slot->tts_isnull[i])
2330 				val = "null";
2331 			else
2332 			{
2333 				Oid			foutoid;
2334 				bool		typisvarlena;
2335 
2336 				getTypeOutputInfo(att->atttypid,
2337 								  &foutoid, &typisvarlena);
2338 				val = OidOutputFunctionCall(foutoid, slot->tts_values[i]);
2339 			}
2340 
2341 			if (write_comma)
2342 				appendStringInfoString(&buf, ", ");
2343 			else
2344 				write_comma = true;
2345 
2346 			/* truncate if needed */
2347 			vallen = strlen(val);
2348 			if (vallen <= maxfieldlen)
2349 				appendStringInfoString(&buf, val);
2350 			else
2351 			{
2352 				vallen = pg_mbcliplen(val, vallen, maxfieldlen);
2353 				appendBinaryStringInfo(&buf, val, vallen);
2354 				appendStringInfoString(&buf, "...");
2355 			}
2356 		}
2357 	}
2358 
2359 	/* If we end up with zero columns being returned, then return NULL. */
2360 	if (!any_perm)
2361 		return NULL;
2362 
2363 	appendStringInfoChar(&buf, ')');
2364 
2365 	if (!table_perm)
2366 	{
2367 		appendStringInfoString(&collist, ") = ");
2368 		appendStringInfoString(&collist, buf.data);
2369 
2370 		return collist.data;
2371 	}
2372 
2373 	return buf.data;
2374 }
2375 
2376 
2377 /*
2378  * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
2379  * given ResultRelInfo
2380  */
2381 LockTupleMode
ExecUpdateLockMode(EState * estate,ResultRelInfo * relinfo)2382 ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
2383 {
2384 	Bitmapset  *keyCols;
2385 	Bitmapset  *updatedCols;
2386 
2387 	/*
2388 	 * Compute lock mode to use.  If columns that are part of the key have not
2389 	 * been modified, then we can use a weaker lock, allowing for better
2390 	 * concurrency.
2391 	 */
2392 	updatedCols = ExecGetUpdatedCols(relinfo, estate);
2393 	keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
2394 										 INDEX_ATTR_BITMAP_KEY);
2395 
2396 	if (bms_overlap(keyCols, updatedCols))
2397 		return LockTupleExclusive;
2398 
2399 	return LockTupleNoKeyExclusive;
2400 }
2401 
2402 /*
2403  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
2404  *
2405  * If no such struct, either return NULL or throw error depending on missing_ok
2406  */
2407 ExecRowMark *
ExecFindRowMark(EState * estate,Index rti,bool missing_ok)2408 ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
2409 {
2410 	ListCell   *lc;
2411 
2412 	foreach(lc, estate->es_rowMarks)
2413 	{
2414 		ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
2415 
2416 		if (erm->rti == rti)
2417 			return erm;
2418 	}
2419 	if (!missing_ok)
2420 		elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
2421 	return NULL;
2422 }
2423 
2424 /*
2425  * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
2426  *
2427  * Inputs are the underlying ExecRowMark struct and the targetlist of the
2428  * input plan node (not planstate node!).  We need the latter to find out
2429  * the column numbers of the resjunk columns.
2430  */
2431 ExecAuxRowMark *
ExecBuildAuxRowMark(ExecRowMark * erm,List * targetlist)2432 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
2433 {
2434 	ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
2435 	char		resname[32];
2436 
2437 	aerm->rowmark = erm;
2438 
2439 	/* Look up the resjunk columns associated with this rowmark */
2440 	if (erm->markType != ROW_MARK_COPY)
2441 	{
2442 		/* need ctid for all methods other than COPY */
2443 		snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
2444 		aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2445 													   resname);
2446 		if (!AttributeNumberIsValid(aerm->ctidAttNo))
2447 			elog(ERROR, "could not find junk %s column", resname);
2448 	}
2449 	else
2450 	{
2451 		/* need wholerow if COPY */
2452 		snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
2453 		aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
2454 														resname);
2455 		if (!AttributeNumberIsValid(aerm->wholeAttNo))
2456 			elog(ERROR, "could not find junk %s column", resname);
2457 	}
2458 
2459 	/* if child rel, need tableoid */
2460 	if (erm->rti != erm->prti)
2461 	{
2462 		snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
2463 		aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2464 													   resname);
2465 		if (!AttributeNumberIsValid(aerm->toidAttNo))
2466 			elog(ERROR, "could not find junk %s column", resname);
2467 	}
2468 
2469 	return aerm;
2470 }
2471 
2472 
2473 /*
2474  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
2475  * process the updated version under READ COMMITTED rules.
2476  *
2477  * See backend/executor/README for some info about how this works.
2478  */
2479 
2480 
2481 /*
2482  * Check a modified tuple to see if we want to process its updated version
2483  * under READ COMMITTED rules.
2484  *
2485  *	estate - outer executor state data
2486  *	epqstate - state for EvalPlanQual rechecking
2487  *	relation - table containing tuple
2488  *	rti - rangetable index of table containing tuple
2489  *	lockmode - requested tuple lock mode
2490  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
2491  *	priorXmax - t_xmax from the outdated tuple
2492  *
2493  * *tid is also an output parameter: it's modified to hold the TID of the
2494  * latest version of the tuple (note this may be changed even on failure)
2495  *
2496  * Returns a slot containing the new candidate update/delete tuple, or
2497  * NULL if we determine we shouldn't process the row.
2498  *
2499  * Note: properly, lockmode should be declared as enum LockTupleMode,
2500  * but we use "int" to avoid having to include heapam.h in executor.h.
2501  */
2502 TupleTableSlot *
EvalPlanQual(EState * estate,EPQState * epqstate,Relation relation,Index rti,int lockmode,ItemPointer tid,TransactionId priorXmax)2503 EvalPlanQual(EState *estate, EPQState *epqstate,
2504 			 Relation relation, Index rti, int lockmode,
2505 			 ItemPointer tid, TransactionId priorXmax)
2506 {
2507 	TupleTableSlot *slot;
2508 	HeapTuple	copyTuple;
2509 
2510 	Assert(rti > 0);
2511 
2512 	/*
2513 	 * Get and lock the updated version of the row; if fail, return NULL.
2514 	 */
2515 	copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
2516 								  tid, priorXmax);
2517 
2518 	if (copyTuple == NULL)
2519 		return NULL;
2520 
2521 	/*
2522 	 * For UPDATE/DELETE we have to return tid of actual row we're executing
2523 	 * PQ for.
2524 	 */
2525 	*tid = copyTuple->t_self;
2526 
2527 	/*
2528 	 * Need to run a recheck subquery.  Initialize or reinitialize EPQ state.
2529 	 */
2530 	EvalPlanQualBegin(epqstate, estate);
2531 
2532 	/*
2533 	 * Free old test tuple, if any, and store new tuple where relation's scan
2534 	 * node will see it
2535 	 */
2536 	EvalPlanQualSetTuple(epqstate, rti, copyTuple);
2537 
2538 	/*
2539 	 * Fetch any non-locked source rows
2540 	 */
2541 	EvalPlanQualFetchRowMarks(epqstate);
2542 
2543 	/*
2544 	 * Run the EPQ query.  We assume it will return at most one tuple.
2545 	 */
2546 	slot = EvalPlanQualNext(epqstate);
2547 
2548 	/*
2549 	 * If we got a tuple, force the slot to materialize the tuple so that it
2550 	 * is not dependent on any local state in the EPQ query (in particular,
2551 	 * it's highly likely that the slot contains references to any pass-by-ref
2552 	 * datums that may be present in copyTuple).  As with the next step, this
2553 	 * is to guard against early re-use of the EPQ query.
2554 	 */
2555 	if (!TupIsNull(slot))
2556 		(void) ExecMaterializeSlot(slot);
2557 
2558 	/*
2559 	 * Clear out the test tuple.  This is needed in case the EPQ query is
2560 	 * re-used to test a tuple for a different relation.  (Not clear that can
2561 	 * really happen, but let's be safe.)
2562 	 */
2563 	EvalPlanQualSetTuple(epqstate, rti, NULL);
2564 
2565 	return slot;
2566 }
2567 
2568 /*
2569  * Fetch a copy of the newest version of an outdated tuple
2570  *
2571  *	estate - executor state data
2572  *	relation - table containing tuple
2573  *	lockmode - requested tuple lock mode
2574  *	wait_policy - requested lock wait policy
2575  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
2576  *	priorXmax - t_xmax from the outdated tuple
2577  *
2578  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
2579  * that there is no newest version (ie, the row was deleted not updated).
2580  * We also return NULL if the tuple is locked and the wait policy is to skip
2581  * such tuples.
2582  *
2583  * If successful, we have locked the newest tuple version, so caller does not
2584  * need to worry about it changing anymore.
2585  *
2586  * Note: properly, lockmode should be declared as enum LockTupleMode,
2587  * but we use "int" to avoid having to include heapam.h in executor.h.
2588  */
2589 HeapTuple
EvalPlanQualFetch(EState * estate,Relation relation,int lockmode,LockWaitPolicy wait_policy,ItemPointer tid,TransactionId priorXmax)2590 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
2591 				  LockWaitPolicy wait_policy,
2592 				  ItemPointer tid, TransactionId priorXmax)
2593 {
2594 	HeapTuple	copyTuple = NULL;
2595 	HeapTupleData tuple;
2596 	SnapshotData SnapshotDirty;
2597 
2598 	/*
2599 	 * fetch target tuple
2600 	 *
2601 	 * Loop here to deal with updated or busy tuples
2602 	 */
2603 	InitDirtySnapshot(SnapshotDirty);
2604 	tuple.t_self = *tid;
2605 	for (;;)
2606 	{
2607 		Buffer		buffer;
2608 
2609 		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2610 		{
2611 			HTSU_Result test;
2612 			HeapUpdateFailureData hufd;
2613 
2614 			/*
2615 			 * If xmin isn't what we're expecting, the slot must have been
2616 			 * recycled and reused for an unrelated tuple.  This implies that
2617 			 * the latest version of the row was deleted, so we need do
2618 			 * nothing.  (Should be safe to examine xmin without getting
2619 			 * buffer's content lock.  We assume reading a TransactionId to be
2620 			 * atomic, and Xmin never changes in an existing tuple, except to
2621 			 * invalid or frozen, and neither of those can match priorXmax.)
2622 			 */
2623 			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2624 									 priorXmax))
2625 			{
2626 				ReleaseBuffer(buffer);
2627 				return NULL;
2628 			}
2629 
2630 			/* otherwise xmin should not be dirty... */
2631 			if (TransactionIdIsValid(SnapshotDirty.xmin))
2632 				ereport(ERROR,
2633 						(errcode(ERRCODE_DATA_CORRUPTED),
2634 						 errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
2635 										 SnapshotDirty.xmin,
2636 										 ItemPointerGetBlockNumber(&tuple.t_self),
2637 										 ItemPointerGetOffsetNumber(&tuple.t_self),
2638 										 RelationGetRelationName(relation))));
2639 
2640 			/*
2641 			 * If tuple is being updated by other transaction then we have to
2642 			 * wait for its commit/abort, or die trying.
2643 			 */
2644 			if (TransactionIdIsValid(SnapshotDirty.xmax))
2645 			{
2646 				ReleaseBuffer(buffer);
2647 				switch (wait_policy)
2648 				{
2649 					case LockWaitBlock:
2650 						XactLockTableWait(SnapshotDirty.xmax,
2651 										  relation, &tuple.t_self,
2652 										  XLTW_FetchUpdated);
2653 						break;
2654 					case LockWaitSkip:
2655 						if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2656 							return NULL;	/* skip instead of waiting */
2657 						break;
2658 					case LockWaitError:
2659 						if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2660 							ereport(ERROR,
2661 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
2662 									 errmsg("could not obtain lock on row in relation \"%s\"",
2663 											RelationGetRelationName(relation))));
2664 						break;
2665 				}
2666 				continue;		/* loop back to repeat heap_fetch */
2667 			}
2668 
2669 			/*
2670 			 * If tuple was inserted by our own transaction, we have to check
2671 			 * cmin against es_output_cid: cmin >= current CID means our
2672 			 * command cannot see the tuple, so we should ignore it. Otherwise
2673 			 * heap_lock_tuple() will throw an error, and so would any later
2674 			 * attempt to update or delete the tuple.  (We need not check cmax
2675 			 * because HeapTupleSatisfiesDirty will consider a tuple deleted
2676 			 * by our transaction dead, regardless of cmax.) We just checked
2677 			 * that priorXmax == xmin, so we can test that variable instead of
2678 			 * doing HeapTupleHeaderGetXmin again.
2679 			 */
2680 			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2681 				HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2682 			{
2683 				ReleaseBuffer(buffer);
2684 				return NULL;
2685 			}
2686 
2687 			/*
2688 			 * This is a live tuple, so now try to lock it.
2689 			 */
2690 			test = heap_lock_tuple(relation, &tuple,
2691 								   estate->es_output_cid,
2692 								   lockmode, wait_policy,
2693 								   false, &buffer, &hufd);
2694 			/* We now have two pins on the buffer, get rid of one */
2695 			ReleaseBuffer(buffer);
2696 
2697 			switch (test)
2698 			{
2699 				case HeapTupleSelfUpdated:
2700 
2701 					/*
2702 					 * The target tuple was already updated or deleted by the
2703 					 * current command, or by a later command in the current
2704 					 * transaction.  We *must* ignore the tuple in the former
2705 					 * case, so as to avoid the "Halloween problem" of
2706 					 * repeated update attempts.  In the latter case it might
2707 					 * be sensible to fetch the updated tuple instead, but
2708 					 * doing so would require changing heap_update and
2709 					 * heap_delete to not complain about updating "invisible"
2710 					 * tuples, which seems pretty scary (heap_lock_tuple will
2711 					 * not complain, but few callers expect
2712 					 * HeapTupleInvisible, and we're not one of them).  So for
2713 					 * now, treat the tuple as deleted and do not process.
2714 					 */
2715 					ReleaseBuffer(buffer);
2716 					return NULL;
2717 
2718 				case HeapTupleMayBeUpdated:
2719 					/* successfully locked */
2720 					break;
2721 
2722 				case HeapTupleUpdated:
2723 					ReleaseBuffer(buffer);
2724 					if (IsolationUsesXactSnapshot())
2725 						ereport(ERROR,
2726 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2727 								 errmsg("could not serialize access due to concurrent update")));
2728 					if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
2729 						ereport(ERROR,
2730 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2731 								 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
2732 
2733 					/* Should not encounter speculative tuple on recheck */
2734 					Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
2735 					if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
2736 					{
2737 						/* it was updated, so look at the updated version */
2738 						tuple.t_self = hufd.ctid;
2739 						/* updated row should have xmin matching this xmax */
2740 						priorXmax = hufd.xmax;
2741 						continue;
2742 					}
2743 					/* tuple was deleted, so give up */
2744 					return NULL;
2745 
2746 				case HeapTupleWouldBlock:
2747 					ReleaseBuffer(buffer);
2748 					return NULL;
2749 
2750 				case HeapTupleInvisible:
2751 					elog(ERROR, "attempted to lock invisible tuple");
2752 					break;
2753 
2754 				default:
2755 					ReleaseBuffer(buffer);
2756 					elog(ERROR, "unrecognized heap_lock_tuple status: %u",
2757 						 test);
2758 					return NULL;	/* keep compiler quiet */
2759 			}
2760 
2761 			/*
2762 			 * We got tuple - now copy it for use by recheck query.
2763 			 */
2764 			copyTuple = heap_copytuple(&tuple);
2765 			ReleaseBuffer(buffer);
2766 			break;
2767 		}
2768 
2769 		/*
2770 		 * If the referenced slot was actually empty, the latest version of
2771 		 * the row must have been deleted, so we need do nothing.
2772 		 */
2773 		if (tuple.t_data == NULL)
2774 		{
2775 			ReleaseBuffer(buffer);
2776 			return NULL;
2777 		}
2778 
2779 		/*
2780 		 * As above, if xmin isn't what we're expecting, do nothing.
2781 		 */
2782 		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2783 								 priorXmax))
2784 		{
2785 			ReleaseBuffer(buffer);
2786 			return NULL;
2787 		}
2788 
2789 		/*
2790 		 * If we get here, the tuple was found but failed SnapshotDirty.
2791 		 * Assuming the xmin is either a committed xact or our own xact (as it
2792 		 * certainly should be if we're trying to modify the tuple), this must
2793 		 * mean that the row was updated or deleted by either a committed xact
2794 		 * or our own xact.  If it was deleted, we can ignore it; if it was
2795 		 * updated then chain up to the next version and repeat the whole
2796 		 * process.
2797 		 *
2798 		 * As above, it should be safe to examine xmax and t_ctid without the
2799 		 * buffer content lock, because they can't be changing.
2800 		 */
2801 
2802 		/* check whether next version would be in a different partition */
2803 		if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
2804 			ereport(ERROR,
2805 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2806 					 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
2807 
2808 		/* check whether tuple has been deleted */
2809 		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2810 		{
2811 			/* deleted, so forget about it */
2812 			ReleaseBuffer(buffer);
2813 			return NULL;
2814 		}
2815 
2816 		/* updated, so look at the updated row */
2817 		tuple.t_self = tuple.t_data->t_ctid;
2818 		/* updated row should have xmin matching this xmax */
2819 		priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
2820 		ReleaseBuffer(buffer);
2821 		/* loop back to fetch next in chain */
2822 	}
2823 
2824 	/*
2825 	 * Return the copied tuple
2826 	 */
2827 	return copyTuple;
2828 }
2829 
2830 /*
2831  * EvalPlanQualInit -- initialize during creation of a plan state node
2832  * that might need to invoke EPQ processing.
2833  *
2834  * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
2835  * with EvalPlanQualSetPlan.
2836  */
2837 void
EvalPlanQualInit(EPQState * epqstate,EState * estate,Plan * subplan,List * auxrowmarks,int epqParam)2838 EvalPlanQualInit(EPQState *epqstate, EState *estate,
2839 				 Plan *subplan, List *auxrowmarks, int epqParam)
2840 {
2841 	/* Mark the EPQ state inactive */
2842 	epqstate->estate = NULL;
2843 	epqstate->planstate = NULL;
2844 	epqstate->origslot = NULL;
2845 	/* ... and remember data that EvalPlanQualBegin will need */
2846 	epqstate->plan = subplan;
2847 	epqstate->arowMarks = auxrowmarks;
2848 	epqstate->epqParam = epqParam;
2849 }
2850 
2851 /*
2852  * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
2853  *
2854  * We need this so that ModifyTable can deal with multiple subplans.
2855  */
2856 void
EvalPlanQualSetPlan(EPQState * epqstate,Plan * subplan,List * auxrowmarks)2857 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
2858 {
2859 	/* If we have a live EPQ query, shut it down */
2860 	EvalPlanQualEnd(epqstate);
2861 	/* And set/change the plan pointer */
2862 	epqstate->plan = subplan;
2863 	/* The rowmarks depend on the plan, too */
2864 	epqstate->arowMarks = auxrowmarks;
2865 }
2866 
2867 /*
2868  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
2869  *
2870  * NB: passed tuple must be palloc'd; it may get freed later
2871  */
2872 void
EvalPlanQualSetTuple(EPQState * epqstate,Index rti,HeapTuple tuple)2873 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
2874 {
2875 	EState	   *estate = epqstate->estate;
2876 
2877 	Assert(rti > 0);
2878 
2879 	/*
2880 	 * free old test tuple, if any, and store new tuple where relation's scan
2881 	 * node will see it
2882 	 */
2883 	if (estate->es_epqTuple[rti - 1] != NULL)
2884 		heap_freetuple(estate->es_epqTuple[rti - 1]);
2885 	estate->es_epqTuple[rti - 1] = tuple;
2886 	estate->es_epqTupleSet[rti - 1] = true;
2887 }
2888 
2889 /*
2890  * Fetch back the current test tuple (if any) for the specified RTI
2891  */
2892 HeapTuple
EvalPlanQualGetTuple(EPQState * epqstate,Index rti)2893 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
2894 {
2895 	EState	   *estate = epqstate->estate;
2896 
2897 	Assert(rti > 0);
2898 
2899 	return estate->es_epqTuple[rti - 1];
2900 }
2901 
2902 /*
2903  * Fetch the current row values for any non-locked relations that need
2904  * to be scanned by an EvalPlanQual operation.  origslot must have been set
2905  * to contain the current result row (top-level row) that we need to recheck.
2906  */
2907 void
EvalPlanQualFetchRowMarks(EPQState * epqstate)2908 EvalPlanQualFetchRowMarks(EPQState *epqstate)
2909 {
2910 	ListCell   *l;
2911 
2912 	Assert(epqstate->origslot != NULL);
2913 
2914 	foreach(l, epqstate->arowMarks)
2915 	{
2916 		ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
2917 		ExecRowMark *erm = aerm->rowmark;
2918 		Datum		datum;
2919 		bool		isNull;
2920 		HeapTupleData tuple;
2921 
2922 		if (RowMarkRequiresRowShareLock(erm->markType))
2923 			elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
2924 
2925 		/* clear any leftover test tuple for this rel */
2926 		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
2927 
2928 		/* if child rel, must check whether it produced this row */
2929 		if (erm->rti != erm->prti)
2930 		{
2931 			Oid			tableoid;
2932 
2933 			datum = ExecGetJunkAttribute(epqstate->origslot,
2934 										 aerm->toidAttNo,
2935 										 &isNull);
2936 			/* non-locked rels could be on the inside of outer joins */
2937 			if (isNull)
2938 				continue;
2939 			tableoid = DatumGetObjectId(datum);
2940 
2941 			Assert(OidIsValid(erm->relid));
2942 			if (tableoid != erm->relid)
2943 			{
2944 				/* this child is inactive right now */
2945 				continue;
2946 			}
2947 		}
2948 
2949 		if (erm->markType == ROW_MARK_REFERENCE)
2950 		{
2951 			HeapTuple	copyTuple;
2952 
2953 			Assert(erm->relation != NULL);
2954 
2955 			/* fetch the tuple's ctid */
2956 			datum = ExecGetJunkAttribute(epqstate->origslot,
2957 										 aerm->ctidAttNo,
2958 										 &isNull);
2959 			/* non-locked rels could be on the inside of outer joins */
2960 			if (isNull)
2961 				continue;
2962 
2963 			/* fetch requests on foreign tables must be passed to their FDW */
2964 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2965 			{
2966 				FdwRoutine *fdwroutine;
2967 				bool		updated = false;
2968 
2969 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
2970 				/* this should have been checked already, but let's be safe */
2971 				if (fdwroutine->RefetchForeignRow == NULL)
2972 					ereport(ERROR,
2973 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2974 							 errmsg("cannot lock rows in foreign table \"%s\"",
2975 									RelationGetRelationName(erm->relation))));
2976 				copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
2977 														  erm,
2978 														  datum,
2979 														  &updated);
2980 				if (copyTuple == NULL)
2981 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2982 
2983 				/*
2984 				 * Ideally we'd insist on updated == false here, but that
2985 				 * assumes that FDWs can track that exactly, which they might
2986 				 * not be able to.  So just ignore the flag.
2987 				 */
2988 			}
2989 			else
2990 			{
2991 				/* ordinary table, fetch the tuple */
2992 				Buffer		buffer;
2993 
2994 				tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2995 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2996 								false, NULL))
2997 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2998 
2999 				/* successful, copy tuple */
3000 				copyTuple = heap_copytuple(&tuple);
3001 				ReleaseBuffer(buffer);
3002 			}
3003 
3004 			/* store tuple */
3005 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
3006 		}
3007 		else
3008 		{
3009 			HeapTupleHeader td;
3010 
3011 			Assert(erm->markType == ROW_MARK_COPY);
3012 
3013 			/* fetch the whole-row Var for the relation */
3014 			datum = ExecGetJunkAttribute(epqstate->origslot,
3015 										 aerm->wholeAttNo,
3016 										 &isNull);
3017 			/* non-locked rels could be on the inside of outer joins */
3018 			if (isNull)
3019 				continue;
3020 			td = DatumGetHeapTupleHeader(datum);
3021 
3022 			/* build a temporary HeapTuple control structure */
3023 			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
3024 			tuple.t_data = td;
3025 			/* relation might be a foreign table, if so provide tableoid */
3026 			tuple.t_tableOid = erm->relid;
3027 			/* also copy t_ctid in case there's valid data there */
3028 			tuple.t_self = td->t_ctid;
3029 
3030 			/* copy and store tuple */
3031 			EvalPlanQualSetTuple(epqstate, erm->rti,
3032 								 heap_copytuple(&tuple));
3033 		}
3034 	}
3035 }
3036 
3037 /*
3038  * Fetch the next row (if any) from EvalPlanQual testing
3039  *
3040  * (In practice, there should never be more than one row...)
3041  */
3042 TupleTableSlot *
EvalPlanQualNext(EPQState * epqstate)3043 EvalPlanQualNext(EPQState *epqstate)
3044 {
3045 	MemoryContext oldcontext;
3046 	TupleTableSlot *slot;
3047 
3048 	oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
3049 	slot = ExecProcNode(epqstate->planstate);
3050 	MemoryContextSwitchTo(oldcontext);
3051 
3052 	return slot;
3053 }
3054 
3055 /*
3056  * Initialize or reset an EvalPlanQual state tree
3057  */
3058 void
EvalPlanQualBegin(EPQState * epqstate,EState * parentestate)3059 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
3060 {
3061 	EState	   *estate = epqstate->estate;
3062 
3063 	if (estate == NULL)
3064 	{
3065 		/* First time through, so create a child EState */
3066 		EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
3067 	}
3068 	else
3069 	{
3070 		/*
3071 		 * We already have a suitable child EPQ tree, so just reset it.
3072 		 */
3073 		int			rtsize = list_length(parentestate->es_range_table);
3074 		PlanState  *planstate = epqstate->planstate;
3075 
3076 		MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
3077 
3078 		/* Recopy current values of parent parameters */
3079 		if (parentestate->es_plannedstmt->paramExecTypes != NIL)
3080 		{
3081 			int			i;
3082 
3083 			/*
3084 			 * Force evaluation of any InitPlan outputs that could be needed
3085 			 * by the subplan, just in case they got reset since
3086 			 * EvalPlanQualStart (see comments therein).
3087 			 */
3088 			ExecSetParamPlanMulti(planstate->plan->extParam,
3089 								  GetPerTupleExprContext(parentestate));
3090 
3091 			i = list_length(parentestate->es_plannedstmt->paramExecTypes);
3092 
3093 			while (--i >= 0)
3094 			{
3095 				/* copy value if any, but not execPlan link */
3096 				estate->es_param_exec_vals[i].value =
3097 					parentestate->es_param_exec_vals[i].value;
3098 				estate->es_param_exec_vals[i].isnull =
3099 					parentestate->es_param_exec_vals[i].isnull;
3100 			}
3101 		}
3102 
3103 		/*
3104 		 * Mark child plan tree as needing rescan at all scan nodes.  The
3105 		 * first ExecProcNode will take care of actually doing the rescan.
3106 		 */
3107 		planstate->chgParam = bms_add_member(planstate->chgParam,
3108 											 epqstate->epqParam);
3109 	}
3110 }
3111 
3112 /*
3113  * Start execution of an EvalPlanQual plan tree.
3114  *
3115  * This is a cut-down version of ExecutorStart(): we copy some state from
3116  * the top-level estate rather than initializing it fresh.
3117  */
3118 static void
EvalPlanQualStart(EPQState * epqstate,EState * parentestate,Plan * planTree)3119 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
3120 {
3121 	EState	   *estate;
3122 	int			rtsize;
3123 	MemoryContext oldcontext;
3124 	ListCell   *l;
3125 
3126 	rtsize = list_length(parentestate->es_range_table);
3127 
3128 	epqstate->estate = estate = CreateExecutorState();
3129 
3130 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3131 
3132 	/*
3133 	 * Child EPQ EStates share the parent's copy of unchanging state such as
3134 	 * the snapshot, rangetable, result-rel info, and external Param info.
3135 	 * They need their own copies of local state, including a tuple table,
3136 	 * es_param_exec_vals, etc.
3137 	 *
3138 	 * The ResultRelInfo array management is trickier than it looks.  We
3139 	 * create fresh arrays for the child but copy all the content from the
3140 	 * parent.  This is because it's okay for the child to share any
3141 	 * per-relation state the parent has already created --- but if the child
3142 	 * sets up any ResultRelInfo fields, such as its own junkfilter, that
3143 	 * state must *not* propagate back to the parent.  (For one thing, the
3144 	 * pointed-to data is in a memory context that won't last long enough.)
3145 	 */
3146 	estate->es_direction = ForwardScanDirection;
3147 	estate->es_snapshot = parentestate->es_snapshot;
3148 	estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
3149 	estate->es_range_table = parentestate->es_range_table;
3150 	estate->es_queryEnv = parentestate->es_queryEnv;
3151 	estate->es_plannedstmt = parentestate->es_plannedstmt;
3152 	estate->es_junkFilter = parentestate->es_junkFilter;
3153 	estate->es_output_cid = parentestate->es_output_cid;
3154 	if (parentestate->es_num_result_relations > 0)
3155 	{
3156 		int			numResultRelations = parentestate->es_num_result_relations;
3157 		int			numRootResultRels = parentestate->es_num_root_result_relations;
3158 		ResultRelInfo *resultRelInfos;
3159 
3160 		resultRelInfos = (ResultRelInfo *)
3161 			palloc(numResultRelations * sizeof(ResultRelInfo));
3162 		memcpy(resultRelInfos, parentestate->es_result_relations,
3163 			   numResultRelations * sizeof(ResultRelInfo));
3164 		estate->es_result_relations = resultRelInfos;
3165 		estate->es_num_result_relations = numResultRelations;
3166 
3167 		/* Also transfer partitioned root result relations. */
3168 		if (numRootResultRels > 0)
3169 		{
3170 			resultRelInfos = (ResultRelInfo *)
3171 				palloc(numRootResultRels * sizeof(ResultRelInfo));
3172 			memcpy(resultRelInfos, parentestate->es_root_result_relations,
3173 				   numRootResultRels * sizeof(ResultRelInfo));
3174 			estate->es_root_result_relations = resultRelInfos;
3175 			estate->es_num_root_result_relations = numRootResultRels;
3176 		}
3177 	}
3178 	/* es_result_relation_info must NOT be copied */
3179 	/* es_trig_target_relations must NOT be copied */
3180 	estate->es_rowMarks = parentestate->es_rowMarks;
3181 	estate->es_top_eflags = parentestate->es_top_eflags;
3182 	estate->es_instrument = parentestate->es_instrument;
3183 	/* es_auxmodifytables must NOT be copied */
3184 
3185 	/*
3186 	 * The external param list is simply shared from parent.  The internal
3187 	 * param workspace has to be local state, but we copy the initial values
3188 	 * from the parent, so as to have access to any param values that were
3189 	 * already set from other parts of the parent's plan tree.
3190 	 */
3191 	estate->es_param_list_info = parentestate->es_param_list_info;
3192 	if (parentestate->es_plannedstmt->paramExecTypes != NIL)
3193 	{
3194 		int			i;
3195 
3196 		/*
3197 		 * Force evaluation of any InitPlan outputs that could be needed by
3198 		 * the subplan.  (With more complexity, maybe we could postpone this
3199 		 * till the subplan actually demands them, but it doesn't seem worth
3200 		 * the trouble; this is a corner case already, since usually the
3201 		 * InitPlans would have been evaluated before reaching EvalPlanQual.)
3202 		 *
3203 		 * This will not touch output params of InitPlans that occur somewhere
3204 		 * within the subplan tree, only those that are attached to the
3205 		 * ModifyTable node or above it and are referenced within the subplan.
3206 		 * That's OK though, because the planner would only attach such
3207 		 * InitPlans to a lower-level SubqueryScan node, and EPQ execution
3208 		 * will not descend into a SubqueryScan.
3209 		 *
3210 		 * The EState's per-output-tuple econtext is sufficiently short-lived
3211 		 * for this, since it should get reset before there is any chance of
3212 		 * doing EvalPlanQual again.
3213 		 */
3214 		ExecSetParamPlanMulti(planTree->extParam,
3215 							  GetPerTupleExprContext(parentestate));
3216 
3217 		/* now make the internal param workspace ... */
3218 		i = list_length(parentestate->es_plannedstmt->paramExecTypes);
3219 		estate->es_param_exec_vals = (ParamExecData *)
3220 			palloc0(i * sizeof(ParamExecData));
3221 		/* ... and copy down all values, whether really needed or not */
3222 		while (--i >= 0)
3223 		{
3224 			/* copy value if any, but not execPlan link */
3225 			estate->es_param_exec_vals[i].value =
3226 				parentestate->es_param_exec_vals[i].value;
3227 			estate->es_param_exec_vals[i].isnull =
3228 				parentestate->es_param_exec_vals[i].isnull;
3229 		}
3230 	}
3231 
3232 	/*
3233 	 * Each EState must have its own es_epqScanDone state, but if we have
3234 	 * nested EPQ checks they should share es_epqTuple arrays.  This allows
3235 	 * sub-rechecks to inherit the values being examined by an outer recheck.
3236 	 */
3237 	estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
3238 	if (parentestate->es_epqTuple != NULL)
3239 	{
3240 		estate->es_epqTuple = parentestate->es_epqTuple;
3241 		estate->es_epqTupleSet = parentestate->es_epqTupleSet;
3242 	}
3243 	else
3244 	{
3245 		estate->es_epqTuple = (HeapTuple *)
3246 			palloc0(rtsize * sizeof(HeapTuple));
3247 		estate->es_epqTupleSet = (bool *)
3248 			palloc0(rtsize * sizeof(bool));
3249 	}
3250 
3251 	/*
3252 	 * Each estate also has its own tuple table.
3253 	 */
3254 	estate->es_tupleTable = NIL;
3255 
3256 	/*
3257 	 * Initialize private state information for each SubPlan.  We must do this
3258 	 * before running ExecInitNode on the main query tree, since
3259 	 * ExecInitSubPlan expects to be able to find these entries. Some of the
3260 	 * SubPlans might not be used in the part of the plan tree we intend to
3261 	 * run, but since it's not easy to tell which, we just initialize them
3262 	 * all.
3263 	 */
3264 	Assert(estate->es_subplanstates == NIL);
3265 	foreach(l, parentestate->es_plannedstmt->subplans)
3266 	{
3267 		Plan	   *subplan = (Plan *) lfirst(l);
3268 		PlanState  *subplanstate;
3269 
3270 		subplanstate = ExecInitNode(subplan, estate, 0);
3271 		estate->es_subplanstates = lappend(estate->es_subplanstates,
3272 										   subplanstate);
3273 	}
3274 
3275 	/*
3276 	 * Initialize the private state information for all the nodes in the part
3277 	 * of the plan tree we need to run.  This opens files, allocates storage
3278 	 * and leaves us ready to start processing tuples.
3279 	 */
3280 	epqstate->planstate = ExecInitNode(planTree, estate, 0);
3281 
3282 	MemoryContextSwitchTo(oldcontext);
3283 }
3284 
3285 /*
3286  * EvalPlanQualEnd -- shut down at termination of parent plan state node,
3287  * or if we are done with the current EPQ child.
3288  *
3289  * This is a cut-down version of ExecutorEnd(); basically we want to do most
3290  * of the normal cleanup, but *not* close result relations (which we are
3291  * just sharing from the outer query).  We do, however, have to close any
3292  * trigger target relations that got opened, since those are not shared.
3293  * (There probably shouldn't be any of the latter, but just in case...)
3294  */
3295 void
EvalPlanQualEnd(EPQState * epqstate)3296 EvalPlanQualEnd(EPQState *epqstate)
3297 {
3298 	EState	   *estate = epqstate->estate;
3299 	MemoryContext oldcontext;
3300 	ListCell   *l;
3301 
3302 	if (estate == NULL)
3303 		return;					/* idle, so nothing to do */
3304 
3305 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3306 
3307 	ExecEndNode(epqstate->planstate);
3308 
3309 	foreach(l, estate->es_subplanstates)
3310 	{
3311 		PlanState  *subplanstate = (PlanState *) lfirst(l);
3312 
3313 		ExecEndNode(subplanstate);
3314 	}
3315 
3316 	/* throw away the per-estate tuple table */
3317 	ExecResetTupleTable(estate->es_tupleTable, false);
3318 
3319 	/* close any trigger target relations attached to this EState */
3320 	ExecCleanUpTriggerState(estate);
3321 
3322 	MemoryContextSwitchTo(oldcontext);
3323 
3324 	FreeExecutorState(estate);
3325 
3326 	/* Mark EPQState idle */
3327 	epqstate->estate = NULL;
3328 	epqstate->planstate = NULL;
3329 	epqstate->origslot = NULL;
3330 }
3331