1 /*-------------------------------------------------------------------------
2  *
3  * subselect.c
4  *	  Planning routines for subselects.
5  *
6  * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e.,
7  * not sub-SELECT-in-FROM cases).
8  *
9  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * IDENTIFICATION
13  *	  src/backend/optimizer/plan/subselect.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include "access/htup_details.h"
20 #include "catalog/pg_operator.h"
21 #include "catalog/pg_type.h"
22 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/cost.h"
28 #include "optimizer/optimizer.h"
29 #include "optimizer/paramassign.h"
30 #include "optimizer/pathnode.h"
31 #include "optimizer/planmain.h"
32 #include "optimizer/planner.h"
33 #include "optimizer/prep.h"
34 #include "optimizer/subselect.h"
35 #include "parser/parse_relation.h"
36 #include "rewrite/rewriteManip.h"
37 #include "utils/builtins.h"
38 #include "utils/lsyscache.h"
39 #include "utils/syscache.h"
40 
41 
42 /* source-code-compatibility hacks for pull_varnos() API change */
43 #define pull_varnos(a,b) pull_varnos_new(a,b)
44 
45 typedef struct convert_testexpr_context
46 {
47 	PlannerInfo *root;
48 	List	   *subst_nodes;	/* Nodes to substitute for Params */
49 } convert_testexpr_context;
50 
51 typedef struct process_sublinks_context
52 {
53 	PlannerInfo *root;
54 	bool		isTopQual;
55 } process_sublinks_context;
56 
57 typedef struct finalize_primnode_context
58 {
59 	PlannerInfo *root;
60 	Bitmapset  *paramids;		/* Non-local PARAM_EXEC paramids found */
61 } finalize_primnode_context;
62 
63 typedef struct inline_cte_walker_context
64 {
65 	const char *ctename;		/* name and relative level of target CTE */
66 	int			levelsup;
67 	int			refcount;		/* number of remaining references */
68 	Query	   *ctequery;		/* query to substitute */
69 } inline_cte_walker_context;
70 
71 
72 static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
73 						   List *plan_params,
74 						   SubLinkType subLinkType, int subLinkId,
75 						   Node *testexpr, List *testexpr_paramids,
76 						   bool unknownEqFalse);
77 static List *generate_subquery_params(PlannerInfo *root, List *tlist,
78 									  List **paramIds);
79 static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
80 									Index varno);
81 static Node *convert_testexpr(PlannerInfo *root,
82 							  Node *testexpr,
83 							  List *subst_nodes);
84 static Node *convert_testexpr_mutator(Node *node,
85 									  convert_testexpr_context *context);
86 static bool subplan_is_hashable(Plan *plan);
87 static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
88 static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
89 static bool hash_ok_operator(OpExpr *expr);
90 static bool contain_dml(Node *node);
91 static bool contain_dml_walker(Node *node, void *context);
92 static bool contain_outer_selfref(Node *node);
93 static bool contain_outer_selfref_walker(Node *node, Index *depth);
94 static void inline_cte(PlannerInfo *root, CommonTableExpr *cte);
95 static bool inline_cte_walker(Node *node, inline_cte_walker_context *context);
96 static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
97 static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
98 									Node **testexpr, List **paramIds);
99 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
100 static Node *process_sublinks_mutator(Node *node,
101 									  process_sublinks_context *context);
102 static Bitmapset *finalize_plan(PlannerInfo *root,
103 								Plan *plan,
104 								int gather_param,
105 								Bitmapset *valid_params,
106 								Bitmapset *scan_params);
107 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
108 static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
109 
110 
111 /*
112  * Get the datatype/typmod/collation of the first column of the plan's output.
113  *
114  * This information is stored for ARRAY_SUBLINK execution and for
115  * exprType()/exprTypmod()/exprCollation(), which have no way to get at the
116  * plan associated with a SubPlan node.  We really only need the info for
117  * EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency we save it
118  * always.
119  */
120 static void
get_first_col_type(Plan * plan,Oid * coltype,int32 * coltypmod,Oid * colcollation)121 get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
122 				   Oid *colcollation)
123 {
124 	/* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
125 	if (plan->targetlist)
126 	{
127 		TargetEntry *tent = linitial_node(TargetEntry, plan->targetlist);
128 
129 		if (!tent->resjunk)
130 		{
131 			*coltype = exprType((Node *) tent->expr);
132 			*coltypmod = exprTypmod((Node *) tent->expr);
133 			*colcollation = exprCollation((Node *) tent->expr);
134 			return;
135 		}
136 	}
137 	*coltype = VOIDOID;
138 	*coltypmod = -1;
139 	*colcollation = InvalidOid;
140 }
141 
142 /*
143  * Convert a SubLink (as created by the parser) into a SubPlan.
144  *
145  * We are given the SubLink's contained query, type, ID, and testexpr.  We are
146  * also told if this expression appears at top level of a WHERE/HAVING qual.
147  *
148  * Note: we assume that the testexpr has been AND/OR flattened (actually,
149  * it's been through eval_const_expressions), but not converted to
150  * implicit-AND form; and any SubLinks in it should already have been
151  * converted to SubPlans.  The subquery is as yet untouched, however.
152  *
153  * The result is whatever we need to substitute in place of the SubLink node
154  * in the executable expression.  If we're going to do the subplan as a
155  * regular subplan, this will be the constructed SubPlan node.  If we're going
156  * to do the subplan as an InitPlan, the SubPlan node instead goes into
157  * root->init_plans, and what we return here is an expression tree
158  * representing the InitPlan's result: usually just a Param node representing
159  * a single scalar result, but possibly a row comparison tree containing
160  * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant
161  * (since the real output Params are elsewhere in the tree, and the MULTIEXPR
162  * subquery itself is in a resjunk tlist entry whose value is uninteresting).
163  */
164 static Node *
make_subplan(PlannerInfo * root,Query * orig_subquery,SubLinkType subLinkType,int subLinkId,Node * testexpr,bool isTopQual)165 make_subplan(PlannerInfo *root, Query *orig_subquery,
166 			 SubLinkType subLinkType, int subLinkId,
167 			 Node *testexpr, bool isTopQual)
168 {
169 	Query	   *subquery;
170 	bool		simple_exists = false;
171 	double		tuple_fraction;
172 	PlannerInfo *subroot;
173 	RelOptInfo *final_rel;
174 	Path	   *best_path;
175 	Plan	   *plan;
176 	List	   *plan_params;
177 	Node	   *result;
178 
179 	/*
180 	 * Copy the source Query node.  This is a quick and dirty kluge to resolve
181 	 * the fact that the parser can generate trees with multiple links to the
182 	 * same sub-Query node, but the planner wants to scribble on the Query.
183 	 * Try to clean this up when we do querytree redesign...
184 	 */
185 	subquery = copyObject(orig_subquery);
186 
187 	/*
188 	 * If it's an EXISTS subplan, we might be able to simplify it.
189 	 */
190 	if (subLinkType == EXISTS_SUBLINK)
191 		simple_exists = simplify_EXISTS_query(root, subquery);
192 
193 	/*
194 	 * For an EXISTS subplan, tell lower-level planner to expect that only the
195 	 * first tuple will be retrieved.  For ALL and ANY subplans, we will be
196 	 * able to stop evaluating if the test condition fails or matches, so very
197 	 * often not all the tuples will be retrieved; for lack of a better idea,
198 	 * specify 50% retrieval.  For EXPR, MULTIEXPR, and ROWCOMPARE subplans,
199 	 * use default behavior (we're only expecting one row out, anyway).
200 	 *
201 	 * NOTE: if you change these numbers, also change cost_subplan() in
202 	 * path/costsize.c.
203 	 *
204 	 * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
205 	 * its output.  In that case it would've been better to specify full
206 	 * retrieval.  At present, however, we can only check hashability after
207 	 * we've made the subplan :-(.  (Determining whether it'll fit in hash_mem
208 	 * is the really hard part.)  Therefore, we don't want to be too
209 	 * optimistic about the percentage of tuples retrieved, for fear of
210 	 * selecting a plan that's bad for the materialization case.
211 	 */
212 	if (subLinkType == EXISTS_SUBLINK)
213 		tuple_fraction = 1.0;	/* just like a LIMIT 1 */
214 	else if (subLinkType == ALL_SUBLINK ||
215 			 subLinkType == ANY_SUBLINK)
216 		tuple_fraction = 0.5;	/* 50% */
217 	else
218 		tuple_fraction = 0.0;	/* default behavior */
219 
220 	/* plan_params should not be in use in current query level */
221 	Assert(root->plan_params == NIL);
222 
223 	/* Generate Paths for the subquery */
224 	subroot = subquery_planner(root->glob, subquery,
225 							   root,
226 							   false, tuple_fraction);
227 
228 	/* Isolate the params needed by this specific subplan */
229 	plan_params = root->plan_params;
230 	root->plan_params = NIL;
231 
232 	/*
233 	 * Select best Path and turn it into a Plan.  At least for now, there
234 	 * seems no reason to postpone doing that.
235 	 */
236 	final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
237 	best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
238 
239 	plan = create_plan(subroot, best_path);
240 
241 	/* And convert to SubPlan or InitPlan format. */
242 	result = build_subplan(root, plan, subroot, plan_params,
243 						   subLinkType, subLinkId,
244 						   testexpr, NIL, isTopQual);
245 
246 	/*
247 	 * If it's a correlated EXISTS with an unimportant targetlist, we might be
248 	 * able to transform it to the equivalent of an IN and then implement it
249 	 * by hashing.  We don't have enough information yet to tell which way is
250 	 * likely to be better (it depends on the expected number of executions of
251 	 * the EXISTS qual, and we are much too early in planning the outer query
252 	 * to be able to guess that).  So we generate both plans, if possible, and
253 	 * leave it to the executor to decide which to use.
254 	 */
255 	if (simple_exists && IsA(result, SubPlan))
256 	{
257 		Node	   *newtestexpr;
258 		List	   *paramIds;
259 
260 		/* Make a second copy of the original subquery */
261 		subquery = copyObject(orig_subquery);
262 		/* and re-simplify */
263 		simple_exists = simplify_EXISTS_query(root, subquery);
264 		Assert(simple_exists);
265 		/* See if it can be converted to an ANY query */
266 		subquery = convert_EXISTS_to_ANY(root, subquery,
267 										 &newtestexpr, &paramIds);
268 		if (subquery)
269 		{
270 			/* Generate Paths for the ANY subquery; we'll need all rows */
271 			subroot = subquery_planner(root->glob, subquery,
272 									   root,
273 									   false, 0.0);
274 
275 			/* Isolate the params needed by this specific subplan */
276 			plan_params = root->plan_params;
277 			root->plan_params = NIL;
278 
279 			/* Select best Path and turn it into a Plan */
280 			final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
281 			best_path = final_rel->cheapest_total_path;
282 
283 			plan = create_plan(subroot, best_path);
284 
285 			/* Now we can check if it'll fit in hash_mem */
286 			/* XXX can we check this at the Path stage? */
287 			if (subplan_is_hashable(plan))
288 			{
289 				SubPlan    *hashplan;
290 				AlternativeSubPlan *asplan;
291 
292 				/* OK, convert to SubPlan format. */
293 				hashplan = castNode(SubPlan,
294 									build_subplan(root, plan, subroot,
295 												  plan_params,
296 												  ANY_SUBLINK, 0,
297 												  newtestexpr,
298 												  paramIds,
299 												  true));
300 				/* Check we got what we expected */
301 				Assert(hashplan->parParam == NIL);
302 				Assert(hashplan->useHashTable);
303 
304 				/* Leave it to the executor to decide which plan to use */
305 				asplan = makeNode(AlternativeSubPlan);
306 				asplan->subplans = list_make2(result, hashplan);
307 				result = (Node *) asplan;
308 			}
309 		}
310 	}
311 
312 	return result;
313 }
314 
315 /*
316  * Build a SubPlan node given the raw inputs --- subroutine for make_subplan
317  *
318  * Returns either the SubPlan, or a replacement expression if we decide to
319  * make it an InitPlan, as explained in the comments for make_subplan.
320  */
321 static Node *
build_subplan(PlannerInfo * root,Plan * plan,PlannerInfo * subroot,List * plan_params,SubLinkType subLinkType,int subLinkId,Node * testexpr,List * testexpr_paramids,bool unknownEqFalse)322 build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
323 			  List *plan_params,
324 			  SubLinkType subLinkType, int subLinkId,
325 			  Node *testexpr, List *testexpr_paramids,
326 			  bool unknownEqFalse)
327 {
328 	Node	   *result;
329 	SubPlan    *splan;
330 	bool		isInitPlan;
331 	ListCell   *lc;
332 
333 	/*
334 	 * Initialize the SubPlan node.  Note plan_id, plan_name, and cost fields
335 	 * are set further down.
336 	 */
337 	splan = makeNode(SubPlan);
338 	splan->subLinkType = subLinkType;
339 	splan->testexpr = NULL;
340 	splan->paramIds = NIL;
341 	get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
342 					   &splan->firstColCollation);
343 	splan->useHashTable = false;
344 	splan->unknownEqFalse = unknownEqFalse;
345 	splan->parallel_safe = plan->parallel_safe;
346 	splan->setParam = NIL;
347 	splan->parParam = NIL;
348 	splan->args = NIL;
349 
350 	/*
351 	 * Make parParam and args lists of param IDs and expressions that current
352 	 * query level will pass to this child plan.
353 	 */
354 	foreach(lc, plan_params)
355 	{
356 		PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lc);
357 		Node	   *arg = pitem->item;
358 
359 		/*
360 		 * The Var, PlaceHolderVar, or Aggref has already been adjusted to
361 		 * have the correct varlevelsup, phlevelsup, or agglevelsup.
362 		 *
363 		 * If it's a PlaceHolderVar or Aggref, its arguments might contain
364 		 * SubLinks, which have not yet been processed (see the comments for
365 		 * SS_replace_correlation_vars).  Do that now.
366 		 */
367 		if (IsA(arg, PlaceHolderVar) ||
368 			IsA(arg, Aggref))
369 			arg = SS_process_sublinks(root, arg, false);
370 
371 		splan->parParam = lappend_int(splan->parParam, pitem->paramId);
372 		splan->args = lappend(splan->args, arg);
373 	}
374 
375 	/*
376 	 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY,
377 	 * ROWCOMPARE, or MULTIEXPR types can be used as initPlans.  For EXISTS,
378 	 * EXPR, or ARRAY, we return a Param referring to the result of evaluating
379 	 * the initPlan.  For ROWCOMPARE, we must modify the testexpr tree to
380 	 * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted
381 	 * by the parser, and then return that tree.  For MULTIEXPR, we return a
382 	 * null constant: the resjunk targetlist item containing the SubLink does
383 	 * not need to return anything useful, since the referencing Params are
384 	 * elsewhere.
385 	 */
386 	if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK)
387 	{
388 		Param	   *prm;
389 
390 		Assert(testexpr == NULL);
391 		prm = generate_new_exec_param(root, BOOLOID, -1, InvalidOid);
392 		splan->setParam = list_make1_int(prm->paramid);
393 		isInitPlan = true;
394 		result = (Node *) prm;
395 	}
396 	else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK)
397 	{
398 		TargetEntry *te = linitial(plan->targetlist);
399 		Param	   *prm;
400 
401 		Assert(!te->resjunk);
402 		Assert(testexpr == NULL);
403 		prm = generate_new_exec_param(root,
404 									  exprType((Node *) te->expr),
405 									  exprTypmod((Node *) te->expr),
406 									  exprCollation((Node *) te->expr));
407 		splan->setParam = list_make1_int(prm->paramid);
408 		isInitPlan = true;
409 		result = (Node *) prm;
410 	}
411 	else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK)
412 	{
413 		TargetEntry *te = linitial(plan->targetlist);
414 		Oid			arraytype;
415 		Param	   *prm;
416 
417 		Assert(!te->resjunk);
418 		Assert(testexpr == NULL);
419 		arraytype = get_promoted_array_type(exprType((Node *) te->expr));
420 		if (!OidIsValid(arraytype))
421 			elog(ERROR, "could not find array type for datatype %s",
422 				 format_type_be(exprType((Node *) te->expr)));
423 		prm = generate_new_exec_param(root,
424 									  arraytype,
425 									  exprTypmod((Node *) te->expr),
426 									  exprCollation((Node *) te->expr));
427 		splan->setParam = list_make1_int(prm->paramid);
428 		isInitPlan = true;
429 		result = (Node *) prm;
430 	}
431 	else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK)
432 	{
433 		/* Adjust the Params */
434 		List	   *params;
435 
436 		Assert(testexpr != NULL);
437 		params = generate_subquery_params(root,
438 										  plan->targetlist,
439 										  &splan->paramIds);
440 		result = convert_testexpr(root,
441 								  testexpr,
442 								  params);
443 		splan->setParam = list_copy(splan->paramIds);
444 		isInitPlan = true;
445 
446 		/*
447 		 * The executable expression is returned to become part of the outer
448 		 * plan's expression tree; it is not kept in the initplan node.
449 		 */
450 	}
451 	else if (subLinkType == MULTIEXPR_SUBLINK)
452 	{
453 		/*
454 		 * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param
455 		 * for each output column.
456 		 */
457 		List	   *params;
458 
459 		Assert(testexpr == NULL);
460 		params = generate_subquery_params(root,
461 										  plan->targetlist,
462 										  &splan->setParam);
463 
464 		/*
465 		 * Save the list of replacement Params in the n'th cell of
466 		 * root->multiexpr_params; setrefs.c will use it to replace
467 		 * PARAM_MULTIEXPR Params.
468 		 */
469 		while (list_length(root->multiexpr_params) < subLinkId)
470 			root->multiexpr_params = lappend(root->multiexpr_params, NIL);
471 		lc = list_nth_cell(root->multiexpr_params, subLinkId - 1);
472 		Assert(lfirst(lc) == NIL);
473 		lfirst(lc) = params;
474 
475 		/* It can be an initplan if there are no parParams. */
476 		if (splan->parParam == NIL)
477 		{
478 			isInitPlan = true;
479 			result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid);
480 		}
481 		else
482 		{
483 			isInitPlan = false;
484 			result = (Node *) splan;
485 		}
486 	}
487 	else
488 	{
489 		/*
490 		 * Adjust the Params in the testexpr, unless caller already took care
491 		 * of it (as indicated by passing a list of Param IDs).
492 		 */
493 		if (testexpr && testexpr_paramids == NIL)
494 		{
495 			List	   *params;
496 
497 			params = generate_subquery_params(root,
498 											  plan->targetlist,
499 											  &splan->paramIds);
500 			splan->testexpr = convert_testexpr(root,
501 											   testexpr,
502 											   params);
503 		}
504 		else
505 		{
506 			splan->testexpr = testexpr;
507 			splan->paramIds = testexpr_paramids;
508 		}
509 
510 		/*
511 		 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
512 		 * initPlans, even when they are uncorrelated or undirect correlated,
513 		 * because we need to scan the output of the subplan for each outer
514 		 * tuple.  But if it's a not-direct-correlated IN (= ANY) test, we
515 		 * might be able to use a hashtable to avoid comparing all the tuples.
516 		 */
517 		if (subLinkType == ANY_SUBLINK &&
518 			splan->parParam == NIL &&
519 			subplan_is_hashable(plan) &&
520 			testexpr_is_hashable(splan->testexpr, splan->paramIds))
521 			splan->useHashTable = true;
522 
523 		/*
524 		 * Otherwise, we have the option to tack a Material node onto the top
525 		 * of the subplan, to reduce the cost of reading it repeatedly.  This
526 		 * is pointless for a direct-correlated subplan, since we'd have to
527 		 * recompute its results each time anyway.  For uncorrelated/undirect
528 		 * correlated subplans, we add Material unless the subplan's top plan
529 		 * node would materialize its output anyway.  Also, if enable_material
530 		 * is false, then the user does not want us to materialize anything
531 		 * unnecessarily, so we don't.
532 		 */
533 		else if (splan->parParam == NIL && enable_material &&
534 				 !ExecMaterializesOutput(nodeTag(plan)))
535 			plan = materialize_finished_plan(plan);
536 
537 		result = (Node *) splan;
538 		isInitPlan = false;
539 	}
540 
541 	/*
542 	 * Add the subplan and its PlannerInfo to the global lists.
543 	 */
544 	root->glob->subplans = lappend(root->glob->subplans, plan);
545 	root->glob->subroots = lappend(root->glob->subroots, subroot);
546 	splan->plan_id = list_length(root->glob->subplans);
547 
548 	if (isInitPlan)
549 		root->init_plans = lappend(root->init_plans, splan);
550 
551 	/*
552 	 * A parameterless subplan (not initplan) should be prepared to handle
553 	 * REWIND efficiently.  If it has direct parameters then there's no point
554 	 * since it'll be reset on each scan anyway; and if it's an initplan then
555 	 * there's no point since it won't get re-run without parameter changes
556 	 * anyway.  The input of a hashed subplan doesn't need REWIND either.
557 	 */
558 	if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable)
559 		root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
560 												   splan->plan_id);
561 
562 	/* Label the subplan for EXPLAIN purposes */
563 	splan->plan_name = palloc(32 + 12 * list_length(splan->setParam));
564 	sprintf(splan->plan_name, "%s %d",
565 			isInitPlan ? "InitPlan" : "SubPlan",
566 			splan->plan_id);
567 	if (splan->setParam)
568 	{
569 		char	   *ptr = splan->plan_name + strlen(splan->plan_name);
570 
571 		ptr += sprintf(ptr, " (returns ");
572 		foreach(lc, splan->setParam)
573 		{
574 			ptr += sprintf(ptr, "$%d%s",
575 						   lfirst_int(lc),
576 						   lnext(splan->setParam, lc) ? "," : ")");
577 		}
578 	}
579 
580 	/* Lastly, fill in the cost estimates for use later */
581 	cost_subplan(root, splan, plan);
582 
583 	return result;
584 }
585 
586 /*
587  * generate_subquery_params: build a list of Params representing the output
588  * columns of a sublink's sub-select, given the sub-select's targetlist.
589  *
590  * We also return an integer list of the paramids of the Params.
591  */
592 static List *
generate_subquery_params(PlannerInfo * root,List * tlist,List ** paramIds)593 generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds)
594 {
595 	List	   *result;
596 	List	   *ids;
597 	ListCell   *lc;
598 
599 	result = ids = NIL;
600 	foreach(lc, tlist)
601 	{
602 		TargetEntry *tent = (TargetEntry *) lfirst(lc);
603 		Param	   *param;
604 
605 		if (tent->resjunk)
606 			continue;
607 
608 		param = generate_new_exec_param(root,
609 										exprType((Node *) tent->expr),
610 										exprTypmod((Node *) tent->expr),
611 										exprCollation((Node *) tent->expr));
612 		result = lappend(result, param);
613 		ids = lappend_int(ids, param->paramid);
614 	}
615 
616 	*paramIds = ids;
617 	return result;
618 }
619 
620 /*
621  * generate_subquery_vars: build a list of Vars representing the output
622  * columns of a sublink's sub-select, given the sub-select's targetlist.
623  * The Vars have the specified varno (RTE index).
624  */
625 static List *
generate_subquery_vars(PlannerInfo * root,List * tlist,Index varno)626 generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
627 {
628 	List	   *result;
629 	ListCell   *lc;
630 
631 	result = NIL;
632 	foreach(lc, tlist)
633 	{
634 		TargetEntry *tent = (TargetEntry *) lfirst(lc);
635 		Var		   *var;
636 
637 		if (tent->resjunk)
638 			continue;
639 
640 		var = makeVarFromTargetEntry(varno, tent);
641 		result = lappend(result, var);
642 	}
643 
644 	return result;
645 }
646 
647 /*
648  * convert_testexpr: convert the testexpr given by the parser into
649  * actually executable form.  This entails replacing PARAM_SUBLINK Params
650  * with Params or Vars representing the results of the sub-select.  The
651  * nodes to be substituted are passed in as the List result from
652  * generate_subquery_params or generate_subquery_vars.
653  */
654 static Node *
convert_testexpr(PlannerInfo * root,Node * testexpr,List * subst_nodes)655 convert_testexpr(PlannerInfo *root,
656 				 Node *testexpr,
657 				 List *subst_nodes)
658 {
659 	convert_testexpr_context context;
660 
661 	context.root = root;
662 	context.subst_nodes = subst_nodes;
663 	return convert_testexpr_mutator(testexpr, &context);
664 }
665 
666 static Node *
convert_testexpr_mutator(Node * node,convert_testexpr_context * context)667 convert_testexpr_mutator(Node *node,
668 						 convert_testexpr_context *context)
669 {
670 	if (node == NULL)
671 		return NULL;
672 	if (IsA(node, Param))
673 	{
674 		Param	   *param = (Param *) node;
675 
676 		if (param->paramkind == PARAM_SUBLINK)
677 		{
678 			if (param->paramid <= 0 ||
679 				param->paramid > list_length(context->subst_nodes))
680 				elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid);
681 
682 			/*
683 			 * We copy the list item to avoid having doubly-linked
684 			 * substructure in the modified parse tree.  This is probably
685 			 * unnecessary when it's a Param, but be safe.
686 			 */
687 			return (Node *) copyObject(list_nth(context->subst_nodes,
688 												param->paramid - 1));
689 		}
690 	}
691 	if (IsA(node, SubLink))
692 	{
693 		/*
694 		 * If we come across a nested SubLink, it is neither necessary nor
695 		 * correct to recurse into it: any PARAM_SUBLINKs we might find inside
696 		 * belong to the inner SubLink not the outer. So just return it as-is.
697 		 *
698 		 * This reasoning depends on the assumption that nothing will pull
699 		 * subexpressions into or out of the testexpr field of a SubLink, at
700 		 * least not without replacing PARAM_SUBLINKs first.  If we did want
701 		 * to do that we'd need to rethink the parser-output representation
702 		 * altogether, since currently PARAM_SUBLINKs are only unique per
703 		 * SubLink not globally across the query.  The whole point of
704 		 * replacing them with Vars or PARAM_EXEC nodes is to make them
705 		 * globally unique before they escape from the SubLink's testexpr.
706 		 *
707 		 * Note: this can't happen when called during SS_process_sublinks,
708 		 * because that recursively processes inner SubLinks first.  It can
709 		 * happen when called from convert_ANY_sublink_to_join, though.
710 		 */
711 		return node;
712 	}
713 	return expression_tree_mutator(node,
714 								   convert_testexpr_mutator,
715 								   (void *) context);
716 }
717 
718 /*
719  * subplan_is_hashable: can we implement an ANY subplan by hashing?
720  */
721 static bool
subplan_is_hashable(Plan * plan)722 subplan_is_hashable(Plan *plan)
723 {
724 	double		subquery_size;
725 
726 	/*
727 	 * The estimated size of the subquery result must fit in hash_mem. (Note:
728 	 * we use heap tuple overhead here even though the tuples will actually be
729 	 * stored as MinimalTuples; this provides some fudge factor for hashtable
730 	 * overhead.)
731 	 */
732 	subquery_size = plan->plan_rows *
733 		(MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
734 	if (subquery_size > get_hash_memory_limit())
735 		return false;
736 
737 	return true;
738 }
739 
740 /*
741  * testexpr_is_hashable: is an ANY SubLink's test expression hashable?
742  *
743  * To identify LHS vs RHS of the hash expression, we must be given the
744  * list of output Param IDs of the SubLink's subquery.
745  */
746 static bool
testexpr_is_hashable(Node * testexpr,List * param_ids)747 testexpr_is_hashable(Node *testexpr, List *param_ids)
748 {
749 	/*
750 	 * The testexpr must be a single OpExpr, or an AND-clause containing only
751 	 * OpExprs, each of which satisfy test_opexpr_is_hashable().
752 	 */
753 	if (testexpr && IsA(testexpr, OpExpr))
754 	{
755 		if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids))
756 			return true;
757 	}
758 	else if (is_andclause(testexpr))
759 	{
760 		ListCell   *l;
761 
762 		foreach(l, ((BoolExpr *) testexpr)->args)
763 		{
764 			Node	   *andarg = (Node *) lfirst(l);
765 
766 			if (!IsA(andarg, OpExpr))
767 				return false;
768 			if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids))
769 				return false;
770 		}
771 		return true;
772 	}
773 
774 	return false;
775 }
776 
777 static bool
test_opexpr_is_hashable(OpExpr * testexpr,List * param_ids)778 test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids)
779 {
780 	/*
781 	 * The combining operator must be hashable and strict.  The need for
782 	 * hashability is obvious, since we want to use hashing.  Without
783 	 * strictness, behavior in the presence of nulls is too unpredictable.  We
784 	 * actually must assume even more than plain strictness: it can't yield
785 	 * NULL for non-null inputs, either (see nodeSubplan.c).  However, hash
786 	 * indexes and hash joins assume that too.
787 	 */
788 	if (!hash_ok_operator(testexpr))
789 		return false;
790 
791 	/*
792 	 * The left and right inputs must belong to the outer and inner queries
793 	 * respectively; hence Params that will be supplied by the subquery must
794 	 * not appear in the LHS, and Vars of the outer query must not appear in
795 	 * the RHS.  (Ordinarily, this must be true because of the way that the
796 	 * parser builds an ANY SubLink's testexpr ... but inlining of functions
797 	 * could have changed the expression's structure, so we have to check.
798 	 * Such cases do not occur often enough to be worth trying to optimize, so
799 	 * we don't worry about trying to commute the clause or anything like
800 	 * that; we just need to be sure not to build an invalid plan.)
801 	 */
802 	if (list_length(testexpr->args) != 2)
803 		return false;
804 	if (contain_exec_param((Node *) linitial(testexpr->args), param_ids))
805 		return false;
806 	if (contain_var_clause((Node *) lsecond(testexpr->args)))
807 		return false;
808 	return true;
809 }
810 
811 /*
812  * Check expression is hashable + strict
813  *
814  * We could use op_hashjoinable() and op_strict(), but do it like this to
815  * avoid a redundant cache lookup.
816  */
817 static bool
hash_ok_operator(OpExpr * expr)818 hash_ok_operator(OpExpr *expr)
819 {
820 	Oid			opid = expr->opno;
821 
822 	/* quick out if not a binary operator */
823 	if (list_length(expr->args) != 2)
824 		return false;
825 	if (opid == ARRAY_EQ_OP)
826 	{
827 		/* array_eq is strict, but must check input type to ensure hashable */
828 		/* XXX record_eq will need same treatment when it becomes hashable */
829 		Node	   *leftarg = linitial(expr->args);
830 
831 		return op_hashjoinable(opid, exprType(leftarg));
832 	}
833 	else
834 	{
835 		/* else must look up the operator properties */
836 		HeapTuple	tup;
837 		Form_pg_operator optup;
838 
839 		tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid));
840 		if (!HeapTupleIsValid(tup))
841 			elog(ERROR, "cache lookup failed for operator %u", opid);
842 		optup = (Form_pg_operator) GETSTRUCT(tup);
843 		if (!optup->oprcanhash || !func_strict(optup->oprcode))
844 		{
845 			ReleaseSysCache(tup);
846 			return false;
847 		}
848 		ReleaseSysCache(tup);
849 		return true;
850 	}
851 }
852 
853 
854 /*
855  * SS_process_ctes: process a query's WITH list
856  *
857  * Consider each CTE in the WITH list and either ignore it (if it's an
858  * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM,
859  * or convert it to an initplan.
860  *
861  * A side effect is to fill in root->cte_plan_ids with a list that
862  * parallels root->parse->cteList and provides the subplan ID for
863  * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan.
864  */
865 void
SS_process_ctes(PlannerInfo * root)866 SS_process_ctes(PlannerInfo *root)
867 {
868 	ListCell   *lc;
869 
870 	Assert(root->cte_plan_ids == NIL);
871 
872 	foreach(lc, root->parse->cteList)
873 	{
874 		CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
875 		CmdType		cmdType = ((Query *) cte->ctequery)->commandType;
876 		Query	   *subquery;
877 		PlannerInfo *subroot;
878 		RelOptInfo *final_rel;
879 		Path	   *best_path;
880 		Plan	   *plan;
881 		SubPlan    *splan;
882 		int			paramid;
883 
884 		/*
885 		 * Ignore SELECT CTEs that are not actually referenced anywhere.
886 		 */
887 		if (cte->cterefcount == 0 && cmdType == CMD_SELECT)
888 		{
889 			/* Make a dummy entry in cte_plan_ids */
890 			root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
891 			continue;
892 		}
893 
894 		/*
895 		 * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of
896 		 * implementing it as a separately-planned CTE.
897 		 *
898 		 * We cannot inline if any of these conditions hold:
899 		 *
900 		 * 1. The user said not to (the CTEMaterializeAlways option).
901 		 *
902 		 * 2. The CTE is recursive.
903 		 *
904 		 * 3. The CTE has side-effects; this includes either not being a plain
905 		 * SELECT, or containing volatile functions.  Inlining might change
906 		 * the side-effects, which would be bad.
907 		 *
908 		 * 4. The CTE is multiply-referenced and contains a self-reference to
909 		 * a recursive CTE outside itself.  Inlining would result in multiple
910 		 * recursive self-references, which we don't support.
911 		 *
912 		 * Otherwise, we have an option whether to inline or not.  That should
913 		 * always be a win if there's just a single reference, but if the CTE
914 		 * is multiply-referenced then it's unclear: inlining adds duplicate
915 		 * computations, but the ability to absorb restrictions from the outer
916 		 * query level could outweigh that.  We do not have nearly enough
917 		 * information at this point to tell whether that's true, so we let
918 		 * the user express a preference.  Our default behavior is to inline
919 		 * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever
920 		 * will be inlined even if multiply referenced.
921 		 *
922 		 * Note: we check for volatile functions last, because that's more
923 		 * expensive than the other tests needed.
924 		 */
925 		if ((cte->ctematerialized == CTEMaterializeNever ||
926 			 (cte->ctematerialized == CTEMaterializeDefault &&
927 			  cte->cterefcount == 1)) &&
928 			!cte->cterecursive &&
929 			cmdType == CMD_SELECT &&
930 			!contain_dml(cte->ctequery) &&
931 			(cte->cterefcount <= 1 ||
932 			 !contain_outer_selfref(cte->ctequery)) &&
933 			!contain_volatile_functions(cte->ctequery))
934 		{
935 			inline_cte(root, cte);
936 			/* Make a dummy entry in cte_plan_ids */
937 			root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
938 			continue;
939 		}
940 
941 		/*
942 		 * Copy the source Query node.  Probably not necessary, but let's keep
943 		 * this similar to make_subplan.
944 		 */
945 		subquery = (Query *) copyObject(cte->ctequery);
946 
947 		/* plan_params should not be in use in current query level */
948 		Assert(root->plan_params == NIL);
949 
950 		/*
951 		 * Generate Paths for the CTE query.  Always plan for full retrieval
952 		 * --- we don't have enough info to predict otherwise.
953 		 */
954 		subroot = subquery_planner(root->glob, subquery,
955 								   root,
956 								   cte->cterecursive, 0.0);
957 
958 		/*
959 		 * Since the current query level doesn't yet contain any RTEs, it
960 		 * should not be possible for the CTE to have requested parameters of
961 		 * this level.
962 		 */
963 		if (root->plan_params)
964 			elog(ERROR, "unexpected outer reference in CTE query");
965 
966 		/*
967 		 * Select best Path and turn it into a Plan.  At least for now, there
968 		 * seems no reason to postpone doing that.
969 		 */
970 		final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
971 		best_path = final_rel->cheapest_total_path;
972 
973 		plan = create_plan(subroot, best_path);
974 
975 		/*
976 		 * Make a SubPlan node for it.  This is just enough unlike
977 		 * build_subplan that we can't share code.
978 		 *
979 		 * Note plan_id, plan_name, and cost fields are set further down.
980 		 */
981 		splan = makeNode(SubPlan);
982 		splan->subLinkType = CTE_SUBLINK;
983 		splan->testexpr = NULL;
984 		splan->paramIds = NIL;
985 		get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
986 						   &splan->firstColCollation);
987 		splan->useHashTable = false;
988 		splan->unknownEqFalse = false;
989 
990 		/*
991 		 * CTE scans are not considered for parallelism (cf
992 		 * set_rel_consider_parallel), and even if they were, initPlans aren't
993 		 * parallel-safe.
994 		 */
995 		splan->parallel_safe = false;
996 		splan->setParam = NIL;
997 		splan->parParam = NIL;
998 		splan->args = NIL;
999 
1000 		/*
1001 		 * The node can't have any inputs (since it's an initplan), so the
1002 		 * parParam and args lists remain empty.  (It could contain references
1003 		 * to earlier CTEs' output param IDs, but CTE outputs are not
1004 		 * propagated via the args list.)
1005 		 */
1006 
1007 		/*
1008 		 * Assign a param ID to represent the CTE's output.  No ordinary
1009 		 * "evaluation" of this param slot ever happens, but we use the param
1010 		 * ID for setParam/chgParam signaling just as if the CTE plan were
1011 		 * returning a simple scalar output.  (Also, the executor abuses the
1012 		 * ParamExecData slot for this param ID for communication among
1013 		 * multiple CteScan nodes that might be scanning this CTE.)
1014 		 */
1015 		paramid = assign_special_exec_param(root);
1016 		splan->setParam = list_make1_int(paramid);
1017 
1018 		/*
1019 		 * Add the subplan and its PlannerInfo to the global lists.
1020 		 */
1021 		root->glob->subplans = lappend(root->glob->subplans, plan);
1022 		root->glob->subroots = lappend(root->glob->subroots, subroot);
1023 		splan->plan_id = list_length(root->glob->subplans);
1024 
1025 		root->init_plans = lappend(root->init_plans, splan);
1026 
1027 		root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id);
1028 
1029 		/* Label the subplan for EXPLAIN purposes */
1030 		splan->plan_name = psprintf("CTE %s", cte->ctename);
1031 
1032 		/* Lastly, fill in the cost estimates for use later */
1033 		cost_subplan(root, splan, plan);
1034 	}
1035 }
1036 
1037 /*
1038  * contain_dml: is any subquery not a plain SELECT?
1039  *
1040  * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc.
1041  */
1042 static bool
contain_dml(Node * node)1043 contain_dml(Node *node)
1044 {
1045 	return contain_dml_walker(node, NULL);
1046 }
1047 
1048 static bool
contain_dml_walker(Node * node,void * context)1049 contain_dml_walker(Node *node, void *context)
1050 {
1051 	if (node == NULL)
1052 		return false;
1053 	if (IsA(node, Query))
1054 	{
1055 		Query	   *query = (Query *) node;
1056 
1057 		if (query->commandType != CMD_SELECT ||
1058 			query->rowMarks != NIL)
1059 			return true;
1060 
1061 		return query_tree_walker(query, contain_dml_walker, context, 0);
1062 	}
1063 	return expression_tree_walker(node, contain_dml_walker, context);
1064 }
1065 
1066 /*
1067  * contain_outer_selfref: is there an external recursive self-reference?
1068  */
1069 static bool
contain_outer_selfref(Node * node)1070 contain_outer_selfref(Node *node)
1071 {
1072 	Index		depth = 0;
1073 
1074 	/*
1075 	 * We should be starting with a Query, so that depth will be 1 while
1076 	 * examining its immediate contents.
1077 	 */
1078 	Assert(IsA(node, Query));
1079 
1080 	return contain_outer_selfref_walker(node, &depth);
1081 }
1082 
1083 static bool
contain_outer_selfref_walker(Node * node,Index * depth)1084 contain_outer_selfref_walker(Node *node, Index *depth)
1085 {
1086 	if (node == NULL)
1087 		return false;
1088 	if (IsA(node, RangeTblEntry))
1089 	{
1090 		RangeTblEntry *rte = (RangeTblEntry *) node;
1091 
1092 		/*
1093 		 * Check for a self-reference to a CTE that's above the Query that our
1094 		 * search started at.
1095 		 */
1096 		if (rte->rtekind == RTE_CTE &&
1097 			rte->self_reference &&
1098 			rte->ctelevelsup >= *depth)
1099 			return true;
1100 		return false;			/* allow range_table_walker to continue */
1101 	}
1102 	if (IsA(node, Query))
1103 	{
1104 		/* Recurse into subquery, tracking nesting depth properly */
1105 		Query	   *query = (Query *) node;
1106 		bool		result;
1107 
1108 		(*depth)++;
1109 
1110 		result = query_tree_walker(query, contain_outer_selfref_walker,
1111 								   (void *) depth, QTW_EXAMINE_RTES_BEFORE);
1112 
1113 		(*depth)--;
1114 
1115 		return result;
1116 	}
1117 	return expression_tree_walker(node, contain_outer_selfref_walker,
1118 								  (void *) depth);
1119 }
1120 
1121 /*
1122  * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs
1123  */
1124 static void
inline_cte(PlannerInfo * root,CommonTableExpr * cte)1125 inline_cte(PlannerInfo *root, CommonTableExpr *cte)
1126 {
1127 	struct inline_cte_walker_context context;
1128 
1129 	context.ctename = cte->ctename;
1130 	/* Start at levelsup = -1 because we'll immediately increment it */
1131 	context.levelsup = -1;
1132 	context.refcount = cte->cterefcount;
1133 	context.ctequery = castNode(Query, cte->ctequery);
1134 
1135 	(void) inline_cte_walker((Node *) root->parse, &context);
1136 
1137 	/* Assert we replaced all references */
1138 	Assert(context.refcount == 0);
1139 }
1140 
1141 static bool
inline_cte_walker(Node * node,inline_cte_walker_context * context)1142 inline_cte_walker(Node *node, inline_cte_walker_context *context)
1143 {
1144 	if (node == NULL)
1145 		return false;
1146 	if (IsA(node, Query))
1147 	{
1148 		Query	   *query = (Query *) node;
1149 
1150 		context->levelsup++;
1151 
1152 		/*
1153 		 * Visit the query's RTE nodes after their contents; otherwise
1154 		 * query_tree_walker would descend into the newly inlined CTE query,
1155 		 * which we don't want.
1156 		 */
1157 		(void) query_tree_walker(query, inline_cte_walker, context,
1158 								 QTW_EXAMINE_RTES_AFTER);
1159 
1160 		context->levelsup--;
1161 
1162 		return false;
1163 	}
1164 	else if (IsA(node, RangeTblEntry))
1165 	{
1166 		RangeTblEntry *rte = (RangeTblEntry *) node;
1167 
1168 		if (rte->rtekind == RTE_CTE &&
1169 			strcmp(rte->ctename, context->ctename) == 0 &&
1170 			rte->ctelevelsup == context->levelsup)
1171 		{
1172 			/*
1173 			 * Found a reference to replace.  Generate a copy of the CTE query
1174 			 * with appropriate level adjustment for outer references (e.g.,
1175 			 * to other CTEs).
1176 			 */
1177 			Query	   *newquery = copyObject(context->ctequery);
1178 
1179 			if (context->levelsup > 0)
1180 				IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1);
1181 
1182 			/*
1183 			 * Convert the RTE_CTE RTE into a RTE_SUBQUERY.
1184 			 *
1185 			 * Historically, a FOR UPDATE clause has been treated as extending
1186 			 * into views and subqueries, but not into CTEs.  We preserve this
1187 			 * distinction by not trying to push rowmarks into the new
1188 			 * subquery.
1189 			 */
1190 			rte->rtekind = RTE_SUBQUERY;
1191 			rte->subquery = newquery;
1192 			rte->security_barrier = false;
1193 
1194 			/* Zero out CTE-specific fields */
1195 			rte->ctename = NULL;
1196 			rte->ctelevelsup = 0;
1197 			rte->self_reference = false;
1198 			rte->coltypes = NIL;
1199 			rte->coltypmods = NIL;
1200 			rte->colcollations = NIL;
1201 
1202 			/* Count the number of replacements we've done */
1203 			context->refcount--;
1204 		}
1205 
1206 		return false;
1207 	}
1208 
1209 	return expression_tree_walker(node, inline_cte_walker, context);
1210 }
1211 
1212 
1213 /*
1214  * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
1215  *
1216  * The caller has found an ANY SubLink at the top level of one of the query's
1217  * qual clauses, but has not checked the properties of the SubLink further.
1218  * Decide whether it is appropriate to process this SubLink in join style.
1219  * If so, form a JoinExpr and return it.  Return NULL if the SubLink cannot
1220  * be converted to a join.
1221  *
1222  * The only non-obvious input parameter is available_rels: this is the set
1223  * of query rels that can safely be referenced in the sublink expression.
1224  * (We must restrict this to avoid changing the semantics when a sublink
1225  * is present in an outer join's ON qual.)  The conversion must fail if
1226  * the converted qual would reference any but these parent-query relids.
1227  *
1228  * On success, the returned JoinExpr has larg = NULL and rarg = the jointree
1229  * item representing the pulled-up subquery.  The caller must set larg to
1230  * represent the relation(s) on the lefthand side of the new join, and insert
1231  * the JoinExpr into the upper query's jointree at an appropriate place
1232  * (typically, where the lefthand relation(s) had been).  Note that the
1233  * passed-in SubLink must also be removed from its original position in the
1234  * query quals, since the quals of the returned JoinExpr replace it.
1235  * (Notionally, we replace the SubLink with a constant TRUE, then elide the
1236  * redundant constant from the qual.)
1237  *
1238  * On success, the caller is also responsible for recursively applying
1239  * pull_up_sublinks processing to the rarg and quals of the returned JoinExpr.
1240  * (On failure, there is no need to do anything, since pull_up_sublinks will
1241  * be applied when we recursively plan the sub-select.)
1242  *
1243  * Side effects of a successful conversion include adding the SubLink's
1244  * subselect to the query's rangetable, so that it can be referenced in
1245  * the JoinExpr's rarg.
1246  */
1247 JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo * root,SubLink * sublink,Relids available_rels)1248 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1249 							Relids available_rels)
1250 {
1251 	JoinExpr   *result;
1252 	Query	   *parse = root->parse;
1253 	Query	   *subselect = (Query *) sublink->subselect;
1254 	Relids		upper_varnos;
1255 	int			rtindex;
1256 	ParseNamespaceItem *nsitem;
1257 	RangeTblEntry *rte;
1258 	RangeTblRef *rtr;
1259 	List	   *subquery_vars;
1260 	Node	   *quals;
1261 	ParseState *pstate;
1262 
1263 	Assert(sublink->subLinkType == ANY_SUBLINK);
1264 
1265 	/*
1266 	 * The sub-select must not refer to any Vars of the parent query. (Vars of
1267 	 * higher levels should be okay, though.)
1268 	 */
1269 	if (contain_vars_of_level((Node *) subselect, 1))
1270 		return NULL;
1271 
1272 	/*
1273 	 * The test expression must contain some Vars of the parent query, else
1274 	 * it's not gonna be a join.  (Note that it won't have Vars referring to
1275 	 * the subquery, rather Params.)
1276 	 */
1277 	upper_varnos = pull_varnos(root, sublink->testexpr);
1278 	if (bms_is_empty(upper_varnos))
1279 		return NULL;
1280 
1281 	/*
1282 	 * However, it can't refer to anything outside available_rels.
1283 	 */
1284 	if (!bms_is_subset(upper_varnos, available_rels))
1285 		return NULL;
1286 
1287 	/*
1288 	 * The combining operators and left-hand expressions mustn't be volatile.
1289 	 */
1290 	if (contain_volatile_functions(sublink->testexpr))
1291 		return NULL;
1292 
1293 	/* Create a dummy ParseState for addRangeTableEntryForSubquery */
1294 	pstate = make_parsestate(NULL);
1295 
1296 	/*
1297 	 * Okay, pull up the sub-select into upper range table.
1298 	 *
1299 	 * We rely here on the assumption that the outer query has no references
1300 	 * to the inner (necessarily true, other than the Vars that we build
1301 	 * below). Therefore this is a lot easier than what pull_up_subqueries has
1302 	 * to go through.
1303 	 */
1304 	nsitem = addRangeTableEntryForSubquery(pstate,
1305 										   subselect,
1306 										   makeAlias("ANY_subquery", NIL),
1307 										   false,
1308 										   false);
1309 	rte = nsitem->p_rte;
1310 	parse->rtable = lappend(parse->rtable, rte);
1311 	rtindex = list_length(parse->rtable);
1312 
1313 	/*
1314 	 * Form a RangeTblRef for the pulled-up sub-select.
1315 	 */
1316 	rtr = makeNode(RangeTblRef);
1317 	rtr->rtindex = rtindex;
1318 
1319 	/*
1320 	 * Build a list of Vars representing the subselect outputs.
1321 	 */
1322 	subquery_vars = generate_subquery_vars(root,
1323 										   subselect->targetList,
1324 										   rtindex);
1325 
1326 	/*
1327 	 * Build the new join's qual expression, replacing Params with these Vars.
1328 	 */
1329 	quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
1330 
1331 	/*
1332 	 * And finally, build the JoinExpr node.
1333 	 */
1334 	result = makeNode(JoinExpr);
1335 	result->jointype = JOIN_SEMI;
1336 	result->isNatural = false;
1337 	result->larg = NULL;		/* caller must fill this in */
1338 	result->rarg = (Node *) rtr;
1339 	result->usingClause = NIL;
1340 	result->quals = quals;
1341 	result->alias = NULL;
1342 	result->rtindex = 0;		/* we don't need an RTE for it */
1343 
1344 	return result;
1345 }
1346 
1347 /*
1348  * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
1349  *
1350  * The API of this function is identical to convert_ANY_sublink_to_join's,
1351  * except that we also support the case where the caller has found NOT EXISTS,
1352  * so we need an additional input parameter "under_not".
1353  */
1354 JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo * root,SubLink * sublink,bool under_not,Relids available_rels)1355 convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1356 							   bool under_not, Relids available_rels)
1357 {
1358 	JoinExpr   *result;
1359 	Query	   *parse = root->parse;
1360 	Query	   *subselect = (Query *) sublink->subselect;
1361 	Node	   *whereClause;
1362 	int			rtoffset;
1363 	int			varno;
1364 	Relids		clause_varnos;
1365 	Relids		upper_varnos;
1366 
1367 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
1368 
1369 	/*
1370 	 * Can't flatten if it contains WITH.  (We could arrange to pull up the
1371 	 * WITH into the parent query's cteList, but that risks changing the
1372 	 * semantics, since a WITH ought to be executed once per associated query
1373 	 * call.)  Note that convert_ANY_sublink_to_join doesn't have to reject
1374 	 * this case, since it just produces a subquery RTE that doesn't have to
1375 	 * get flattened into the parent query.
1376 	 */
1377 	if (subselect->cteList)
1378 		return NULL;
1379 
1380 	/*
1381 	 * Copy the subquery so we can modify it safely (see comments in
1382 	 * make_subplan).
1383 	 */
1384 	subselect = copyObject(subselect);
1385 
1386 	/*
1387 	 * See if the subquery can be simplified based on the knowledge that it's
1388 	 * being used in EXISTS().  If we aren't able to get rid of its
1389 	 * targetlist, we have to fail, because the pullup operation leaves us
1390 	 * with noplace to evaluate the targetlist.
1391 	 */
1392 	if (!simplify_EXISTS_query(root, subselect))
1393 		return NULL;
1394 
1395 	/*
1396 	 * Separate out the WHERE clause.  (We could theoretically also remove
1397 	 * top-level plain JOIN/ON clauses, but it's probably not worth the
1398 	 * trouble.)
1399 	 */
1400 	whereClause = subselect->jointree->quals;
1401 	subselect->jointree->quals = NULL;
1402 
1403 	/*
1404 	 * The rest of the sub-select must not refer to any Vars of the parent
1405 	 * query.  (Vars of higher levels should be okay, though.)
1406 	 */
1407 	if (contain_vars_of_level((Node *) subselect, 1))
1408 		return NULL;
1409 
1410 	/*
1411 	 * On the other hand, the WHERE clause must contain some Vars of the
1412 	 * parent query, else it's not gonna be a join.
1413 	 */
1414 	if (!contain_vars_of_level(whereClause, 1))
1415 		return NULL;
1416 
1417 	/*
1418 	 * We don't risk optimizing if the WHERE clause is volatile, either.
1419 	 */
1420 	if (contain_volatile_functions(whereClause))
1421 		return NULL;
1422 
1423 	/*
1424 	 * The subquery must have a nonempty jointree, but we can make it so.
1425 	 */
1426 	replace_empty_jointree(subselect);
1427 
1428 	/*
1429 	 * Prepare to pull up the sub-select into top range table.
1430 	 *
1431 	 * We rely here on the assumption that the outer query has no references
1432 	 * to the inner (necessarily true). Therefore this is a lot easier than
1433 	 * what pull_up_subqueries has to go through.
1434 	 *
1435 	 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
1436 	 * do.  The machinations of simplify_EXISTS_query ensured that there is
1437 	 * nothing interesting in the subquery except an rtable and jointree, and
1438 	 * even the jointree FromExpr no longer has quals.  So we can just append
1439 	 * the rtable to our own and use the FromExpr in our jointree. But first,
1440 	 * adjust all level-zero varnos in the subquery to account for the rtable
1441 	 * merger.
1442 	 */
1443 	rtoffset = list_length(parse->rtable);
1444 	OffsetVarNodes((Node *) subselect, rtoffset, 0);
1445 	OffsetVarNodes(whereClause, rtoffset, 0);
1446 
1447 	/*
1448 	 * Upper-level vars in subquery will now be one level closer to their
1449 	 * parent than before; in particular, anything that had been level 1
1450 	 * becomes level zero.
1451 	 */
1452 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1453 	IncrementVarSublevelsUp(whereClause, -1, 1);
1454 
1455 	/*
1456 	 * Now that the WHERE clause is adjusted to match the parent query
1457 	 * environment, we can easily identify all the level-zero rels it uses.
1458 	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
1459 	 * not.
1460 	 */
1461 	clause_varnos = pull_varnos(root, whereClause);
1462 	upper_varnos = NULL;
1463 	while ((varno = bms_first_member(clause_varnos)) >= 0)
1464 	{
1465 		if (varno <= rtoffset)
1466 			upper_varnos = bms_add_member(upper_varnos, varno);
1467 	}
1468 	bms_free(clause_varnos);
1469 	Assert(!bms_is_empty(upper_varnos));
1470 
1471 	/*
1472 	 * Now that we've got the set of upper-level varnos, we can make the last
1473 	 * check: only available_rels can be referenced.
1474 	 */
1475 	if (!bms_is_subset(upper_varnos, available_rels))
1476 		return NULL;
1477 
1478 	/* Now we can attach the modified subquery rtable to the parent */
1479 	parse->rtable = list_concat(parse->rtable, subselect->rtable);
1480 
1481 	/*
1482 	 * And finally, build the JoinExpr node.
1483 	 */
1484 	result = makeNode(JoinExpr);
1485 	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
1486 	result->isNatural = false;
1487 	result->larg = NULL;		/* caller must fill this in */
1488 	/* flatten out the FromExpr node if it's useless */
1489 	if (list_length(subselect->jointree->fromlist) == 1)
1490 		result->rarg = (Node *) linitial(subselect->jointree->fromlist);
1491 	else
1492 		result->rarg = (Node *) subselect->jointree;
1493 	result->usingClause = NIL;
1494 	result->quals = whereClause;
1495 	result->alias = NULL;
1496 	result->rtindex = 0;		/* we don't need an RTE for it */
1497 
1498 	return result;
1499 }
1500 
1501 /*
1502  * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
1503  *
1504  * The only thing that matters about an EXISTS query is whether it returns
1505  * zero or more than zero rows.  Therefore, we can remove certain SQL features
1506  * that won't affect that.  The only part that is really likely to matter in
1507  * typical usage is simplifying the targetlist: it's a common habit to write
1508  * "SELECT * FROM" even though there is no need to evaluate any columns.
1509  *
1510  * Note: by suppressing the targetlist we could cause an observable behavioral
1511  * change, namely that any errors that might occur in evaluating the tlist
1512  * won't occur, nor will other side-effects of volatile functions.  This seems
1513  * unlikely to bother anyone in practice.
1514  *
1515  * Returns true if was able to discard the targetlist, else false.
1516  */
1517 static bool
simplify_EXISTS_query(PlannerInfo * root,Query * query)1518 simplify_EXISTS_query(PlannerInfo *root, Query *query)
1519 {
1520 	/*
1521 	 * We don't try to simplify at all if the query uses set operations,
1522 	 * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR
1523 	 * UPDATE/SHARE; none of these seem likely in normal usage and their
1524 	 * possible effects are complex.  (Note: we could ignore an "OFFSET 0"
1525 	 * clause, but that traditionally is used as an optimization fence, so we
1526 	 * don't.)
1527 	 */
1528 	if (query->commandType != CMD_SELECT ||
1529 		query->setOperations ||
1530 		query->hasAggs ||
1531 		query->groupingSets ||
1532 		query->hasWindowFuncs ||
1533 		query->hasTargetSRFs ||
1534 		query->hasModifyingCTE ||
1535 		query->havingQual ||
1536 		query->limitOffset ||
1537 		query->rowMarks)
1538 		return false;
1539 
1540 	/*
1541 	 * LIMIT with a constant positive (or NULL) value doesn't affect the
1542 	 * semantics of EXISTS, so let's ignore such clauses.  This is worth doing
1543 	 * because people accustomed to certain other DBMSes may be in the habit
1544 	 * of writing EXISTS(SELECT ... LIMIT 1) as an optimization.  If there's a
1545 	 * LIMIT with anything else as argument, though, we can't simplify.
1546 	 */
1547 	if (query->limitCount)
1548 	{
1549 		/*
1550 		 * The LIMIT clause has not yet been through eval_const_expressions,
1551 		 * so we have to apply that here.  It might seem like this is a waste
1552 		 * of cycles, since the only case plausibly worth worrying about is
1553 		 * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
1554 		 * so we have to fold constants or we're not going to recognize it.
1555 		 */
1556 		Node	   *node = eval_const_expressions(root, query->limitCount);
1557 		Const	   *limit;
1558 
1559 		/* Might as well update the query if we simplified the clause. */
1560 		query->limitCount = node;
1561 
1562 		if (!IsA(node, Const))
1563 			return false;
1564 
1565 		limit = (Const *) node;
1566 		Assert(limit->consttype == INT8OID);
1567 		if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
1568 			return false;
1569 
1570 		/* Whether or not the targetlist is safe, we can drop the LIMIT. */
1571 		query->limitCount = NULL;
1572 	}
1573 
1574 	/*
1575 	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
1576 	 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
1577 	 * change a nonzero-rows result to zero rows or vice versa.  (Furthermore,
1578 	 * since our parsetree representation of these clauses depends on the
1579 	 * targetlist, we'd better throw them away if we drop the targetlist.)
1580 	 */
1581 	query->targetList = NIL;
1582 	query->groupClause = NIL;
1583 	query->windowClause = NIL;
1584 	query->distinctClause = NIL;
1585 	query->sortClause = NIL;
1586 	query->hasDistinctOn = false;
1587 
1588 	return true;
1589 }
1590 
1591 /*
1592  * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink
1593  *
1594  * The subselect is expected to be a fresh copy that we can munge up,
1595  * and to have been successfully passed through simplify_EXISTS_query.
1596  *
1597  * On success, the modified subselect is returned, and we store a suitable
1598  * upper-level test expression at *testexpr, plus a list of the subselect's
1599  * output Params at *paramIds.  (The test expression is already Param-ified
1600  * and hence need not go through convert_testexpr, which is why we have to
1601  * deal with the Param IDs specially.)
1602  *
1603  * On failure, returns NULL.
1604  */
1605 static Query *
convert_EXISTS_to_ANY(PlannerInfo * root,Query * subselect,Node ** testexpr,List ** paramIds)1606 convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
1607 					  Node **testexpr, List **paramIds)
1608 {
1609 	Node	   *whereClause;
1610 	List	   *leftargs,
1611 			   *rightargs,
1612 			   *opids,
1613 			   *opcollations,
1614 			   *newWhere,
1615 			   *tlist,
1616 			   *testlist,
1617 			   *paramids;
1618 	ListCell   *lc,
1619 			   *rc,
1620 			   *oc,
1621 			   *cc;
1622 	AttrNumber	resno;
1623 
1624 	/*
1625 	 * Query must not require a targetlist, since we have to insert a new one.
1626 	 * Caller should have dealt with the case already.
1627 	 */
1628 	Assert(subselect->targetList == NIL);
1629 
1630 	/*
1631 	 * Separate out the WHERE clause.  (We could theoretically also remove
1632 	 * top-level plain JOIN/ON clauses, but it's probably not worth the
1633 	 * trouble.)
1634 	 */
1635 	whereClause = subselect->jointree->quals;
1636 	subselect->jointree->quals = NULL;
1637 
1638 	/*
1639 	 * The rest of the sub-select must not refer to any Vars of the parent
1640 	 * query.  (Vars of higher levels should be okay, though.)
1641 	 *
1642 	 * Note: we need not check for Aggrefs separately because we know the
1643 	 * sub-select is as yet unoptimized; any uplevel Aggref must therefore
1644 	 * contain an uplevel Var reference.  This is not the case below ...
1645 	 */
1646 	if (contain_vars_of_level((Node *) subselect, 1))
1647 		return NULL;
1648 
1649 	/*
1650 	 * We don't risk optimizing if the WHERE clause is volatile, either.
1651 	 */
1652 	if (contain_volatile_functions(whereClause))
1653 		return NULL;
1654 
1655 	/*
1656 	 * Clean up the WHERE clause by doing const-simplification etc on it.
1657 	 * Aside from simplifying the processing we're about to do, this is
1658 	 * important for being able to pull chunks of the WHERE clause up into the
1659 	 * parent query.  Since we are invoked partway through the parent's
1660 	 * preprocess_expression() work, earlier steps of preprocess_expression()
1661 	 * wouldn't get applied to the pulled-up stuff unless we do them here. For
1662 	 * the parts of the WHERE clause that get put back into the child query,
1663 	 * this work is partially duplicative, but it shouldn't hurt.
1664 	 *
1665 	 * Note: we do not run flatten_join_alias_vars.  This is OK because any
1666 	 * parent aliases were flattened already, and we're not going to pull any
1667 	 * child Vars (of any description) into the parent.
1668 	 *
1669 	 * Note: passing the parent's root to eval_const_expressions is
1670 	 * technically wrong, but we can get away with it since only the
1671 	 * boundParams (if any) are used, and those would be the same in a
1672 	 * subroot.
1673 	 */
1674 	whereClause = eval_const_expressions(root, whereClause);
1675 	whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
1676 	whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
1677 
1678 	/*
1679 	 * We now have a flattened implicit-AND list of clauses, which we try to
1680 	 * break apart into "outervar = innervar" hash clauses. Anything that
1681 	 * can't be broken apart just goes back into the newWhere list.  Note that
1682 	 * we aren't trying hard yet to ensure that we have only outer or only
1683 	 * inner on each side; we'll check that if we get to the end.
1684 	 */
1685 	leftargs = rightargs = opids = opcollations = newWhere = NIL;
1686 	foreach(lc, (List *) whereClause)
1687 	{
1688 		OpExpr	   *expr = (OpExpr *) lfirst(lc);
1689 
1690 		if (IsA(expr, OpExpr) &&
1691 			hash_ok_operator(expr))
1692 		{
1693 			Node	   *leftarg = (Node *) linitial(expr->args);
1694 			Node	   *rightarg = (Node *) lsecond(expr->args);
1695 
1696 			if (contain_vars_of_level(leftarg, 1))
1697 			{
1698 				leftargs = lappend(leftargs, leftarg);
1699 				rightargs = lappend(rightargs, rightarg);
1700 				opids = lappend_oid(opids, expr->opno);
1701 				opcollations = lappend_oid(opcollations, expr->inputcollid);
1702 				continue;
1703 			}
1704 			if (contain_vars_of_level(rightarg, 1))
1705 			{
1706 				/*
1707 				 * We must commute the clause to put the outer var on the
1708 				 * left, because the hashing code in nodeSubplan.c expects
1709 				 * that.  This probably shouldn't ever fail, since hashable
1710 				 * operators ought to have commutators, but be paranoid.
1711 				 */
1712 				expr->opno = get_commutator(expr->opno);
1713 				if (OidIsValid(expr->opno) && hash_ok_operator(expr))
1714 				{
1715 					leftargs = lappend(leftargs, rightarg);
1716 					rightargs = lappend(rightargs, leftarg);
1717 					opids = lappend_oid(opids, expr->opno);
1718 					opcollations = lappend_oid(opcollations, expr->inputcollid);
1719 					continue;
1720 				}
1721 				/* If no commutator, no chance to optimize the WHERE clause */
1722 				return NULL;
1723 			}
1724 		}
1725 		/* Couldn't handle it as a hash clause */
1726 		newWhere = lappend(newWhere, expr);
1727 	}
1728 
1729 	/*
1730 	 * If we didn't find anything we could convert, fail.
1731 	 */
1732 	if (leftargs == NIL)
1733 		return NULL;
1734 
1735 	/*
1736 	 * There mustn't be any parent Vars or Aggs in the stuff that we intend to
1737 	 * put back into the child query.  Note: you might think we don't need to
1738 	 * check for Aggs separately, because an uplevel Agg must contain an
1739 	 * uplevel Var in its argument.  But it is possible that the uplevel Var
1740 	 * got optimized away by eval_const_expressions.  Consider
1741 	 *
1742 	 * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END)
1743 	 */
1744 	if (contain_vars_of_level((Node *) newWhere, 1) ||
1745 		contain_vars_of_level((Node *) rightargs, 1))
1746 		return NULL;
1747 	if (root->parse->hasAggs &&
1748 		(contain_aggs_of_level((Node *) newWhere, 1) ||
1749 		 contain_aggs_of_level((Node *) rightargs, 1)))
1750 		return NULL;
1751 
1752 	/*
1753 	 * And there can't be any child Vars in the stuff we intend to pull up.
1754 	 * (Note: we'd need to check for child Aggs too, except we know the child
1755 	 * has no aggs at all because of simplify_EXISTS_query's check. The same
1756 	 * goes for window functions.)
1757 	 */
1758 	if (contain_vars_of_level((Node *) leftargs, 0))
1759 		return NULL;
1760 
1761 	/*
1762 	 * Also reject sublinks in the stuff we intend to pull up.  (It might be
1763 	 * possible to support this, but doesn't seem worth the complication.)
1764 	 */
1765 	if (contain_subplans((Node *) leftargs))
1766 		return NULL;
1767 
1768 	/*
1769 	 * Okay, adjust the sublevelsup in the stuff we're pulling up.
1770 	 */
1771 	IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
1772 
1773 	/*
1774 	 * Put back any child-level-only WHERE clauses.
1775 	 */
1776 	if (newWhere)
1777 		subselect->jointree->quals = (Node *) make_ands_explicit(newWhere);
1778 
1779 	/*
1780 	 * Build a new targetlist for the child that emits the expressions we
1781 	 * need.  Concurrently, build a testexpr for the parent using Params to
1782 	 * reference the child outputs.  (Since we generate Params directly here,
1783 	 * there will be no need to convert the testexpr in build_subplan.)
1784 	 */
1785 	tlist = testlist = paramids = NIL;
1786 	resno = 1;
1787 	forfour(lc, leftargs, rc, rightargs, oc, opids, cc, opcollations)
1788 	{
1789 		Node	   *leftarg = (Node *) lfirst(lc);
1790 		Node	   *rightarg = (Node *) lfirst(rc);
1791 		Oid			opid = lfirst_oid(oc);
1792 		Oid			opcollation = lfirst_oid(cc);
1793 		Param	   *param;
1794 
1795 		param = generate_new_exec_param(root,
1796 										exprType(rightarg),
1797 										exprTypmod(rightarg),
1798 										exprCollation(rightarg));
1799 		tlist = lappend(tlist,
1800 						makeTargetEntry((Expr *) rightarg,
1801 										resno++,
1802 										NULL,
1803 										false));
1804 		testlist = lappend(testlist,
1805 						   make_opclause(opid, BOOLOID, false,
1806 										 (Expr *) leftarg, (Expr *) param,
1807 										 InvalidOid, opcollation));
1808 		paramids = lappend_int(paramids, param->paramid);
1809 	}
1810 
1811 	/* Put everything where it should go, and we're done */
1812 	subselect->targetList = tlist;
1813 	*testexpr = (Node *) make_ands_explicit(testlist);
1814 	*paramIds = paramids;
1815 
1816 	return subselect;
1817 }
1818 
1819 
1820 /*
1821  * Replace correlation vars (uplevel vars) with Params.
1822  *
1823  * Uplevel PlaceHolderVars and aggregates are replaced, too.
1824  *
1825  * Note: it is critical that this runs immediately after SS_process_sublinks.
1826  * Since we do not recurse into the arguments of uplevel PHVs and aggregates,
1827  * they will get copied to the appropriate subplan args list in the parent
1828  * query with uplevel vars not replaced by Params, but only adjusted in level
1829  * (see replace_outer_placeholdervar and replace_outer_agg).  That's exactly
1830  * what we want for the vars of the parent level --- but if a PHV's or
1831  * aggregate's argument contains any further-up variables, they have to be
1832  * replaced with Params in their turn. That will happen when the parent level
1833  * runs SS_replace_correlation_vars.  Therefore it must do so after expanding
1834  * its sublinks to subplans.  And we don't want any steps in between, else
1835  * those steps would never get applied to the argument expressions, either in
1836  * the parent or the child level.
1837  *
1838  * Another fairly tricky thing going on here is the handling of SubLinks in
1839  * the arguments of uplevel PHVs/aggregates.  Those are not touched inside the
1840  * intermediate query level, either.  Instead, SS_process_sublinks recurses on
1841  * them after copying the PHV or Aggref expression into the parent plan level
1842  * (this is actually taken care of in build_subplan).
1843  */
1844 Node *
SS_replace_correlation_vars(PlannerInfo * root,Node * expr)1845 SS_replace_correlation_vars(PlannerInfo *root, Node *expr)
1846 {
1847 	/* No setup needed for tree walk, so away we go */
1848 	return replace_correlation_vars_mutator(expr, root);
1849 }
1850 
1851 static Node *
replace_correlation_vars_mutator(Node * node,PlannerInfo * root)1852 replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
1853 {
1854 	if (node == NULL)
1855 		return NULL;
1856 	if (IsA(node, Var))
1857 	{
1858 		if (((Var *) node)->varlevelsup > 0)
1859 			return (Node *) replace_outer_var(root, (Var *) node);
1860 	}
1861 	if (IsA(node, PlaceHolderVar))
1862 	{
1863 		if (((PlaceHolderVar *) node)->phlevelsup > 0)
1864 			return (Node *) replace_outer_placeholdervar(root,
1865 														 (PlaceHolderVar *) node);
1866 	}
1867 	if (IsA(node, Aggref))
1868 	{
1869 		if (((Aggref *) node)->agglevelsup > 0)
1870 			return (Node *) replace_outer_agg(root, (Aggref *) node);
1871 	}
1872 	if (IsA(node, GroupingFunc))
1873 	{
1874 		if (((GroupingFunc *) node)->agglevelsup > 0)
1875 			return (Node *) replace_outer_grouping(root, (GroupingFunc *) node);
1876 	}
1877 	return expression_tree_mutator(node,
1878 								   replace_correlation_vars_mutator,
1879 								   (void *) root);
1880 }
1881 
1882 /*
1883  * Expand SubLinks to SubPlans in the given expression.
1884  *
1885  * The isQual argument tells whether or not this expression is a WHERE/HAVING
1886  * qualifier expression.  If it is, any sublinks appearing at top level need
1887  * not distinguish FALSE from UNKNOWN return values.
1888  */
1889 Node *
SS_process_sublinks(PlannerInfo * root,Node * expr,bool isQual)1890 SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
1891 {
1892 	process_sublinks_context context;
1893 
1894 	context.root = root;
1895 	context.isTopQual = isQual;
1896 	return process_sublinks_mutator(expr, &context);
1897 }
1898 
1899 static Node *
process_sublinks_mutator(Node * node,process_sublinks_context * context)1900 process_sublinks_mutator(Node *node, process_sublinks_context *context)
1901 {
1902 	process_sublinks_context locContext;
1903 
1904 	locContext.root = context->root;
1905 
1906 	if (node == NULL)
1907 		return NULL;
1908 	if (IsA(node, SubLink))
1909 	{
1910 		SubLink    *sublink = (SubLink *) node;
1911 		Node	   *testexpr;
1912 
1913 		/*
1914 		 * First, recursively process the lefthand-side expressions, if any.
1915 		 * They're not top-level anymore.
1916 		 */
1917 		locContext.isTopQual = false;
1918 		testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
1919 
1920 		/*
1921 		 * Now build the SubPlan node and make the expr to return.
1922 		 */
1923 		return make_subplan(context->root,
1924 							(Query *) sublink->subselect,
1925 							sublink->subLinkType,
1926 							sublink->subLinkId,
1927 							testexpr,
1928 							context->isTopQual);
1929 	}
1930 
1931 	/*
1932 	 * Don't recurse into the arguments of an outer PHV or aggregate here. Any
1933 	 * SubLinks in the arguments have to be dealt with at the outer query
1934 	 * level; they'll be handled when build_subplan collects the PHV or Aggref
1935 	 * into the arguments to be passed down to the current subplan.
1936 	 */
1937 	if (IsA(node, PlaceHolderVar))
1938 	{
1939 		if (((PlaceHolderVar *) node)->phlevelsup > 0)
1940 			return node;
1941 	}
1942 	else if (IsA(node, Aggref))
1943 	{
1944 		if (((Aggref *) node)->agglevelsup > 0)
1945 			return node;
1946 	}
1947 
1948 	/*
1949 	 * We should never see a SubPlan expression in the input (since this is
1950 	 * the very routine that creates 'em to begin with).  We shouldn't find
1951 	 * ourselves invoked directly on a Query, either.
1952 	 */
1953 	Assert(!IsA(node, SubPlan));
1954 	Assert(!IsA(node, AlternativeSubPlan));
1955 	Assert(!IsA(node, Query));
1956 
1957 	/*
1958 	 * Because make_subplan() could return an AND or OR clause, we have to
1959 	 * take steps to preserve AND/OR flatness of a qual.  We assume the input
1960 	 * has been AND/OR flattened and so we need no recursion here.
1961 	 *
1962 	 * (Due to the coding here, we will not get called on the List subnodes of
1963 	 * an AND; and the input is *not* yet in implicit-AND format.  So no check
1964 	 * is needed for a bare List.)
1965 	 *
1966 	 * Anywhere within the top-level AND/OR clause structure, we can tell
1967 	 * make_subplan() that NULL and FALSE are interchangeable.  So isTopQual
1968 	 * propagates down in both cases.  (Note that this is unlike the meaning
1969 	 * of "top level qual" used in most other places in Postgres.)
1970 	 */
1971 	if (is_andclause(node))
1972 	{
1973 		List	   *newargs = NIL;
1974 		ListCell   *l;
1975 
1976 		/* Still at qual top-level */
1977 		locContext.isTopQual = context->isTopQual;
1978 
1979 		foreach(l, ((BoolExpr *) node)->args)
1980 		{
1981 			Node	   *newarg;
1982 
1983 			newarg = process_sublinks_mutator(lfirst(l), &locContext);
1984 			if (is_andclause(newarg))
1985 				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1986 			else
1987 				newargs = lappend(newargs, newarg);
1988 		}
1989 		return (Node *) make_andclause(newargs);
1990 	}
1991 
1992 	if (is_orclause(node))
1993 	{
1994 		List	   *newargs = NIL;
1995 		ListCell   *l;
1996 
1997 		/* Still at qual top-level */
1998 		locContext.isTopQual = context->isTopQual;
1999 
2000 		foreach(l, ((BoolExpr *) node)->args)
2001 		{
2002 			Node	   *newarg;
2003 
2004 			newarg = process_sublinks_mutator(lfirst(l), &locContext);
2005 			if (is_orclause(newarg))
2006 				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
2007 			else
2008 				newargs = lappend(newargs, newarg);
2009 		}
2010 		return (Node *) make_orclause(newargs);
2011 	}
2012 
2013 	/*
2014 	 * If we recurse down through anything other than an AND or OR node, we
2015 	 * are definitely not at top qual level anymore.
2016 	 */
2017 	locContext.isTopQual = false;
2018 
2019 	return expression_tree_mutator(node,
2020 								   process_sublinks_mutator,
2021 								   (void *) &locContext);
2022 }
2023 
2024 /*
2025  * SS_identify_outer_params - identify the Params available from outer levels
2026  *
2027  * This must be run after SS_replace_correlation_vars and SS_process_sublinks
2028  * processing is complete in a given query level as well as all of its
2029  * descendant levels (which means it's most practical to do it at the end of
2030  * processing the query level).  We compute the set of paramIds that outer
2031  * levels will make available to this level+descendants, and record it in
2032  * root->outer_params for use while computing extParam/allParam sets in final
2033  * plan cleanup.  (We can't just compute it then, because the upper levels'
2034  * plan_params lists are transient and will be gone by then.)
2035  */
2036 void
SS_identify_outer_params(PlannerInfo * root)2037 SS_identify_outer_params(PlannerInfo *root)
2038 {
2039 	Bitmapset  *outer_params;
2040 	PlannerInfo *proot;
2041 	ListCell   *l;
2042 
2043 	/*
2044 	 * If no parameters have been assigned anywhere in the tree, we certainly
2045 	 * don't need to do anything here.
2046 	 */
2047 	if (root->glob->paramExecTypes == NIL)
2048 		return;
2049 
2050 	/*
2051 	 * Scan all query levels above this one to see which parameters are due to
2052 	 * be available from them, either because lower query levels have
2053 	 * requested them (via plan_params) or because they will be available from
2054 	 * initPlans of those levels.
2055 	 */
2056 	outer_params = NULL;
2057 	for (proot = root->parent_root; proot != NULL; proot = proot->parent_root)
2058 	{
2059 		/* Include ordinary Var/PHV/Aggref params */
2060 		foreach(l, proot->plan_params)
2061 		{
2062 			PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
2063 
2064 			outer_params = bms_add_member(outer_params, pitem->paramId);
2065 		}
2066 		/* Include any outputs of outer-level initPlans */
2067 		foreach(l, proot->init_plans)
2068 		{
2069 			SubPlan    *initsubplan = (SubPlan *) lfirst(l);
2070 			ListCell   *l2;
2071 
2072 			foreach(l2, initsubplan->setParam)
2073 			{
2074 				outer_params = bms_add_member(outer_params, lfirst_int(l2));
2075 			}
2076 		}
2077 		/* Include worktable ID, if a recursive query is being planned */
2078 		if (proot->wt_param_id >= 0)
2079 			outer_params = bms_add_member(outer_params, proot->wt_param_id);
2080 	}
2081 	root->outer_params = outer_params;
2082 }
2083 
2084 /*
2085  * SS_charge_for_initplans - account for initplans in Path costs & parallelism
2086  *
2087  * If any initPlans have been created in the current query level, they will
2088  * get attached to the Plan tree created from whichever Path we select from
2089  * the given rel.  Increment all that rel's Paths' costs to account for them,
2090  * and make sure the paths get marked as parallel-unsafe, since we can't
2091  * currently transmit initPlans to parallel workers.
2092  *
2093  * This is separate from SS_attach_initplans because we might conditionally
2094  * create more initPlans during create_plan(), depending on which Path we
2095  * select.  However, Paths that would generate such initPlans are expected
2096  * to have included their cost already.
2097  */
2098 void
SS_charge_for_initplans(PlannerInfo * root,RelOptInfo * final_rel)2099 SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel)
2100 {
2101 	Cost		initplan_cost;
2102 	ListCell   *lc;
2103 
2104 	/* Nothing to do if no initPlans */
2105 	if (root->init_plans == NIL)
2106 		return;
2107 
2108 	/*
2109 	 * Compute the cost increment just once, since it will be the same for all
2110 	 * Paths.  We assume each initPlan gets run once during top plan startup.
2111 	 * This is a conservative overestimate, since in fact an initPlan might be
2112 	 * executed later than plan startup, or even not at all.
2113 	 */
2114 	initplan_cost = 0;
2115 	foreach(lc, root->init_plans)
2116 	{
2117 		SubPlan    *initsubplan = (SubPlan *) lfirst(lc);
2118 
2119 		initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost;
2120 	}
2121 
2122 	/*
2123 	 * Now adjust the costs and parallel_safe flags.
2124 	 */
2125 	foreach(lc, final_rel->pathlist)
2126 	{
2127 		Path	   *path = (Path *) lfirst(lc);
2128 
2129 		path->startup_cost += initplan_cost;
2130 		path->total_cost += initplan_cost;
2131 		path->parallel_safe = false;
2132 	}
2133 
2134 	/*
2135 	 * Forget about any partial paths and clear consider_parallel, too;
2136 	 * they're not usable if we attached an initPlan.
2137 	 */
2138 	final_rel->partial_pathlist = NIL;
2139 	final_rel->consider_parallel = false;
2140 
2141 	/* We needn't do set_cheapest() here, caller will do it */
2142 }
2143 
2144 /*
2145  * SS_attach_initplans - attach initplans to topmost plan node
2146  *
2147  * Attach any initplans created in the current query level to the specified
2148  * plan node, which should normally be the topmost node for the query level.
2149  * (In principle the initPlans could go in any node at or above where they're
2150  * referenced; but there seems no reason to put them any lower than the
2151  * topmost node, so we don't bother to track exactly where they came from.)
2152  * We do not touch the plan node's cost; the initplans should have been
2153  * accounted for in path costing.
2154  */
2155 void
SS_attach_initplans(PlannerInfo * root,Plan * plan)2156 SS_attach_initplans(PlannerInfo *root, Plan *plan)
2157 {
2158 	plan->initPlan = root->init_plans;
2159 }
2160 
2161 /*
2162  * SS_finalize_plan - do final parameter processing for a completed Plan.
2163  *
2164  * This recursively computes the extParam and allParam sets for every Plan
2165  * node in the given plan tree.  (Oh, and RangeTblFunction.funcparams too.)
2166  *
2167  * We assume that SS_finalize_plan has already been run on any initplans or
2168  * subplans the plan tree could reference.
2169  */
2170 void
SS_finalize_plan(PlannerInfo * root,Plan * plan)2171 SS_finalize_plan(PlannerInfo *root, Plan *plan)
2172 {
2173 	/* No setup needed, just recurse through plan tree. */
2174 	(void) finalize_plan(root, plan, -1, root->outer_params, NULL);
2175 }
2176 
2177 /*
2178  * Recursive processing of all nodes in the plan tree
2179  *
2180  * gather_param is the rescan_param of an ancestral Gather/GatherMerge,
2181  * or -1 if there is none.
2182  *
2183  * valid_params is the set of param IDs supplied by outer plan levels
2184  * that are valid to reference in this plan node or its children.
2185  *
2186  * scan_params is a set of param IDs to force scan plan nodes to reference.
2187  * This is for EvalPlanQual support, and is always NULL at the top of the
2188  * recursion.
2189  *
2190  * The return value is the computed allParam set for the given Plan node.
2191  * This is just an internal notational convenience: we can add a child
2192  * plan's allParams to the set of param IDs of interest to this level
2193  * in the same statement that recurses to that child.
2194  *
2195  * Do not scribble on caller's values of valid_params or scan_params!
2196  *
2197  * Note: although we attempt to deal with initPlans anywhere in the tree, the
2198  * logic is not really right.  The problem is that a plan node might return an
2199  * output Param of its initPlan as a targetlist item, in which case it's valid
2200  * for the parent plan level to reference that same Param; the parent's usage
2201  * will be converted into a Var referencing the child plan node by setrefs.c.
2202  * But this function would see the parent's reference as out of scope and
2203  * complain about it.  For now, this does not matter because the planner only
2204  * attaches initPlans to the topmost plan node in a query level, so the case
2205  * doesn't arise.  If we ever merge this processing into setrefs.c, maybe it
2206  * can be handled more cleanly.
2207  */
2208 static Bitmapset *
finalize_plan(PlannerInfo * root,Plan * plan,int gather_param,Bitmapset * valid_params,Bitmapset * scan_params)2209 finalize_plan(PlannerInfo *root, Plan *plan,
2210 			  int gather_param,
2211 			  Bitmapset *valid_params,
2212 			  Bitmapset *scan_params)
2213 {
2214 	finalize_primnode_context context;
2215 	int			locally_added_param;
2216 	Bitmapset  *nestloop_params;
2217 	Bitmapset  *initExtParam;
2218 	Bitmapset  *initSetParam;
2219 	Bitmapset  *child_params;
2220 	ListCell   *l;
2221 
2222 	if (plan == NULL)
2223 		return NULL;
2224 
2225 	context.root = root;
2226 	context.paramids = NULL;	/* initialize set to empty */
2227 	locally_added_param = -1;	/* there isn't one */
2228 	nestloop_params = NULL;		/* there aren't any */
2229 
2230 	/*
2231 	 * Examine any initPlans to determine the set of external params they
2232 	 * reference and the set of output params they supply.  (We assume
2233 	 * SS_finalize_plan was run on them already.)
2234 	 */
2235 	initExtParam = initSetParam = NULL;
2236 	foreach(l, plan->initPlan)
2237 	{
2238 		SubPlan    *initsubplan = (SubPlan *) lfirst(l);
2239 		Plan	   *initplan = planner_subplan_get_plan(root, initsubplan);
2240 		ListCell   *l2;
2241 
2242 		initExtParam = bms_add_members(initExtParam, initplan->extParam);
2243 		foreach(l2, initsubplan->setParam)
2244 		{
2245 			initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
2246 		}
2247 	}
2248 
2249 	/* Any setParams are validly referenceable in this node and children */
2250 	if (initSetParam)
2251 		valid_params = bms_union(valid_params, initSetParam);
2252 
2253 	/*
2254 	 * When we call finalize_primnode, context.paramids sets are automatically
2255 	 * merged together.  But when recursing to self, we have to do it the hard
2256 	 * way.  We want the paramids set to include params in subplans as well as
2257 	 * at this level.
2258 	 */
2259 
2260 	/* Find params in targetlist and qual */
2261 	finalize_primnode((Node *) plan->targetlist, &context);
2262 	finalize_primnode((Node *) plan->qual, &context);
2263 
2264 	/*
2265 	 * If it's a parallel-aware scan node, mark it as dependent on the parent
2266 	 * Gather/GatherMerge's rescan Param.
2267 	 */
2268 	if (plan->parallel_aware)
2269 	{
2270 		if (gather_param < 0)
2271 			elog(ERROR, "parallel-aware plan node is not below a Gather");
2272 		context.paramids =
2273 			bms_add_member(context.paramids, gather_param);
2274 	}
2275 
2276 	/* Check additional node-type-specific fields */
2277 	switch (nodeTag(plan))
2278 	{
2279 		case T_Result:
2280 			finalize_primnode(((Result *) plan)->resconstantqual,
2281 							  &context);
2282 			break;
2283 
2284 		case T_SeqScan:
2285 			context.paramids = bms_add_members(context.paramids, scan_params);
2286 			break;
2287 
2288 		case T_SampleScan:
2289 			finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
2290 							  &context);
2291 			context.paramids = bms_add_members(context.paramids, scan_params);
2292 			break;
2293 
2294 		case T_IndexScan:
2295 			finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
2296 							  &context);
2297 			finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby,
2298 							  &context);
2299 
2300 			/*
2301 			 * we need not look at indexqualorig, since it will have the same
2302 			 * param references as indexqual.  Likewise, we can ignore
2303 			 * indexorderbyorig.
2304 			 */
2305 			context.paramids = bms_add_members(context.paramids, scan_params);
2306 			break;
2307 
2308 		case T_IndexOnlyScan:
2309 			finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual,
2310 							  &context);
2311 			finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby,
2312 							  &context);
2313 
2314 			/*
2315 			 * we need not look at indextlist, since it cannot contain Params.
2316 			 */
2317 			context.paramids = bms_add_members(context.paramids, scan_params);
2318 			break;
2319 
2320 		case T_BitmapIndexScan:
2321 			finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual,
2322 							  &context);
2323 
2324 			/*
2325 			 * we need not look at indexqualorig, since it will have the same
2326 			 * param references as indexqual.
2327 			 */
2328 			break;
2329 
2330 		case T_BitmapHeapScan:
2331 			finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
2332 							  &context);
2333 			context.paramids = bms_add_members(context.paramids, scan_params);
2334 			break;
2335 
2336 		case T_TidScan:
2337 			finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
2338 							  &context);
2339 			context.paramids = bms_add_members(context.paramids, scan_params);
2340 			break;
2341 
2342 		case T_SubqueryScan:
2343 			{
2344 				SubqueryScan *sscan = (SubqueryScan *) plan;
2345 				RelOptInfo *rel;
2346 				Bitmapset  *subquery_params;
2347 
2348 				/* We must run finalize_plan on the subquery */
2349 				rel = find_base_rel(root, sscan->scan.scanrelid);
2350 				subquery_params = rel->subroot->outer_params;
2351 				if (gather_param >= 0)
2352 					subquery_params = bms_add_member(bms_copy(subquery_params),
2353 													 gather_param);
2354 				finalize_plan(rel->subroot, sscan->subplan, gather_param,
2355 							  subquery_params, NULL);
2356 
2357 				/* Now we can add its extParams to the parent's params */
2358 				context.paramids = bms_add_members(context.paramids,
2359 												   sscan->subplan->extParam);
2360 				/* We need scan_params too, though */
2361 				context.paramids = bms_add_members(context.paramids,
2362 												   scan_params);
2363 			}
2364 			break;
2365 
2366 		case T_FunctionScan:
2367 			{
2368 				FunctionScan *fscan = (FunctionScan *) plan;
2369 				ListCell   *lc;
2370 
2371 				/*
2372 				 * Call finalize_primnode independently on each function
2373 				 * expression, so that we can record which params are
2374 				 * referenced in each, in order to decide which need
2375 				 * re-evaluating during rescan.
2376 				 */
2377 				foreach(lc, fscan->functions)
2378 				{
2379 					RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
2380 					finalize_primnode_context funccontext;
2381 
2382 					funccontext = context;
2383 					funccontext.paramids = NULL;
2384 
2385 					finalize_primnode(rtfunc->funcexpr, &funccontext);
2386 
2387 					/* remember results for execution */
2388 					rtfunc->funcparams = funccontext.paramids;
2389 
2390 					/* add the function's params to the overall set */
2391 					context.paramids = bms_add_members(context.paramids,
2392 													   funccontext.paramids);
2393 				}
2394 
2395 				context.paramids = bms_add_members(context.paramids,
2396 												   scan_params);
2397 			}
2398 			break;
2399 
2400 		case T_TableFuncScan:
2401 			finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc,
2402 							  &context);
2403 			context.paramids = bms_add_members(context.paramids, scan_params);
2404 			break;
2405 
2406 		case T_ValuesScan:
2407 			finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
2408 							  &context);
2409 			context.paramids = bms_add_members(context.paramids, scan_params);
2410 			break;
2411 
2412 		case T_CteScan:
2413 			{
2414 				/*
2415 				 * You might think we should add the node's cteParam to
2416 				 * paramids, but we shouldn't because that param is just a
2417 				 * linkage mechanism for multiple CteScan nodes for the same
2418 				 * CTE; it is never used for changed-param signaling.  What we
2419 				 * have to do instead is to find the referenced CTE plan and
2420 				 * incorporate its external paramids, so that the correct
2421 				 * things will happen if the CTE references outer-level
2422 				 * variables.  See test cases for bug #4902.  (We assume
2423 				 * SS_finalize_plan was run on the CTE plan already.)
2424 				 */
2425 				int			plan_id = ((CteScan *) plan)->ctePlanId;
2426 				Plan	   *cteplan;
2427 
2428 				/* so, do this ... */
2429 				if (plan_id < 1 || plan_id > list_length(root->glob->subplans))
2430 					elog(ERROR, "could not find plan for CteScan referencing plan ID %d",
2431 						 plan_id);
2432 				cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2433 				context.paramids =
2434 					bms_add_members(context.paramids, cteplan->extParam);
2435 
2436 #ifdef NOT_USED
2437 				/* ... but not this */
2438 				context.paramids =
2439 					bms_add_member(context.paramids,
2440 								   ((CteScan *) plan)->cteParam);
2441 #endif
2442 
2443 				context.paramids = bms_add_members(context.paramids,
2444 												   scan_params);
2445 			}
2446 			break;
2447 
2448 		case T_WorkTableScan:
2449 			context.paramids =
2450 				bms_add_member(context.paramids,
2451 							   ((WorkTableScan *) plan)->wtParam);
2452 			context.paramids = bms_add_members(context.paramids, scan_params);
2453 			break;
2454 
2455 		case T_NamedTuplestoreScan:
2456 			context.paramids = bms_add_members(context.paramids, scan_params);
2457 			break;
2458 
2459 		case T_ForeignScan:
2460 			{
2461 				ForeignScan *fscan = (ForeignScan *) plan;
2462 
2463 				finalize_primnode((Node *) fscan->fdw_exprs,
2464 								  &context);
2465 				finalize_primnode((Node *) fscan->fdw_recheck_quals,
2466 								  &context);
2467 
2468 				/* We assume fdw_scan_tlist cannot contain Params */
2469 				context.paramids = bms_add_members(context.paramids,
2470 												   scan_params);
2471 			}
2472 			break;
2473 
2474 		case T_CustomScan:
2475 			{
2476 				CustomScan *cscan = (CustomScan *) plan;
2477 				ListCell   *lc;
2478 
2479 				finalize_primnode((Node *) cscan->custom_exprs,
2480 								  &context);
2481 				/* We assume custom_scan_tlist cannot contain Params */
2482 				context.paramids =
2483 					bms_add_members(context.paramids, scan_params);
2484 
2485 				/* child nodes if any */
2486 				foreach(lc, cscan->custom_plans)
2487 				{
2488 					context.paramids =
2489 						bms_add_members(context.paramids,
2490 										finalize_plan(root,
2491 													  (Plan *) lfirst(lc),
2492 													  gather_param,
2493 													  valid_params,
2494 													  scan_params));
2495 				}
2496 			}
2497 			break;
2498 
2499 		case T_ModifyTable:
2500 			{
2501 				ModifyTable *mtplan = (ModifyTable *) plan;
2502 				ListCell   *l;
2503 
2504 				/* Force descendant scan nodes to reference epqParam */
2505 				locally_added_param = mtplan->epqParam;
2506 				valid_params = bms_add_member(bms_copy(valid_params),
2507 											  locally_added_param);
2508 				scan_params = bms_add_member(bms_copy(scan_params),
2509 											 locally_added_param);
2510 				finalize_primnode((Node *) mtplan->returningLists,
2511 								  &context);
2512 				finalize_primnode((Node *) mtplan->onConflictSet,
2513 								  &context);
2514 				finalize_primnode((Node *) mtplan->onConflictWhere,
2515 								  &context);
2516 				/* exclRelTlist contains only Vars, doesn't need examination */
2517 				foreach(l, mtplan->plans)
2518 				{
2519 					context.paramids =
2520 						bms_add_members(context.paramids,
2521 										finalize_plan(root,
2522 													  (Plan *) lfirst(l),
2523 													  gather_param,
2524 													  valid_params,
2525 													  scan_params));
2526 				}
2527 			}
2528 			break;
2529 
2530 		case T_Append:
2531 			{
2532 				ListCell   *l;
2533 
2534 				foreach(l, ((Append *) plan)->appendplans)
2535 				{
2536 					context.paramids =
2537 						bms_add_members(context.paramids,
2538 										finalize_plan(root,
2539 													  (Plan *) lfirst(l),
2540 													  gather_param,
2541 													  valid_params,
2542 													  scan_params));
2543 				}
2544 			}
2545 			break;
2546 
2547 		case T_MergeAppend:
2548 			{
2549 				ListCell   *l;
2550 
2551 				foreach(l, ((MergeAppend *) plan)->mergeplans)
2552 				{
2553 					context.paramids =
2554 						bms_add_members(context.paramids,
2555 										finalize_plan(root,
2556 													  (Plan *) lfirst(l),
2557 													  gather_param,
2558 													  valid_params,
2559 													  scan_params));
2560 				}
2561 			}
2562 			break;
2563 
2564 		case T_BitmapAnd:
2565 			{
2566 				ListCell   *l;
2567 
2568 				foreach(l, ((BitmapAnd *) plan)->bitmapplans)
2569 				{
2570 					context.paramids =
2571 						bms_add_members(context.paramids,
2572 										finalize_plan(root,
2573 													  (Plan *) lfirst(l),
2574 													  gather_param,
2575 													  valid_params,
2576 													  scan_params));
2577 				}
2578 			}
2579 			break;
2580 
2581 		case T_BitmapOr:
2582 			{
2583 				ListCell   *l;
2584 
2585 				foreach(l, ((BitmapOr *) plan)->bitmapplans)
2586 				{
2587 					context.paramids =
2588 						bms_add_members(context.paramids,
2589 										finalize_plan(root,
2590 													  (Plan *) lfirst(l),
2591 													  gather_param,
2592 													  valid_params,
2593 													  scan_params));
2594 				}
2595 			}
2596 			break;
2597 
2598 		case T_NestLoop:
2599 			{
2600 				ListCell   *l;
2601 
2602 				finalize_primnode((Node *) ((Join *) plan)->joinqual,
2603 								  &context);
2604 				/* collect set of params that will be passed to right child */
2605 				foreach(l, ((NestLoop *) plan)->nestParams)
2606 				{
2607 					NestLoopParam *nlp = (NestLoopParam *) lfirst(l);
2608 
2609 					nestloop_params = bms_add_member(nestloop_params,
2610 													 nlp->paramno);
2611 				}
2612 			}
2613 			break;
2614 
2615 		case T_MergeJoin:
2616 			finalize_primnode((Node *) ((Join *) plan)->joinqual,
2617 							  &context);
2618 			finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
2619 							  &context);
2620 			break;
2621 
2622 		case T_HashJoin:
2623 			finalize_primnode((Node *) ((Join *) plan)->joinqual,
2624 							  &context);
2625 			finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
2626 							  &context);
2627 			break;
2628 
2629 		case T_Limit:
2630 			finalize_primnode(((Limit *) plan)->limitOffset,
2631 							  &context);
2632 			finalize_primnode(((Limit *) plan)->limitCount,
2633 							  &context);
2634 			break;
2635 
2636 		case T_RecursiveUnion:
2637 			/* child nodes are allowed to reference wtParam */
2638 			locally_added_param = ((RecursiveUnion *) plan)->wtParam;
2639 			valid_params = bms_add_member(bms_copy(valid_params),
2640 										  locally_added_param);
2641 			/* wtParam does *not* get added to scan_params */
2642 			break;
2643 
2644 		case T_LockRows:
2645 			/* Force descendant scan nodes to reference epqParam */
2646 			locally_added_param = ((LockRows *) plan)->epqParam;
2647 			valid_params = bms_add_member(bms_copy(valid_params),
2648 										  locally_added_param);
2649 			scan_params = bms_add_member(bms_copy(scan_params),
2650 										 locally_added_param);
2651 			break;
2652 
2653 		case T_Agg:
2654 			{
2655 				Agg		   *agg = (Agg *) plan;
2656 
2657 				/*
2658 				 * AGG_HASHED plans need to know which Params are referenced
2659 				 * in aggregate calls.  Do a separate scan to identify them.
2660 				 */
2661 				if (agg->aggstrategy == AGG_HASHED)
2662 				{
2663 					finalize_primnode_context aggcontext;
2664 
2665 					aggcontext.root = root;
2666 					aggcontext.paramids = NULL;
2667 					finalize_agg_primnode((Node *) agg->plan.targetlist,
2668 										  &aggcontext);
2669 					finalize_agg_primnode((Node *) agg->plan.qual,
2670 										  &aggcontext);
2671 					agg->aggParams = aggcontext.paramids;
2672 				}
2673 			}
2674 			break;
2675 
2676 		case T_WindowAgg:
2677 			finalize_primnode(((WindowAgg *) plan)->startOffset,
2678 							  &context);
2679 			finalize_primnode(((WindowAgg *) plan)->endOffset,
2680 							  &context);
2681 			break;
2682 
2683 		case T_Gather:
2684 			/* child nodes are allowed to reference rescan_param, if any */
2685 			locally_added_param = ((Gather *) plan)->rescan_param;
2686 			if (locally_added_param >= 0)
2687 			{
2688 				valid_params = bms_add_member(bms_copy(valid_params),
2689 											  locally_added_param);
2690 
2691 				/*
2692 				 * We currently don't support nested Gathers.  The issue so
2693 				 * far as this function is concerned would be how to identify
2694 				 * which child nodes depend on which Gather.
2695 				 */
2696 				Assert(gather_param < 0);
2697 				/* Pass down rescan_param to child parallel-aware nodes */
2698 				gather_param = locally_added_param;
2699 			}
2700 			/* rescan_param does *not* get added to scan_params */
2701 			break;
2702 
2703 		case T_GatherMerge:
2704 			/* child nodes are allowed to reference rescan_param, if any */
2705 			locally_added_param = ((GatherMerge *) plan)->rescan_param;
2706 			if (locally_added_param >= 0)
2707 			{
2708 				valid_params = bms_add_member(bms_copy(valid_params),
2709 											  locally_added_param);
2710 
2711 				/*
2712 				 * We currently don't support nested Gathers.  The issue so
2713 				 * far as this function is concerned would be how to identify
2714 				 * which child nodes depend on which Gather.
2715 				 */
2716 				Assert(gather_param < 0);
2717 				/* Pass down rescan_param to child parallel-aware nodes */
2718 				gather_param = locally_added_param;
2719 			}
2720 			/* rescan_param does *not* get added to scan_params */
2721 			break;
2722 
2723 		case T_ProjectSet:
2724 		case T_Hash:
2725 		case T_Material:
2726 		case T_Sort:
2727 		case T_IncrementalSort:
2728 		case T_Unique:
2729 		case T_SetOp:
2730 		case T_Group:
2731 			/* no node-type-specific fields need fixing */
2732 			break;
2733 
2734 		default:
2735 			elog(ERROR, "unrecognized node type: %d",
2736 				 (int) nodeTag(plan));
2737 	}
2738 
2739 	/* Process left and right child plans, if any */
2740 	child_params = finalize_plan(root,
2741 								 plan->lefttree,
2742 								 gather_param,
2743 								 valid_params,
2744 								 scan_params);
2745 	context.paramids = bms_add_members(context.paramids, child_params);
2746 
2747 	if (nestloop_params)
2748 	{
2749 		/* right child can reference nestloop_params as well as valid_params */
2750 		child_params = finalize_plan(root,
2751 									 plan->righttree,
2752 									 gather_param,
2753 									 bms_union(nestloop_params, valid_params),
2754 									 scan_params);
2755 		/* ... and they don't count as parameters used at my level */
2756 		child_params = bms_difference(child_params, nestloop_params);
2757 		bms_free(nestloop_params);
2758 	}
2759 	else
2760 	{
2761 		/* easy case */
2762 		child_params = finalize_plan(root,
2763 									 plan->righttree,
2764 									 gather_param,
2765 									 valid_params,
2766 									 scan_params);
2767 	}
2768 	context.paramids = bms_add_members(context.paramids, child_params);
2769 
2770 	/*
2771 	 * Any locally generated parameter doesn't count towards its generating
2772 	 * plan node's external dependencies.  (Note: if we changed valid_params
2773 	 * and/or scan_params, we leak those bitmapsets; not worth the notational
2774 	 * trouble to clean them up.)
2775 	 */
2776 	if (locally_added_param >= 0)
2777 	{
2778 		context.paramids = bms_del_member(context.paramids,
2779 										  locally_added_param);
2780 	}
2781 
2782 	/* Now we have all the paramids referenced in this node and children */
2783 
2784 	if (!bms_is_subset(context.paramids, valid_params))
2785 		elog(ERROR, "plan should not reference subplan's variable");
2786 
2787 	/*
2788 	 * The plan node's allParam and extParam fields should include all its
2789 	 * referenced paramids, plus contributions from any child initPlans.
2790 	 * However, any setParams of the initPlans should not be present in the
2791 	 * parent node's extParams, only in its allParams.  (It's possible that
2792 	 * some initPlans have extParams that are setParams of other initPlans.)
2793 	 */
2794 
2795 	/* allParam must include initplans' extParams and setParams */
2796 	plan->allParam = bms_union(context.paramids, initExtParam);
2797 	plan->allParam = bms_add_members(plan->allParam, initSetParam);
2798 	/* extParam must include any initplan extParams */
2799 	plan->extParam = bms_union(context.paramids, initExtParam);
2800 	/* but not any initplan setParams */
2801 	plan->extParam = bms_del_members(plan->extParam, initSetParam);
2802 
2803 	/*
2804 	 * For speed at execution time, make sure extParam/allParam are actually
2805 	 * NULL if they are empty sets.
2806 	 */
2807 	if (bms_is_empty(plan->extParam))
2808 		plan->extParam = NULL;
2809 	if (bms_is_empty(plan->allParam))
2810 		plan->allParam = NULL;
2811 
2812 	return plan->allParam;
2813 }
2814 
2815 /*
2816  * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
2817  * expression tree to the result set.
2818  */
2819 static bool
finalize_primnode(Node * node,finalize_primnode_context * context)2820 finalize_primnode(Node *node, finalize_primnode_context *context)
2821 {
2822 	if (node == NULL)
2823 		return false;
2824 	if (IsA(node, Param))
2825 	{
2826 		if (((Param *) node)->paramkind == PARAM_EXEC)
2827 		{
2828 			int			paramid = ((Param *) node)->paramid;
2829 
2830 			context->paramids = bms_add_member(context->paramids, paramid);
2831 		}
2832 		return false;			/* no more to do here */
2833 	}
2834 	if (IsA(node, SubPlan))
2835 	{
2836 		SubPlan    *subplan = (SubPlan *) node;
2837 		Plan	   *plan = planner_subplan_get_plan(context->root, subplan);
2838 		ListCell   *lc;
2839 		Bitmapset  *subparamids;
2840 
2841 		/* Recurse into the testexpr, but not into the Plan */
2842 		finalize_primnode(subplan->testexpr, context);
2843 
2844 		/*
2845 		 * Remove any param IDs of output parameters of the subplan that were
2846 		 * referenced in the testexpr.  These are not interesting for
2847 		 * parameter change signaling since we always re-evaluate the subplan.
2848 		 * Note that this wouldn't work too well if there might be uses of the
2849 		 * same param IDs elsewhere in the plan, but that can't happen because
2850 		 * generate_new_exec_param never tries to merge params.
2851 		 */
2852 		foreach(lc, subplan->paramIds)
2853 		{
2854 			context->paramids = bms_del_member(context->paramids,
2855 											   lfirst_int(lc));
2856 		}
2857 
2858 		/* Also examine args list */
2859 		finalize_primnode((Node *) subplan->args, context);
2860 
2861 		/*
2862 		 * Add params needed by the subplan to paramids, but excluding those
2863 		 * we will pass down to it.  (We assume SS_finalize_plan was run on
2864 		 * the subplan already.)
2865 		 */
2866 		subparamids = bms_copy(plan->extParam);
2867 		foreach(lc, subplan->parParam)
2868 		{
2869 			subparamids = bms_del_member(subparamids, lfirst_int(lc));
2870 		}
2871 		context->paramids = bms_join(context->paramids, subparamids);
2872 
2873 		return false;			/* no more to do here */
2874 	}
2875 	return expression_tree_walker(node, finalize_primnode,
2876 								  (void *) context);
2877 }
2878 
2879 /*
2880  * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2881  * and add IDs of all PARAM_EXEC params appearing within their aggregated
2882  * arguments to the result set.
2883  */
2884 static bool
finalize_agg_primnode(Node * node,finalize_primnode_context * context)2885 finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2886 {
2887 	if (node == NULL)
2888 		return false;
2889 	if (IsA(node, Aggref))
2890 	{
2891 		Aggref	   *agg = (Aggref *) node;
2892 
2893 		/* we should not consider the direct arguments, if any */
2894 		finalize_primnode((Node *) agg->args, context);
2895 		finalize_primnode((Node *) agg->aggfilter, context);
2896 		return false;			/* there can't be any Aggrefs below here */
2897 	}
2898 	return expression_tree_walker(node, finalize_agg_primnode,
2899 								  (void *) context);
2900 }
2901 
2902 /*
2903  * SS_make_initplan_output_param - make a Param for an initPlan's output
2904  *
2905  * The plan is expected to return a scalar value of the given type/collation.
2906  *
2907  * Note that in some cases the initplan may not ever appear in the finished
2908  * plan tree.  If that happens, we'll have wasted a PARAM_EXEC slot, which
2909  * is no big deal.
2910  */
2911 Param *
SS_make_initplan_output_param(PlannerInfo * root,Oid resulttype,int32 resulttypmod,Oid resultcollation)2912 SS_make_initplan_output_param(PlannerInfo *root,
2913 							  Oid resulttype, int32 resulttypmod,
2914 							  Oid resultcollation)
2915 {
2916 	return generate_new_exec_param(root, resulttype,
2917 								   resulttypmod, resultcollation);
2918 }
2919 
2920 /*
2921  * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
2922  *
2923  * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
2924  * list for the outer query level.  A Param that represents the initplan's
2925  * output has already been assigned using SS_make_initplan_output_param.
2926  */
2927 void
SS_make_initplan_from_plan(PlannerInfo * root,PlannerInfo * subroot,Plan * plan,Param * prm)2928 SS_make_initplan_from_plan(PlannerInfo *root,
2929 						   PlannerInfo *subroot, Plan *plan,
2930 						   Param *prm)
2931 {
2932 	SubPlan    *node;
2933 
2934 	/*
2935 	 * Add the subplan and its PlannerInfo to the global lists.
2936 	 */
2937 	root->glob->subplans = lappend(root->glob->subplans, plan);
2938 	root->glob->subroots = lappend(root->glob->subroots, subroot);
2939 
2940 	/*
2941 	 * Create a SubPlan node and add it to the outer list of InitPlans. Note
2942 	 * it has to appear after any other InitPlans it might depend on (see
2943 	 * comments in ExecReScan).
2944 	 */
2945 	node = makeNode(SubPlan);
2946 	node->subLinkType = EXPR_SUBLINK;
2947 	node->plan_id = list_length(root->glob->subplans);
2948 	node->plan_name = psprintf("InitPlan %d (returns $%d)",
2949 							   node->plan_id, prm->paramid);
2950 	get_first_col_type(plan, &node->firstColType, &node->firstColTypmod,
2951 					   &node->firstColCollation);
2952 	node->setParam = list_make1_int(prm->paramid);
2953 
2954 	root->init_plans = lappend(root->init_plans, node);
2955 
2956 	/*
2957 	 * The node can't have any inputs (since it's an initplan), so the
2958 	 * parParam and args lists remain empty.
2959 	 */
2960 
2961 	/* Set costs of SubPlan using info from the plan tree */
2962 	cost_subplan(subroot, node, plan);
2963 }
2964