1 /*-------------------------------------------------------------------------
2  *
3  * subselect.c
4  *	  Planning routines for subselects.
5  *
6  * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e.,
7  * not sub-SELECT-in-FROM cases).
8  *
9  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * IDENTIFICATION
13  *	  src/backend/optimizer/plan/subselect.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include "access/htup_details.h"
20 #include "catalog/pg_operator.h"
21 #include "catalog/pg_type.h"
22 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/cost.h"
28 #include "optimizer/paramassign.h"
29 #include "optimizer/pathnode.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/planner.h"
32 #include "optimizer/prep.h"
33 #include "optimizer/subselect.h"
34 #include "optimizer/var.h"
35 #include "parser/parse_relation.h"
36 #include "rewrite/rewriteManip.h"
37 #include "utils/builtins.h"
38 #include "utils/lsyscache.h"
39 #include "utils/syscache.h"
40 
41 
42 typedef struct convert_testexpr_context
43 {
44 	PlannerInfo *root;
45 	List	   *subst_nodes;	/* Nodes to substitute for Params */
46 } convert_testexpr_context;
47 
48 typedef struct process_sublinks_context
49 {
50 	PlannerInfo *root;
51 	bool		isTopQual;
52 } process_sublinks_context;
53 
54 typedef struct finalize_primnode_context
55 {
56 	PlannerInfo *root;
57 	Bitmapset  *paramids;		/* Non-local PARAM_EXEC paramids found */
58 } finalize_primnode_context;
59 
60 
61 static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
62 			  List *plan_params,
63 			  SubLinkType subLinkType, int subLinkId,
64 			  Node *testexpr, List *testexpr_paramids,
65 			  bool unknownEqFalse);
66 static List *generate_subquery_params(PlannerInfo *root, List *tlist,
67 						 List **paramIds);
68 static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
69 					   Index varno);
70 static Node *convert_testexpr(PlannerInfo *root,
71 				 Node *testexpr,
72 				 List *subst_nodes);
73 static Node *convert_testexpr_mutator(Node *node,
74 						 convert_testexpr_context *context);
75 static bool subplan_is_hashable(Plan *plan);
76 static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
77 static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
78 static bool hash_ok_operator(OpExpr *expr);
79 static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
80 static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
81 					  Node **testexpr, List **paramIds);
82 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
83 static Node *process_sublinks_mutator(Node *node,
84 						 process_sublinks_context *context);
85 static Bitmapset *finalize_plan(PlannerInfo *root,
86 			  Plan *plan,
87 			  Bitmapset *valid_params,
88 			  Bitmapset *scan_params);
89 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
90 static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
91 
92 
93 /*
94  * Assign a (nonnegative) PARAM_EXEC ID for a special parameter (one that
95  * is not actually used to carry a value at runtime).  Such parameters are
96  * used for special runtime signaling purposes, such as connecting a
97  * recursive union node to its worktable scan node or forcing plan
98  * re-evaluation within the EvalPlanQual mechanism.  No actual Param node
99  * exists with this ID, however.
100  *
101  * XXX deprecated: use assign_special_exec_param directly, instead.  We are
102  * keeping this in v11 and below only to avoid API breaks.
103  */
104 int
SS_assign_special_param(PlannerInfo * root)105 SS_assign_special_param(PlannerInfo *root)
106 {
107 	return assign_special_exec_param(root);
108 }
109 
110 /*
111  * Get the datatype/typmod/collation of the first column of the plan's output.
112  *
113  * This information is stored for ARRAY_SUBLINK execution and for
114  * exprType()/exprTypmod()/exprCollation(), which have no way to get at the
115  * plan associated with a SubPlan node.  We really only need the info for
116  * EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency we save it
117  * always.
118  */
119 static void
get_first_col_type(Plan * plan,Oid * coltype,int32 * coltypmod,Oid * colcollation)120 get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
121 				   Oid *colcollation)
122 {
123 	/* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
124 	if (plan->targetlist)
125 	{
126 		TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
127 
128 		Assert(IsA(tent, TargetEntry));
129 		if (!tent->resjunk)
130 		{
131 			*coltype = exprType((Node *) tent->expr);
132 			*coltypmod = exprTypmod((Node *) tent->expr);
133 			*colcollation = exprCollation((Node *) tent->expr);
134 			return;
135 		}
136 	}
137 	*coltype = VOIDOID;
138 	*coltypmod = -1;
139 	*colcollation = InvalidOid;
140 }
141 
142 /*
143  * Convert a SubLink (as created by the parser) into a SubPlan.
144  *
145  * We are given the SubLink's contained query, type, ID, and testexpr.  We are
146  * also told if this expression appears at top level of a WHERE/HAVING qual.
147  *
148  * Note: we assume that the testexpr has been AND/OR flattened (actually,
149  * it's been through eval_const_expressions), but not converted to
150  * implicit-AND form; and any SubLinks in it should already have been
151  * converted to SubPlans.  The subquery is as yet untouched, however.
152  *
153  * The result is whatever we need to substitute in place of the SubLink node
154  * in the executable expression.  If we're going to do the subplan as a
155  * regular subplan, this will be the constructed SubPlan node.  If we're going
156  * to do the subplan as an InitPlan, the SubPlan node instead goes into
157  * root->init_plans, and what we return here is an expression tree
158  * representing the InitPlan's result: usually just a Param node representing
159  * a single scalar result, but possibly a row comparison tree containing
160  * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant
161  * (since the real output Params are elsewhere in the tree, and the MULTIEXPR
162  * subquery itself is in a resjunk tlist entry whose value is uninteresting).
163  */
164 static Node *
make_subplan(PlannerInfo * root,Query * orig_subquery,SubLinkType subLinkType,int subLinkId,Node * testexpr,bool isTopQual)165 make_subplan(PlannerInfo *root, Query *orig_subquery,
166 			 SubLinkType subLinkType, int subLinkId,
167 			 Node *testexpr, bool isTopQual)
168 {
169 	Query	   *subquery;
170 	bool		simple_exists = false;
171 	double		tuple_fraction;
172 	PlannerInfo *subroot;
173 	RelOptInfo *final_rel;
174 	Path	   *best_path;
175 	Plan	   *plan;
176 	List	   *plan_params;
177 	Node	   *result;
178 
179 	/*
180 	 * Copy the source Query node.  This is a quick and dirty kluge to resolve
181 	 * the fact that the parser can generate trees with multiple links to the
182 	 * same sub-Query node, but the planner wants to scribble on the Query.
183 	 * Try to clean this up when we do querytree redesign...
184 	 */
185 	subquery = (Query *) copyObject(orig_subquery);
186 
187 	/*
188 	 * If it's an EXISTS subplan, we might be able to simplify it.
189 	 */
190 	if (subLinkType == EXISTS_SUBLINK)
191 		simple_exists = simplify_EXISTS_query(root, subquery);
192 
193 	/*
194 	 * For an EXISTS subplan, tell lower-level planner to expect that only the
195 	 * first tuple will be retrieved.  For ALL and ANY subplans, we will be
196 	 * able to stop evaluating if the test condition fails or matches, so very
197 	 * often not all the tuples will be retrieved; for lack of a better idea,
198 	 * specify 50% retrieval.  For EXPR, MULTIEXPR, and ROWCOMPARE subplans,
199 	 * use default behavior (we're only expecting one row out, anyway).
200 	 *
201 	 * NOTE: if you change these numbers, also change cost_subplan() in
202 	 * path/costsize.c.
203 	 *
204 	 * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
205 	 * its output.  In that case it would've been better to specify full
206 	 * retrieval.  At present, however, we can only check hashability after
207 	 * we've made the subplan :-(.  (Determining whether it'll fit in work_mem
208 	 * is the really hard part.)  Therefore, we don't want to be too
209 	 * optimistic about the percentage of tuples retrieved, for fear of
210 	 * selecting a plan that's bad for the materialization case.
211 	 */
212 	if (subLinkType == EXISTS_SUBLINK)
213 		tuple_fraction = 1.0;	/* just like a LIMIT 1 */
214 	else if (subLinkType == ALL_SUBLINK ||
215 			 subLinkType == ANY_SUBLINK)
216 		tuple_fraction = 0.5;	/* 50% */
217 	else
218 		tuple_fraction = 0.0;	/* default behavior */
219 
220 	/* plan_params should not be in use in current query level */
221 	Assert(root->plan_params == NIL);
222 
223 	/* Generate Paths for the subquery */
224 	subroot = subquery_planner(root->glob, subquery,
225 							   root,
226 							   false, tuple_fraction);
227 
228 	/* Isolate the params needed by this specific subplan */
229 	plan_params = root->plan_params;
230 	root->plan_params = NIL;
231 
232 	/*
233 	 * Select best Path and turn it into a Plan.  At least for now, there
234 	 * seems no reason to postpone doing that.
235 	 */
236 	final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
237 	best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
238 
239 	plan = create_plan(subroot, best_path);
240 
241 	/* And convert to SubPlan or InitPlan format. */
242 	result = build_subplan(root, plan, subroot, plan_params,
243 						   subLinkType, subLinkId,
244 						   testexpr, NIL, isTopQual);
245 
246 	/*
247 	 * If it's a correlated EXISTS with an unimportant targetlist, we might be
248 	 * able to transform it to the equivalent of an IN and then implement it
249 	 * by hashing.  We don't have enough information yet to tell which way is
250 	 * likely to be better (it depends on the expected number of executions of
251 	 * the EXISTS qual, and we are much too early in planning the outer query
252 	 * to be able to guess that).  So we generate both plans, if possible, and
253 	 * leave it to the executor to decide which to use.
254 	 */
255 	if (simple_exists && IsA(result, SubPlan))
256 	{
257 		Node	   *newtestexpr;
258 		List	   *paramIds;
259 
260 		/* Make a second copy of the original subquery */
261 		subquery = (Query *) copyObject(orig_subquery);
262 		/* and re-simplify */
263 		simple_exists = simplify_EXISTS_query(root, subquery);
264 		Assert(simple_exists);
265 		/* See if it can be converted to an ANY query */
266 		subquery = convert_EXISTS_to_ANY(root, subquery,
267 										 &newtestexpr, &paramIds);
268 		if (subquery)
269 		{
270 			/* Generate Paths for the ANY subquery; we'll need all rows */
271 			subroot = subquery_planner(root->glob, subquery,
272 									   root,
273 									   false, 0.0);
274 
275 			/* Isolate the params needed by this specific subplan */
276 			plan_params = root->plan_params;
277 			root->plan_params = NIL;
278 
279 			/* Select best Path and turn it into a Plan */
280 			final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
281 			best_path = final_rel->cheapest_total_path;
282 
283 			plan = create_plan(subroot, best_path);
284 
285 			/* Now we can check if it'll fit in work_mem */
286 			/* XXX can we check this at the Path stage? */
287 			if (subplan_is_hashable(plan))
288 			{
289 				SubPlan    *hashplan;
290 				AlternativeSubPlan *asplan;
291 
292 				/* OK, convert to SubPlan format. */
293 				hashplan = (SubPlan *) build_subplan(root, plan, subroot,
294 													 plan_params,
295 													 ANY_SUBLINK, 0,
296 													 newtestexpr,
297 													 paramIds,
298 													 true);
299 				/* Check we got what we expected */
300 				Assert(IsA(hashplan, SubPlan));
301 				Assert(hashplan->parParam == NIL);
302 				Assert(hashplan->useHashTable);
303 
304 				/* Leave it to the executor to decide which plan to use */
305 				asplan = makeNode(AlternativeSubPlan);
306 				asplan->subplans = list_make2(result, hashplan);
307 				result = (Node *) asplan;
308 			}
309 		}
310 	}
311 
312 	return result;
313 }
314 
315 /*
316  * Build a SubPlan node given the raw inputs --- subroutine for make_subplan
317  *
318  * Returns either the SubPlan, or a replacement expression if we decide to
319  * make it an InitPlan, as explained in the comments for make_subplan.
320  */
321 static Node *
build_subplan(PlannerInfo * root,Plan * plan,PlannerInfo * subroot,List * plan_params,SubLinkType subLinkType,int subLinkId,Node * testexpr,List * testexpr_paramids,bool unknownEqFalse)322 build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
323 			  List *plan_params,
324 			  SubLinkType subLinkType, int subLinkId,
325 			  Node *testexpr, List *testexpr_paramids,
326 			  bool unknownEqFalse)
327 {
328 	Node	   *result;
329 	SubPlan    *splan;
330 	bool		isInitPlan;
331 	ListCell   *lc;
332 
333 	/*
334 	 * Initialize the SubPlan node.  Note plan_id, plan_name, and cost fields
335 	 * are set further down.
336 	 */
337 	splan = makeNode(SubPlan);
338 	splan->subLinkType = subLinkType;
339 	splan->testexpr = NULL;
340 	splan->paramIds = NIL;
341 	get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
342 					   &splan->firstColCollation);
343 	splan->useHashTable = false;
344 	splan->unknownEqFalse = unknownEqFalse;
345 	splan->setParam = NIL;
346 	splan->parParam = NIL;
347 	splan->args = NIL;
348 
349 	/*
350 	 * Make parParam and args lists of param IDs and expressions that current
351 	 * query level will pass to this child plan.
352 	 */
353 	foreach(lc, plan_params)
354 	{
355 		PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lc);
356 		Node	   *arg = pitem->item;
357 
358 		/*
359 		 * The Var, PlaceHolderVar, or Aggref has already been adjusted to
360 		 * have the correct varlevelsup, phlevelsup, or agglevelsup.
361 		 *
362 		 * If it's a PlaceHolderVar or Aggref, its arguments might contain
363 		 * SubLinks, which have not yet been processed (see the comments for
364 		 * SS_replace_correlation_vars).  Do that now.
365 		 */
366 		if (IsA(arg, PlaceHolderVar) ||
367 			IsA(arg, Aggref))
368 			arg = SS_process_sublinks(root, arg, false);
369 
370 		splan->parParam = lappend_int(splan->parParam, pitem->paramId);
371 		splan->args = lappend(splan->args, arg);
372 	}
373 
374 	/*
375 	 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY,
376 	 * ROWCOMPARE, or MULTIEXPR types can be used as initPlans.  For EXISTS,
377 	 * EXPR, or ARRAY, we return a Param referring to the result of evaluating
378 	 * the initPlan.  For ROWCOMPARE, we must modify the testexpr tree to
379 	 * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted
380 	 * by the parser, and then return that tree.  For MULTIEXPR, we return a
381 	 * null constant: the resjunk targetlist item containing the SubLink does
382 	 * not need to return anything useful, since the referencing Params are
383 	 * elsewhere.
384 	 */
385 	if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK)
386 	{
387 		Param	   *prm;
388 
389 		Assert(testexpr == NULL);
390 		prm = generate_new_exec_param(root, BOOLOID, -1, InvalidOid);
391 		splan->setParam = list_make1_int(prm->paramid);
392 		isInitPlan = true;
393 		result = (Node *) prm;
394 	}
395 	else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK)
396 	{
397 		TargetEntry *te = linitial(plan->targetlist);
398 		Param	   *prm;
399 
400 		Assert(!te->resjunk);
401 		Assert(testexpr == NULL);
402 		prm = generate_new_exec_param(root,
403 									  exprType((Node *) te->expr),
404 									  exprTypmod((Node *) te->expr),
405 									  exprCollation((Node *) te->expr));
406 		splan->setParam = list_make1_int(prm->paramid);
407 		isInitPlan = true;
408 		result = (Node *) prm;
409 	}
410 	else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK)
411 	{
412 		TargetEntry *te = linitial(plan->targetlist);
413 		Oid			arraytype;
414 		Param	   *prm;
415 
416 		Assert(!te->resjunk);
417 		Assert(testexpr == NULL);
418 		arraytype = get_promoted_array_type(exprType((Node *) te->expr));
419 		if (!OidIsValid(arraytype))
420 			elog(ERROR, "could not find array type for datatype %s",
421 				 format_type_be(exprType((Node *) te->expr)));
422 		prm = generate_new_exec_param(root,
423 									  arraytype,
424 									  exprTypmod((Node *) te->expr),
425 									  exprCollation((Node *) te->expr));
426 		splan->setParam = list_make1_int(prm->paramid);
427 		isInitPlan = true;
428 		result = (Node *) prm;
429 	}
430 	else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK)
431 	{
432 		/* Adjust the Params */
433 		List	   *params;
434 
435 		Assert(testexpr != NULL);
436 		params = generate_subquery_params(root,
437 										  plan->targetlist,
438 										  &splan->paramIds);
439 		result = convert_testexpr(root,
440 								  testexpr,
441 								  params);
442 		splan->setParam = list_copy(splan->paramIds);
443 		isInitPlan = true;
444 
445 		/*
446 		 * The executable expression is returned to become part of the outer
447 		 * plan's expression tree; it is not kept in the initplan node.
448 		 */
449 	}
450 	else if (subLinkType == MULTIEXPR_SUBLINK)
451 	{
452 		/*
453 		 * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param
454 		 * for each output column.
455 		 */
456 		List	   *params;
457 
458 		Assert(testexpr == NULL);
459 		params = generate_subquery_params(root,
460 										  plan->targetlist,
461 										  &splan->setParam);
462 
463 		/*
464 		 * Save the list of replacement Params in the n'th cell of
465 		 * root->multiexpr_params; setrefs.c will use it to replace
466 		 * PARAM_MULTIEXPR Params.
467 		 */
468 		while (list_length(root->multiexpr_params) < subLinkId)
469 			root->multiexpr_params = lappend(root->multiexpr_params, NIL);
470 		lc = list_nth_cell(root->multiexpr_params, subLinkId - 1);
471 		Assert(lfirst(lc) == NIL);
472 		lfirst(lc) = params;
473 
474 		/* It can be an initplan if there are no parParams. */
475 		if (splan->parParam == NIL)
476 		{
477 			isInitPlan = true;
478 			result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid);
479 		}
480 		else
481 		{
482 			isInitPlan = false;
483 			result = (Node *) splan;
484 		}
485 	}
486 	else
487 	{
488 		/*
489 		 * Adjust the Params in the testexpr, unless caller already took care
490 		 * of it (as indicated by passing a list of Param IDs).
491 		 */
492 		if (testexpr && testexpr_paramids == NIL)
493 		{
494 			List	   *params;
495 
496 			params = generate_subquery_params(root,
497 											  plan->targetlist,
498 											  &splan->paramIds);
499 			splan->testexpr = convert_testexpr(root,
500 											   testexpr,
501 											   params);
502 		}
503 		else
504 		{
505 			splan->testexpr = testexpr;
506 			splan->paramIds = testexpr_paramids;
507 		}
508 
509 		/*
510 		 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
511 		 * initPlans, even when they are uncorrelated or undirect correlated,
512 		 * because we need to scan the output of the subplan for each outer
513 		 * tuple.  But if it's a not-direct-correlated IN (= ANY) test, we
514 		 * might be able to use a hashtable to avoid comparing all the tuples.
515 		 */
516 		if (subLinkType == ANY_SUBLINK &&
517 			splan->parParam == NIL &&
518 			subplan_is_hashable(plan) &&
519 			testexpr_is_hashable(splan->testexpr, splan->paramIds))
520 			splan->useHashTable = true;
521 
522 		/*
523 		 * Otherwise, we have the option to tack a Material node onto the top
524 		 * of the subplan, to reduce the cost of reading it repeatedly.  This
525 		 * is pointless for a direct-correlated subplan, since we'd have to
526 		 * recompute its results each time anyway.  For uncorrelated/undirect
527 		 * correlated subplans, we add Material unless the subplan's top plan
528 		 * node would materialize its output anyway.  Also, if enable_material
529 		 * is false, then the user does not want us to materialize anything
530 		 * unnecessarily, so we don't.
531 		 */
532 		else if (splan->parParam == NIL && enable_material &&
533 				 !ExecMaterializesOutput(nodeTag(plan)))
534 			plan = materialize_finished_plan(plan);
535 
536 		result = (Node *) splan;
537 		isInitPlan = false;
538 	}
539 
540 	/*
541 	 * Add the subplan and its PlannerInfo to the global lists.
542 	 */
543 	root->glob->subplans = lappend(root->glob->subplans, plan);
544 	root->glob->subroots = lappend(root->glob->subroots, subroot);
545 	splan->plan_id = list_length(root->glob->subplans);
546 
547 	if (isInitPlan)
548 		root->init_plans = lappend(root->init_plans, splan);
549 
550 	/*
551 	 * A parameterless subplan (not initplan) should be prepared to handle
552 	 * REWIND efficiently.  If it has direct parameters then there's no point
553 	 * since it'll be reset on each scan anyway; and if it's an initplan then
554 	 * there's no point since it won't get re-run without parameter changes
555 	 * anyway.  The input of a hashed subplan doesn't need REWIND either.
556 	 */
557 	if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable)
558 		root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
559 												   splan->plan_id);
560 
561 	/* Label the subplan for EXPLAIN purposes */
562 	splan->plan_name = palloc(32 + 12 * list_length(splan->setParam));
563 	sprintf(splan->plan_name, "%s %d",
564 			isInitPlan ? "InitPlan" : "SubPlan",
565 			splan->plan_id);
566 	if (splan->setParam)
567 	{
568 		char	   *ptr = splan->plan_name + strlen(splan->plan_name);
569 
570 		ptr += sprintf(ptr, " (returns ");
571 		foreach(lc, splan->setParam)
572 		{
573 			ptr += sprintf(ptr, "$%d%s",
574 						   lfirst_int(lc),
575 						   lnext(lc) ? "," : ")");
576 		}
577 	}
578 
579 	/* Lastly, fill in the cost estimates for use later */
580 	cost_subplan(root, splan, plan);
581 
582 	return result;
583 }
584 
585 /*
586  * generate_subquery_params: build a list of Params representing the output
587  * columns of a sublink's sub-select, given the sub-select's targetlist.
588  *
589  * We also return an integer list of the paramids of the Params.
590  */
591 static List *
generate_subquery_params(PlannerInfo * root,List * tlist,List ** paramIds)592 generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds)
593 {
594 	List	   *result;
595 	List	   *ids;
596 	ListCell   *lc;
597 
598 	result = ids = NIL;
599 	foreach(lc, tlist)
600 	{
601 		TargetEntry *tent = (TargetEntry *) lfirst(lc);
602 		Param	   *param;
603 
604 		if (tent->resjunk)
605 			continue;
606 
607 		param = generate_new_exec_param(root,
608 										exprType((Node *) tent->expr),
609 										exprTypmod((Node *) tent->expr),
610 										exprCollation((Node *) tent->expr));
611 		result = lappend(result, param);
612 		ids = lappend_int(ids, param->paramid);
613 	}
614 
615 	*paramIds = ids;
616 	return result;
617 }
618 
619 /*
620  * generate_subquery_vars: build a list of Vars representing the output
621  * columns of a sublink's sub-select, given the sub-select's targetlist.
622  * The Vars have the specified varno (RTE index).
623  */
624 static List *
generate_subquery_vars(PlannerInfo * root,List * tlist,Index varno)625 generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
626 {
627 	List	   *result;
628 	ListCell   *lc;
629 
630 	result = NIL;
631 	foreach(lc, tlist)
632 	{
633 		TargetEntry *tent = (TargetEntry *) lfirst(lc);
634 		Var		   *var;
635 
636 		if (tent->resjunk)
637 			continue;
638 
639 		var = makeVarFromTargetEntry(varno, tent);
640 		result = lappend(result, var);
641 	}
642 
643 	return result;
644 }
645 
646 /*
647  * convert_testexpr: convert the testexpr given by the parser into
648  * actually executable form.  This entails replacing PARAM_SUBLINK Params
649  * with Params or Vars representing the results of the sub-select.  The
650  * nodes to be substituted are passed in as the List result from
651  * generate_subquery_params or generate_subquery_vars.
652  */
653 static Node *
convert_testexpr(PlannerInfo * root,Node * testexpr,List * subst_nodes)654 convert_testexpr(PlannerInfo *root,
655 				 Node *testexpr,
656 				 List *subst_nodes)
657 {
658 	convert_testexpr_context context;
659 
660 	context.root = root;
661 	context.subst_nodes = subst_nodes;
662 	return convert_testexpr_mutator(testexpr, &context);
663 }
664 
665 static Node *
convert_testexpr_mutator(Node * node,convert_testexpr_context * context)666 convert_testexpr_mutator(Node *node,
667 						 convert_testexpr_context *context)
668 {
669 	if (node == NULL)
670 		return NULL;
671 	if (IsA(node, Param))
672 	{
673 		Param	   *param = (Param *) node;
674 
675 		if (param->paramkind == PARAM_SUBLINK)
676 		{
677 			if (param->paramid <= 0 ||
678 				param->paramid > list_length(context->subst_nodes))
679 				elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid);
680 
681 			/*
682 			 * We copy the list item to avoid having doubly-linked
683 			 * substructure in the modified parse tree.  This is probably
684 			 * unnecessary when it's a Param, but be safe.
685 			 */
686 			return (Node *) copyObject(list_nth(context->subst_nodes,
687 												param->paramid - 1));
688 		}
689 	}
690 	if (IsA(node, SubLink))
691 	{
692 		/*
693 		 * If we come across a nested SubLink, it is neither necessary nor
694 		 * correct to recurse into it: any PARAM_SUBLINKs we might find inside
695 		 * belong to the inner SubLink not the outer. So just return it as-is.
696 		 *
697 		 * This reasoning depends on the assumption that nothing will pull
698 		 * subexpressions into or out of the testexpr field of a SubLink, at
699 		 * least not without replacing PARAM_SUBLINKs first.  If we did want
700 		 * to do that we'd need to rethink the parser-output representation
701 		 * altogether, since currently PARAM_SUBLINKs are only unique per
702 		 * SubLink not globally across the query.  The whole point of
703 		 * replacing them with Vars or PARAM_EXEC nodes is to make them
704 		 * globally unique before they escape from the SubLink's testexpr.
705 		 *
706 		 * Note: this can't happen when called during SS_process_sublinks,
707 		 * because that recursively processes inner SubLinks first.  It can
708 		 * happen when called from convert_ANY_sublink_to_join, though.
709 		 */
710 		return node;
711 	}
712 	return expression_tree_mutator(node,
713 								   convert_testexpr_mutator,
714 								   (void *) context);
715 }
716 
717 /*
718  * subplan_is_hashable: can we implement an ANY subplan by hashing?
719  */
720 static bool
subplan_is_hashable(Plan * plan)721 subplan_is_hashable(Plan *plan)
722 {
723 	double		subquery_size;
724 
725 	/*
726 	 * The estimated size of the subquery result must fit in work_mem. (Note:
727 	 * we use heap tuple overhead here even though the tuples will actually be
728 	 * stored as MinimalTuples; this provides some fudge factor for hashtable
729 	 * overhead.)
730 	 */
731 	subquery_size = plan->plan_rows *
732 		(MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
733 	if (subquery_size > work_mem * 1024L)
734 		return false;
735 
736 	return true;
737 }
738 
739 /*
740  * testexpr_is_hashable: is an ANY SubLink's test expression hashable?
741  *
742  * To identify LHS vs RHS of the hash expression, we must be given the
743  * list of output Param IDs of the SubLink's subquery.
744  */
745 static bool
testexpr_is_hashable(Node * testexpr,List * param_ids)746 testexpr_is_hashable(Node *testexpr, List *param_ids)
747 {
748 	/*
749 	 * The testexpr must be a single OpExpr, or an AND-clause containing only
750 	 * OpExprs, each of which satisfy test_opexpr_is_hashable().
751 	 */
752 	if (testexpr && IsA(testexpr, OpExpr))
753 	{
754 		if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids))
755 			return true;
756 	}
757 	else if (and_clause(testexpr))
758 	{
759 		ListCell   *l;
760 
761 		foreach(l, ((BoolExpr *) testexpr)->args)
762 		{
763 			Node	   *andarg = (Node *) lfirst(l);
764 
765 			if (!IsA(andarg, OpExpr))
766 				return false;
767 			if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids))
768 				return false;
769 		}
770 		return true;
771 	}
772 
773 	return false;
774 }
775 
776 static bool
test_opexpr_is_hashable(OpExpr * testexpr,List * param_ids)777 test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids)
778 {
779 	/*
780 	 * The combining operator must be hashable and strict.  The need for
781 	 * hashability is obvious, since we want to use hashing.  Without
782 	 * strictness, behavior in the presence of nulls is too unpredictable.  We
783 	 * actually must assume even more than plain strictness: it can't yield
784 	 * NULL for non-null inputs, either (see nodeSubplan.c).  However, hash
785 	 * indexes and hash joins assume that too.
786 	 */
787 	if (!hash_ok_operator(testexpr))
788 		return false;
789 
790 	/*
791 	 * The left and right inputs must belong to the outer and inner queries
792 	 * respectively; hence Params that will be supplied by the subquery must
793 	 * not appear in the LHS, and Vars of the outer query must not appear in
794 	 * the RHS.  (Ordinarily, this must be true because of the way that the
795 	 * parser builds an ANY SubLink's testexpr ... but inlining of functions
796 	 * could have changed the expression's structure, so we have to check.
797 	 * Such cases do not occur often enough to be worth trying to optimize, so
798 	 * we don't worry about trying to commute the clause or anything like
799 	 * that; we just need to be sure not to build an invalid plan.)
800 	 */
801 	if (list_length(testexpr->args) != 2)
802 		return false;
803 	if (contain_exec_param((Node *) linitial(testexpr->args), param_ids))
804 		return false;
805 	if (contain_var_clause((Node *) lsecond(testexpr->args)))
806 		return false;
807 	return true;
808 }
809 
810 /*
811  * Check expression is hashable + strict
812  *
813  * We could use op_hashjoinable() and op_strict(), but do it like this to
814  * avoid a redundant cache lookup.
815  */
816 static bool
hash_ok_operator(OpExpr * expr)817 hash_ok_operator(OpExpr *expr)
818 {
819 	Oid			opid = expr->opno;
820 
821 	/* quick out if not a binary operator */
822 	if (list_length(expr->args) != 2)
823 		return false;
824 	if (opid == ARRAY_EQ_OP)
825 	{
826 		/* array_eq is strict, but must check input type to ensure hashable */
827 		/* XXX record_eq will need same treatment when it becomes hashable */
828 		Node	   *leftarg = linitial(expr->args);
829 
830 		return op_hashjoinable(opid, exprType(leftarg));
831 	}
832 	else
833 	{
834 		/* else must look up the operator properties */
835 		HeapTuple	tup;
836 		Form_pg_operator optup;
837 
838 		tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid));
839 		if (!HeapTupleIsValid(tup))
840 			elog(ERROR, "cache lookup failed for operator %u", opid);
841 		optup = (Form_pg_operator) GETSTRUCT(tup);
842 		if (!optup->oprcanhash || !func_strict(optup->oprcode))
843 		{
844 			ReleaseSysCache(tup);
845 			return false;
846 		}
847 		ReleaseSysCache(tup);
848 		return true;
849 	}
850 }
851 
852 
853 /*
854  * SS_process_ctes: process a query's WITH list
855  *
856  * We plan each interesting WITH item and convert it to an initplan.
857  * A side effect is to fill in root->cte_plan_ids with a list that
858  * parallels root->parse->cteList and provides the subplan ID for
859  * each CTE's initplan.
860  */
861 void
SS_process_ctes(PlannerInfo * root)862 SS_process_ctes(PlannerInfo *root)
863 {
864 	ListCell   *lc;
865 
866 	Assert(root->cte_plan_ids == NIL);
867 
868 	foreach(lc, root->parse->cteList)
869 	{
870 		CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
871 		CmdType		cmdType = ((Query *) cte->ctequery)->commandType;
872 		Query	   *subquery;
873 		PlannerInfo *subroot;
874 		RelOptInfo *final_rel;
875 		Path	   *best_path;
876 		Plan	   *plan;
877 		SubPlan    *splan;
878 		int			paramid;
879 
880 		/*
881 		 * Ignore SELECT CTEs that are not actually referenced anywhere.
882 		 */
883 		if (cte->cterefcount == 0 && cmdType == CMD_SELECT)
884 		{
885 			/* Make a dummy entry in cte_plan_ids */
886 			root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
887 			continue;
888 		}
889 
890 		/*
891 		 * Copy the source Query node.  Probably not necessary, but let's keep
892 		 * this similar to make_subplan.
893 		 */
894 		subquery = (Query *) copyObject(cte->ctequery);
895 
896 		/* plan_params should not be in use in current query level */
897 		Assert(root->plan_params == NIL);
898 
899 		/*
900 		 * Generate Paths for the CTE query.  Always plan for full retrieval
901 		 * --- we don't have enough info to predict otherwise.
902 		 */
903 		subroot = subquery_planner(root->glob, subquery,
904 								   root,
905 								   cte->cterecursive, 0.0);
906 
907 		/*
908 		 * Since the current query level doesn't yet contain any RTEs, it
909 		 * should not be possible for the CTE to have requested parameters of
910 		 * this level.
911 		 */
912 		if (root->plan_params)
913 			elog(ERROR, "unexpected outer reference in CTE query");
914 
915 		/*
916 		 * Select best Path and turn it into a Plan.  At least for now, there
917 		 * seems no reason to postpone doing that.
918 		 */
919 		final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
920 		best_path = final_rel->cheapest_total_path;
921 
922 		plan = create_plan(subroot, best_path);
923 
924 		/*
925 		 * Make a SubPlan node for it.  This is just enough unlike
926 		 * build_subplan that we can't share code.
927 		 *
928 		 * Note plan_id, plan_name, and cost fields are set further down.
929 		 */
930 		splan = makeNode(SubPlan);
931 		splan->subLinkType = CTE_SUBLINK;
932 		splan->testexpr = NULL;
933 		splan->paramIds = NIL;
934 		get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
935 						   &splan->firstColCollation);
936 		splan->useHashTable = false;
937 		splan->unknownEqFalse = false;
938 		splan->setParam = NIL;
939 		splan->parParam = NIL;
940 		splan->args = NIL;
941 
942 		/*
943 		 * The node can't have any inputs (since it's an initplan), so the
944 		 * parParam and args lists remain empty.  (It could contain references
945 		 * to earlier CTEs' output param IDs, but CTE outputs are not
946 		 * propagated via the args list.)
947 		 */
948 
949 		/*
950 		 * Assign a param ID to represent the CTE's output.  No ordinary
951 		 * "evaluation" of this param slot ever happens, but we use the param
952 		 * ID for setParam/chgParam signaling just as if the CTE plan were
953 		 * returning a simple scalar output.  (Also, the executor abuses the
954 		 * ParamExecData slot for this param ID for communication among
955 		 * multiple CteScan nodes that might be scanning this CTE.)
956 		 */
957 		paramid = assign_special_exec_param(root);
958 		splan->setParam = list_make1_int(paramid);
959 
960 		/*
961 		 * Add the subplan and its PlannerInfo to the global lists.
962 		 */
963 		root->glob->subplans = lappend(root->glob->subplans, plan);
964 		root->glob->subroots = lappend(root->glob->subroots, subroot);
965 		splan->plan_id = list_length(root->glob->subplans);
966 
967 		root->init_plans = lappend(root->init_plans, splan);
968 
969 		root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id);
970 
971 		/* Label the subplan for EXPLAIN purposes */
972 		splan->plan_name = psprintf("CTE %s", cte->ctename);
973 
974 		/* Lastly, fill in the cost estimates for use later */
975 		cost_subplan(root, splan, plan);
976 	}
977 }
978 
979 /*
980  * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
981  *
982  * The caller has found an ANY SubLink at the top level of one of the query's
983  * qual clauses, but has not checked the properties of the SubLink further.
984  * Decide whether it is appropriate to process this SubLink in join style.
985  * If so, form a JoinExpr and return it.  Return NULL if the SubLink cannot
986  * be converted to a join.
987  *
988  * The only non-obvious input parameter is available_rels: this is the set
989  * of query rels that can safely be referenced in the sublink expression.
990  * (We must restrict this to avoid changing the semantics when a sublink
991  * is present in an outer join's ON qual.)  The conversion must fail if
992  * the converted qual would reference any but these parent-query relids.
993  *
994  * On success, the returned JoinExpr has larg = NULL and rarg = the jointree
995  * item representing the pulled-up subquery.  The caller must set larg to
996  * represent the relation(s) on the lefthand side of the new join, and insert
997  * the JoinExpr into the upper query's jointree at an appropriate place
998  * (typically, where the lefthand relation(s) had been).  Note that the
999  * passed-in SubLink must also be removed from its original position in the
1000  * query quals, since the quals of the returned JoinExpr replace it.
1001  * (Notionally, we replace the SubLink with a constant TRUE, then elide the
1002  * redundant constant from the qual.)
1003  *
1004  * On success, the caller is also responsible for recursively applying
1005  * pull_up_sublinks processing to the rarg and quals of the returned JoinExpr.
1006  * (On failure, there is no need to do anything, since pull_up_sublinks will
1007  * be applied when we recursively plan the sub-select.)
1008  *
1009  * Side effects of a successful conversion include adding the SubLink's
1010  * subselect to the query's rangetable, so that it can be referenced in
1011  * the JoinExpr's rarg.
1012  */
1013 JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo * root,SubLink * sublink,Relids available_rels)1014 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1015 							Relids available_rels)
1016 {
1017 	JoinExpr   *result;
1018 	Query	   *parse = root->parse;
1019 	Query	   *subselect = (Query *) sublink->subselect;
1020 	Relids		upper_varnos;
1021 	int			rtindex;
1022 	RangeTblEntry *rte;
1023 	RangeTblRef *rtr;
1024 	List	   *subquery_vars;
1025 	Node	   *quals;
1026 	ParseState *pstate;
1027 
1028 	Assert(sublink->subLinkType == ANY_SUBLINK);
1029 
1030 	/*
1031 	 * The sub-select must not refer to any Vars of the parent query. (Vars of
1032 	 * higher levels should be okay, though.)
1033 	 */
1034 	if (contain_vars_of_level((Node *) subselect, 1))
1035 		return NULL;
1036 
1037 	/*
1038 	 * The test expression must contain some Vars of the parent query, else
1039 	 * it's not gonna be a join.  (Note that it won't have Vars referring to
1040 	 * the subquery, rather Params.)
1041 	 */
1042 	upper_varnos = pull_varnos(sublink->testexpr);
1043 	if (bms_is_empty(upper_varnos))
1044 		return NULL;
1045 
1046 	/*
1047 	 * However, it can't refer to anything outside available_rels.
1048 	 */
1049 	if (!bms_is_subset(upper_varnos, available_rels))
1050 		return NULL;
1051 
1052 	/*
1053 	 * The combining operators and left-hand expressions mustn't be volatile.
1054 	 */
1055 	if (contain_volatile_functions(sublink->testexpr))
1056 		return NULL;
1057 
1058 	/* Create a dummy ParseState for addRangeTableEntryForSubquery */
1059 	pstate = make_parsestate(NULL);
1060 
1061 	/*
1062 	 * Okay, pull up the sub-select into upper range table.
1063 	 *
1064 	 * We rely here on the assumption that the outer query has no references
1065 	 * to the inner (necessarily true, other than the Vars that we build
1066 	 * below). Therefore this is a lot easier than what pull_up_subqueries has
1067 	 * to go through.
1068 	 */
1069 	rte = addRangeTableEntryForSubquery(pstate,
1070 										subselect,
1071 										makeAlias("ANY_subquery", NIL),
1072 										false,
1073 										false);
1074 	parse->rtable = lappend(parse->rtable, rte);
1075 	rtindex = list_length(parse->rtable);
1076 
1077 	/*
1078 	 * Form a RangeTblRef for the pulled-up sub-select.
1079 	 */
1080 	rtr = makeNode(RangeTblRef);
1081 	rtr->rtindex = rtindex;
1082 
1083 	/*
1084 	 * Build a list of Vars representing the subselect outputs.
1085 	 */
1086 	subquery_vars = generate_subquery_vars(root,
1087 										   subselect->targetList,
1088 										   rtindex);
1089 
1090 	/*
1091 	 * Build the new join's qual expression, replacing Params with these Vars.
1092 	 */
1093 	quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
1094 
1095 	/*
1096 	 * And finally, build the JoinExpr node.
1097 	 */
1098 	result = makeNode(JoinExpr);
1099 	result->jointype = JOIN_SEMI;
1100 	result->isNatural = false;
1101 	result->larg = NULL;		/* caller must fill this in */
1102 	result->rarg = (Node *) rtr;
1103 	result->usingClause = NIL;
1104 	result->quals = quals;
1105 	result->alias = NULL;
1106 	result->rtindex = 0;		/* we don't need an RTE for it */
1107 
1108 	return result;
1109 }
1110 
1111 /*
1112  * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
1113  *
1114  * The API of this function is identical to convert_ANY_sublink_to_join's,
1115  * except that we also support the case where the caller has found NOT EXISTS,
1116  * so we need an additional input parameter "under_not".
1117  */
1118 JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo * root,SubLink * sublink,bool under_not,Relids available_rels)1119 convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1120 							   bool under_not, Relids available_rels)
1121 {
1122 	JoinExpr   *result;
1123 	Query	   *parse = root->parse;
1124 	Query	   *subselect = (Query *) sublink->subselect;
1125 	Node	   *whereClause;
1126 	int			rtoffset;
1127 	int			varno;
1128 	Relids		clause_varnos;
1129 	Relids		upper_varnos;
1130 
1131 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
1132 
1133 	/*
1134 	 * Can't flatten if it contains WITH.  (We could arrange to pull up the
1135 	 * WITH into the parent query's cteList, but that risks changing the
1136 	 * semantics, since a WITH ought to be executed once per associated query
1137 	 * call.)  Note that convert_ANY_sublink_to_join doesn't have to reject
1138 	 * this case, since it just produces a subquery RTE that doesn't have to
1139 	 * get flattened into the parent query.
1140 	 */
1141 	if (subselect->cteList)
1142 		return NULL;
1143 
1144 	/*
1145 	 * Copy the subquery so we can modify it safely (see comments in
1146 	 * make_subplan).
1147 	 */
1148 	subselect = (Query *) copyObject(subselect);
1149 
1150 	/*
1151 	 * See if the subquery can be simplified based on the knowledge that it's
1152 	 * being used in EXISTS().  If we aren't able to get rid of its
1153 	 * targetlist, we have to fail, because the pullup operation leaves us
1154 	 * with noplace to evaluate the targetlist.
1155 	 */
1156 	if (!simplify_EXISTS_query(root, subselect))
1157 		return NULL;
1158 
1159 	/*
1160 	 * The subquery must have a nonempty jointree, else we won't have a join.
1161 	 */
1162 	if (subselect->jointree->fromlist == NIL)
1163 		return NULL;
1164 
1165 	/*
1166 	 * Separate out the WHERE clause.  (We could theoretically also remove
1167 	 * top-level plain JOIN/ON clauses, but it's probably not worth the
1168 	 * trouble.)
1169 	 */
1170 	whereClause = subselect->jointree->quals;
1171 	subselect->jointree->quals = NULL;
1172 
1173 	/*
1174 	 * The rest of the sub-select must not refer to any Vars of the parent
1175 	 * query.  (Vars of higher levels should be okay, though.)
1176 	 */
1177 	if (contain_vars_of_level((Node *) subselect, 1))
1178 		return NULL;
1179 
1180 	/*
1181 	 * On the other hand, the WHERE clause must contain some Vars of the
1182 	 * parent query, else it's not gonna be a join.
1183 	 */
1184 	if (!contain_vars_of_level(whereClause, 1))
1185 		return NULL;
1186 
1187 	/*
1188 	 * We don't risk optimizing if the WHERE clause is volatile, either.
1189 	 */
1190 	if (contain_volatile_functions(whereClause))
1191 		return NULL;
1192 
1193 	/*
1194 	 * Prepare to pull up the sub-select into top range table.
1195 	 *
1196 	 * We rely here on the assumption that the outer query has no references
1197 	 * to the inner (necessarily true). Therefore this is a lot easier than
1198 	 * what pull_up_subqueries has to go through.
1199 	 *
1200 	 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
1201 	 * do.  The machinations of simplify_EXISTS_query ensured that there is
1202 	 * nothing interesting in the subquery except an rtable and jointree, and
1203 	 * even the jointree FromExpr no longer has quals.  So we can just append
1204 	 * the rtable to our own and use the FromExpr in our jointree. But first,
1205 	 * adjust all level-zero varnos in the subquery to account for the rtable
1206 	 * merger.
1207 	 */
1208 	rtoffset = list_length(parse->rtable);
1209 	OffsetVarNodes((Node *) subselect, rtoffset, 0);
1210 	OffsetVarNodes(whereClause, rtoffset, 0);
1211 
1212 	/*
1213 	 * Upper-level vars in subquery will now be one level closer to their
1214 	 * parent than before; in particular, anything that had been level 1
1215 	 * becomes level zero.
1216 	 */
1217 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1218 	IncrementVarSublevelsUp(whereClause, -1, 1);
1219 
1220 	/*
1221 	 * Now that the WHERE clause is adjusted to match the parent query
1222 	 * environment, we can easily identify all the level-zero rels it uses.
1223 	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
1224 	 * not.
1225 	 */
1226 	clause_varnos = pull_varnos(whereClause);
1227 	upper_varnos = NULL;
1228 	while ((varno = bms_first_member(clause_varnos)) >= 0)
1229 	{
1230 		if (varno <= rtoffset)
1231 			upper_varnos = bms_add_member(upper_varnos, varno);
1232 	}
1233 	bms_free(clause_varnos);
1234 	Assert(!bms_is_empty(upper_varnos));
1235 
1236 	/*
1237 	 * Now that we've got the set of upper-level varnos, we can make the last
1238 	 * check: only available_rels can be referenced.
1239 	 */
1240 	if (!bms_is_subset(upper_varnos, available_rels))
1241 		return NULL;
1242 
1243 	/* Now we can attach the modified subquery rtable to the parent */
1244 	parse->rtable = list_concat(parse->rtable, subselect->rtable);
1245 
1246 	/*
1247 	 * And finally, build the JoinExpr node.
1248 	 */
1249 	result = makeNode(JoinExpr);
1250 	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
1251 	result->isNatural = false;
1252 	result->larg = NULL;		/* caller must fill this in */
1253 	/* flatten out the FromExpr node if it's useless */
1254 	if (list_length(subselect->jointree->fromlist) == 1)
1255 		result->rarg = (Node *) linitial(subselect->jointree->fromlist);
1256 	else
1257 		result->rarg = (Node *) subselect->jointree;
1258 	result->usingClause = NIL;
1259 	result->quals = whereClause;
1260 	result->alias = NULL;
1261 	result->rtindex = 0;		/* we don't need an RTE for it */
1262 
1263 	return result;
1264 }
1265 
1266 /*
1267  * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
1268  *
1269  * The only thing that matters about an EXISTS query is whether it returns
1270  * zero or more than zero rows.  Therefore, we can remove certain SQL features
1271  * that won't affect that.  The only part that is really likely to matter in
1272  * typical usage is simplifying the targetlist: it's a common habit to write
1273  * "SELECT * FROM" even though there is no need to evaluate any columns.
1274  *
1275  * Note: by suppressing the targetlist we could cause an observable behavioral
1276  * change, namely that any errors that might occur in evaluating the tlist
1277  * won't occur, nor will other side-effects of volatile functions.  This seems
1278  * unlikely to bother anyone in practice.
1279  *
1280  * Returns TRUE if was able to discard the targetlist, else FALSE.
1281  */
1282 static bool
simplify_EXISTS_query(PlannerInfo * root,Query * query)1283 simplify_EXISTS_query(PlannerInfo *root, Query *query)
1284 {
1285 	/*
1286 	 * We don't try to simplify at all if the query uses set operations,
1287 	 * aggregates, grouping sets, modifying CTEs, HAVING, OFFSET, or FOR
1288 	 * UPDATE/SHARE; none of these seem likely in normal usage and their
1289 	 * possible effects are complex.  (Note: we could ignore an "OFFSET 0"
1290 	 * clause, but that traditionally is used as an optimization fence, so we
1291 	 * don't.)
1292 	 */
1293 	if (query->commandType != CMD_SELECT ||
1294 		query->setOperations ||
1295 		query->hasAggs ||
1296 		query->groupingSets ||
1297 		query->hasWindowFuncs ||
1298 		query->hasModifyingCTE ||
1299 		query->havingQual ||
1300 		query->limitOffset ||
1301 		query->rowMarks)
1302 		return false;
1303 
1304 	/*
1305 	 * LIMIT with a constant positive (or NULL) value doesn't affect the
1306 	 * semantics of EXISTS, so let's ignore such clauses.  This is worth doing
1307 	 * because people accustomed to certain other DBMSes may be in the habit
1308 	 * of writing EXISTS(SELECT ... LIMIT 1) as an optimization.  If there's a
1309 	 * LIMIT with anything else as argument, though, we can't simplify.
1310 	 */
1311 	if (query->limitCount)
1312 	{
1313 		/*
1314 		 * The LIMIT clause has not yet been through eval_const_expressions,
1315 		 * so we have to apply that here.  It might seem like this is a waste
1316 		 * of cycles, since the only case plausibly worth worrying about is
1317 		 * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
1318 		 * so we have to fold constants or we're not going to recognize it.
1319 		 */
1320 		Node	   *node = eval_const_expressions(root, query->limitCount);
1321 		Const	   *limit;
1322 
1323 		/* Might as well update the query if we simplified the clause. */
1324 		query->limitCount = node;
1325 
1326 		if (!IsA(node, Const))
1327 			return false;
1328 
1329 		limit = (Const *) node;
1330 		Assert(limit->consttype == INT8OID);
1331 		if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
1332 			return false;
1333 
1334 		/* Whether or not the targetlist is safe, we can drop the LIMIT. */
1335 		query->limitCount = NULL;
1336 	}
1337 
1338 	/*
1339 	 * Mustn't throw away the targetlist if it contains set-returning
1340 	 * functions; those could affect whether zero rows are returned!
1341 	 */
1342 	if (expression_returns_set((Node *) query->targetList))
1343 		return false;
1344 
1345 	/*
1346 	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
1347 	 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
1348 	 * change a nonzero-rows result to zero rows or vice versa.  (Furthermore,
1349 	 * since our parsetree representation of these clauses depends on the
1350 	 * targetlist, we'd better throw them away if we drop the targetlist.)
1351 	 */
1352 	query->targetList = NIL;
1353 	query->groupClause = NIL;
1354 	query->windowClause = NIL;
1355 	query->distinctClause = NIL;
1356 	query->sortClause = NIL;
1357 	query->hasDistinctOn = false;
1358 
1359 	return true;
1360 }
1361 
1362 /*
1363  * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink
1364  *
1365  * The subselect is expected to be a fresh copy that we can munge up,
1366  * and to have been successfully passed through simplify_EXISTS_query.
1367  *
1368  * On success, the modified subselect is returned, and we store a suitable
1369  * upper-level test expression at *testexpr, plus a list of the subselect's
1370  * output Params at *paramIds.  (The test expression is already Param-ified
1371  * and hence need not go through convert_testexpr, which is why we have to
1372  * deal with the Param IDs specially.)
1373  *
1374  * On failure, returns NULL.
1375  */
1376 static Query *
convert_EXISTS_to_ANY(PlannerInfo * root,Query * subselect,Node ** testexpr,List ** paramIds)1377 convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
1378 					  Node **testexpr, List **paramIds)
1379 {
1380 	Node	   *whereClause;
1381 	List	   *leftargs,
1382 			   *rightargs,
1383 			   *opids,
1384 			   *opcollations,
1385 			   *newWhere,
1386 			   *tlist,
1387 			   *testlist,
1388 			   *paramids;
1389 	ListCell   *lc,
1390 			   *rc,
1391 			   *oc,
1392 			   *cc;
1393 	AttrNumber	resno;
1394 
1395 	/*
1396 	 * Query must not require a targetlist, since we have to insert a new one.
1397 	 * Caller should have dealt with the case already.
1398 	 */
1399 	Assert(subselect->targetList == NIL);
1400 
1401 	/*
1402 	 * Separate out the WHERE clause.  (We could theoretically also remove
1403 	 * top-level plain JOIN/ON clauses, but it's probably not worth the
1404 	 * trouble.)
1405 	 */
1406 	whereClause = subselect->jointree->quals;
1407 	subselect->jointree->quals = NULL;
1408 
1409 	/*
1410 	 * The rest of the sub-select must not refer to any Vars of the parent
1411 	 * query.  (Vars of higher levels should be okay, though.)
1412 	 *
1413 	 * Note: we need not check for Aggrefs separately because we know the
1414 	 * sub-select is as yet unoptimized; any uplevel Aggref must therefore
1415 	 * contain an uplevel Var reference.  This is not the case below ...
1416 	 */
1417 	if (contain_vars_of_level((Node *) subselect, 1))
1418 		return NULL;
1419 
1420 	/*
1421 	 * We don't risk optimizing if the WHERE clause is volatile, either.
1422 	 */
1423 	if (contain_volatile_functions(whereClause))
1424 		return NULL;
1425 
1426 	/*
1427 	 * Clean up the WHERE clause by doing const-simplification etc on it.
1428 	 * Aside from simplifying the processing we're about to do, this is
1429 	 * important for being able to pull chunks of the WHERE clause up into the
1430 	 * parent query.  Since we are invoked partway through the parent's
1431 	 * preprocess_expression() work, earlier steps of preprocess_expression()
1432 	 * wouldn't get applied to the pulled-up stuff unless we do them here. For
1433 	 * the parts of the WHERE clause that get put back into the child query,
1434 	 * this work is partially duplicative, but it shouldn't hurt.
1435 	 *
1436 	 * Note: we do not run flatten_join_alias_vars.  This is OK because any
1437 	 * parent aliases were flattened already, and we're not going to pull any
1438 	 * child Vars (of any description) into the parent.
1439 	 *
1440 	 * Note: passing the parent's root to eval_const_expressions is
1441 	 * technically wrong, but we can get away with it since only the
1442 	 * boundParams (if any) are used, and those would be the same in a
1443 	 * subroot.
1444 	 */
1445 	whereClause = eval_const_expressions(root, whereClause);
1446 	whereClause = (Node *) canonicalize_qual_ext((Expr *) whereClause, false);
1447 	whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
1448 
1449 	/*
1450 	 * We now have a flattened implicit-AND list of clauses, which we try to
1451 	 * break apart into "outervar = innervar" hash clauses. Anything that
1452 	 * can't be broken apart just goes back into the newWhere list.  Note that
1453 	 * we aren't trying hard yet to ensure that we have only outer or only
1454 	 * inner on each side; we'll check that if we get to the end.
1455 	 */
1456 	leftargs = rightargs = opids = opcollations = newWhere = NIL;
1457 	foreach(lc, (List *) whereClause)
1458 	{
1459 		OpExpr	   *expr = (OpExpr *) lfirst(lc);
1460 
1461 		if (IsA(expr, OpExpr) &&
1462 			hash_ok_operator(expr))
1463 		{
1464 			Node	   *leftarg = (Node *) linitial(expr->args);
1465 			Node	   *rightarg = (Node *) lsecond(expr->args);
1466 
1467 			if (contain_vars_of_level(leftarg, 1))
1468 			{
1469 				leftargs = lappend(leftargs, leftarg);
1470 				rightargs = lappend(rightargs, rightarg);
1471 				opids = lappend_oid(opids, expr->opno);
1472 				opcollations = lappend_oid(opcollations, expr->inputcollid);
1473 				continue;
1474 			}
1475 			if (contain_vars_of_level(rightarg, 1))
1476 			{
1477 				/*
1478 				 * We must commute the clause to put the outer var on the
1479 				 * left, because the hashing code in nodeSubplan.c expects
1480 				 * that.  This probably shouldn't ever fail, since hashable
1481 				 * operators ought to have commutators, but be paranoid.
1482 				 */
1483 				expr->opno = get_commutator(expr->opno);
1484 				if (OidIsValid(expr->opno) && hash_ok_operator(expr))
1485 				{
1486 					leftargs = lappend(leftargs, rightarg);
1487 					rightargs = lappend(rightargs, leftarg);
1488 					opids = lappend_oid(opids, expr->opno);
1489 					opcollations = lappend_oid(opcollations, expr->inputcollid);
1490 					continue;
1491 				}
1492 				/* If no commutator, no chance to optimize the WHERE clause */
1493 				return NULL;
1494 			}
1495 		}
1496 		/* Couldn't handle it as a hash clause */
1497 		newWhere = lappend(newWhere, expr);
1498 	}
1499 
1500 	/*
1501 	 * If we didn't find anything we could convert, fail.
1502 	 */
1503 	if (leftargs == NIL)
1504 		return NULL;
1505 
1506 	/*
1507 	 * There mustn't be any parent Vars or Aggs in the stuff that we intend to
1508 	 * put back into the child query.  Note: you might think we don't need to
1509 	 * check for Aggs separately, because an uplevel Agg must contain an
1510 	 * uplevel Var in its argument.  But it is possible that the uplevel Var
1511 	 * got optimized away by eval_const_expressions.  Consider
1512 	 *
1513 	 * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END)
1514 	 */
1515 	if (contain_vars_of_level((Node *) newWhere, 1) ||
1516 		contain_vars_of_level((Node *) rightargs, 1))
1517 		return NULL;
1518 	if (root->parse->hasAggs &&
1519 		(contain_aggs_of_level((Node *) newWhere, 1) ||
1520 		 contain_aggs_of_level((Node *) rightargs, 1)))
1521 		return NULL;
1522 
1523 	/*
1524 	 * And there can't be any child Vars in the stuff we intend to pull up.
1525 	 * (Note: we'd need to check for child Aggs too, except we know the child
1526 	 * has no aggs at all because of simplify_EXISTS_query's check. The same
1527 	 * goes for window functions.)
1528 	 */
1529 	if (contain_vars_of_level((Node *) leftargs, 0))
1530 		return NULL;
1531 
1532 	/*
1533 	 * Also reject sublinks in the stuff we intend to pull up.  (It might be
1534 	 * possible to support this, but doesn't seem worth the complication.)
1535 	 */
1536 	if (contain_subplans((Node *) leftargs))
1537 		return NULL;
1538 
1539 	/*
1540 	 * Okay, adjust the sublevelsup in the stuff we're pulling up.
1541 	 */
1542 	IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
1543 
1544 	/*
1545 	 * Put back any child-level-only WHERE clauses.
1546 	 */
1547 	if (newWhere)
1548 		subselect->jointree->quals = (Node *) make_ands_explicit(newWhere);
1549 
1550 	/*
1551 	 * Build a new targetlist for the child that emits the expressions we
1552 	 * need.  Concurrently, build a testexpr for the parent using Params to
1553 	 * reference the child outputs.  (Since we generate Params directly here,
1554 	 * there will be no need to convert the testexpr in build_subplan.)
1555 	 */
1556 	tlist = testlist = paramids = NIL;
1557 	resno = 1;
1558 	/* there's no "forfour" so we have to chase one of the lists manually */
1559 	cc = list_head(opcollations);
1560 	forthree(lc, leftargs, rc, rightargs, oc, opids)
1561 	{
1562 		Node	   *leftarg = (Node *) lfirst(lc);
1563 		Node	   *rightarg = (Node *) lfirst(rc);
1564 		Oid			opid = lfirst_oid(oc);
1565 		Oid			opcollation = lfirst_oid(cc);
1566 		Param	   *param;
1567 
1568 		cc = lnext(cc);
1569 		param = generate_new_exec_param(root,
1570 										exprType(rightarg),
1571 										exprTypmod(rightarg),
1572 										exprCollation(rightarg));
1573 		tlist = lappend(tlist,
1574 						makeTargetEntry((Expr *) rightarg,
1575 										resno++,
1576 										NULL,
1577 										false));
1578 		testlist = lappend(testlist,
1579 						   make_opclause(opid, BOOLOID, false,
1580 										 (Expr *) leftarg, (Expr *) param,
1581 										 InvalidOid, opcollation));
1582 		paramids = lappend_int(paramids, param->paramid);
1583 	}
1584 
1585 	/* Put everything where it should go, and we're done */
1586 	subselect->targetList = tlist;
1587 	*testexpr = (Node *) make_ands_explicit(testlist);
1588 	*paramIds = paramids;
1589 
1590 	return subselect;
1591 }
1592 
1593 
1594 /*
1595  * Replace correlation vars (uplevel vars) with Params.
1596  *
1597  * Uplevel PlaceHolderVars and aggregates are replaced, too.
1598  *
1599  * Note: it is critical that this runs immediately after SS_process_sublinks.
1600  * Since we do not recurse into the arguments of uplevel PHVs and aggregates,
1601  * they will get copied to the appropriate subplan args list in the parent
1602  * query with uplevel vars not replaced by Params, but only adjusted in level
1603  * (see replace_outer_placeholdervar and replace_outer_agg).  That's exactly
1604  * what we want for the vars of the parent level --- but if a PHV's or
1605  * aggregate's argument contains any further-up variables, they have to be
1606  * replaced with Params in their turn. That will happen when the parent level
1607  * runs SS_replace_correlation_vars.  Therefore it must do so after expanding
1608  * its sublinks to subplans.  And we don't want any steps in between, else
1609  * those steps would never get applied to the argument expressions, either in
1610  * the parent or the child level.
1611  *
1612  * Another fairly tricky thing going on here is the handling of SubLinks in
1613  * the arguments of uplevel PHVs/aggregates.  Those are not touched inside the
1614  * intermediate query level, either.  Instead, SS_process_sublinks recurses on
1615  * them after copying the PHV or Aggref expression into the parent plan level
1616  * (this is actually taken care of in build_subplan).
1617  */
1618 Node *
SS_replace_correlation_vars(PlannerInfo * root,Node * expr)1619 SS_replace_correlation_vars(PlannerInfo *root, Node *expr)
1620 {
1621 	/* No setup needed for tree walk, so away we go */
1622 	return replace_correlation_vars_mutator(expr, root);
1623 }
1624 
1625 static Node *
replace_correlation_vars_mutator(Node * node,PlannerInfo * root)1626 replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
1627 {
1628 	if (node == NULL)
1629 		return NULL;
1630 	if (IsA(node, Var))
1631 	{
1632 		if (((Var *) node)->varlevelsup > 0)
1633 			return (Node *) replace_outer_var(root, (Var *) node);
1634 	}
1635 	if (IsA(node, PlaceHolderVar))
1636 	{
1637 		if (((PlaceHolderVar *) node)->phlevelsup > 0)
1638 			return (Node *) replace_outer_placeholdervar(root,
1639 													(PlaceHolderVar *) node);
1640 	}
1641 	if (IsA(node, Aggref))
1642 	{
1643 		if (((Aggref *) node)->agglevelsup > 0)
1644 			return (Node *) replace_outer_agg(root, (Aggref *) node);
1645 	}
1646 	if (IsA(node, GroupingFunc))
1647 	{
1648 		if (((GroupingFunc *) node)->agglevelsup > 0)
1649 			return (Node *) replace_outer_grouping(root, (GroupingFunc *) node);
1650 	}
1651 	return expression_tree_mutator(node,
1652 								   replace_correlation_vars_mutator,
1653 								   (void *) root);
1654 }
1655 
1656 /*
1657  * Expand SubLinks to SubPlans in the given expression.
1658  *
1659  * The isQual argument tells whether or not this expression is a WHERE/HAVING
1660  * qualifier expression.  If it is, any sublinks appearing at top level need
1661  * not distinguish FALSE from UNKNOWN return values.
1662  */
1663 Node *
SS_process_sublinks(PlannerInfo * root,Node * expr,bool isQual)1664 SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
1665 {
1666 	process_sublinks_context context;
1667 
1668 	context.root = root;
1669 	context.isTopQual = isQual;
1670 	return process_sublinks_mutator(expr, &context);
1671 }
1672 
1673 static Node *
process_sublinks_mutator(Node * node,process_sublinks_context * context)1674 process_sublinks_mutator(Node *node, process_sublinks_context *context)
1675 {
1676 	process_sublinks_context locContext;
1677 
1678 	locContext.root = context->root;
1679 
1680 	if (node == NULL)
1681 		return NULL;
1682 	if (IsA(node, SubLink))
1683 	{
1684 		SubLink    *sublink = (SubLink *) node;
1685 		Node	   *testexpr;
1686 
1687 		/*
1688 		 * First, recursively process the lefthand-side expressions, if any.
1689 		 * They're not top-level anymore.
1690 		 */
1691 		locContext.isTopQual = false;
1692 		testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
1693 
1694 		/*
1695 		 * Now build the SubPlan node and make the expr to return.
1696 		 */
1697 		return make_subplan(context->root,
1698 							(Query *) sublink->subselect,
1699 							sublink->subLinkType,
1700 							sublink->subLinkId,
1701 							testexpr,
1702 							context->isTopQual);
1703 	}
1704 
1705 	/*
1706 	 * Don't recurse into the arguments of an outer PHV or aggregate here. Any
1707 	 * SubLinks in the arguments have to be dealt with at the outer query
1708 	 * level; they'll be handled when build_subplan collects the PHV or Aggref
1709 	 * into the arguments to be passed down to the current subplan.
1710 	 */
1711 	if (IsA(node, PlaceHolderVar))
1712 	{
1713 		if (((PlaceHolderVar *) node)->phlevelsup > 0)
1714 			return node;
1715 	}
1716 	else if (IsA(node, Aggref))
1717 	{
1718 		if (((Aggref *) node)->agglevelsup > 0)
1719 			return node;
1720 	}
1721 
1722 	/*
1723 	 * We should never see a SubPlan expression in the input (since this is
1724 	 * the very routine that creates 'em to begin with).  We shouldn't find
1725 	 * ourselves invoked directly on a Query, either.
1726 	 */
1727 	Assert(!IsA(node, SubPlan));
1728 	Assert(!IsA(node, AlternativeSubPlan));
1729 	Assert(!IsA(node, Query));
1730 
1731 	/*
1732 	 * Because make_subplan() could return an AND or OR clause, we have to
1733 	 * take steps to preserve AND/OR flatness of a qual.  We assume the input
1734 	 * has been AND/OR flattened and so we need no recursion here.
1735 	 *
1736 	 * (Due to the coding here, we will not get called on the List subnodes of
1737 	 * an AND; and the input is *not* yet in implicit-AND format.  So no check
1738 	 * is needed for a bare List.)
1739 	 *
1740 	 * Anywhere within the top-level AND/OR clause structure, we can tell
1741 	 * make_subplan() that NULL and FALSE are interchangeable.  So isTopQual
1742 	 * propagates down in both cases.  (Note that this is unlike the meaning
1743 	 * of "top level qual" used in most other places in Postgres.)
1744 	 */
1745 	if (and_clause(node))
1746 	{
1747 		List	   *newargs = NIL;
1748 		ListCell   *l;
1749 
1750 		/* Still at qual top-level */
1751 		locContext.isTopQual = context->isTopQual;
1752 
1753 		foreach(l, ((BoolExpr *) node)->args)
1754 		{
1755 			Node	   *newarg;
1756 
1757 			newarg = process_sublinks_mutator(lfirst(l), &locContext);
1758 			if (and_clause(newarg))
1759 				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1760 			else
1761 				newargs = lappend(newargs, newarg);
1762 		}
1763 		return (Node *) make_andclause(newargs);
1764 	}
1765 
1766 	if (or_clause(node))
1767 	{
1768 		List	   *newargs = NIL;
1769 		ListCell   *l;
1770 
1771 		/* Still at qual top-level */
1772 		locContext.isTopQual = context->isTopQual;
1773 
1774 		foreach(l, ((BoolExpr *) node)->args)
1775 		{
1776 			Node	   *newarg;
1777 
1778 			newarg = process_sublinks_mutator(lfirst(l), &locContext);
1779 			if (or_clause(newarg))
1780 				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1781 			else
1782 				newargs = lappend(newargs, newarg);
1783 		}
1784 		return (Node *) make_orclause(newargs);
1785 	}
1786 
1787 	/*
1788 	 * If we recurse down through anything other than an AND or OR node, we
1789 	 * are definitely not at top qual level anymore.
1790 	 */
1791 	locContext.isTopQual = false;
1792 
1793 	return expression_tree_mutator(node,
1794 								   process_sublinks_mutator,
1795 								   (void *) &locContext);
1796 }
1797 
1798 /*
1799  * SS_identify_outer_params - identify the Params available from outer levels
1800  *
1801  * This must be run after SS_replace_correlation_vars and SS_process_sublinks
1802  * processing is complete in a given query level as well as all of its
1803  * descendant levels (which means it's most practical to do it at the end of
1804  * processing the query level).  We compute the set of paramIds that outer
1805  * levels will make available to this level+descendants, and record it in
1806  * root->outer_params for use while computing extParam/allParam sets in final
1807  * plan cleanup.  (We can't just compute it then, because the upper levels'
1808  * plan_params lists are transient and will be gone by then.)
1809  */
1810 void
SS_identify_outer_params(PlannerInfo * root)1811 SS_identify_outer_params(PlannerInfo *root)
1812 {
1813 	Bitmapset  *outer_params;
1814 	PlannerInfo *proot;
1815 	ListCell   *l;
1816 
1817 	/*
1818 	 * If no parameters have been assigned anywhere in the tree, we certainly
1819 	 * don't need to do anything here.
1820 	 */
1821 	if (root->glob->nParamExec == 0)
1822 		return;
1823 
1824 	/*
1825 	 * Scan all query levels above this one to see which parameters are due to
1826 	 * be available from them, either because lower query levels have
1827 	 * requested them (via plan_params) or because they will be available from
1828 	 * initPlans of those levels.
1829 	 */
1830 	outer_params = NULL;
1831 	for (proot = root->parent_root; proot != NULL; proot = proot->parent_root)
1832 	{
1833 		/* Include ordinary Var/PHV/Aggref params */
1834 		foreach(l, proot->plan_params)
1835 		{
1836 			PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
1837 
1838 			outer_params = bms_add_member(outer_params, pitem->paramId);
1839 		}
1840 		/* Include any outputs of outer-level initPlans */
1841 		foreach(l, proot->init_plans)
1842 		{
1843 			SubPlan    *initsubplan = (SubPlan *) lfirst(l);
1844 			ListCell   *l2;
1845 
1846 			foreach(l2, initsubplan->setParam)
1847 			{
1848 				outer_params = bms_add_member(outer_params, lfirst_int(l2));
1849 			}
1850 		}
1851 		/* Include worktable ID, if a recursive query is being planned */
1852 		if (proot->wt_param_id >= 0)
1853 			outer_params = bms_add_member(outer_params, proot->wt_param_id);
1854 	}
1855 	root->outer_params = outer_params;
1856 }
1857 
1858 /*
1859  * SS_charge_for_initplans - account for initplans in Path costs & parallelism
1860  *
1861  * If any initPlans have been created in the current query level, they will
1862  * get attached to the Plan tree created from whichever Path we select from
1863  * the given rel.  Increment all that rel's Paths' costs to account for them,
1864  * and make sure the paths get marked as parallel-unsafe, since we can't
1865  * currently transmit initPlans to parallel workers.
1866  *
1867  * This is separate from SS_attach_initplans because we might conditionally
1868  * create more initPlans during create_plan(), depending on which Path we
1869  * select.  However, Paths that would generate such initPlans are expected
1870  * to have included their cost already.
1871  */
1872 void
SS_charge_for_initplans(PlannerInfo * root,RelOptInfo * final_rel)1873 SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel)
1874 {
1875 	Cost		initplan_cost;
1876 	ListCell   *lc;
1877 
1878 	/* Nothing to do if no initPlans */
1879 	if (root->init_plans == NIL)
1880 		return;
1881 
1882 	/*
1883 	 * Compute the cost increment just once, since it will be the same for all
1884 	 * Paths.  We assume each initPlan gets run once during top plan startup.
1885 	 * This is a conservative overestimate, since in fact an initPlan might be
1886 	 * executed later than plan startup, or even not at all.
1887 	 */
1888 	initplan_cost = 0;
1889 	foreach(lc, root->init_plans)
1890 	{
1891 		SubPlan    *initsubplan = (SubPlan *) lfirst(lc);
1892 
1893 		initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost;
1894 	}
1895 
1896 	/*
1897 	 * Now adjust the costs and parallel_safe flags.
1898 	 */
1899 	foreach(lc, final_rel->pathlist)
1900 	{
1901 		Path	   *path = (Path *) lfirst(lc);
1902 
1903 		path->startup_cost += initplan_cost;
1904 		path->total_cost += initplan_cost;
1905 		path->parallel_safe = false;
1906 	}
1907 
1908 	/* We needn't do set_cheapest() here, caller will do it */
1909 }
1910 
1911 /*
1912  * SS_attach_initplans - attach initplans to topmost plan node
1913  *
1914  * Attach any initplans created in the current query level to the specified
1915  * plan node, which should normally be the topmost node for the query level.
1916  * (In principle the initPlans could go in any node at or above where they're
1917  * referenced; but there seems no reason to put them any lower than the
1918  * topmost node, so we don't bother to track exactly where they came from.)
1919  * We do not touch the plan node's cost; the initplans should have been
1920  * accounted for in path costing.
1921  */
1922 void
SS_attach_initplans(PlannerInfo * root,Plan * plan)1923 SS_attach_initplans(PlannerInfo *root, Plan *plan)
1924 {
1925 	plan->initPlan = root->init_plans;
1926 }
1927 
1928 /*
1929  * SS_finalize_plan - do final parameter processing for a completed Plan.
1930  *
1931  * This recursively computes the extParam and allParam sets for every Plan
1932  * node in the given plan tree.  (Oh, and RangeTblFunction.funcparams too.)
1933  *
1934  * We assume that SS_finalize_plan has already been run on any initplans or
1935  * subplans the plan tree could reference.
1936  */
1937 void
SS_finalize_plan(PlannerInfo * root,Plan * plan)1938 SS_finalize_plan(PlannerInfo *root, Plan *plan)
1939 {
1940 	/* No setup needed, just recurse through plan tree. */
1941 	(void) finalize_plan(root, plan, root->outer_params, NULL);
1942 }
1943 
1944 /*
1945  * Recursive processing of all nodes in the plan tree
1946  *
1947  * valid_params is the set of param IDs supplied by outer plan levels
1948  * that are valid to reference in this plan node or its children.
1949  *
1950  * scan_params is a set of param IDs to force scan plan nodes to reference.
1951  * This is for EvalPlanQual support, and is always NULL at the top of the
1952  * recursion.
1953  *
1954  * The return value is the computed allParam set for the given Plan node.
1955  * This is just an internal notational convenience: we can add a child
1956  * plan's allParams to the set of param IDs of interest to this level
1957  * in the same statement that recurses to that child.
1958  *
1959  * Do not scribble on caller's values of valid_params or scan_params!
1960  *
1961  * Note: although we attempt to deal with initPlans anywhere in the tree, the
1962  * logic is not really right.  The problem is that a plan node might return an
1963  * output Param of its initPlan as a targetlist item, in which case it's valid
1964  * for the parent plan level to reference that same Param; the parent's usage
1965  * will be converted into a Var referencing the child plan node by setrefs.c.
1966  * But this function would see the parent's reference as out of scope and
1967  * complain about it.  For now, this does not matter because the planner only
1968  * attaches initPlans to the topmost plan node in a query level, so the case
1969  * doesn't arise.  If we ever merge this processing into setrefs.c, maybe it
1970  * can be handled more cleanly.
1971  */
1972 static Bitmapset *
finalize_plan(PlannerInfo * root,Plan * plan,Bitmapset * valid_params,Bitmapset * scan_params)1973 finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
1974 			  Bitmapset *scan_params)
1975 {
1976 	finalize_primnode_context context;
1977 	int			locally_added_param;
1978 	Bitmapset  *nestloop_params;
1979 	Bitmapset  *initExtParam;
1980 	Bitmapset  *initSetParam;
1981 	Bitmapset  *child_params;
1982 	ListCell   *l;
1983 
1984 	if (plan == NULL)
1985 		return NULL;
1986 
1987 	context.root = root;
1988 	context.paramids = NULL;	/* initialize set to empty */
1989 	locally_added_param = -1;	/* there isn't one */
1990 	nestloop_params = NULL;		/* there aren't any */
1991 
1992 	/*
1993 	 * Examine any initPlans to determine the set of external params they
1994 	 * reference and the set of output params they supply.  (We assume
1995 	 * SS_finalize_plan was run on them already.)
1996 	 */
1997 	initExtParam = initSetParam = NULL;
1998 	foreach(l, plan->initPlan)
1999 	{
2000 		SubPlan    *initsubplan = (SubPlan *) lfirst(l);
2001 		Plan	   *initplan = planner_subplan_get_plan(root, initsubplan);
2002 		ListCell   *l2;
2003 
2004 		initExtParam = bms_add_members(initExtParam, initplan->extParam);
2005 		foreach(l2, initsubplan->setParam)
2006 		{
2007 			initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
2008 		}
2009 	}
2010 
2011 	/* Any setParams are validly referenceable in this node and children */
2012 	if (initSetParam)
2013 		valid_params = bms_union(valid_params, initSetParam);
2014 
2015 	/*
2016 	 * When we call finalize_primnode, context.paramids sets are automatically
2017 	 * merged together.  But when recursing to self, we have to do it the hard
2018 	 * way.  We want the paramids set to include params in subplans as well as
2019 	 * at this level.
2020 	 */
2021 
2022 	/* Find params in targetlist and qual */
2023 	finalize_primnode((Node *) plan->targetlist, &context);
2024 	finalize_primnode((Node *) plan->qual, &context);
2025 
2026 	/* Check additional node-type-specific fields */
2027 	switch (nodeTag(plan))
2028 	{
2029 		case T_Result:
2030 			finalize_primnode(((Result *) plan)->resconstantqual,
2031 							  &context);
2032 			break;
2033 
2034 		case T_SeqScan:
2035 			context.paramids = bms_add_members(context.paramids, scan_params);
2036 			break;
2037 
2038 		case T_SampleScan:
2039 			finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
2040 							  &context);
2041 			context.paramids = bms_add_members(context.paramids, scan_params);
2042 			break;
2043 
2044 		case T_IndexScan:
2045 			finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
2046 							  &context);
2047 			finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby,
2048 							  &context);
2049 
2050 			/*
2051 			 * we need not look at indexqualorig, since it will have the same
2052 			 * param references as indexqual.  Likewise, we can ignore
2053 			 * indexorderbyorig.
2054 			 */
2055 			context.paramids = bms_add_members(context.paramids, scan_params);
2056 			break;
2057 
2058 		case T_IndexOnlyScan:
2059 			finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual,
2060 							  &context);
2061 			finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby,
2062 							  &context);
2063 
2064 			/*
2065 			 * we need not look at indextlist, since it cannot contain Params.
2066 			 */
2067 			context.paramids = bms_add_members(context.paramids, scan_params);
2068 			break;
2069 
2070 		case T_BitmapIndexScan:
2071 			finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual,
2072 							  &context);
2073 
2074 			/*
2075 			 * we need not look at indexqualorig, since it will have the same
2076 			 * param references as indexqual.
2077 			 */
2078 			break;
2079 
2080 		case T_BitmapHeapScan:
2081 			finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
2082 							  &context);
2083 			context.paramids = bms_add_members(context.paramids, scan_params);
2084 			break;
2085 
2086 		case T_TidScan:
2087 			finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
2088 							  &context);
2089 			context.paramids = bms_add_members(context.paramids, scan_params);
2090 			break;
2091 
2092 		case T_SubqueryScan:
2093 			{
2094 				SubqueryScan *sscan = (SubqueryScan *) plan;
2095 				RelOptInfo *rel;
2096 
2097 				/* We must run SS_finalize_plan on the subquery */
2098 				rel = find_base_rel(root, sscan->scan.scanrelid);
2099 				SS_finalize_plan(rel->subroot, sscan->subplan);
2100 
2101 				/* Now we can add its extParams to the parent's params */
2102 				context.paramids = bms_add_members(context.paramids,
2103 												   sscan->subplan->extParam);
2104 				/* We need scan_params too, though */
2105 				context.paramids = bms_add_members(context.paramids,
2106 												   scan_params);
2107 			}
2108 			break;
2109 
2110 		case T_FunctionScan:
2111 			{
2112 				FunctionScan *fscan = (FunctionScan *) plan;
2113 				ListCell   *lc;
2114 
2115 				/*
2116 				 * Call finalize_primnode independently on each function
2117 				 * expression, so that we can record which params are
2118 				 * referenced in each, in order to decide which need
2119 				 * re-evaluating during rescan.
2120 				 */
2121 				foreach(lc, fscan->functions)
2122 				{
2123 					RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
2124 					finalize_primnode_context funccontext;
2125 
2126 					funccontext = context;
2127 					funccontext.paramids = NULL;
2128 
2129 					finalize_primnode(rtfunc->funcexpr, &funccontext);
2130 
2131 					/* remember results for execution */
2132 					rtfunc->funcparams = funccontext.paramids;
2133 
2134 					/* add the function's params to the overall set */
2135 					context.paramids = bms_add_members(context.paramids,
2136 													   funccontext.paramids);
2137 				}
2138 
2139 				context.paramids = bms_add_members(context.paramids,
2140 												   scan_params);
2141 			}
2142 			break;
2143 
2144 		case T_ValuesScan:
2145 			finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
2146 							  &context);
2147 			context.paramids = bms_add_members(context.paramids, scan_params);
2148 			break;
2149 
2150 		case T_CteScan:
2151 			{
2152 				/*
2153 				 * You might think we should add the node's cteParam to
2154 				 * paramids, but we shouldn't because that param is just a
2155 				 * linkage mechanism for multiple CteScan nodes for the same
2156 				 * CTE; it is never used for changed-param signaling.  What we
2157 				 * have to do instead is to find the referenced CTE plan and
2158 				 * incorporate its external paramids, so that the correct
2159 				 * things will happen if the CTE references outer-level
2160 				 * variables.  See test cases for bug #4902.  (We assume
2161 				 * SS_finalize_plan was run on the CTE plan already.)
2162 				 */
2163 				int			plan_id = ((CteScan *) plan)->ctePlanId;
2164 				Plan	   *cteplan;
2165 
2166 				/* so, do this ... */
2167 				if (plan_id < 1 || plan_id > list_length(root->glob->subplans))
2168 					elog(ERROR, "could not find plan for CteScan referencing plan ID %d",
2169 						 plan_id);
2170 				cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2171 				context.paramids =
2172 					bms_add_members(context.paramids, cteplan->extParam);
2173 
2174 #ifdef NOT_USED
2175 				/* ... but not this */
2176 				context.paramids =
2177 					bms_add_member(context.paramids,
2178 								   ((CteScan *) plan)->cteParam);
2179 #endif
2180 
2181 				context.paramids = bms_add_members(context.paramids,
2182 												   scan_params);
2183 			}
2184 			break;
2185 
2186 		case T_WorkTableScan:
2187 			context.paramids =
2188 				bms_add_member(context.paramids,
2189 							   ((WorkTableScan *) plan)->wtParam);
2190 			context.paramids = bms_add_members(context.paramids, scan_params);
2191 			break;
2192 
2193 		case T_ForeignScan:
2194 			{
2195 				ForeignScan *fscan = (ForeignScan *) plan;
2196 
2197 				finalize_primnode((Node *) fscan->fdw_exprs,
2198 								  &context);
2199 				finalize_primnode((Node *) fscan->fdw_recheck_quals,
2200 								  &context);
2201 
2202 				/* We assume fdw_scan_tlist cannot contain Params */
2203 				context.paramids = bms_add_members(context.paramids,
2204 												   scan_params);
2205 			}
2206 			break;
2207 
2208 		case T_CustomScan:
2209 			{
2210 				CustomScan *cscan = (CustomScan *) plan;
2211 				ListCell   *lc;
2212 
2213 				finalize_primnode((Node *) cscan->custom_exprs,
2214 								  &context);
2215 				/* We assume custom_scan_tlist cannot contain Params */
2216 				context.paramids =
2217 					bms_add_members(context.paramids, scan_params);
2218 
2219 				/* child nodes if any */
2220 				foreach(lc, cscan->custom_plans)
2221 				{
2222 					context.paramids =
2223 						bms_add_members(context.paramids,
2224 										finalize_plan(root,
2225 													  (Plan *) lfirst(lc),
2226 													  valid_params,
2227 													  scan_params));
2228 				}
2229 			}
2230 			break;
2231 
2232 		case T_ModifyTable:
2233 			{
2234 				ModifyTable *mtplan = (ModifyTable *) plan;
2235 				ListCell   *l;
2236 
2237 				/* Force descendant scan nodes to reference epqParam */
2238 				locally_added_param = mtplan->epqParam;
2239 				valid_params = bms_add_member(bms_copy(valid_params),
2240 											  locally_added_param);
2241 				scan_params = bms_add_member(bms_copy(scan_params),
2242 											 locally_added_param);
2243 				finalize_primnode((Node *) mtplan->returningLists,
2244 								  &context);
2245 				finalize_primnode((Node *) mtplan->onConflictSet,
2246 								  &context);
2247 				finalize_primnode((Node *) mtplan->onConflictWhere,
2248 								  &context);
2249 				/* exclRelTlist contains only Vars, doesn't need examination */
2250 				foreach(l, mtplan->plans)
2251 				{
2252 					context.paramids =
2253 						bms_add_members(context.paramids,
2254 										finalize_plan(root,
2255 													  (Plan *) lfirst(l),
2256 													  valid_params,
2257 													  scan_params));
2258 				}
2259 			}
2260 			break;
2261 
2262 		case T_Append:
2263 			{
2264 				ListCell   *l;
2265 
2266 				foreach(l, ((Append *) plan)->appendplans)
2267 				{
2268 					context.paramids =
2269 						bms_add_members(context.paramids,
2270 										finalize_plan(root,
2271 													  (Plan *) lfirst(l),
2272 													  valid_params,
2273 													  scan_params));
2274 				}
2275 			}
2276 			break;
2277 
2278 		case T_MergeAppend:
2279 			{
2280 				ListCell   *l;
2281 
2282 				foreach(l, ((MergeAppend *) plan)->mergeplans)
2283 				{
2284 					context.paramids =
2285 						bms_add_members(context.paramids,
2286 										finalize_plan(root,
2287 													  (Plan *) lfirst(l),
2288 													  valid_params,
2289 													  scan_params));
2290 				}
2291 			}
2292 			break;
2293 
2294 		case T_BitmapAnd:
2295 			{
2296 				ListCell   *l;
2297 
2298 				foreach(l, ((BitmapAnd *) plan)->bitmapplans)
2299 				{
2300 					context.paramids =
2301 						bms_add_members(context.paramids,
2302 										finalize_plan(root,
2303 													  (Plan *) lfirst(l),
2304 													  valid_params,
2305 													  scan_params));
2306 				}
2307 			}
2308 			break;
2309 
2310 		case T_BitmapOr:
2311 			{
2312 				ListCell   *l;
2313 
2314 				foreach(l, ((BitmapOr *) plan)->bitmapplans)
2315 				{
2316 					context.paramids =
2317 						bms_add_members(context.paramids,
2318 										finalize_plan(root,
2319 													  (Plan *) lfirst(l),
2320 													  valid_params,
2321 													  scan_params));
2322 				}
2323 			}
2324 			break;
2325 
2326 		case T_NestLoop:
2327 			{
2328 				ListCell   *l;
2329 
2330 				finalize_primnode((Node *) ((Join *) plan)->joinqual,
2331 								  &context);
2332 				/* collect set of params that will be passed to right child */
2333 				foreach(l, ((NestLoop *) plan)->nestParams)
2334 				{
2335 					NestLoopParam *nlp = (NestLoopParam *) lfirst(l);
2336 
2337 					nestloop_params = bms_add_member(nestloop_params,
2338 													 nlp->paramno);
2339 				}
2340 			}
2341 			break;
2342 
2343 		case T_MergeJoin:
2344 			finalize_primnode((Node *) ((Join *) plan)->joinqual,
2345 							  &context);
2346 			finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
2347 							  &context);
2348 			break;
2349 
2350 		case T_HashJoin:
2351 			finalize_primnode((Node *) ((Join *) plan)->joinqual,
2352 							  &context);
2353 			finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
2354 							  &context);
2355 			break;
2356 
2357 		case T_Limit:
2358 			finalize_primnode(((Limit *) plan)->limitOffset,
2359 							  &context);
2360 			finalize_primnode(((Limit *) plan)->limitCount,
2361 							  &context);
2362 			break;
2363 
2364 		case T_RecursiveUnion:
2365 			/* child nodes are allowed to reference wtParam */
2366 			locally_added_param = ((RecursiveUnion *) plan)->wtParam;
2367 			valid_params = bms_add_member(bms_copy(valid_params),
2368 										  locally_added_param);
2369 			/* wtParam does *not* get added to scan_params */
2370 			break;
2371 
2372 		case T_LockRows:
2373 			/* Force descendant scan nodes to reference epqParam */
2374 			locally_added_param = ((LockRows *) plan)->epqParam;
2375 			valid_params = bms_add_member(bms_copy(valid_params),
2376 										  locally_added_param);
2377 			scan_params = bms_add_member(bms_copy(scan_params),
2378 										 locally_added_param);
2379 			break;
2380 
2381 		case T_Agg:
2382 			{
2383 				Agg		   *agg = (Agg *) plan;
2384 
2385 				/*
2386 				 * AGG_HASHED plans need to know which Params are referenced
2387 				 * in aggregate calls.  Do a separate scan to identify them.
2388 				 */
2389 				if (agg->aggstrategy == AGG_HASHED)
2390 				{
2391 					finalize_primnode_context aggcontext;
2392 
2393 					aggcontext.root = root;
2394 					aggcontext.paramids = NULL;
2395 					finalize_agg_primnode((Node *) agg->plan.targetlist,
2396 										  &aggcontext);
2397 					finalize_agg_primnode((Node *) agg->plan.qual,
2398 										  &aggcontext);
2399 					agg->aggParams = aggcontext.paramids;
2400 				}
2401 			}
2402 			break;
2403 
2404 		case T_WindowAgg:
2405 			finalize_primnode(((WindowAgg *) plan)->startOffset,
2406 							  &context);
2407 			finalize_primnode(((WindowAgg *) plan)->endOffset,
2408 							  &context);
2409 			break;
2410 
2411 		case T_Hash:
2412 		case T_Material:
2413 		case T_Sort:
2414 		case T_Unique:
2415 		case T_Gather:
2416 		case T_SetOp:
2417 		case T_Group:
2418 			break;
2419 
2420 		default:
2421 			elog(ERROR, "unrecognized node type: %d",
2422 				 (int) nodeTag(plan));
2423 	}
2424 
2425 	/* Process left and right child plans, if any */
2426 	child_params = finalize_plan(root,
2427 								 plan->lefttree,
2428 								 valid_params,
2429 								 scan_params);
2430 	context.paramids = bms_add_members(context.paramids, child_params);
2431 
2432 	if (nestloop_params)
2433 	{
2434 		/* right child can reference nestloop_params as well as valid_params */
2435 		child_params = finalize_plan(root,
2436 									 plan->righttree,
2437 									 bms_union(nestloop_params, valid_params),
2438 									 scan_params);
2439 		/* ... and they don't count as parameters used at my level */
2440 		child_params = bms_difference(child_params, nestloop_params);
2441 		bms_free(nestloop_params);
2442 	}
2443 	else
2444 	{
2445 		/* easy case */
2446 		child_params = finalize_plan(root,
2447 									 plan->righttree,
2448 									 valid_params,
2449 									 scan_params);
2450 	}
2451 	context.paramids = bms_add_members(context.paramids, child_params);
2452 
2453 	/*
2454 	 * Any locally generated parameter doesn't count towards its generating
2455 	 * plan node's external dependencies.  (Note: if we changed valid_params
2456 	 * and/or scan_params, we leak those bitmapsets; not worth the notational
2457 	 * trouble to clean them up.)
2458 	 */
2459 	if (locally_added_param >= 0)
2460 	{
2461 		context.paramids = bms_del_member(context.paramids,
2462 										  locally_added_param);
2463 	}
2464 
2465 	/* Now we have all the paramids referenced in this node and children */
2466 
2467 	if (!bms_is_subset(context.paramids, valid_params))
2468 		elog(ERROR, "plan should not reference subplan's variable");
2469 
2470 	/*
2471 	 * The plan node's allParam and extParam fields should include all its
2472 	 * referenced paramids, plus contributions from any child initPlans.
2473 	 * However, any setParams of the initPlans should not be present in the
2474 	 * parent node's extParams, only in its allParams.  (It's possible that
2475 	 * some initPlans have extParams that are setParams of other initPlans.)
2476 	 */
2477 
2478 	/* allParam must include initplans' extParams and setParams */
2479 	plan->allParam = bms_union(context.paramids, initExtParam);
2480 	plan->allParam = bms_add_members(plan->allParam, initSetParam);
2481 	/* extParam must include any initplan extParams */
2482 	plan->extParam = bms_union(context.paramids, initExtParam);
2483 	/* but not any initplan setParams */
2484 	plan->extParam = bms_del_members(plan->extParam, initSetParam);
2485 
2486 	/*
2487 	 * For speed at execution time, make sure extParam/allParam are actually
2488 	 * NULL if they are empty sets.
2489 	 */
2490 	if (bms_is_empty(plan->extParam))
2491 		plan->extParam = NULL;
2492 	if (bms_is_empty(plan->allParam))
2493 		plan->allParam = NULL;
2494 
2495 	return plan->allParam;
2496 }
2497 
2498 /*
2499  * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
2500  * expression tree to the result set.
2501  */
2502 static bool
finalize_primnode(Node * node,finalize_primnode_context * context)2503 finalize_primnode(Node *node, finalize_primnode_context *context)
2504 {
2505 	if (node == NULL)
2506 		return false;
2507 	if (IsA(node, Param))
2508 	{
2509 		if (((Param *) node)->paramkind == PARAM_EXEC)
2510 		{
2511 			int			paramid = ((Param *) node)->paramid;
2512 
2513 			context->paramids = bms_add_member(context->paramids, paramid);
2514 		}
2515 		return false;			/* no more to do here */
2516 	}
2517 	if (IsA(node, SubPlan))
2518 	{
2519 		SubPlan    *subplan = (SubPlan *) node;
2520 		Plan	   *plan = planner_subplan_get_plan(context->root, subplan);
2521 		ListCell   *lc;
2522 		Bitmapset  *subparamids;
2523 
2524 		/* Recurse into the testexpr, but not into the Plan */
2525 		finalize_primnode(subplan->testexpr, context);
2526 
2527 		/*
2528 		 * Remove any param IDs of output parameters of the subplan that were
2529 		 * referenced in the testexpr.  These are not interesting for
2530 		 * parameter change signaling since we always re-evaluate the subplan.
2531 		 * Note that this wouldn't work too well if there might be uses of the
2532 		 * same param IDs elsewhere in the plan, but that can't happen because
2533 		 * generate_new_exec_param never tries to merge params.
2534 		 */
2535 		foreach(lc, subplan->paramIds)
2536 		{
2537 			context->paramids = bms_del_member(context->paramids,
2538 											   lfirst_int(lc));
2539 		}
2540 
2541 		/* Also examine args list */
2542 		finalize_primnode((Node *) subplan->args, context);
2543 
2544 		/*
2545 		 * Add params needed by the subplan to paramids, but excluding those
2546 		 * we will pass down to it.  (We assume SS_finalize_plan was run on
2547 		 * the subplan already.)
2548 		 */
2549 		subparamids = bms_copy(plan->extParam);
2550 		foreach(lc, subplan->parParam)
2551 		{
2552 			subparamids = bms_del_member(subparamids, lfirst_int(lc));
2553 		}
2554 		context->paramids = bms_join(context->paramids, subparamids);
2555 
2556 		return false;			/* no more to do here */
2557 	}
2558 	return expression_tree_walker(node, finalize_primnode,
2559 								  (void *) context);
2560 }
2561 
2562 /*
2563  * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2564  * and add IDs of all PARAM_EXEC params appearing within their aggregated
2565  * arguments to the result set.
2566  */
2567 static bool
finalize_agg_primnode(Node * node,finalize_primnode_context * context)2568 finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2569 {
2570 	if (node == NULL)
2571 		return false;
2572 	if (IsA(node, Aggref))
2573 	{
2574 		Aggref	   *agg = (Aggref *) node;
2575 
2576 		/* we should not consider the direct arguments, if any */
2577 		finalize_primnode((Node *) agg->args, context);
2578 		finalize_primnode((Node *) agg->aggfilter, context);
2579 		return false;			/* there can't be any Aggrefs below here */
2580 	}
2581 	return expression_tree_walker(node, finalize_agg_primnode,
2582 								  (void *) context);
2583 }
2584 
2585 /*
2586  * SS_make_initplan_output_param - make a Param for an initPlan's output
2587  *
2588  * The plan is expected to return a scalar value of the given type/collation.
2589  *
2590  * Note that in some cases the initplan may not ever appear in the finished
2591  * plan tree.  If that happens, we'll have wasted a PARAM_EXEC slot, which
2592  * is no big deal.
2593  */
2594 Param *
SS_make_initplan_output_param(PlannerInfo * root,Oid resulttype,int32 resulttypmod,Oid resultcollation)2595 SS_make_initplan_output_param(PlannerInfo *root,
2596 							  Oid resulttype, int32 resulttypmod,
2597 							  Oid resultcollation)
2598 {
2599 	return generate_new_exec_param(root, resulttype,
2600 								   resulttypmod, resultcollation);
2601 }
2602 
2603 /*
2604  * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
2605  *
2606  * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
2607  * list for the outer query level.  A Param that represents the initplan's
2608  * output has already been assigned using SS_make_initplan_output_param.
2609  */
2610 void
SS_make_initplan_from_plan(PlannerInfo * root,PlannerInfo * subroot,Plan * plan,Param * prm)2611 SS_make_initplan_from_plan(PlannerInfo *root,
2612 						   PlannerInfo *subroot, Plan *plan,
2613 						   Param *prm)
2614 {
2615 	SubPlan    *node;
2616 
2617 	/*
2618 	 * Add the subplan and its PlannerInfo to the global lists.
2619 	 */
2620 	root->glob->subplans = lappend(root->glob->subplans, plan);
2621 	root->glob->subroots = lappend(root->glob->subroots, subroot);
2622 
2623 	/*
2624 	 * Create a SubPlan node and add it to the outer list of InitPlans. Note
2625 	 * it has to appear after any other InitPlans it might depend on (see
2626 	 * comments in ExecReScan).
2627 	 */
2628 	node = makeNode(SubPlan);
2629 	node->subLinkType = EXPR_SUBLINK;
2630 	node->plan_id = list_length(root->glob->subplans);
2631 	node->plan_name = psprintf("InitPlan %d (returns $%d)",
2632 							   node->plan_id, prm->paramid);
2633 	get_first_col_type(plan, &node->firstColType, &node->firstColTypmod,
2634 					   &node->firstColCollation);
2635 	node->setParam = list_make1_int(prm->paramid);
2636 
2637 	root->init_plans = lappend(root->init_plans, node);
2638 
2639 	/*
2640 	 * The node can't have any inputs (since it's an initplan), so the
2641 	 * parParam and args lists remain empty.
2642 	 */
2643 
2644 	/* Set costs of SubPlan using info from the plan tree */
2645 	cost_subplan(subroot, node, plan);
2646 }
2647