1 /*-------------------------------------------------------------------------
2 *
3 * subselect.c
4 * Planning routines for subselects.
5 *
6 * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e.,
7 * not sub-SELECT-in-FROM cases).
8 *
9 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 * IDENTIFICATION
13 * src/backend/optimizer/plan/subselect.c
14 *
15 *-------------------------------------------------------------------------
16 */
17 #include "postgres.h"
18
19 #include "access/htup_details.h"
20 #include "catalog/pg_operator.h"
21 #include "catalog/pg_type.h"
22 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/cost.h"
28 #include "optimizer/optimizer.h"
29 #include "optimizer/paramassign.h"
30 #include "optimizer/pathnode.h"
31 #include "optimizer/planmain.h"
32 #include "optimizer/planner.h"
33 #include "optimizer/prep.h"
34 #include "optimizer/subselect.h"
35 #include "parser/parse_relation.h"
36 #include "rewrite/rewriteManip.h"
37 #include "utils/builtins.h"
38 #include "utils/lsyscache.h"
39 #include "utils/syscache.h"
40
41
42 /* source-code-compatibility hacks for pull_varnos() API change */
43 #define pull_varnos(a,b) pull_varnos_new(a,b)
44
45 typedef struct convert_testexpr_context
46 {
47 PlannerInfo *root;
48 List *subst_nodes; /* Nodes to substitute for Params */
49 } convert_testexpr_context;
50
51 typedef struct process_sublinks_context
52 {
53 PlannerInfo *root;
54 bool isTopQual;
55 } process_sublinks_context;
56
57 typedef struct finalize_primnode_context
58 {
59 PlannerInfo *root;
60 Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */
61 } finalize_primnode_context;
62
63 typedef struct inline_cte_walker_context
64 {
65 const char *ctename; /* name and relative level of target CTE */
66 int levelsup;
67 int refcount; /* number of remaining references */
68 Query *ctequery; /* query to substitute */
69 } inline_cte_walker_context;
70
71
72 static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
73 List *plan_params,
74 SubLinkType subLinkType, int subLinkId,
75 Node *testexpr, List *testexpr_paramids,
76 bool unknownEqFalse);
77 static List *generate_subquery_params(PlannerInfo *root, List *tlist,
78 List **paramIds);
79 static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
80 Index varno);
81 static Node *convert_testexpr(PlannerInfo *root,
82 Node *testexpr,
83 List *subst_nodes);
84 static Node *convert_testexpr_mutator(Node *node,
85 convert_testexpr_context *context);
86 static bool subplan_is_hashable(Plan *plan);
87 static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
88 static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
89 static bool hash_ok_operator(OpExpr *expr);
90 static bool contain_dml(Node *node);
91 static bool contain_dml_walker(Node *node, void *context);
92 static bool contain_outer_selfref(Node *node);
93 static bool contain_outer_selfref_walker(Node *node, Index *depth);
94 static void inline_cte(PlannerInfo *root, CommonTableExpr *cte);
95 static bool inline_cte_walker(Node *node, inline_cte_walker_context *context);
96 static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
97 static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
98 Node **testexpr, List **paramIds);
99 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
100 static Node *process_sublinks_mutator(Node *node,
101 process_sublinks_context *context);
102 static Bitmapset *finalize_plan(PlannerInfo *root,
103 Plan *plan,
104 int gather_param,
105 Bitmapset *valid_params,
106 Bitmapset *scan_params);
107 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
108 static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
109
110
111 /*
112 * Get the datatype/typmod/collation of the first column of the plan's output.
113 *
114 * This information is stored for ARRAY_SUBLINK execution and for
115 * exprType()/exprTypmod()/exprCollation(), which have no way to get at the
116 * plan associated with a SubPlan node. We really only need the info for
117 * EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency we save it
118 * always.
119 */
120 static void
get_first_col_type(Plan * plan,Oid * coltype,int32 * coltypmod,Oid * colcollation)121 get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
122 Oid *colcollation)
123 {
124 /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
125 if (plan->targetlist)
126 {
127 TargetEntry *tent = linitial_node(TargetEntry, plan->targetlist);
128
129 if (!tent->resjunk)
130 {
131 *coltype = exprType((Node *) tent->expr);
132 *coltypmod = exprTypmod((Node *) tent->expr);
133 *colcollation = exprCollation((Node *) tent->expr);
134 return;
135 }
136 }
137 *coltype = VOIDOID;
138 *coltypmod = -1;
139 *colcollation = InvalidOid;
140 }
141
142 /*
143 * Convert a SubLink (as created by the parser) into a SubPlan.
144 *
145 * We are given the SubLink's contained query, type, ID, and testexpr. We are
146 * also told if this expression appears at top level of a WHERE/HAVING qual.
147 *
148 * Note: we assume that the testexpr has been AND/OR flattened (actually,
149 * it's been through eval_const_expressions), but not converted to
150 * implicit-AND form; and any SubLinks in it should already have been
151 * converted to SubPlans. The subquery is as yet untouched, however.
152 *
153 * The result is whatever we need to substitute in place of the SubLink node
154 * in the executable expression. If we're going to do the subplan as a
155 * regular subplan, this will be the constructed SubPlan node. If we're going
156 * to do the subplan as an InitPlan, the SubPlan node instead goes into
157 * root->init_plans, and what we return here is an expression tree
158 * representing the InitPlan's result: usually just a Param node representing
159 * a single scalar result, but possibly a row comparison tree containing
160 * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant
161 * (since the real output Params are elsewhere in the tree, and the MULTIEXPR
162 * subquery itself is in a resjunk tlist entry whose value is uninteresting).
163 */
164 static Node *
make_subplan(PlannerInfo * root,Query * orig_subquery,SubLinkType subLinkType,int subLinkId,Node * testexpr,bool isTopQual)165 make_subplan(PlannerInfo *root, Query *orig_subquery,
166 SubLinkType subLinkType, int subLinkId,
167 Node *testexpr, bool isTopQual)
168 {
169 Query *subquery;
170 bool simple_exists = false;
171 double tuple_fraction;
172 PlannerInfo *subroot;
173 RelOptInfo *final_rel;
174 Path *best_path;
175 Plan *plan;
176 List *plan_params;
177 Node *result;
178
179 /*
180 * Copy the source Query node. This is a quick and dirty kluge to resolve
181 * the fact that the parser can generate trees with multiple links to the
182 * same sub-Query node, but the planner wants to scribble on the Query.
183 * Try to clean this up when we do querytree redesign...
184 */
185 subquery = copyObject(orig_subquery);
186
187 /*
188 * If it's an EXISTS subplan, we might be able to simplify it.
189 */
190 if (subLinkType == EXISTS_SUBLINK)
191 simple_exists = simplify_EXISTS_query(root, subquery);
192
193 /*
194 * For an EXISTS subplan, tell lower-level planner to expect that only the
195 * first tuple will be retrieved. For ALL and ANY subplans, we will be
196 * able to stop evaluating if the test condition fails or matches, so very
197 * often not all the tuples will be retrieved; for lack of a better idea,
198 * specify 50% retrieval. For EXPR, MULTIEXPR, and ROWCOMPARE subplans,
199 * use default behavior (we're only expecting one row out, anyway).
200 *
201 * NOTE: if you change these numbers, also change cost_subplan() in
202 * path/costsize.c.
203 *
204 * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
205 * its output. In that case it would've been better to specify full
206 * retrieval. At present, however, we can only check hashability after
207 * we've made the subplan :-(. (Determining whether it'll fit in work_mem
208 * is the really hard part.) Therefore, we don't want to be too
209 * optimistic about the percentage of tuples retrieved, for fear of
210 * selecting a plan that's bad for the materialization case.
211 */
212 if (subLinkType == EXISTS_SUBLINK)
213 tuple_fraction = 1.0; /* just like a LIMIT 1 */
214 else if (subLinkType == ALL_SUBLINK ||
215 subLinkType == ANY_SUBLINK)
216 tuple_fraction = 0.5; /* 50% */
217 else
218 tuple_fraction = 0.0; /* default behavior */
219
220 /* plan_params should not be in use in current query level */
221 Assert(root->plan_params == NIL);
222
223 /* Generate Paths for the subquery */
224 subroot = subquery_planner(root->glob, subquery,
225 root,
226 false, tuple_fraction);
227
228 /* Isolate the params needed by this specific subplan */
229 plan_params = root->plan_params;
230 root->plan_params = NIL;
231
232 /*
233 * Select best Path and turn it into a Plan. At least for now, there
234 * seems no reason to postpone doing that.
235 */
236 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
237 best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
238
239 plan = create_plan(subroot, best_path);
240
241 /* And convert to SubPlan or InitPlan format. */
242 result = build_subplan(root, plan, subroot, plan_params,
243 subLinkType, subLinkId,
244 testexpr, NIL, isTopQual);
245
246 /*
247 * If it's a correlated EXISTS with an unimportant targetlist, we might be
248 * able to transform it to the equivalent of an IN and then implement it
249 * by hashing. We don't have enough information yet to tell which way is
250 * likely to be better (it depends on the expected number of executions of
251 * the EXISTS qual, and we are much too early in planning the outer query
252 * to be able to guess that). So we generate both plans, if possible, and
253 * leave it to the executor to decide which to use.
254 */
255 if (simple_exists && IsA(result, SubPlan))
256 {
257 Node *newtestexpr;
258 List *paramIds;
259
260 /* Make a second copy of the original subquery */
261 subquery = copyObject(orig_subquery);
262 /* and re-simplify */
263 simple_exists = simplify_EXISTS_query(root, subquery);
264 Assert(simple_exists);
265 /* See if it can be converted to an ANY query */
266 subquery = convert_EXISTS_to_ANY(root, subquery,
267 &newtestexpr, ¶mIds);
268 if (subquery)
269 {
270 /* Generate Paths for the ANY subquery; we'll need all rows */
271 subroot = subquery_planner(root->glob, subquery,
272 root,
273 false, 0.0);
274
275 /* Isolate the params needed by this specific subplan */
276 plan_params = root->plan_params;
277 root->plan_params = NIL;
278
279 /* Select best Path and turn it into a Plan */
280 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
281 best_path = final_rel->cheapest_total_path;
282
283 plan = create_plan(subroot, best_path);
284
285 /* Now we can check if it'll fit in work_mem */
286 /* XXX can we check this at the Path stage? */
287 if (subplan_is_hashable(plan))
288 {
289 SubPlan *hashplan;
290 AlternativeSubPlan *asplan;
291
292 /* OK, convert to SubPlan format. */
293 hashplan = castNode(SubPlan,
294 build_subplan(root, plan, subroot,
295 plan_params,
296 ANY_SUBLINK, 0,
297 newtestexpr,
298 paramIds,
299 true));
300 /* Check we got what we expected */
301 Assert(hashplan->parParam == NIL);
302 Assert(hashplan->useHashTable);
303
304 /* Leave it to the executor to decide which plan to use */
305 asplan = makeNode(AlternativeSubPlan);
306 asplan->subplans = list_make2(result, hashplan);
307 result = (Node *) asplan;
308 }
309 }
310 }
311
312 return result;
313 }
314
315 /*
316 * Build a SubPlan node given the raw inputs --- subroutine for make_subplan
317 *
318 * Returns either the SubPlan, or a replacement expression if we decide to
319 * make it an InitPlan, as explained in the comments for make_subplan.
320 */
321 static Node *
build_subplan(PlannerInfo * root,Plan * plan,PlannerInfo * subroot,List * plan_params,SubLinkType subLinkType,int subLinkId,Node * testexpr,List * testexpr_paramids,bool unknownEqFalse)322 build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
323 List *plan_params,
324 SubLinkType subLinkType, int subLinkId,
325 Node *testexpr, List *testexpr_paramids,
326 bool unknownEqFalse)
327 {
328 Node *result;
329 SubPlan *splan;
330 bool isInitPlan;
331 ListCell *lc;
332
333 /*
334 * Initialize the SubPlan node. Note plan_id, plan_name, and cost fields
335 * are set further down.
336 */
337 splan = makeNode(SubPlan);
338 splan->subLinkType = subLinkType;
339 splan->testexpr = NULL;
340 splan->paramIds = NIL;
341 get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
342 &splan->firstColCollation);
343 splan->useHashTable = false;
344 splan->unknownEqFalse = unknownEqFalse;
345 splan->parallel_safe = plan->parallel_safe;
346 splan->setParam = NIL;
347 splan->parParam = NIL;
348 splan->args = NIL;
349
350 /*
351 * Make parParam and args lists of param IDs and expressions that current
352 * query level will pass to this child plan.
353 */
354 foreach(lc, plan_params)
355 {
356 PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lc);
357 Node *arg = pitem->item;
358
359 /*
360 * The Var, PlaceHolderVar, or Aggref has already been adjusted to
361 * have the correct varlevelsup, phlevelsup, or agglevelsup.
362 *
363 * If it's a PlaceHolderVar or Aggref, its arguments might contain
364 * SubLinks, which have not yet been processed (see the comments for
365 * SS_replace_correlation_vars). Do that now.
366 */
367 if (IsA(arg, PlaceHolderVar) ||
368 IsA(arg, Aggref))
369 arg = SS_process_sublinks(root, arg, false);
370
371 splan->parParam = lappend_int(splan->parParam, pitem->paramId);
372 splan->args = lappend(splan->args, arg);
373 }
374
375 /*
376 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY,
377 * ROWCOMPARE, or MULTIEXPR types can be used as initPlans. For EXISTS,
378 * EXPR, or ARRAY, we return a Param referring to the result of evaluating
379 * the initPlan. For ROWCOMPARE, we must modify the testexpr tree to
380 * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted
381 * by the parser, and then return that tree. For MULTIEXPR, we return a
382 * null constant: the resjunk targetlist item containing the SubLink does
383 * not need to return anything useful, since the referencing Params are
384 * elsewhere.
385 */
386 if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK)
387 {
388 Param *prm;
389
390 Assert(testexpr == NULL);
391 prm = generate_new_exec_param(root, BOOLOID, -1, InvalidOid);
392 splan->setParam = list_make1_int(prm->paramid);
393 isInitPlan = true;
394 result = (Node *) prm;
395 }
396 else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK)
397 {
398 TargetEntry *te = linitial(plan->targetlist);
399 Param *prm;
400
401 Assert(!te->resjunk);
402 Assert(testexpr == NULL);
403 prm = generate_new_exec_param(root,
404 exprType((Node *) te->expr),
405 exprTypmod((Node *) te->expr),
406 exprCollation((Node *) te->expr));
407 splan->setParam = list_make1_int(prm->paramid);
408 isInitPlan = true;
409 result = (Node *) prm;
410 }
411 else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK)
412 {
413 TargetEntry *te = linitial(plan->targetlist);
414 Oid arraytype;
415 Param *prm;
416
417 Assert(!te->resjunk);
418 Assert(testexpr == NULL);
419 arraytype = get_promoted_array_type(exprType((Node *) te->expr));
420 if (!OidIsValid(arraytype))
421 elog(ERROR, "could not find array type for datatype %s",
422 format_type_be(exprType((Node *) te->expr)));
423 prm = generate_new_exec_param(root,
424 arraytype,
425 exprTypmod((Node *) te->expr),
426 exprCollation((Node *) te->expr));
427 splan->setParam = list_make1_int(prm->paramid);
428 isInitPlan = true;
429 result = (Node *) prm;
430 }
431 else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK)
432 {
433 /* Adjust the Params */
434 List *params;
435
436 Assert(testexpr != NULL);
437 params = generate_subquery_params(root,
438 plan->targetlist,
439 &splan->paramIds);
440 result = convert_testexpr(root,
441 testexpr,
442 params);
443 splan->setParam = list_copy(splan->paramIds);
444 isInitPlan = true;
445
446 /*
447 * The executable expression is returned to become part of the outer
448 * plan's expression tree; it is not kept in the initplan node.
449 */
450 }
451 else if (subLinkType == MULTIEXPR_SUBLINK)
452 {
453 /*
454 * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param
455 * for each output column.
456 */
457 List *params;
458
459 Assert(testexpr == NULL);
460 params = generate_subquery_params(root,
461 plan->targetlist,
462 &splan->setParam);
463
464 /*
465 * Save the list of replacement Params in the n'th cell of
466 * root->multiexpr_params; setrefs.c will use it to replace
467 * PARAM_MULTIEXPR Params.
468 */
469 while (list_length(root->multiexpr_params) < subLinkId)
470 root->multiexpr_params = lappend(root->multiexpr_params, NIL);
471 lc = list_nth_cell(root->multiexpr_params, subLinkId - 1);
472 Assert(lfirst(lc) == NIL);
473 lfirst(lc) = params;
474
475 /* It can be an initplan if there are no parParams. */
476 if (splan->parParam == NIL)
477 {
478 isInitPlan = true;
479 result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid);
480 }
481 else
482 {
483 isInitPlan = false;
484 result = (Node *) splan;
485 }
486 }
487 else
488 {
489 /*
490 * Adjust the Params in the testexpr, unless caller already took care
491 * of it (as indicated by passing a list of Param IDs).
492 */
493 if (testexpr && testexpr_paramids == NIL)
494 {
495 List *params;
496
497 params = generate_subquery_params(root,
498 plan->targetlist,
499 &splan->paramIds);
500 splan->testexpr = convert_testexpr(root,
501 testexpr,
502 params);
503 }
504 else
505 {
506 splan->testexpr = testexpr;
507 splan->paramIds = testexpr_paramids;
508 }
509
510 /*
511 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
512 * initPlans, even when they are uncorrelated or undirect correlated,
513 * because we need to scan the output of the subplan for each outer
514 * tuple. But if it's a not-direct-correlated IN (= ANY) test, we
515 * might be able to use a hashtable to avoid comparing all the tuples.
516 */
517 if (subLinkType == ANY_SUBLINK &&
518 splan->parParam == NIL &&
519 subplan_is_hashable(plan) &&
520 testexpr_is_hashable(splan->testexpr, splan->paramIds))
521 splan->useHashTable = true;
522
523 /*
524 * Otherwise, we have the option to tack a Material node onto the top
525 * of the subplan, to reduce the cost of reading it repeatedly. This
526 * is pointless for a direct-correlated subplan, since we'd have to
527 * recompute its results each time anyway. For uncorrelated/undirect
528 * correlated subplans, we add Material unless the subplan's top plan
529 * node would materialize its output anyway. Also, if enable_material
530 * is false, then the user does not want us to materialize anything
531 * unnecessarily, so we don't.
532 */
533 else if (splan->parParam == NIL && enable_material &&
534 !ExecMaterializesOutput(nodeTag(plan)))
535 plan = materialize_finished_plan(plan);
536
537 result = (Node *) splan;
538 isInitPlan = false;
539 }
540
541 /*
542 * Add the subplan and its PlannerInfo to the global lists.
543 */
544 root->glob->subplans = lappend(root->glob->subplans, plan);
545 root->glob->subroots = lappend(root->glob->subroots, subroot);
546 splan->plan_id = list_length(root->glob->subplans);
547
548 if (isInitPlan)
549 root->init_plans = lappend(root->init_plans, splan);
550
551 /*
552 * A parameterless subplan (not initplan) should be prepared to handle
553 * REWIND efficiently. If it has direct parameters then there's no point
554 * since it'll be reset on each scan anyway; and if it's an initplan then
555 * there's no point since it won't get re-run without parameter changes
556 * anyway. The input of a hashed subplan doesn't need REWIND either.
557 */
558 if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable)
559 root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
560 splan->plan_id);
561
562 /* Label the subplan for EXPLAIN purposes */
563 splan->plan_name = palloc(32 + 12 * list_length(splan->setParam));
564 sprintf(splan->plan_name, "%s %d",
565 isInitPlan ? "InitPlan" : "SubPlan",
566 splan->plan_id);
567 if (splan->setParam)
568 {
569 char *ptr = splan->plan_name + strlen(splan->plan_name);
570
571 ptr += sprintf(ptr, " (returns ");
572 foreach(lc, splan->setParam)
573 {
574 ptr += sprintf(ptr, "$%d%s",
575 lfirst_int(lc),
576 lnext(lc) ? "," : ")");
577 }
578 }
579
580 /* Lastly, fill in the cost estimates for use later */
581 cost_subplan(root, splan, plan);
582
583 return result;
584 }
585
586 /*
587 * generate_subquery_params: build a list of Params representing the output
588 * columns of a sublink's sub-select, given the sub-select's targetlist.
589 *
590 * We also return an integer list of the paramids of the Params.
591 */
592 static List *
generate_subquery_params(PlannerInfo * root,List * tlist,List ** paramIds)593 generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds)
594 {
595 List *result;
596 List *ids;
597 ListCell *lc;
598
599 result = ids = NIL;
600 foreach(lc, tlist)
601 {
602 TargetEntry *tent = (TargetEntry *) lfirst(lc);
603 Param *param;
604
605 if (tent->resjunk)
606 continue;
607
608 param = generate_new_exec_param(root,
609 exprType((Node *) tent->expr),
610 exprTypmod((Node *) tent->expr),
611 exprCollation((Node *) tent->expr));
612 result = lappend(result, param);
613 ids = lappend_int(ids, param->paramid);
614 }
615
616 *paramIds = ids;
617 return result;
618 }
619
620 /*
621 * generate_subquery_vars: build a list of Vars representing the output
622 * columns of a sublink's sub-select, given the sub-select's targetlist.
623 * The Vars have the specified varno (RTE index).
624 */
625 static List *
generate_subquery_vars(PlannerInfo * root,List * tlist,Index varno)626 generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
627 {
628 List *result;
629 ListCell *lc;
630
631 result = NIL;
632 foreach(lc, tlist)
633 {
634 TargetEntry *tent = (TargetEntry *) lfirst(lc);
635 Var *var;
636
637 if (tent->resjunk)
638 continue;
639
640 var = makeVarFromTargetEntry(varno, tent);
641 result = lappend(result, var);
642 }
643
644 return result;
645 }
646
647 /*
648 * convert_testexpr: convert the testexpr given by the parser into
649 * actually executable form. This entails replacing PARAM_SUBLINK Params
650 * with Params or Vars representing the results of the sub-select. The
651 * nodes to be substituted are passed in as the List result from
652 * generate_subquery_params or generate_subquery_vars.
653 */
654 static Node *
convert_testexpr(PlannerInfo * root,Node * testexpr,List * subst_nodes)655 convert_testexpr(PlannerInfo *root,
656 Node *testexpr,
657 List *subst_nodes)
658 {
659 convert_testexpr_context context;
660
661 context.root = root;
662 context.subst_nodes = subst_nodes;
663 return convert_testexpr_mutator(testexpr, &context);
664 }
665
666 static Node *
convert_testexpr_mutator(Node * node,convert_testexpr_context * context)667 convert_testexpr_mutator(Node *node,
668 convert_testexpr_context *context)
669 {
670 if (node == NULL)
671 return NULL;
672 if (IsA(node, Param))
673 {
674 Param *param = (Param *) node;
675
676 if (param->paramkind == PARAM_SUBLINK)
677 {
678 if (param->paramid <= 0 ||
679 param->paramid > list_length(context->subst_nodes))
680 elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid);
681
682 /*
683 * We copy the list item to avoid having doubly-linked
684 * substructure in the modified parse tree. This is probably
685 * unnecessary when it's a Param, but be safe.
686 */
687 return (Node *) copyObject(list_nth(context->subst_nodes,
688 param->paramid - 1));
689 }
690 }
691 if (IsA(node, SubLink))
692 {
693 /*
694 * If we come across a nested SubLink, it is neither necessary nor
695 * correct to recurse into it: any PARAM_SUBLINKs we might find inside
696 * belong to the inner SubLink not the outer. So just return it as-is.
697 *
698 * This reasoning depends on the assumption that nothing will pull
699 * subexpressions into or out of the testexpr field of a SubLink, at
700 * least not without replacing PARAM_SUBLINKs first. If we did want
701 * to do that we'd need to rethink the parser-output representation
702 * altogether, since currently PARAM_SUBLINKs are only unique per
703 * SubLink not globally across the query. The whole point of
704 * replacing them with Vars or PARAM_EXEC nodes is to make them
705 * globally unique before they escape from the SubLink's testexpr.
706 *
707 * Note: this can't happen when called during SS_process_sublinks,
708 * because that recursively processes inner SubLinks first. It can
709 * happen when called from convert_ANY_sublink_to_join, though.
710 */
711 return node;
712 }
713 return expression_tree_mutator(node,
714 convert_testexpr_mutator,
715 (void *) context);
716 }
717
718 /*
719 * subplan_is_hashable: can we implement an ANY subplan by hashing?
720 */
721 static bool
subplan_is_hashable(Plan * plan)722 subplan_is_hashable(Plan *plan)
723 {
724 double subquery_size;
725
726 /*
727 * The estimated size of the subquery result must fit in work_mem. (Note:
728 * we use heap tuple overhead here even though the tuples will actually be
729 * stored as MinimalTuples; this provides some fudge factor for hashtable
730 * overhead.)
731 */
732 subquery_size = plan->plan_rows *
733 (MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
734 if (subquery_size > work_mem * 1024L)
735 return false;
736
737 return true;
738 }
739
740 /*
741 * testexpr_is_hashable: is an ANY SubLink's test expression hashable?
742 *
743 * To identify LHS vs RHS of the hash expression, we must be given the
744 * list of output Param IDs of the SubLink's subquery.
745 */
746 static bool
testexpr_is_hashable(Node * testexpr,List * param_ids)747 testexpr_is_hashable(Node *testexpr, List *param_ids)
748 {
749 /*
750 * The testexpr must be a single OpExpr, or an AND-clause containing only
751 * OpExprs, each of which satisfy test_opexpr_is_hashable().
752 */
753 if (testexpr && IsA(testexpr, OpExpr))
754 {
755 if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids))
756 return true;
757 }
758 else if (is_andclause(testexpr))
759 {
760 ListCell *l;
761
762 foreach(l, ((BoolExpr *) testexpr)->args)
763 {
764 Node *andarg = (Node *) lfirst(l);
765
766 if (!IsA(andarg, OpExpr))
767 return false;
768 if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids))
769 return false;
770 }
771 return true;
772 }
773
774 return false;
775 }
776
777 static bool
test_opexpr_is_hashable(OpExpr * testexpr,List * param_ids)778 test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids)
779 {
780 /*
781 * The combining operator must be hashable and strict. The need for
782 * hashability is obvious, since we want to use hashing. Without
783 * strictness, behavior in the presence of nulls is too unpredictable. We
784 * actually must assume even more than plain strictness: it can't yield
785 * NULL for non-null inputs, either (see nodeSubplan.c). However, hash
786 * indexes and hash joins assume that too.
787 */
788 if (!hash_ok_operator(testexpr))
789 return false;
790
791 /*
792 * The left and right inputs must belong to the outer and inner queries
793 * respectively; hence Params that will be supplied by the subquery must
794 * not appear in the LHS, and Vars of the outer query must not appear in
795 * the RHS. (Ordinarily, this must be true because of the way that the
796 * parser builds an ANY SubLink's testexpr ... but inlining of functions
797 * could have changed the expression's structure, so we have to check.
798 * Such cases do not occur often enough to be worth trying to optimize, so
799 * we don't worry about trying to commute the clause or anything like
800 * that; we just need to be sure not to build an invalid plan.)
801 */
802 if (list_length(testexpr->args) != 2)
803 return false;
804 if (contain_exec_param((Node *) linitial(testexpr->args), param_ids))
805 return false;
806 if (contain_var_clause((Node *) lsecond(testexpr->args)))
807 return false;
808 return true;
809 }
810
811 /*
812 * Check expression is hashable + strict
813 *
814 * We could use op_hashjoinable() and op_strict(), but do it like this to
815 * avoid a redundant cache lookup.
816 */
817 static bool
hash_ok_operator(OpExpr * expr)818 hash_ok_operator(OpExpr *expr)
819 {
820 Oid opid = expr->opno;
821
822 /* quick out if not a binary operator */
823 if (list_length(expr->args) != 2)
824 return false;
825 if (opid == ARRAY_EQ_OP)
826 {
827 /* array_eq is strict, but must check input type to ensure hashable */
828 /* XXX record_eq will need same treatment when it becomes hashable */
829 Node *leftarg = linitial(expr->args);
830
831 return op_hashjoinable(opid, exprType(leftarg));
832 }
833 else
834 {
835 /* else must look up the operator properties */
836 HeapTuple tup;
837 Form_pg_operator optup;
838
839 tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid));
840 if (!HeapTupleIsValid(tup))
841 elog(ERROR, "cache lookup failed for operator %u", opid);
842 optup = (Form_pg_operator) GETSTRUCT(tup);
843 if (!optup->oprcanhash || !func_strict(optup->oprcode))
844 {
845 ReleaseSysCache(tup);
846 return false;
847 }
848 ReleaseSysCache(tup);
849 return true;
850 }
851 }
852
853
854 /*
855 * SS_process_ctes: process a query's WITH list
856 *
857 * Consider each CTE in the WITH list and either ignore it (if it's an
858 * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM,
859 * or convert it to an initplan.
860 *
861 * A side effect is to fill in root->cte_plan_ids with a list that
862 * parallels root->parse->cteList and provides the subplan ID for
863 * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan.
864 */
865 void
SS_process_ctes(PlannerInfo * root)866 SS_process_ctes(PlannerInfo *root)
867 {
868 ListCell *lc;
869
870 Assert(root->cte_plan_ids == NIL);
871
872 foreach(lc, root->parse->cteList)
873 {
874 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
875 CmdType cmdType = ((Query *) cte->ctequery)->commandType;
876 Query *subquery;
877 PlannerInfo *subroot;
878 RelOptInfo *final_rel;
879 Path *best_path;
880 Plan *plan;
881 SubPlan *splan;
882 int paramid;
883
884 /*
885 * Ignore SELECT CTEs that are not actually referenced anywhere.
886 */
887 if (cte->cterefcount == 0 && cmdType == CMD_SELECT)
888 {
889 /* Make a dummy entry in cte_plan_ids */
890 root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
891 continue;
892 }
893
894 /*
895 * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of
896 * implementing it as a separately-planned CTE.
897 *
898 * We cannot inline if any of these conditions hold:
899 *
900 * 1. The user said not to (the CTEMaterializeAlways option).
901 *
902 * 2. The CTE is recursive.
903 *
904 * 3. The CTE has side-effects; this includes either not being a plain
905 * SELECT, or containing volatile functions. Inlining might change
906 * the side-effects, which would be bad.
907 *
908 * 4. The CTE is multiply-referenced and contains a self-reference to
909 * a recursive CTE outside itself. Inlining would result in multiple
910 * recursive self-references, which we don't support.
911 *
912 * Otherwise, we have an option whether to inline or not. That should
913 * always be a win if there's just a single reference, but if the CTE
914 * is multiply-referenced then it's unclear: inlining adds duplicate
915 * computations, but the ability to absorb restrictions from the outer
916 * query level could outweigh that. We do not have nearly enough
917 * information at this point to tell whether that's true, so we let
918 * the user express a preference. Our default behavior is to inline
919 * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever
920 * will be inlined even if multiply referenced.
921 *
922 * Note: we check for volatile functions last, because that's more
923 * expensive than the other tests needed.
924 */
925 if ((cte->ctematerialized == CTEMaterializeNever ||
926 (cte->ctematerialized == CTEMaterializeDefault &&
927 cte->cterefcount == 1)) &&
928 !cte->cterecursive &&
929 cmdType == CMD_SELECT &&
930 !contain_dml(cte->ctequery) &&
931 (cte->cterefcount <= 1 ||
932 !contain_outer_selfref(cte->ctequery)) &&
933 !contain_volatile_functions(cte->ctequery))
934 {
935 inline_cte(root, cte);
936 /* Make a dummy entry in cte_plan_ids */
937 root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
938 continue;
939 }
940
941 /*
942 * Copy the source Query node. Probably not necessary, but let's keep
943 * this similar to make_subplan.
944 */
945 subquery = (Query *) copyObject(cte->ctequery);
946
947 /* plan_params should not be in use in current query level */
948 Assert(root->plan_params == NIL);
949
950 /*
951 * Generate Paths for the CTE query. Always plan for full retrieval
952 * --- we don't have enough info to predict otherwise.
953 */
954 subroot = subquery_planner(root->glob, subquery,
955 root,
956 cte->cterecursive, 0.0);
957
958 /*
959 * Since the current query level doesn't yet contain any RTEs, it
960 * should not be possible for the CTE to have requested parameters of
961 * this level.
962 */
963 if (root->plan_params)
964 elog(ERROR, "unexpected outer reference in CTE query");
965
966 /*
967 * Select best Path and turn it into a Plan. At least for now, there
968 * seems no reason to postpone doing that.
969 */
970 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
971 best_path = final_rel->cheapest_total_path;
972
973 plan = create_plan(subroot, best_path);
974
975 /*
976 * Make a SubPlan node for it. This is just enough unlike
977 * build_subplan that we can't share code.
978 *
979 * Note plan_id, plan_name, and cost fields are set further down.
980 */
981 splan = makeNode(SubPlan);
982 splan->subLinkType = CTE_SUBLINK;
983 splan->testexpr = NULL;
984 splan->paramIds = NIL;
985 get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod,
986 &splan->firstColCollation);
987 splan->useHashTable = false;
988 splan->unknownEqFalse = false;
989
990 /*
991 * CTE scans are not considered for parallelism (cf
992 * set_rel_consider_parallel), and even if they were, initPlans aren't
993 * parallel-safe.
994 */
995 splan->parallel_safe = false;
996 splan->setParam = NIL;
997 splan->parParam = NIL;
998 splan->args = NIL;
999
1000 /*
1001 * The node can't have any inputs (since it's an initplan), so the
1002 * parParam and args lists remain empty. (It could contain references
1003 * to earlier CTEs' output param IDs, but CTE outputs are not
1004 * propagated via the args list.)
1005 */
1006
1007 /*
1008 * Assign a param ID to represent the CTE's output. No ordinary
1009 * "evaluation" of this param slot ever happens, but we use the param
1010 * ID for setParam/chgParam signaling just as if the CTE plan were
1011 * returning a simple scalar output. (Also, the executor abuses the
1012 * ParamExecData slot for this param ID for communication among
1013 * multiple CteScan nodes that might be scanning this CTE.)
1014 */
1015 paramid = assign_special_exec_param(root);
1016 splan->setParam = list_make1_int(paramid);
1017
1018 /*
1019 * Add the subplan and its PlannerInfo to the global lists.
1020 */
1021 root->glob->subplans = lappend(root->glob->subplans, plan);
1022 root->glob->subroots = lappend(root->glob->subroots, subroot);
1023 splan->plan_id = list_length(root->glob->subplans);
1024
1025 root->init_plans = lappend(root->init_plans, splan);
1026
1027 root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id);
1028
1029 /* Label the subplan for EXPLAIN purposes */
1030 splan->plan_name = psprintf("CTE %s", cte->ctename);
1031
1032 /* Lastly, fill in the cost estimates for use later */
1033 cost_subplan(root, splan, plan);
1034 }
1035 }
1036
1037 /*
1038 * contain_dml: is any subquery not a plain SELECT?
1039 *
1040 * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc.
1041 */
1042 static bool
contain_dml(Node * node)1043 contain_dml(Node *node)
1044 {
1045 return contain_dml_walker(node, NULL);
1046 }
1047
1048 static bool
contain_dml_walker(Node * node,void * context)1049 contain_dml_walker(Node *node, void *context)
1050 {
1051 if (node == NULL)
1052 return false;
1053 if (IsA(node, Query))
1054 {
1055 Query *query = (Query *) node;
1056
1057 if (query->commandType != CMD_SELECT ||
1058 query->rowMarks != NIL)
1059 return true;
1060
1061 return query_tree_walker(query, contain_dml_walker, context, 0);
1062 }
1063 return expression_tree_walker(node, contain_dml_walker, context);
1064 }
1065
1066 /*
1067 * contain_outer_selfref: is there an external recursive self-reference?
1068 */
1069 static bool
contain_outer_selfref(Node * node)1070 contain_outer_selfref(Node *node)
1071 {
1072 Index depth = 0;
1073
1074 /*
1075 * We should be starting with a Query, so that depth will be 1 while
1076 * examining its immediate contents.
1077 */
1078 Assert(IsA(node, Query));
1079
1080 return contain_outer_selfref_walker(node, &depth);
1081 }
1082
1083 static bool
contain_outer_selfref_walker(Node * node,Index * depth)1084 contain_outer_selfref_walker(Node *node, Index *depth)
1085 {
1086 if (node == NULL)
1087 return false;
1088 if (IsA(node, RangeTblEntry))
1089 {
1090 RangeTblEntry *rte = (RangeTblEntry *) node;
1091
1092 /*
1093 * Check for a self-reference to a CTE that's above the Query that our
1094 * search started at.
1095 */
1096 if (rte->rtekind == RTE_CTE &&
1097 rte->self_reference &&
1098 rte->ctelevelsup >= *depth)
1099 return true;
1100 return false; /* allow range_table_walker to continue */
1101 }
1102 if (IsA(node, Query))
1103 {
1104 /* Recurse into subquery, tracking nesting depth properly */
1105 Query *query = (Query *) node;
1106 bool result;
1107
1108 (*depth)++;
1109
1110 result = query_tree_walker(query, contain_outer_selfref_walker,
1111 (void *) depth, QTW_EXAMINE_RTES_BEFORE);
1112
1113 (*depth)--;
1114
1115 return result;
1116 }
1117 return expression_tree_walker(node, contain_outer_selfref_walker,
1118 (void *) depth);
1119 }
1120
1121 /*
1122 * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs
1123 */
1124 static void
inline_cte(PlannerInfo * root,CommonTableExpr * cte)1125 inline_cte(PlannerInfo *root, CommonTableExpr *cte)
1126 {
1127 struct inline_cte_walker_context context;
1128
1129 context.ctename = cte->ctename;
1130 /* Start at levelsup = -1 because we'll immediately increment it */
1131 context.levelsup = -1;
1132 context.refcount = cte->cterefcount;
1133 context.ctequery = castNode(Query, cte->ctequery);
1134
1135 (void) inline_cte_walker((Node *) root->parse, &context);
1136
1137 /* Assert we replaced all references */
1138 Assert(context.refcount == 0);
1139 }
1140
1141 static bool
inline_cte_walker(Node * node,inline_cte_walker_context * context)1142 inline_cte_walker(Node *node, inline_cte_walker_context *context)
1143 {
1144 if (node == NULL)
1145 return false;
1146 if (IsA(node, Query))
1147 {
1148 Query *query = (Query *) node;
1149
1150 context->levelsup++;
1151
1152 /*
1153 * Visit the query's RTE nodes after their contents; otherwise
1154 * query_tree_walker would descend into the newly inlined CTE query,
1155 * which we don't want.
1156 */
1157 (void) query_tree_walker(query, inline_cte_walker, context,
1158 QTW_EXAMINE_RTES_AFTER);
1159
1160 context->levelsup--;
1161
1162 return false;
1163 }
1164 else if (IsA(node, RangeTblEntry))
1165 {
1166 RangeTblEntry *rte = (RangeTblEntry *) node;
1167
1168 if (rte->rtekind == RTE_CTE &&
1169 strcmp(rte->ctename, context->ctename) == 0 &&
1170 rte->ctelevelsup == context->levelsup)
1171 {
1172 /*
1173 * Found a reference to replace. Generate a copy of the CTE query
1174 * with appropriate level adjustment for outer references (e.g.,
1175 * to other CTEs).
1176 */
1177 Query *newquery = copyObject(context->ctequery);
1178
1179 if (context->levelsup > 0)
1180 IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1);
1181
1182 /*
1183 * Convert the RTE_CTE RTE into a RTE_SUBQUERY.
1184 *
1185 * Historically, a FOR UPDATE clause has been treated as extending
1186 * into views and subqueries, but not into CTEs. We preserve this
1187 * distinction by not trying to push rowmarks into the new
1188 * subquery.
1189 */
1190 rte->rtekind = RTE_SUBQUERY;
1191 rte->subquery = newquery;
1192 rte->security_barrier = false;
1193
1194 /* Zero out CTE-specific fields */
1195 rte->ctename = NULL;
1196 rte->ctelevelsup = 0;
1197 rte->self_reference = false;
1198 rte->coltypes = NIL;
1199 rte->coltypmods = NIL;
1200 rte->colcollations = NIL;
1201
1202 /* Count the number of replacements we've done */
1203 context->refcount--;
1204 }
1205
1206 return false;
1207 }
1208
1209 return expression_tree_walker(node, inline_cte_walker, context);
1210 }
1211
1212
1213 /*
1214 * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
1215 *
1216 * The caller has found an ANY SubLink at the top level of one of the query's
1217 * qual clauses, but has not checked the properties of the SubLink further.
1218 * Decide whether it is appropriate to process this SubLink in join style.
1219 * If so, form a JoinExpr and return it. Return NULL if the SubLink cannot
1220 * be converted to a join.
1221 *
1222 * The only non-obvious input parameter is available_rels: this is the set
1223 * of query rels that can safely be referenced in the sublink expression.
1224 * (We must restrict this to avoid changing the semantics when a sublink
1225 * is present in an outer join's ON qual.) The conversion must fail if
1226 * the converted qual would reference any but these parent-query relids.
1227 *
1228 * On success, the returned JoinExpr has larg = NULL and rarg = the jointree
1229 * item representing the pulled-up subquery. The caller must set larg to
1230 * represent the relation(s) on the lefthand side of the new join, and insert
1231 * the JoinExpr into the upper query's jointree at an appropriate place
1232 * (typically, where the lefthand relation(s) had been). Note that the
1233 * passed-in SubLink must also be removed from its original position in the
1234 * query quals, since the quals of the returned JoinExpr replace it.
1235 * (Notionally, we replace the SubLink with a constant TRUE, then elide the
1236 * redundant constant from the qual.)
1237 *
1238 * On success, the caller is also responsible for recursively applying
1239 * pull_up_sublinks processing to the rarg and quals of the returned JoinExpr.
1240 * (On failure, there is no need to do anything, since pull_up_sublinks will
1241 * be applied when we recursively plan the sub-select.)
1242 *
1243 * Side effects of a successful conversion include adding the SubLink's
1244 * subselect to the query's rangetable, so that it can be referenced in
1245 * the JoinExpr's rarg.
1246 */
1247 JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo * root,SubLink * sublink,Relids available_rels)1248 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1249 Relids available_rels)
1250 {
1251 JoinExpr *result;
1252 Query *parse = root->parse;
1253 Query *subselect = (Query *) sublink->subselect;
1254 Relids upper_varnos;
1255 int rtindex;
1256 RangeTblEntry *rte;
1257 RangeTblRef *rtr;
1258 List *subquery_vars;
1259 Node *quals;
1260 ParseState *pstate;
1261
1262 Assert(sublink->subLinkType == ANY_SUBLINK);
1263
1264 /*
1265 * The sub-select must not refer to any Vars of the parent query. (Vars of
1266 * higher levels should be okay, though.)
1267 */
1268 if (contain_vars_of_level((Node *) subselect, 1))
1269 return NULL;
1270
1271 /*
1272 * The test expression must contain some Vars of the parent query, else
1273 * it's not gonna be a join. (Note that it won't have Vars referring to
1274 * the subquery, rather Params.)
1275 */
1276 upper_varnos = pull_varnos(root, sublink->testexpr);
1277 if (bms_is_empty(upper_varnos))
1278 return NULL;
1279
1280 /*
1281 * However, it can't refer to anything outside available_rels.
1282 */
1283 if (!bms_is_subset(upper_varnos, available_rels))
1284 return NULL;
1285
1286 /*
1287 * The combining operators and left-hand expressions mustn't be volatile.
1288 */
1289 if (contain_volatile_functions(sublink->testexpr))
1290 return NULL;
1291
1292 /* Create a dummy ParseState for addRangeTableEntryForSubquery */
1293 pstate = make_parsestate(NULL);
1294
1295 /*
1296 * Okay, pull up the sub-select into upper range table.
1297 *
1298 * We rely here on the assumption that the outer query has no references
1299 * to the inner (necessarily true, other than the Vars that we build
1300 * below). Therefore this is a lot easier than what pull_up_subqueries has
1301 * to go through.
1302 */
1303 rte = addRangeTableEntryForSubquery(pstate,
1304 subselect,
1305 makeAlias("ANY_subquery", NIL),
1306 false,
1307 false);
1308 parse->rtable = lappend(parse->rtable, rte);
1309 rtindex = list_length(parse->rtable);
1310
1311 /*
1312 * Form a RangeTblRef for the pulled-up sub-select.
1313 */
1314 rtr = makeNode(RangeTblRef);
1315 rtr->rtindex = rtindex;
1316
1317 /*
1318 * Build a list of Vars representing the subselect outputs.
1319 */
1320 subquery_vars = generate_subquery_vars(root,
1321 subselect->targetList,
1322 rtindex);
1323
1324 /*
1325 * Build the new join's qual expression, replacing Params with these Vars.
1326 */
1327 quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
1328
1329 /*
1330 * And finally, build the JoinExpr node.
1331 */
1332 result = makeNode(JoinExpr);
1333 result->jointype = JOIN_SEMI;
1334 result->isNatural = false;
1335 result->larg = NULL; /* caller must fill this in */
1336 result->rarg = (Node *) rtr;
1337 result->usingClause = NIL;
1338 result->quals = quals;
1339 result->alias = NULL;
1340 result->rtindex = 0; /* we don't need an RTE for it */
1341
1342 return result;
1343 }
1344
1345 /*
1346 * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
1347 *
1348 * The API of this function is identical to convert_ANY_sublink_to_join's,
1349 * except that we also support the case where the caller has found NOT EXISTS,
1350 * so we need an additional input parameter "under_not".
1351 */
1352 JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo * root,SubLink * sublink,bool under_not,Relids available_rels)1353 convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
1354 bool under_not, Relids available_rels)
1355 {
1356 JoinExpr *result;
1357 Query *parse = root->parse;
1358 Query *subselect = (Query *) sublink->subselect;
1359 Node *whereClause;
1360 int rtoffset;
1361 int varno;
1362 Relids clause_varnos;
1363 Relids upper_varnos;
1364
1365 Assert(sublink->subLinkType == EXISTS_SUBLINK);
1366
1367 /*
1368 * Can't flatten if it contains WITH. (We could arrange to pull up the
1369 * WITH into the parent query's cteList, but that risks changing the
1370 * semantics, since a WITH ought to be executed once per associated query
1371 * call.) Note that convert_ANY_sublink_to_join doesn't have to reject
1372 * this case, since it just produces a subquery RTE that doesn't have to
1373 * get flattened into the parent query.
1374 */
1375 if (subselect->cteList)
1376 return NULL;
1377
1378 /*
1379 * Copy the subquery so we can modify it safely (see comments in
1380 * make_subplan).
1381 */
1382 subselect = copyObject(subselect);
1383
1384 /*
1385 * See if the subquery can be simplified based on the knowledge that it's
1386 * being used in EXISTS(). If we aren't able to get rid of its
1387 * targetlist, we have to fail, because the pullup operation leaves us
1388 * with noplace to evaluate the targetlist.
1389 */
1390 if (!simplify_EXISTS_query(root, subselect))
1391 return NULL;
1392
1393 /*
1394 * Separate out the WHERE clause. (We could theoretically also remove
1395 * top-level plain JOIN/ON clauses, but it's probably not worth the
1396 * trouble.)
1397 */
1398 whereClause = subselect->jointree->quals;
1399 subselect->jointree->quals = NULL;
1400
1401 /*
1402 * The rest of the sub-select must not refer to any Vars of the parent
1403 * query. (Vars of higher levels should be okay, though.)
1404 */
1405 if (contain_vars_of_level((Node *) subselect, 1))
1406 return NULL;
1407
1408 /*
1409 * On the other hand, the WHERE clause must contain some Vars of the
1410 * parent query, else it's not gonna be a join.
1411 */
1412 if (!contain_vars_of_level(whereClause, 1))
1413 return NULL;
1414
1415 /*
1416 * We don't risk optimizing if the WHERE clause is volatile, either.
1417 */
1418 if (contain_volatile_functions(whereClause))
1419 return NULL;
1420
1421 /*
1422 * The subquery must have a nonempty jointree, but we can make it so.
1423 */
1424 replace_empty_jointree(subselect);
1425
1426 /*
1427 * Prepare to pull up the sub-select into top range table.
1428 *
1429 * We rely here on the assumption that the outer query has no references
1430 * to the inner (necessarily true). Therefore this is a lot easier than
1431 * what pull_up_subqueries has to go through.
1432 *
1433 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
1434 * do. The machinations of simplify_EXISTS_query ensured that there is
1435 * nothing interesting in the subquery except an rtable and jointree, and
1436 * even the jointree FromExpr no longer has quals. So we can just append
1437 * the rtable to our own and use the FromExpr in our jointree. But first,
1438 * adjust all level-zero varnos in the subquery to account for the rtable
1439 * merger.
1440 */
1441 rtoffset = list_length(parse->rtable);
1442 OffsetVarNodes((Node *) subselect, rtoffset, 0);
1443 OffsetVarNodes(whereClause, rtoffset, 0);
1444
1445 /*
1446 * Upper-level vars in subquery will now be one level closer to their
1447 * parent than before; in particular, anything that had been level 1
1448 * becomes level zero.
1449 */
1450 IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1451 IncrementVarSublevelsUp(whereClause, -1, 1);
1452
1453 /*
1454 * Now that the WHERE clause is adjusted to match the parent query
1455 * environment, we can easily identify all the level-zero rels it uses.
1456 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
1457 * not.
1458 */
1459 clause_varnos = pull_varnos(root, whereClause);
1460 upper_varnos = NULL;
1461 while ((varno = bms_first_member(clause_varnos)) >= 0)
1462 {
1463 if (varno <= rtoffset)
1464 upper_varnos = bms_add_member(upper_varnos, varno);
1465 }
1466 bms_free(clause_varnos);
1467 Assert(!bms_is_empty(upper_varnos));
1468
1469 /*
1470 * Now that we've got the set of upper-level varnos, we can make the last
1471 * check: only available_rels can be referenced.
1472 */
1473 if (!bms_is_subset(upper_varnos, available_rels))
1474 return NULL;
1475
1476 /* Now we can attach the modified subquery rtable to the parent */
1477 parse->rtable = list_concat(parse->rtable, subselect->rtable);
1478
1479 /*
1480 * And finally, build the JoinExpr node.
1481 */
1482 result = makeNode(JoinExpr);
1483 result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
1484 result->isNatural = false;
1485 result->larg = NULL; /* caller must fill this in */
1486 /* flatten out the FromExpr node if it's useless */
1487 if (list_length(subselect->jointree->fromlist) == 1)
1488 result->rarg = (Node *) linitial(subselect->jointree->fromlist);
1489 else
1490 result->rarg = (Node *) subselect->jointree;
1491 result->usingClause = NIL;
1492 result->quals = whereClause;
1493 result->alias = NULL;
1494 result->rtindex = 0; /* we don't need an RTE for it */
1495
1496 return result;
1497 }
1498
1499 /*
1500 * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
1501 *
1502 * The only thing that matters about an EXISTS query is whether it returns
1503 * zero or more than zero rows. Therefore, we can remove certain SQL features
1504 * that won't affect that. The only part that is really likely to matter in
1505 * typical usage is simplifying the targetlist: it's a common habit to write
1506 * "SELECT * FROM" even though there is no need to evaluate any columns.
1507 *
1508 * Note: by suppressing the targetlist we could cause an observable behavioral
1509 * change, namely that any errors that might occur in evaluating the tlist
1510 * won't occur, nor will other side-effects of volatile functions. This seems
1511 * unlikely to bother anyone in practice.
1512 *
1513 * Returns true if was able to discard the targetlist, else false.
1514 */
1515 static bool
simplify_EXISTS_query(PlannerInfo * root,Query * query)1516 simplify_EXISTS_query(PlannerInfo *root, Query *query)
1517 {
1518 /*
1519 * We don't try to simplify at all if the query uses set operations,
1520 * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR
1521 * UPDATE/SHARE; none of these seem likely in normal usage and their
1522 * possible effects are complex. (Note: we could ignore an "OFFSET 0"
1523 * clause, but that traditionally is used as an optimization fence, so we
1524 * don't.)
1525 */
1526 if (query->commandType != CMD_SELECT ||
1527 query->setOperations ||
1528 query->hasAggs ||
1529 query->groupingSets ||
1530 query->hasWindowFuncs ||
1531 query->hasTargetSRFs ||
1532 query->hasModifyingCTE ||
1533 query->havingQual ||
1534 query->limitOffset ||
1535 query->rowMarks)
1536 return false;
1537
1538 /*
1539 * LIMIT with a constant positive (or NULL) value doesn't affect the
1540 * semantics of EXISTS, so let's ignore such clauses. This is worth doing
1541 * because people accustomed to certain other DBMSes may be in the habit
1542 * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a
1543 * LIMIT with anything else as argument, though, we can't simplify.
1544 */
1545 if (query->limitCount)
1546 {
1547 /*
1548 * The LIMIT clause has not yet been through eval_const_expressions,
1549 * so we have to apply that here. It might seem like this is a waste
1550 * of cycles, since the only case plausibly worth worrying about is
1551 * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
1552 * so we have to fold constants or we're not going to recognize it.
1553 */
1554 Node *node = eval_const_expressions(root, query->limitCount);
1555 Const *limit;
1556
1557 /* Might as well update the query if we simplified the clause. */
1558 query->limitCount = node;
1559
1560 if (!IsA(node, Const))
1561 return false;
1562
1563 limit = (Const *) node;
1564 Assert(limit->consttype == INT8OID);
1565 if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
1566 return false;
1567
1568 /* Whether or not the targetlist is safe, we can drop the LIMIT. */
1569 query->limitCount = NULL;
1570 }
1571
1572 /*
1573 * Otherwise, we can throw away the targetlist, as well as any GROUP,
1574 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
1575 * change a nonzero-rows result to zero rows or vice versa. (Furthermore,
1576 * since our parsetree representation of these clauses depends on the
1577 * targetlist, we'd better throw them away if we drop the targetlist.)
1578 */
1579 query->targetList = NIL;
1580 query->groupClause = NIL;
1581 query->windowClause = NIL;
1582 query->distinctClause = NIL;
1583 query->sortClause = NIL;
1584 query->hasDistinctOn = false;
1585
1586 return true;
1587 }
1588
1589 /*
1590 * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink
1591 *
1592 * The subselect is expected to be a fresh copy that we can munge up,
1593 * and to have been successfully passed through simplify_EXISTS_query.
1594 *
1595 * On success, the modified subselect is returned, and we store a suitable
1596 * upper-level test expression at *testexpr, plus a list of the subselect's
1597 * output Params at *paramIds. (The test expression is already Param-ified
1598 * and hence need not go through convert_testexpr, which is why we have to
1599 * deal with the Param IDs specially.)
1600 *
1601 * On failure, returns NULL.
1602 */
1603 static Query *
convert_EXISTS_to_ANY(PlannerInfo * root,Query * subselect,Node ** testexpr,List ** paramIds)1604 convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
1605 Node **testexpr, List **paramIds)
1606 {
1607 Node *whereClause;
1608 List *leftargs,
1609 *rightargs,
1610 *opids,
1611 *opcollations,
1612 *newWhere,
1613 *tlist,
1614 *testlist,
1615 *paramids;
1616 ListCell *lc,
1617 *rc,
1618 *oc,
1619 *cc;
1620 AttrNumber resno;
1621
1622 /*
1623 * Query must not require a targetlist, since we have to insert a new one.
1624 * Caller should have dealt with the case already.
1625 */
1626 Assert(subselect->targetList == NIL);
1627
1628 /*
1629 * Separate out the WHERE clause. (We could theoretically also remove
1630 * top-level plain JOIN/ON clauses, but it's probably not worth the
1631 * trouble.)
1632 */
1633 whereClause = subselect->jointree->quals;
1634 subselect->jointree->quals = NULL;
1635
1636 /*
1637 * The rest of the sub-select must not refer to any Vars of the parent
1638 * query. (Vars of higher levels should be okay, though.)
1639 *
1640 * Note: we need not check for Aggrefs separately because we know the
1641 * sub-select is as yet unoptimized; any uplevel Aggref must therefore
1642 * contain an uplevel Var reference. This is not the case below ...
1643 */
1644 if (contain_vars_of_level((Node *) subselect, 1))
1645 return NULL;
1646
1647 /*
1648 * We don't risk optimizing if the WHERE clause is volatile, either.
1649 */
1650 if (contain_volatile_functions(whereClause))
1651 return NULL;
1652
1653 /*
1654 * Clean up the WHERE clause by doing const-simplification etc on it.
1655 * Aside from simplifying the processing we're about to do, this is
1656 * important for being able to pull chunks of the WHERE clause up into the
1657 * parent query. Since we are invoked partway through the parent's
1658 * preprocess_expression() work, earlier steps of preprocess_expression()
1659 * wouldn't get applied to the pulled-up stuff unless we do them here. For
1660 * the parts of the WHERE clause that get put back into the child query,
1661 * this work is partially duplicative, but it shouldn't hurt.
1662 *
1663 * Note: we do not run flatten_join_alias_vars. This is OK because any
1664 * parent aliases were flattened already, and we're not going to pull any
1665 * child Vars (of any description) into the parent.
1666 *
1667 * Note: passing the parent's root to eval_const_expressions is
1668 * technically wrong, but we can get away with it since only the
1669 * boundParams (if any) are used, and those would be the same in a
1670 * subroot.
1671 */
1672 whereClause = eval_const_expressions(root, whereClause);
1673 whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
1674 whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
1675
1676 /*
1677 * We now have a flattened implicit-AND list of clauses, which we try to
1678 * break apart into "outervar = innervar" hash clauses. Anything that
1679 * can't be broken apart just goes back into the newWhere list. Note that
1680 * we aren't trying hard yet to ensure that we have only outer or only
1681 * inner on each side; we'll check that if we get to the end.
1682 */
1683 leftargs = rightargs = opids = opcollations = newWhere = NIL;
1684 foreach(lc, (List *) whereClause)
1685 {
1686 OpExpr *expr = (OpExpr *) lfirst(lc);
1687
1688 if (IsA(expr, OpExpr) &&
1689 hash_ok_operator(expr))
1690 {
1691 Node *leftarg = (Node *) linitial(expr->args);
1692 Node *rightarg = (Node *) lsecond(expr->args);
1693
1694 if (contain_vars_of_level(leftarg, 1))
1695 {
1696 leftargs = lappend(leftargs, leftarg);
1697 rightargs = lappend(rightargs, rightarg);
1698 opids = lappend_oid(opids, expr->opno);
1699 opcollations = lappend_oid(opcollations, expr->inputcollid);
1700 continue;
1701 }
1702 if (contain_vars_of_level(rightarg, 1))
1703 {
1704 /*
1705 * We must commute the clause to put the outer var on the
1706 * left, because the hashing code in nodeSubplan.c expects
1707 * that. This probably shouldn't ever fail, since hashable
1708 * operators ought to have commutators, but be paranoid.
1709 */
1710 expr->opno = get_commutator(expr->opno);
1711 if (OidIsValid(expr->opno) && hash_ok_operator(expr))
1712 {
1713 leftargs = lappend(leftargs, rightarg);
1714 rightargs = lappend(rightargs, leftarg);
1715 opids = lappend_oid(opids, expr->opno);
1716 opcollations = lappend_oid(opcollations, expr->inputcollid);
1717 continue;
1718 }
1719 /* If no commutator, no chance to optimize the WHERE clause */
1720 return NULL;
1721 }
1722 }
1723 /* Couldn't handle it as a hash clause */
1724 newWhere = lappend(newWhere, expr);
1725 }
1726
1727 /*
1728 * If we didn't find anything we could convert, fail.
1729 */
1730 if (leftargs == NIL)
1731 return NULL;
1732
1733 /*
1734 * There mustn't be any parent Vars or Aggs in the stuff that we intend to
1735 * put back into the child query. Note: you might think we don't need to
1736 * check for Aggs separately, because an uplevel Agg must contain an
1737 * uplevel Var in its argument. But it is possible that the uplevel Var
1738 * got optimized away by eval_const_expressions. Consider
1739 *
1740 * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END)
1741 */
1742 if (contain_vars_of_level((Node *) newWhere, 1) ||
1743 contain_vars_of_level((Node *) rightargs, 1))
1744 return NULL;
1745 if (root->parse->hasAggs &&
1746 (contain_aggs_of_level((Node *) newWhere, 1) ||
1747 contain_aggs_of_level((Node *) rightargs, 1)))
1748 return NULL;
1749
1750 /*
1751 * And there can't be any child Vars in the stuff we intend to pull up.
1752 * (Note: we'd need to check for child Aggs too, except we know the child
1753 * has no aggs at all because of simplify_EXISTS_query's check. The same
1754 * goes for window functions.)
1755 */
1756 if (contain_vars_of_level((Node *) leftargs, 0))
1757 return NULL;
1758
1759 /*
1760 * Also reject sublinks in the stuff we intend to pull up. (It might be
1761 * possible to support this, but doesn't seem worth the complication.)
1762 */
1763 if (contain_subplans((Node *) leftargs))
1764 return NULL;
1765
1766 /*
1767 * Okay, adjust the sublevelsup in the stuff we're pulling up.
1768 */
1769 IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
1770
1771 /*
1772 * Put back any child-level-only WHERE clauses.
1773 */
1774 if (newWhere)
1775 subselect->jointree->quals = (Node *) make_ands_explicit(newWhere);
1776
1777 /*
1778 * Build a new targetlist for the child that emits the expressions we
1779 * need. Concurrently, build a testexpr for the parent using Params to
1780 * reference the child outputs. (Since we generate Params directly here,
1781 * there will be no need to convert the testexpr in build_subplan.)
1782 */
1783 tlist = testlist = paramids = NIL;
1784 resno = 1;
1785 forfour(lc, leftargs, rc, rightargs, oc, opids, cc, opcollations)
1786 {
1787 Node *leftarg = (Node *) lfirst(lc);
1788 Node *rightarg = (Node *) lfirst(rc);
1789 Oid opid = lfirst_oid(oc);
1790 Oid opcollation = lfirst_oid(cc);
1791 Param *param;
1792
1793 param = generate_new_exec_param(root,
1794 exprType(rightarg),
1795 exprTypmod(rightarg),
1796 exprCollation(rightarg));
1797 tlist = lappend(tlist,
1798 makeTargetEntry((Expr *) rightarg,
1799 resno++,
1800 NULL,
1801 false));
1802 testlist = lappend(testlist,
1803 make_opclause(opid, BOOLOID, false,
1804 (Expr *) leftarg, (Expr *) param,
1805 InvalidOid, opcollation));
1806 paramids = lappend_int(paramids, param->paramid);
1807 }
1808
1809 /* Put everything where it should go, and we're done */
1810 subselect->targetList = tlist;
1811 *testexpr = (Node *) make_ands_explicit(testlist);
1812 *paramIds = paramids;
1813
1814 return subselect;
1815 }
1816
1817
1818 /*
1819 * Replace correlation vars (uplevel vars) with Params.
1820 *
1821 * Uplevel PlaceHolderVars and aggregates are replaced, too.
1822 *
1823 * Note: it is critical that this runs immediately after SS_process_sublinks.
1824 * Since we do not recurse into the arguments of uplevel PHVs and aggregates,
1825 * they will get copied to the appropriate subplan args list in the parent
1826 * query with uplevel vars not replaced by Params, but only adjusted in level
1827 * (see replace_outer_placeholdervar and replace_outer_agg). That's exactly
1828 * what we want for the vars of the parent level --- but if a PHV's or
1829 * aggregate's argument contains any further-up variables, they have to be
1830 * replaced with Params in their turn. That will happen when the parent level
1831 * runs SS_replace_correlation_vars. Therefore it must do so after expanding
1832 * its sublinks to subplans. And we don't want any steps in between, else
1833 * those steps would never get applied to the argument expressions, either in
1834 * the parent or the child level.
1835 *
1836 * Another fairly tricky thing going on here is the handling of SubLinks in
1837 * the arguments of uplevel PHVs/aggregates. Those are not touched inside the
1838 * intermediate query level, either. Instead, SS_process_sublinks recurses on
1839 * them after copying the PHV or Aggref expression into the parent plan level
1840 * (this is actually taken care of in build_subplan).
1841 */
1842 Node *
SS_replace_correlation_vars(PlannerInfo * root,Node * expr)1843 SS_replace_correlation_vars(PlannerInfo *root, Node *expr)
1844 {
1845 /* No setup needed for tree walk, so away we go */
1846 return replace_correlation_vars_mutator(expr, root);
1847 }
1848
1849 static Node *
replace_correlation_vars_mutator(Node * node,PlannerInfo * root)1850 replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
1851 {
1852 if (node == NULL)
1853 return NULL;
1854 if (IsA(node, Var))
1855 {
1856 if (((Var *) node)->varlevelsup > 0)
1857 return (Node *) replace_outer_var(root, (Var *) node);
1858 }
1859 if (IsA(node, PlaceHolderVar))
1860 {
1861 if (((PlaceHolderVar *) node)->phlevelsup > 0)
1862 return (Node *) replace_outer_placeholdervar(root,
1863 (PlaceHolderVar *) node);
1864 }
1865 if (IsA(node, Aggref))
1866 {
1867 if (((Aggref *) node)->agglevelsup > 0)
1868 return (Node *) replace_outer_agg(root, (Aggref *) node);
1869 }
1870 if (IsA(node, GroupingFunc))
1871 {
1872 if (((GroupingFunc *) node)->agglevelsup > 0)
1873 return (Node *) replace_outer_grouping(root, (GroupingFunc *) node);
1874 }
1875 return expression_tree_mutator(node,
1876 replace_correlation_vars_mutator,
1877 (void *) root);
1878 }
1879
1880 /*
1881 * Expand SubLinks to SubPlans in the given expression.
1882 *
1883 * The isQual argument tells whether or not this expression is a WHERE/HAVING
1884 * qualifier expression. If it is, any sublinks appearing at top level need
1885 * not distinguish FALSE from UNKNOWN return values.
1886 */
1887 Node *
SS_process_sublinks(PlannerInfo * root,Node * expr,bool isQual)1888 SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
1889 {
1890 process_sublinks_context context;
1891
1892 context.root = root;
1893 context.isTopQual = isQual;
1894 return process_sublinks_mutator(expr, &context);
1895 }
1896
1897 static Node *
process_sublinks_mutator(Node * node,process_sublinks_context * context)1898 process_sublinks_mutator(Node *node, process_sublinks_context *context)
1899 {
1900 process_sublinks_context locContext;
1901
1902 locContext.root = context->root;
1903
1904 if (node == NULL)
1905 return NULL;
1906 if (IsA(node, SubLink))
1907 {
1908 SubLink *sublink = (SubLink *) node;
1909 Node *testexpr;
1910
1911 /*
1912 * First, recursively process the lefthand-side expressions, if any.
1913 * They're not top-level anymore.
1914 */
1915 locContext.isTopQual = false;
1916 testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
1917
1918 /*
1919 * Now build the SubPlan node and make the expr to return.
1920 */
1921 return make_subplan(context->root,
1922 (Query *) sublink->subselect,
1923 sublink->subLinkType,
1924 sublink->subLinkId,
1925 testexpr,
1926 context->isTopQual);
1927 }
1928
1929 /*
1930 * Don't recurse into the arguments of an outer PHV or aggregate here. Any
1931 * SubLinks in the arguments have to be dealt with at the outer query
1932 * level; they'll be handled when build_subplan collects the PHV or Aggref
1933 * into the arguments to be passed down to the current subplan.
1934 */
1935 if (IsA(node, PlaceHolderVar))
1936 {
1937 if (((PlaceHolderVar *) node)->phlevelsup > 0)
1938 return node;
1939 }
1940 else if (IsA(node, Aggref))
1941 {
1942 if (((Aggref *) node)->agglevelsup > 0)
1943 return node;
1944 }
1945
1946 /*
1947 * We should never see a SubPlan expression in the input (since this is
1948 * the very routine that creates 'em to begin with). We shouldn't find
1949 * ourselves invoked directly on a Query, either.
1950 */
1951 Assert(!IsA(node, SubPlan));
1952 Assert(!IsA(node, AlternativeSubPlan));
1953 Assert(!IsA(node, Query));
1954
1955 /*
1956 * Because make_subplan() could return an AND or OR clause, we have to
1957 * take steps to preserve AND/OR flatness of a qual. We assume the input
1958 * has been AND/OR flattened and so we need no recursion here.
1959 *
1960 * (Due to the coding here, we will not get called on the List subnodes of
1961 * an AND; and the input is *not* yet in implicit-AND format. So no check
1962 * is needed for a bare List.)
1963 *
1964 * Anywhere within the top-level AND/OR clause structure, we can tell
1965 * make_subplan() that NULL and FALSE are interchangeable. So isTopQual
1966 * propagates down in both cases. (Note that this is unlike the meaning
1967 * of "top level qual" used in most other places in Postgres.)
1968 */
1969 if (is_andclause(node))
1970 {
1971 List *newargs = NIL;
1972 ListCell *l;
1973
1974 /* Still at qual top-level */
1975 locContext.isTopQual = context->isTopQual;
1976
1977 foreach(l, ((BoolExpr *) node)->args)
1978 {
1979 Node *newarg;
1980
1981 newarg = process_sublinks_mutator(lfirst(l), &locContext);
1982 if (is_andclause(newarg))
1983 newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1984 else
1985 newargs = lappend(newargs, newarg);
1986 }
1987 return (Node *) make_andclause(newargs);
1988 }
1989
1990 if (is_orclause(node))
1991 {
1992 List *newargs = NIL;
1993 ListCell *l;
1994
1995 /* Still at qual top-level */
1996 locContext.isTopQual = context->isTopQual;
1997
1998 foreach(l, ((BoolExpr *) node)->args)
1999 {
2000 Node *newarg;
2001
2002 newarg = process_sublinks_mutator(lfirst(l), &locContext);
2003 if (is_orclause(newarg))
2004 newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
2005 else
2006 newargs = lappend(newargs, newarg);
2007 }
2008 return (Node *) make_orclause(newargs);
2009 }
2010
2011 /*
2012 * If we recurse down through anything other than an AND or OR node, we
2013 * are definitely not at top qual level anymore.
2014 */
2015 locContext.isTopQual = false;
2016
2017 return expression_tree_mutator(node,
2018 process_sublinks_mutator,
2019 (void *) &locContext);
2020 }
2021
2022 /*
2023 * SS_identify_outer_params - identify the Params available from outer levels
2024 *
2025 * This must be run after SS_replace_correlation_vars and SS_process_sublinks
2026 * processing is complete in a given query level as well as all of its
2027 * descendant levels (which means it's most practical to do it at the end of
2028 * processing the query level). We compute the set of paramIds that outer
2029 * levels will make available to this level+descendants, and record it in
2030 * root->outer_params for use while computing extParam/allParam sets in final
2031 * plan cleanup. (We can't just compute it then, because the upper levels'
2032 * plan_params lists are transient and will be gone by then.)
2033 */
2034 void
SS_identify_outer_params(PlannerInfo * root)2035 SS_identify_outer_params(PlannerInfo *root)
2036 {
2037 Bitmapset *outer_params;
2038 PlannerInfo *proot;
2039 ListCell *l;
2040
2041 /*
2042 * If no parameters have been assigned anywhere in the tree, we certainly
2043 * don't need to do anything here.
2044 */
2045 if (root->glob->paramExecTypes == NIL)
2046 return;
2047
2048 /*
2049 * Scan all query levels above this one to see which parameters are due to
2050 * be available from them, either because lower query levels have
2051 * requested them (via plan_params) or because they will be available from
2052 * initPlans of those levels.
2053 */
2054 outer_params = NULL;
2055 for (proot = root->parent_root; proot != NULL; proot = proot->parent_root)
2056 {
2057 /* Include ordinary Var/PHV/Aggref params */
2058 foreach(l, proot->plan_params)
2059 {
2060 PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
2061
2062 outer_params = bms_add_member(outer_params, pitem->paramId);
2063 }
2064 /* Include any outputs of outer-level initPlans */
2065 foreach(l, proot->init_plans)
2066 {
2067 SubPlan *initsubplan = (SubPlan *) lfirst(l);
2068 ListCell *l2;
2069
2070 foreach(l2, initsubplan->setParam)
2071 {
2072 outer_params = bms_add_member(outer_params, lfirst_int(l2));
2073 }
2074 }
2075 /* Include worktable ID, if a recursive query is being planned */
2076 if (proot->wt_param_id >= 0)
2077 outer_params = bms_add_member(outer_params, proot->wt_param_id);
2078 }
2079 root->outer_params = outer_params;
2080 }
2081
2082 /*
2083 * SS_charge_for_initplans - account for initplans in Path costs & parallelism
2084 *
2085 * If any initPlans have been created in the current query level, they will
2086 * get attached to the Plan tree created from whichever Path we select from
2087 * the given rel. Increment all that rel's Paths' costs to account for them,
2088 * and make sure the paths get marked as parallel-unsafe, since we can't
2089 * currently transmit initPlans to parallel workers.
2090 *
2091 * This is separate from SS_attach_initplans because we might conditionally
2092 * create more initPlans during create_plan(), depending on which Path we
2093 * select. However, Paths that would generate such initPlans are expected
2094 * to have included their cost already.
2095 */
2096 void
SS_charge_for_initplans(PlannerInfo * root,RelOptInfo * final_rel)2097 SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel)
2098 {
2099 Cost initplan_cost;
2100 ListCell *lc;
2101
2102 /* Nothing to do if no initPlans */
2103 if (root->init_plans == NIL)
2104 return;
2105
2106 /*
2107 * Compute the cost increment just once, since it will be the same for all
2108 * Paths. We assume each initPlan gets run once during top plan startup.
2109 * This is a conservative overestimate, since in fact an initPlan might be
2110 * executed later than plan startup, or even not at all.
2111 */
2112 initplan_cost = 0;
2113 foreach(lc, root->init_plans)
2114 {
2115 SubPlan *initsubplan = (SubPlan *) lfirst(lc);
2116
2117 initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost;
2118 }
2119
2120 /*
2121 * Now adjust the costs and parallel_safe flags.
2122 */
2123 foreach(lc, final_rel->pathlist)
2124 {
2125 Path *path = (Path *) lfirst(lc);
2126
2127 path->startup_cost += initplan_cost;
2128 path->total_cost += initplan_cost;
2129 path->parallel_safe = false;
2130 }
2131
2132 /*
2133 * Forget about any partial paths and clear consider_parallel, too;
2134 * they're not usable if we attached an initPlan.
2135 */
2136 final_rel->partial_pathlist = NIL;
2137 final_rel->consider_parallel = false;
2138
2139 /* We needn't do set_cheapest() here, caller will do it */
2140 }
2141
2142 /*
2143 * SS_attach_initplans - attach initplans to topmost plan node
2144 *
2145 * Attach any initplans created in the current query level to the specified
2146 * plan node, which should normally be the topmost node for the query level.
2147 * (In principle the initPlans could go in any node at or above where they're
2148 * referenced; but there seems no reason to put them any lower than the
2149 * topmost node, so we don't bother to track exactly where they came from.)
2150 * We do not touch the plan node's cost; the initplans should have been
2151 * accounted for in path costing.
2152 */
2153 void
SS_attach_initplans(PlannerInfo * root,Plan * plan)2154 SS_attach_initplans(PlannerInfo *root, Plan *plan)
2155 {
2156 plan->initPlan = root->init_plans;
2157 }
2158
2159 /*
2160 * SS_finalize_plan - do final parameter processing for a completed Plan.
2161 *
2162 * This recursively computes the extParam and allParam sets for every Plan
2163 * node in the given plan tree. (Oh, and RangeTblFunction.funcparams too.)
2164 *
2165 * We assume that SS_finalize_plan has already been run on any initplans or
2166 * subplans the plan tree could reference.
2167 */
2168 void
SS_finalize_plan(PlannerInfo * root,Plan * plan)2169 SS_finalize_plan(PlannerInfo *root, Plan *plan)
2170 {
2171 /* No setup needed, just recurse through plan tree. */
2172 (void) finalize_plan(root, plan, -1, root->outer_params, NULL);
2173 }
2174
2175 /*
2176 * Recursive processing of all nodes in the plan tree
2177 *
2178 * gather_param is the rescan_param of an ancestral Gather/GatherMerge,
2179 * or -1 if there is none.
2180 *
2181 * valid_params is the set of param IDs supplied by outer plan levels
2182 * that are valid to reference in this plan node or its children.
2183 *
2184 * scan_params is a set of param IDs to force scan plan nodes to reference.
2185 * This is for EvalPlanQual support, and is always NULL at the top of the
2186 * recursion.
2187 *
2188 * The return value is the computed allParam set for the given Plan node.
2189 * This is just an internal notational convenience: we can add a child
2190 * plan's allParams to the set of param IDs of interest to this level
2191 * in the same statement that recurses to that child.
2192 *
2193 * Do not scribble on caller's values of valid_params or scan_params!
2194 *
2195 * Note: although we attempt to deal with initPlans anywhere in the tree, the
2196 * logic is not really right. The problem is that a plan node might return an
2197 * output Param of its initPlan as a targetlist item, in which case it's valid
2198 * for the parent plan level to reference that same Param; the parent's usage
2199 * will be converted into a Var referencing the child plan node by setrefs.c.
2200 * But this function would see the parent's reference as out of scope and
2201 * complain about it. For now, this does not matter because the planner only
2202 * attaches initPlans to the topmost plan node in a query level, so the case
2203 * doesn't arise. If we ever merge this processing into setrefs.c, maybe it
2204 * can be handled more cleanly.
2205 */
2206 static Bitmapset *
finalize_plan(PlannerInfo * root,Plan * plan,int gather_param,Bitmapset * valid_params,Bitmapset * scan_params)2207 finalize_plan(PlannerInfo *root, Plan *plan,
2208 int gather_param,
2209 Bitmapset *valid_params,
2210 Bitmapset *scan_params)
2211 {
2212 finalize_primnode_context context;
2213 int locally_added_param;
2214 Bitmapset *nestloop_params;
2215 Bitmapset *initExtParam;
2216 Bitmapset *initSetParam;
2217 Bitmapset *child_params;
2218 ListCell *l;
2219
2220 if (plan == NULL)
2221 return NULL;
2222
2223 context.root = root;
2224 context.paramids = NULL; /* initialize set to empty */
2225 locally_added_param = -1; /* there isn't one */
2226 nestloop_params = NULL; /* there aren't any */
2227
2228 /*
2229 * Examine any initPlans to determine the set of external params they
2230 * reference and the set of output params they supply. (We assume
2231 * SS_finalize_plan was run on them already.)
2232 */
2233 initExtParam = initSetParam = NULL;
2234 foreach(l, plan->initPlan)
2235 {
2236 SubPlan *initsubplan = (SubPlan *) lfirst(l);
2237 Plan *initplan = planner_subplan_get_plan(root, initsubplan);
2238 ListCell *l2;
2239
2240 initExtParam = bms_add_members(initExtParam, initplan->extParam);
2241 foreach(l2, initsubplan->setParam)
2242 {
2243 initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
2244 }
2245 }
2246
2247 /* Any setParams are validly referenceable in this node and children */
2248 if (initSetParam)
2249 valid_params = bms_union(valid_params, initSetParam);
2250
2251 /*
2252 * When we call finalize_primnode, context.paramids sets are automatically
2253 * merged together. But when recursing to self, we have to do it the hard
2254 * way. We want the paramids set to include params in subplans as well as
2255 * at this level.
2256 */
2257
2258 /* Find params in targetlist and qual */
2259 finalize_primnode((Node *) plan->targetlist, &context);
2260 finalize_primnode((Node *) plan->qual, &context);
2261
2262 /*
2263 * If it's a parallel-aware scan node, mark it as dependent on the parent
2264 * Gather/GatherMerge's rescan Param.
2265 */
2266 if (plan->parallel_aware)
2267 {
2268 if (gather_param < 0)
2269 elog(ERROR, "parallel-aware plan node is not below a Gather");
2270 context.paramids =
2271 bms_add_member(context.paramids, gather_param);
2272 }
2273
2274 /* Check additional node-type-specific fields */
2275 switch (nodeTag(plan))
2276 {
2277 case T_Result:
2278 finalize_primnode(((Result *) plan)->resconstantqual,
2279 &context);
2280 break;
2281
2282 case T_SeqScan:
2283 context.paramids = bms_add_members(context.paramids, scan_params);
2284 break;
2285
2286 case T_SampleScan:
2287 finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
2288 &context);
2289 context.paramids = bms_add_members(context.paramids, scan_params);
2290 break;
2291
2292 case T_IndexScan:
2293 finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
2294 &context);
2295 finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby,
2296 &context);
2297
2298 /*
2299 * we need not look at indexqualorig, since it will have the same
2300 * param references as indexqual. Likewise, we can ignore
2301 * indexorderbyorig.
2302 */
2303 context.paramids = bms_add_members(context.paramids, scan_params);
2304 break;
2305
2306 case T_IndexOnlyScan:
2307 finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual,
2308 &context);
2309 finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby,
2310 &context);
2311
2312 /*
2313 * we need not look at indextlist, since it cannot contain Params.
2314 */
2315 context.paramids = bms_add_members(context.paramids, scan_params);
2316 break;
2317
2318 case T_BitmapIndexScan:
2319 finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual,
2320 &context);
2321
2322 /*
2323 * we need not look at indexqualorig, since it will have the same
2324 * param references as indexqual.
2325 */
2326 break;
2327
2328 case T_BitmapHeapScan:
2329 finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
2330 &context);
2331 context.paramids = bms_add_members(context.paramids, scan_params);
2332 break;
2333
2334 case T_TidScan:
2335 finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
2336 &context);
2337 context.paramids = bms_add_members(context.paramids, scan_params);
2338 break;
2339
2340 case T_SubqueryScan:
2341 {
2342 SubqueryScan *sscan = (SubqueryScan *) plan;
2343 RelOptInfo *rel;
2344 Bitmapset *subquery_params;
2345
2346 /* We must run finalize_plan on the subquery */
2347 rel = find_base_rel(root, sscan->scan.scanrelid);
2348 subquery_params = rel->subroot->outer_params;
2349 if (gather_param >= 0)
2350 subquery_params = bms_add_member(bms_copy(subquery_params),
2351 gather_param);
2352 finalize_plan(rel->subroot, sscan->subplan, gather_param,
2353 subquery_params, NULL);
2354
2355 /* Now we can add its extParams to the parent's params */
2356 context.paramids = bms_add_members(context.paramids,
2357 sscan->subplan->extParam);
2358 /* We need scan_params too, though */
2359 context.paramids = bms_add_members(context.paramids,
2360 scan_params);
2361 }
2362 break;
2363
2364 case T_FunctionScan:
2365 {
2366 FunctionScan *fscan = (FunctionScan *) plan;
2367 ListCell *lc;
2368
2369 /*
2370 * Call finalize_primnode independently on each function
2371 * expression, so that we can record which params are
2372 * referenced in each, in order to decide which need
2373 * re-evaluating during rescan.
2374 */
2375 foreach(lc, fscan->functions)
2376 {
2377 RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
2378 finalize_primnode_context funccontext;
2379
2380 funccontext = context;
2381 funccontext.paramids = NULL;
2382
2383 finalize_primnode(rtfunc->funcexpr, &funccontext);
2384
2385 /* remember results for execution */
2386 rtfunc->funcparams = funccontext.paramids;
2387
2388 /* add the function's params to the overall set */
2389 context.paramids = bms_add_members(context.paramids,
2390 funccontext.paramids);
2391 }
2392
2393 context.paramids = bms_add_members(context.paramids,
2394 scan_params);
2395 }
2396 break;
2397
2398 case T_TableFuncScan:
2399 finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc,
2400 &context);
2401 context.paramids = bms_add_members(context.paramids, scan_params);
2402 break;
2403
2404 case T_ValuesScan:
2405 finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
2406 &context);
2407 context.paramids = bms_add_members(context.paramids, scan_params);
2408 break;
2409
2410 case T_CteScan:
2411 {
2412 /*
2413 * You might think we should add the node's cteParam to
2414 * paramids, but we shouldn't because that param is just a
2415 * linkage mechanism for multiple CteScan nodes for the same
2416 * CTE; it is never used for changed-param signaling. What we
2417 * have to do instead is to find the referenced CTE plan and
2418 * incorporate its external paramids, so that the correct
2419 * things will happen if the CTE references outer-level
2420 * variables. See test cases for bug #4902. (We assume
2421 * SS_finalize_plan was run on the CTE plan already.)
2422 */
2423 int plan_id = ((CteScan *) plan)->ctePlanId;
2424 Plan *cteplan;
2425
2426 /* so, do this ... */
2427 if (plan_id < 1 || plan_id > list_length(root->glob->subplans))
2428 elog(ERROR, "could not find plan for CteScan referencing plan ID %d",
2429 plan_id);
2430 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2431 context.paramids =
2432 bms_add_members(context.paramids, cteplan->extParam);
2433
2434 #ifdef NOT_USED
2435 /* ... but not this */
2436 context.paramids =
2437 bms_add_member(context.paramids,
2438 ((CteScan *) plan)->cteParam);
2439 #endif
2440
2441 context.paramids = bms_add_members(context.paramids,
2442 scan_params);
2443 }
2444 break;
2445
2446 case T_WorkTableScan:
2447 context.paramids =
2448 bms_add_member(context.paramids,
2449 ((WorkTableScan *) plan)->wtParam);
2450 context.paramids = bms_add_members(context.paramids, scan_params);
2451 break;
2452
2453 case T_NamedTuplestoreScan:
2454 context.paramids = bms_add_members(context.paramids, scan_params);
2455 break;
2456
2457 case T_ForeignScan:
2458 {
2459 ForeignScan *fscan = (ForeignScan *) plan;
2460
2461 finalize_primnode((Node *) fscan->fdw_exprs,
2462 &context);
2463 finalize_primnode((Node *) fscan->fdw_recheck_quals,
2464 &context);
2465
2466 /* We assume fdw_scan_tlist cannot contain Params */
2467 context.paramids = bms_add_members(context.paramids,
2468 scan_params);
2469 }
2470 break;
2471
2472 case T_CustomScan:
2473 {
2474 CustomScan *cscan = (CustomScan *) plan;
2475 ListCell *lc;
2476
2477 finalize_primnode((Node *) cscan->custom_exprs,
2478 &context);
2479 /* We assume custom_scan_tlist cannot contain Params */
2480 context.paramids =
2481 bms_add_members(context.paramids, scan_params);
2482
2483 /* child nodes if any */
2484 foreach(lc, cscan->custom_plans)
2485 {
2486 context.paramids =
2487 bms_add_members(context.paramids,
2488 finalize_plan(root,
2489 (Plan *) lfirst(lc),
2490 gather_param,
2491 valid_params,
2492 scan_params));
2493 }
2494 }
2495 break;
2496
2497 case T_ModifyTable:
2498 {
2499 ModifyTable *mtplan = (ModifyTable *) plan;
2500 ListCell *l;
2501
2502 /* Force descendant scan nodes to reference epqParam */
2503 locally_added_param = mtplan->epqParam;
2504 valid_params = bms_add_member(bms_copy(valid_params),
2505 locally_added_param);
2506 scan_params = bms_add_member(bms_copy(scan_params),
2507 locally_added_param);
2508 finalize_primnode((Node *) mtplan->returningLists,
2509 &context);
2510 finalize_primnode((Node *) mtplan->onConflictSet,
2511 &context);
2512 finalize_primnode((Node *) mtplan->onConflictWhere,
2513 &context);
2514 /* exclRelTlist contains only Vars, doesn't need examination */
2515 foreach(l, mtplan->plans)
2516 {
2517 context.paramids =
2518 bms_add_members(context.paramids,
2519 finalize_plan(root,
2520 (Plan *) lfirst(l),
2521 gather_param,
2522 valid_params,
2523 scan_params));
2524 }
2525 }
2526 break;
2527
2528 case T_Append:
2529 {
2530 ListCell *l;
2531
2532 foreach(l, ((Append *) plan)->appendplans)
2533 {
2534 context.paramids =
2535 bms_add_members(context.paramids,
2536 finalize_plan(root,
2537 (Plan *) lfirst(l),
2538 gather_param,
2539 valid_params,
2540 scan_params));
2541 }
2542 }
2543 break;
2544
2545 case T_MergeAppend:
2546 {
2547 ListCell *l;
2548
2549 foreach(l, ((MergeAppend *) plan)->mergeplans)
2550 {
2551 context.paramids =
2552 bms_add_members(context.paramids,
2553 finalize_plan(root,
2554 (Plan *) lfirst(l),
2555 gather_param,
2556 valid_params,
2557 scan_params));
2558 }
2559 }
2560 break;
2561
2562 case T_BitmapAnd:
2563 {
2564 ListCell *l;
2565
2566 foreach(l, ((BitmapAnd *) plan)->bitmapplans)
2567 {
2568 context.paramids =
2569 bms_add_members(context.paramids,
2570 finalize_plan(root,
2571 (Plan *) lfirst(l),
2572 gather_param,
2573 valid_params,
2574 scan_params));
2575 }
2576 }
2577 break;
2578
2579 case T_BitmapOr:
2580 {
2581 ListCell *l;
2582
2583 foreach(l, ((BitmapOr *) plan)->bitmapplans)
2584 {
2585 context.paramids =
2586 bms_add_members(context.paramids,
2587 finalize_plan(root,
2588 (Plan *) lfirst(l),
2589 gather_param,
2590 valid_params,
2591 scan_params));
2592 }
2593 }
2594 break;
2595
2596 case T_NestLoop:
2597 {
2598 ListCell *l;
2599
2600 finalize_primnode((Node *) ((Join *) plan)->joinqual,
2601 &context);
2602 /* collect set of params that will be passed to right child */
2603 foreach(l, ((NestLoop *) plan)->nestParams)
2604 {
2605 NestLoopParam *nlp = (NestLoopParam *) lfirst(l);
2606
2607 nestloop_params = bms_add_member(nestloop_params,
2608 nlp->paramno);
2609 }
2610 }
2611 break;
2612
2613 case T_MergeJoin:
2614 finalize_primnode((Node *) ((Join *) plan)->joinqual,
2615 &context);
2616 finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
2617 &context);
2618 break;
2619
2620 case T_HashJoin:
2621 finalize_primnode((Node *) ((Join *) plan)->joinqual,
2622 &context);
2623 finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
2624 &context);
2625 break;
2626
2627 case T_Limit:
2628 finalize_primnode(((Limit *) plan)->limitOffset,
2629 &context);
2630 finalize_primnode(((Limit *) plan)->limitCount,
2631 &context);
2632 break;
2633
2634 case T_RecursiveUnion:
2635 /* child nodes are allowed to reference wtParam */
2636 locally_added_param = ((RecursiveUnion *) plan)->wtParam;
2637 valid_params = bms_add_member(bms_copy(valid_params),
2638 locally_added_param);
2639 /* wtParam does *not* get added to scan_params */
2640 break;
2641
2642 case T_LockRows:
2643 /* Force descendant scan nodes to reference epqParam */
2644 locally_added_param = ((LockRows *) plan)->epqParam;
2645 valid_params = bms_add_member(bms_copy(valid_params),
2646 locally_added_param);
2647 scan_params = bms_add_member(bms_copy(scan_params),
2648 locally_added_param);
2649 break;
2650
2651 case T_Agg:
2652 {
2653 Agg *agg = (Agg *) plan;
2654
2655 /*
2656 * AGG_HASHED plans need to know which Params are referenced
2657 * in aggregate calls. Do a separate scan to identify them.
2658 */
2659 if (agg->aggstrategy == AGG_HASHED)
2660 {
2661 finalize_primnode_context aggcontext;
2662
2663 aggcontext.root = root;
2664 aggcontext.paramids = NULL;
2665 finalize_agg_primnode((Node *) agg->plan.targetlist,
2666 &aggcontext);
2667 finalize_agg_primnode((Node *) agg->plan.qual,
2668 &aggcontext);
2669 agg->aggParams = aggcontext.paramids;
2670 }
2671 }
2672 break;
2673
2674 case T_WindowAgg:
2675 finalize_primnode(((WindowAgg *) plan)->startOffset,
2676 &context);
2677 finalize_primnode(((WindowAgg *) plan)->endOffset,
2678 &context);
2679 break;
2680
2681 case T_Gather:
2682 /* child nodes are allowed to reference rescan_param, if any */
2683 locally_added_param = ((Gather *) plan)->rescan_param;
2684 if (locally_added_param >= 0)
2685 {
2686 valid_params = bms_add_member(bms_copy(valid_params),
2687 locally_added_param);
2688
2689 /*
2690 * We currently don't support nested Gathers. The issue so
2691 * far as this function is concerned would be how to identify
2692 * which child nodes depend on which Gather.
2693 */
2694 Assert(gather_param < 0);
2695 /* Pass down rescan_param to child parallel-aware nodes */
2696 gather_param = locally_added_param;
2697 }
2698 /* rescan_param does *not* get added to scan_params */
2699 break;
2700
2701 case T_GatherMerge:
2702 /* child nodes are allowed to reference rescan_param, if any */
2703 locally_added_param = ((GatherMerge *) plan)->rescan_param;
2704 if (locally_added_param >= 0)
2705 {
2706 valid_params = bms_add_member(bms_copy(valid_params),
2707 locally_added_param);
2708
2709 /*
2710 * We currently don't support nested Gathers. The issue so
2711 * far as this function is concerned would be how to identify
2712 * which child nodes depend on which Gather.
2713 */
2714 Assert(gather_param < 0);
2715 /* Pass down rescan_param to child parallel-aware nodes */
2716 gather_param = locally_added_param;
2717 }
2718 /* rescan_param does *not* get added to scan_params */
2719 break;
2720
2721 case T_ProjectSet:
2722 case T_Hash:
2723 case T_Material:
2724 case T_Sort:
2725 case T_Unique:
2726 case T_SetOp:
2727 case T_Group:
2728 /* no node-type-specific fields need fixing */
2729 break;
2730
2731 default:
2732 elog(ERROR, "unrecognized node type: %d",
2733 (int) nodeTag(plan));
2734 }
2735
2736 /* Process left and right child plans, if any */
2737 child_params = finalize_plan(root,
2738 plan->lefttree,
2739 gather_param,
2740 valid_params,
2741 scan_params);
2742 context.paramids = bms_add_members(context.paramids, child_params);
2743
2744 if (nestloop_params)
2745 {
2746 /* right child can reference nestloop_params as well as valid_params */
2747 child_params = finalize_plan(root,
2748 plan->righttree,
2749 gather_param,
2750 bms_union(nestloop_params, valid_params),
2751 scan_params);
2752 /* ... and they don't count as parameters used at my level */
2753 child_params = bms_difference(child_params, nestloop_params);
2754 bms_free(nestloop_params);
2755 }
2756 else
2757 {
2758 /* easy case */
2759 child_params = finalize_plan(root,
2760 plan->righttree,
2761 gather_param,
2762 valid_params,
2763 scan_params);
2764 }
2765 context.paramids = bms_add_members(context.paramids, child_params);
2766
2767 /*
2768 * Any locally generated parameter doesn't count towards its generating
2769 * plan node's external dependencies. (Note: if we changed valid_params
2770 * and/or scan_params, we leak those bitmapsets; not worth the notational
2771 * trouble to clean them up.)
2772 */
2773 if (locally_added_param >= 0)
2774 {
2775 context.paramids = bms_del_member(context.paramids,
2776 locally_added_param);
2777 }
2778
2779 /* Now we have all the paramids referenced in this node and children */
2780
2781 if (!bms_is_subset(context.paramids, valid_params))
2782 elog(ERROR, "plan should not reference subplan's variable");
2783
2784 /*
2785 * The plan node's allParam and extParam fields should include all its
2786 * referenced paramids, plus contributions from any child initPlans.
2787 * However, any setParams of the initPlans should not be present in the
2788 * parent node's extParams, only in its allParams. (It's possible that
2789 * some initPlans have extParams that are setParams of other initPlans.)
2790 */
2791
2792 /* allParam must include initplans' extParams and setParams */
2793 plan->allParam = bms_union(context.paramids, initExtParam);
2794 plan->allParam = bms_add_members(plan->allParam, initSetParam);
2795 /* extParam must include any initplan extParams */
2796 plan->extParam = bms_union(context.paramids, initExtParam);
2797 /* but not any initplan setParams */
2798 plan->extParam = bms_del_members(plan->extParam, initSetParam);
2799
2800 /*
2801 * For speed at execution time, make sure extParam/allParam are actually
2802 * NULL if they are empty sets.
2803 */
2804 if (bms_is_empty(plan->extParam))
2805 plan->extParam = NULL;
2806 if (bms_is_empty(plan->allParam))
2807 plan->allParam = NULL;
2808
2809 return plan->allParam;
2810 }
2811
2812 /*
2813 * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
2814 * expression tree to the result set.
2815 */
2816 static bool
finalize_primnode(Node * node,finalize_primnode_context * context)2817 finalize_primnode(Node *node, finalize_primnode_context *context)
2818 {
2819 if (node == NULL)
2820 return false;
2821 if (IsA(node, Param))
2822 {
2823 if (((Param *) node)->paramkind == PARAM_EXEC)
2824 {
2825 int paramid = ((Param *) node)->paramid;
2826
2827 context->paramids = bms_add_member(context->paramids, paramid);
2828 }
2829 return false; /* no more to do here */
2830 }
2831 if (IsA(node, SubPlan))
2832 {
2833 SubPlan *subplan = (SubPlan *) node;
2834 Plan *plan = planner_subplan_get_plan(context->root, subplan);
2835 ListCell *lc;
2836 Bitmapset *subparamids;
2837
2838 /* Recurse into the testexpr, but not into the Plan */
2839 finalize_primnode(subplan->testexpr, context);
2840
2841 /*
2842 * Remove any param IDs of output parameters of the subplan that were
2843 * referenced in the testexpr. These are not interesting for
2844 * parameter change signaling since we always re-evaluate the subplan.
2845 * Note that this wouldn't work too well if there might be uses of the
2846 * same param IDs elsewhere in the plan, but that can't happen because
2847 * generate_new_exec_param never tries to merge params.
2848 */
2849 foreach(lc, subplan->paramIds)
2850 {
2851 context->paramids = bms_del_member(context->paramids,
2852 lfirst_int(lc));
2853 }
2854
2855 /* Also examine args list */
2856 finalize_primnode((Node *) subplan->args, context);
2857
2858 /*
2859 * Add params needed by the subplan to paramids, but excluding those
2860 * we will pass down to it. (We assume SS_finalize_plan was run on
2861 * the subplan already.)
2862 */
2863 subparamids = bms_copy(plan->extParam);
2864 foreach(lc, subplan->parParam)
2865 {
2866 subparamids = bms_del_member(subparamids, lfirst_int(lc));
2867 }
2868 context->paramids = bms_join(context->paramids, subparamids);
2869
2870 return false; /* no more to do here */
2871 }
2872 return expression_tree_walker(node, finalize_primnode,
2873 (void *) context);
2874 }
2875
2876 /*
2877 * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2878 * and add IDs of all PARAM_EXEC params appearing within their aggregated
2879 * arguments to the result set.
2880 */
2881 static bool
finalize_agg_primnode(Node * node,finalize_primnode_context * context)2882 finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2883 {
2884 if (node == NULL)
2885 return false;
2886 if (IsA(node, Aggref))
2887 {
2888 Aggref *agg = (Aggref *) node;
2889
2890 /* we should not consider the direct arguments, if any */
2891 finalize_primnode((Node *) agg->args, context);
2892 finalize_primnode((Node *) agg->aggfilter, context);
2893 return false; /* there can't be any Aggrefs below here */
2894 }
2895 return expression_tree_walker(node, finalize_agg_primnode,
2896 (void *) context);
2897 }
2898
2899 /*
2900 * SS_make_initplan_output_param - make a Param for an initPlan's output
2901 *
2902 * The plan is expected to return a scalar value of the given type/collation.
2903 *
2904 * Note that in some cases the initplan may not ever appear in the finished
2905 * plan tree. If that happens, we'll have wasted a PARAM_EXEC slot, which
2906 * is no big deal.
2907 */
2908 Param *
SS_make_initplan_output_param(PlannerInfo * root,Oid resulttype,int32 resulttypmod,Oid resultcollation)2909 SS_make_initplan_output_param(PlannerInfo *root,
2910 Oid resulttype, int32 resulttypmod,
2911 Oid resultcollation)
2912 {
2913 return generate_new_exec_param(root, resulttype,
2914 resulttypmod, resultcollation);
2915 }
2916
2917 /*
2918 * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
2919 *
2920 * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
2921 * list for the outer query level. A Param that represents the initplan's
2922 * output has already been assigned using SS_make_initplan_output_param.
2923 */
2924 void
SS_make_initplan_from_plan(PlannerInfo * root,PlannerInfo * subroot,Plan * plan,Param * prm)2925 SS_make_initplan_from_plan(PlannerInfo *root,
2926 PlannerInfo *subroot, Plan *plan,
2927 Param *prm)
2928 {
2929 SubPlan *node;
2930
2931 /*
2932 * Add the subplan and its PlannerInfo to the global lists.
2933 */
2934 root->glob->subplans = lappend(root->glob->subplans, plan);
2935 root->glob->subroots = lappend(root->glob->subroots, subroot);
2936
2937 /*
2938 * Create a SubPlan node and add it to the outer list of InitPlans. Note
2939 * it has to appear after any other InitPlans it might depend on (see
2940 * comments in ExecReScan).
2941 */
2942 node = makeNode(SubPlan);
2943 node->subLinkType = EXPR_SUBLINK;
2944 node->plan_id = list_length(root->glob->subplans);
2945 node->plan_name = psprintf("InitPlan %d (returns $%d)",
2946 node->plan_id, prm->paramid);
2947 get_first_col_type(plan, &node->firstColType, &node->firstColTypmod,
2948 &node->firstColCollation);
2949 node->setParam = list_make1_int(prm->paramid);
2950
2951 root->init_plans = lappend(root->init_plans, node);
2952
2953 /*
2954 * The node can't have any inputs (since it's an initplan), so the
2955 * parParam and args lists remain empty.
2956 */
2957
2958 /* Set costs of SubPlan using info from the plan tree */
2959 cost_subplan(subroot, node, plan);
2960 }
2961