1 /*-------------------------------------------------------------------------
2 *
3 * allpaths.c
4 * Routines to find possible search paths for processing a query
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/allpaths.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include <limits.h>
19 #include <math.h>
20
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "nodes/makefuncs.h"
28 #include "nodes/nodeFuncs.h"
29 #ifdef OPTIMIZER_DEBUG
30 #include "nodes/print.h"
31 #endif
32 #include "optimizer/clauses.h"
33 #include "optimizer/cost.h"
34 #include "optimizer/geqo.h"
35 #include "optimizer/pathnode.h"
36 #include "optimizer/paths.h"
37 #include "optimizer/plancat.h"
38 #include "optimizer/planner.h"
39 #include "optimizer/prep.h"
40 #include "optimizer/restrictinfo.h"
41 #include "optimizer/tlist.h"
42 #include "optimizer/var.h"
43 #include "parser/parse_clause.h"
44 #include "parser/parsetree.h"
45 #include "rewrite/rewriteManip.h"
46 #include "utils/lsyscache.h"
47
48
49 /* results of subquery_is_pushdown_safe */
50 typedef struct pushdown_safety_info
51 {
52 bool *unsafeColumns; /* which output columns are unsafe to use */
53 bool unsafeVolatile; /* don't push down volatile quals */
54 bool unsafeLeaky; /* don't push down leaky quals */
55 } pushdown_safety_info;
56
57 /* These parameters are set by GUC */
58 bool enable_geqo = false; /* just in case GUC doesn't set it */
59 int geqo_threshold;
60 int min_parallel_relation_size;
61
62 /* Hook for plugins to get control in set_rel_pathlist() */
63 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
64
65 /* Hook for plugins to replace standard_join_search() */
66 join_search_hook_type join_search_hook = NULL;
67
68
69 static void set_base_rel_consider_startup(PlannerInfo *root);
70 static void set_base_rel_sizes(PlannerInfo *root);
71 static void set_base_rel_pathlists(PlannerInfo *root);
72 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
73 Index rti, RangeTblEntry *rte);
74 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
75 Index rti, RangeTblEntry *rte);
76 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
77 RangeTblEntry *rte);
78 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
79 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
80 RangeTblEntry *rte);
81 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
82 RangeTblEntry *rte);
83 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
84 RangeTblEntry *rte);
85 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
86 RangeTblEntry *rte);
87 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
88 RangeTblEntry *rte);
89 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
90 RangeTblEntry *rte);
91 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
92 Index rti, RangeTblEntry *rte);
93 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
94 Index rti, RangeTblEntry *rte);
95 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
96 List *live_childrels,
97 List *all_child_pathkeys);
98 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
99 RelOptInfo *rel,
100 Relids required_outer);
101 static List *accumulate_append_subpath(List *subpaths, Path *path);
102 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
103 Index rti, RangeTblEntry *rte);
104 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
105 RangeTblEntry *rte);
106 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
107 RangeTblEntry *rte);
108 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
109 RangeTblEntry *rte);
110 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
111 RangeTblEntry *rte);
112 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
113 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
114 pushdown_safety_info *safetyInfo);
115 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
116 pushdown_safety_info *safetyInfo);
117 static void check_output_expressions(Query *subquery,
118 pushdown_safety_info *safetyInfo);
119 static void compare_tlist_datatypes(List *tlist, List *colTypes,
120 pushdown_safety_info *safetyInfo);
121 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
122 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
123 pushdown_safety_info *safetyInfo);
124 static void subquery_push_qual(Query *subquery,
125 RangeTblEntry *rte, Index rti, Node *qual);
126 static void recurse_push_qual(Node *setOp, Query *topquery,
127 RangeTblEntry *rte, Index rti, Node *qual);
128 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
129
130
131 /*
132 * make_one_rel
133 * Finds all possible access paths for executing a query, returning a
134 * single rel that represents the join of all base rels in the query.
135 */
136 RelOptInfo *
make_one_rel(PlannerInfo * root,List * joinlist)137 make_one_rel(PlannerInfo *root, List *joinlist)
138 {
139 RelOptInfo *rel;
140 Index rti;
141
142 /*
143 * Construct the all_baserels Relids set.
144 */
145 root->all_baserels = NULL;
146 for (rti = 1; rti < root->simple_rel_array_size; rti++)
147 {
148 RelOptInfo *brel = root->simple_rel_array[rti];
149
150 /* there may be empty slots corresponding to non-baserel RTEs */
151 if (brel == NULL)
152 continue;
153
154 Assert(brel->relid == rti); /* sanity check on array */
155
156 /* ignore RTEs that are "other rels" */
157 if (brel->reloptkind != RELOPT_BASEREL)
158 continue;
159
160 root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
161 }
162
163 /* Mark base rels as to whether we care about fast-start plans */
164 set_base_rel_consider_startup(root);
165
166 /*
167 * Compute size estimates and consider_parallel flags for each base rel,
168 * then generate access paths.
169 */
170 set_base_rel_sizes(root);
171 set_base_rel_pathlists(root);
172
173 /*
174 * Generate access paths for the entire join tree.
175 */
176 rel = make_rel_from_joinlist(root, joinlist);
177
178 /*
179 * The result should join all and only the query's base rels.
180 */
181 Assert(bms_equal(rel->relids, root->all_baserels));
182
183 return rel;
184 }
185
186 /*
187 * set_base_rel_consider_startup
188 * Set the consider_[param_]startup flags for each base-relation entry.
189 *
190 * For the moment, we only deal with consider_param_startup here; because the
191 * logic for consider_startup is pretty trivial and is the same for every base
192 * relation, we just let build_simple_rel() initialize that flag correctly to
193 * start with. If that logic ever gets more complicated it would probably
194 * be better to move it here.
195 */
196 static void
set_base_rel_consider_startup(PlannerInfo * root)197 set_base_rel_consider_startup(PlannerInfo *root)
198 {
199 /*
200 * Since parameterized paths can only be used on the inside of a nestloop
201 * join plan, there is usually little value in considering fast-start
202 * plans for them. However, for relations that are on the RHS of a SEMI
203 * or ANTI join, a fast-start plan can be useful because we're only going
204 * to care about fetching one tuple anyway.
205 *
206 * To minimize growth of planning time, we currently restrict this to
207 * cases where the RHS is a single base relation, not a join; there is no
208 * provision for consider_param_startup to get set at all on joinrels.
209 * Also we don't worry about appendrels. costsize.c's costing rules for
210 * nestloop semi/antijoins don't consider such cases either.
211 */
212 ListCell *lc;
213
214 foreach(lc, root->join_info_list)
215 {
216 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
217 int varno;
218
219 if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
220 bms_get_singleton_member(sjinfo->syn_righthand, &varno))
221 {
222 RelOptInfo *rel = find_base_rel(root, varno);
223
224 rel->consider_param_startup = true;
225 }
226 }
227 }
228
229 /*
230 * set_base_rel_sizes
231 * Set the size estimates (rows and widths) for each base-relation entry.
232 * Also determine whether to consider parallel paths for base relations.
233 *
234 * We do this in a separate pass over the base rels so that rowcount
235 * estimates are available for parameterized path generation, and also so
236 * that each rel's consider_parallel flag is set correctly before we begin to
237 * generate paths.
238 */
239 static void
set_base_rel_sizes(PlannerInfo * root)240 set_base_rel_sizes(PlannerInfo *root)
241 {
242 Index rti;
243
244 for (rti = 1; rti < root->simple_rel_array_size; rti++)
245 {
246 RelOptInfo *rel = root->simple_rel_array[rti];
247 RangeTblEntry *rte;
248
249 /* there may be empty slots corresponding to non-baserel RTEs */
250 if (rel == NULL)
251 continue;
252
253 Assert(rel->relid == rti); /* sanity check on array */
254
255 /* ignore RTEs that are "other rels" */
256 if (rel->reloptkind != RELOPT_BASEREL)
257 continue;
258
259 rte = root->simple_rte_array[rti];
260
261 /*
262 * If parallelism is allowable for this query in general, see whether
263 * it's allowable for this rel in particular. We have to do this
264 * before set_rel_size(), because (a) if this rel is an inheritance
265 * parent, set_append_rel_size() will use and perhaps change the rel's
266 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
267 * goes ahead and makes paths immediately.
268 */
269 if (root->glob->parallelModeOK)
270 set_rel_consider_parallel(root, rel, rte);
271
272 set_rel_size(root, rel, rti, rte);
273 }
274 }
275
276 /*
277 * set_base_rel_pathlists
278 * Finds all paths available for scanning each base-relation entry.
279 * Sequential scan and any available indices are considered.
280 * Each useful path is attached to its relation's 'pathlist' field.
281 */
282 static void
set_base_rel_pathlists(PlannerInfo * root)283 set_base_rel_pathlists(PlannerInfo *root)
284 {
285 Index rti;
286
287 for (rti = 1; rti < root->simple_rel_array_size; rti++)
288 {
289 RelOptInfo *rel = root->simple_rel_array[rti];
290
291 /* there may be empty slots corresponding to non-baserel RTEs */
292 if (rel == NULL)
293 continue;
294
295 Assert(rel->relid == rti); /* sanity check on array */
296
297 /* ignore RTEs that are "other rels" */
298 if (rel->reloptkind != RELOPT_BASEREL)
299 continue;
300
301 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
302 }
303 }
304
305 /*
306 * set_rel_size
307 * Set size estimates for a base relation
308 */
309 static void
set_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)310 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
311 Index rti, RangeTblEntry *rte)
312 {
313 if (rel->reloptkind == RELOPT_BASEREL &&
314 relation_excluded_by_constraints(root, rel, rte))
315 {
316 /*
317 * We proved we don't need to scan the rel via constraint exclusion,
318 * so set up a single dummy path for it. Here we only check this for
319 * regular baserels; if it's an otherrel, CE was already checked in
320 * set_append_rel_size().
321 *
322 * In this case, we go ahead and set up the relation's path right away
323 * instead of leaving it for set_rel_pathlist to do. This is because
324 * we don't have a convention for marking a rel as dummy except by
325 * assigning a dummy path to it.
326 */
327 set_dummy_rel_pathlist(rel);
328 }
329 else if (rte->inh)
330 {
331 /* It's an "append relation", process accordingly */
332 set_append_rel_size(root, rel, rti, rte);
333 }
334 else
335 {
336 switch (rel->rtekind)
337 {
338 case RTE_RELATION:
339 if (rte->relkind == RELKIND_FOREIGN_TABLE)
340 {
341 /* Foreign table */
342 set_foreign_size(root, rel, rte);
343 }
344 else if (rte->tablesample != NULL)
345 {
346 /* Sampled relation */
347 set_tablesample_rel_size(root, rel, rte);
348 }
349 else
350 {
351 /* Plain relation */
352 set_plain_rel_size(root, rel, rte);
353 }
354 break;
355 case RTE_SUBQUERY:
356
357 /*
358 * Subqueries don't support making a choice between
359 * parameterized and unparameterized paths, so just go ahead
360 * and build their paths immediately.
361 */
362 set_subquery_pathlist(root, rel, rti, rte);
363 break;
364 case RTE_FUNCTION:
365 set_function_size_estimates(root, rel);
366 break;
367 case RTE_VALUES:
368 set_values_size_estimates(root, rel);
369 break;
370 case RTE_CTE:
371
372 /*
373 * CTEs don't support making a choice between parameterized
374 * and unparameterized paths, so just go ahead and build their
375 * paths immediately.
376 */
377 if (rte->self_reference)
378 set_worktable_pathlist(root, rel, rte);
379 else
380 set_cte_pathlist(root, rel, rte);
381 break;
382 default:
383 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
384 break;
385 }
386 }
387
388 /*
389 * We insist that all non-dummy rels have a nonzero rowcount estimate.
390 */
391 Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
392 }
393
394 /*
395 * set_rel_pathlist
396 * Build access paths for a base relation
397 */
398 static void
set_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)399 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
400 Index rti, RangeTblEntry *rte)
401 {
402 if (IS_DUMMY_REL(rel))
403 {
404 /* We already proved the relation empty, so nothing more to do */
405 }
406 else if (rte->inh)
407 {
408 /* It's an "append relation", process accordingly */
409 set_append_rel_pathlist(root, rel, rti, rte);
410 }
411 else
412 {
413 switch (rel->rtekind)
414 {
415 case RTE_RELATION:
416 if (rte->relkind == RELKIND_FOREIGN_TABLE)
417 {
418 /* Foreign table */
419 set_foreign_pathlist(root, rel, rte);
420 }
421 else if (rte->tablesample != NULL)
422 {
423 /* Sampled relation */
424 set_tablesample_rel_pathlist(root, rel, rte);
425 }
426 else
427 {
428 /* Plain relation */
429 set_plain_rel_pathlist(root, rel, rte);
430 }
431 break;
432 case RTE_SUBQUERY:
433 /* Subquery --- fully handled during set_rel_size */
434 break;
435 case RTE_FUNCTION:
436 /* RangeFunction */
437 set_function_pathlist(root, rel, rte);
438 break;
439 case RTE_VALUES:
440 /* Values list */
441 set_values_pathlist(root, rel, rte);
442 break;
443 case RTE_CTE:
444 /* CTE reference --- fully handled during set_rel_size */
445 break;
446 default:
447 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
448 break;
449 }
450 }
451
452 /*
453 * Allow a plugin to editorialize on the set of Paths for this base
454 * relation. It could add new paths (such as CustomPaths) by calling
455 * add_path(), or add_partial_path() if parallel aware. It could also
456 * delete or modify paths added by the core code.
457 */
458 if (set_rel_pathlist_hook)
459 (*set_rel_pathlist_hook) (root, rel, rti, rte);
460
461 /*
462 * If this is a baserel, we should normally consider gathering any partial
463 * paths we may have created for it. We have to do this after calling the
464 * set_rel_pathlist_hook, else it cannot add partial paths to be included
465 * here.
466 *
467 * However, if this is an inheritance child, skip it. Otherwise, we could
468 * end up with a very large number of gather nodes, each trying to grab
469 * its own pool of workers. Instead, we'll consider gathering partial
470 * paths for the parent appendrel.
471 */
472 if (rel->reloptkind == RELOPT_BASEREL)
473 generate_gather_paths(root, rel);
474
475 /* Now find the cheapest of the paths for this rel */
476 set_cheapest(rel);
477
478 #ifdef OPTIMIZER_DEBUG
479 debug_print_rel(root, rel);
480 #endif
481 }
482
483 /*
484 * set_plain_rel_size
485 * Set size estimates for a plain relation (no subquery, no inheritance)
486 */
487 static void
set_plain_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)488 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
489 {
490 /*
491 * Test any partial indexes of rel for applicability. We must do this
492 * first since partial unique indexes can affect size estimates.
493 */
494 check_index_predicates(root, rel);
495
496 /* Mark rel with estimated output rows, width, etc */
497 set_baserel_size_estimates(root, rel);
498 }
499
500 /*
501 * If this relation could possibly be scanned from within a worker, then set
502 * its consider_parallel flag.
503 */
504 static void
set_rel_consider_parallel(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)505 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
506 RangeTblEntry *rte)
507 {
508 /*
509 * The flag has previously been initialized to false, so we can just
510 * return if it becomes clear that we can't safely set it.
511 */
512 Assert(!rel->consider_parallel);
513
514 /* Don't call this if parallelism is disallowed for the entire query. */
515 Assert(root->glob->parallelModeOK);
516
517 /* This should only be called for baserels and appendrel children. */
518 Assert(rel->reloptkind == RELOPT_BASEREL ||
519 rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
520
521 /* Assorted checks based on rtekind. */
522 switch (rte->rtekind)
523 {
524 case RTE_RELATION:
525
526 /*
527 * Currently, parallel workers can't access the leader's temporary
528 * tables. We could possibly relax this if we wrote all of its
529 * local buffers at the start of the query and made no changes
530 * thereafter (maybe we could allow hint bit changes), and if we
531 * taught the workers to read them. Writing a large number of
532 * temporary buffers could be expensive, though, and we don't have
533 * the rest of the necessary infrastructure right now anyway. So
534 * for now, bail out if we see a temporary table.
535 */
536 if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
537 return;
538
539 /*
540 * Table sampling can be pushed down to workers if the sample
541 * function and its arguments are safe.
542 */
543 if (rte->tablesample != NULL)
544 {
545 char proparallel = func_parallel(rte->tablesample->tsmhandler);
546
547 if (proparallel != PROPARALLEL_SAFE)
548 return;
549 if (has_parallel_hazard((Node *) rte->tablesample->args,
550 false))
551 return;
552 }
553
554 /*
555 * Ask FDWs whether they can support performing a ForeignScan
556 * within a worker. Most often, the answer will be no. For
557 * example, if the nature of the FDW is such that it opens a TCP
558 * connection with a remote server, each parallel worker would end
559 * up with a separate connection, and these connections might not
560 * be appropriately coordinated between workers and the leader.
561 */
562 if (rte->relkind == RELKIND_FOREIGN_TABLE)
563 {
564 Assert(rel->fdwroutine);
565 if (!rel->fdwroutine->IsForeignScanParallelSafe)
566 return;
567 if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
568 return;
569 }
570
571 /*
572 * There are additional considerations for appendrels, which we'll
573 * deal with in set_append_rel_size and set_append_rel_pathlist.
574 * For now, just set consider_parallel based on the rel's own
575 * quals and targetlist.
576 */
577 break;
578
579 case RTE_SUBQUERY:
580
581 /*
582 * There's no intrinsic problem with scanning a subquery-in-FROM
583 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
584 * If the subquery doesn't happen to have any parallel-safe paths,
585 * then flagging it as consider_parallel won't change anything,
586 * but that's true for plain tables, too. We must set
587 * consider_parallel based on the rel's own quals and targetlist,
588 * so that if a subquery path is parallel-safe but the quals and
589 * projection we're sticking onto it are not, we correctly mark
590 * the SubqueryScanPath as not parallel-safe. (Note that
591 * set_subquery_pathlist() might push some of these quals down
592 * into the subquery itself, but that doesn't change anything.)
593 *
594 * We can't push sub-select containing LIMIT/OFFSET to workers as
595 * there is no guarantee that the row order will be fully
596 * deterministic, and applying LIMIT/OFFSET will lead to
597 * inconsistent results at the top-level. (In some cases, where
598 * the result is ordered, we could relax this restriction. But it
599 * doesn't currently seem worth expending extra effort to do so.)
600 */
601 {
602 Query *subquery = castNode(Query, rte->subquery);
603
604 if (limit_needed(subquery))
605 return;
606 }
607 break;
608
609 case RTE_JOIN:
610 /* Shouldn't happen; we're only considering baserels here. */
611 Assert(false);
612 return;
613
614 case RTE_FUNCTION:
615 /* Check for parallel-restricted functions. */
616 if (has_parallel_hazard((Node *) rte->functions, false))
617 return;
618 break;
619
620 case RTE_VALUES:
621 /* Check for parallel-restricted functions. */
622 if (has_parallel_hazard((Node *) rte->values_lists, false))
623 return;
624 break;
625
626 case RTE_CTE:
627
628 /*
629 * CTE tuplestores aren't shared among parallel workers, so we
630 * force all CTE scans to happen in the leader. Also, populating
631 * the CTE would require executing a subplan that's not available
632 * in the worker, might be parallel-restricted, and must get
633 * executed only once.
634 */
635 return;
636 }
637
638 /*
639 * If there's anything in baserestrictinfo that's parallel-restricted, we
640 * give up on parallelizing access to this relation. We could consider
641 * instead postponing application of the restricted quals until we're
642 * above all the parallelism in the plan tree, but it's not clear that
643 * that would be a win in very many cases, and it might be tricky to make
644 * outer join clauses work correctly. It would likely break equivalence
645 * classes, too.
646 */
647 if (has_parallel_hazard((Node *) rel->baserestrictinfo, false))
648 return;
649
650 /*
651 * Likewise, if the relation's outputs are not parallel-safe, give up.
652 * (Usually, they're just Vars, but sometimes they're not.)
653 */
654 if (has_parallel_hazard((Node *) rel->reltarget->exprs, false))
655 return;
656
657 /* We have a winner. */
658 rel->consider_parallel = true;
659 }
660
661 /*
662 * set_plain_rel_pathlist
663 * Build access paths for a plain relation (no subquery, no inheritance)
664 */
665 static void
set_plain_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)666 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
667 {
668 Relids required_outer;
669
670 /*
671 * We don't support pushing join clauses into the quals of a seqscan, but
672 * it could still have required parameterization due to LATERAL refs in
673 * its tlist.
674 */
675 required_outer = rel->lateral_relids;
676
677 /* Consider sequential scan */
678 add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
679
680 /* If appropriate, consider parallel sequential scan */
681 if (rel->consider_parallel && required_outer == NULL)
682 create_plain_partial_paths(root, rel);
683
684 /* Consider index scans */
685 create_index_paths(root, rel);
686
687 /* Consider TID scans */
688 create_tidscan_paths(root, rel);
689 }
690
691 /*
692 * create_plain_partial_paths
693 * Build partial access paths for parallel scan of a plain relation
694 */
695 static void
create_plain_partial_paths(PlannerInfo * root,RelOptInfo * rel)696 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
697 {
698 int parallel_workers;
699
700 /*
701 * If the user has set the parallel_workers reloption, use that; otherwise
702 * select a default number of workers.
703 */
704 if (rel->rel_parallel_workers != -1)
705 parallel_workers = rel->rel_parallel_workers;
706 else
707 {
708 int parallel_threshold;
709
710 /*
711 * If this relation is too small to be worth a parallel scan, just
712 * return without doing anything ... unless it's an inheritance child.
713 * In that case, we want to generate a parallel path here anyway. It
714 * might not be worthwhile just for this relation, but when combined
715 * with all of its inheritance siblings it may well pay off.
716 */
717 if (rel->pages < (BlockNumber) min_parallel_relation_size &&
718 rel->reloptkind == RELOPT_BASEREL)
719 return;
720
721 /*
722 * Select the number of workers based on the log of the size of the
723 * relation. This probably needs to be a good deal more
724 * sophisticated, but we need something here for now. Note that the
725 * upper limit of the min_parallel_relation_size GUC is chosen to
726 * prevent overflow here.
727 */
728 parallel_workers = 1;
729 parallel_threshold = Max(min_parallel_relation_size, 1);
730 while (rel->pages >= (BlockNumber) (parallel_threshold * 3))
731 {
732 parallel_workers++;
733 parallel_threshold *= 3;
734 if (parallel_threshold > INT_MAX / 3)
735 break; /* avoid overflow */
736 }
737 }
738
739 /*
740 * In no case use more than max_parallel_workers_per_gather workers.
741 */
742 parallel_workers = Min(parallel_workers, max_parallel_workers_per_gather);
743
744 /* If any limit was set to zero, the user doesn't want a parallel scan. */
745 if (parallel_workers <= 0)
746 return;
747
748 /* Add an unordered partial path based on a parallel sequential scan. */
749 add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
750 }
751
752 /*
753 * set_tablesample_rel_size
754 * Set size estimates for a sampled relation
755 */
756 static void
set_tablesample_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)757 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
758 {
759 TableSampleClause *tsc = rte->tablesample;
760 TsmRoutine *tsm;
761 BlockNumber pages;
762 double tuples;
763
764 /*
765 * Test any partial indexes of rel for applicability. We must do this
766 * first since partial unique indexes can affect size estimates.
767 */
768 check_index_predicates(root, rel);
769
770 /*
771 * Call the sampling method's estimation function to estimate the number
772 * of pages it will read and the number of tuples it will return. (Note:
773 * we assume the function returns sane values.)
774 */
775 tsm = GetTsmRoutine(tsc->tsmhandler);
776 tsm->SampleScanGetSampleSize(root, rel, tsc->args,
777 &pages, &tuples);
778
779 /*
780 * For the moment, because we will only consider a SampleScan path for the
781 * rel, it's okay to just overwrite the pages and tuples estimates for the
782 * whole relation. If we ever consider multiple path types for sampled
783 * rels, we'll need more complication.
784 */
785 rel->pages = pages;
786 rel->tuples = tuples;
787
788 /* Mark rel with estimated output rows, width, etc */
789 set_baserel_size_estimates(root, rel);
790 }
791
792 /*
793 * set_tablesample_rel_pathlist
794 * Build access paths for a sampled relation
795 */
796 static void
set_tablesample_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)797 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
798 {
799 Relids required_outer;
800 Path *path;
801
802 /*
803 * We don't support pushing join clauses into the quals of a samplescan,
804 * but it could still have required parameterization due to LATERAL refs
805 * in its tlist or TABLESAMPLE arguments.
806 */
807 required_outer = rel->lateral_relids;
808
809 /* Consider sampled scan */
810 path = create_samplescan_path(root, rel, required_outer);
811
812 /*
813 * If the sampling method does not support repeatable scans, we must avoid
814 * plans that would scan the rel multiple times. Ideally, we'd simply
815 * avoid putting the rel on the inside of a nestloop join; but adding such
816 * a consideration to the planner seems like a great deal of complication
817 * to support an uncommon usage of second-rate sampling methods. Instead,
818 * if there is a risk that the query might perform an unsafe join, just
819 * wrap the SampleScan in a Materialize node. We can check for joins by
820 * counting the membership of all_baserels (note that this correctly
821 * counts inheritance trees as single rels). If we're inside a subquery,
822 * we can't easily check whether a join might occur in the outer query, so
823 * just assume one is possible.
824 *
825 * GetTsmRoutine is relatively expensive compared to the other tests here,
826 * so check repeatable_across_scans last, even though that's a bit odd.
827 */
828 if ((root->query_level > 1 ||
829 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
830 !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
831 {
832 path = (Path *) create_material_path(rel, path);
833 }
834
835 add_path(rel, path);
836
837 /* For the moment, at least, there are no other paths to consider */
838 }
839
840 /*
841 * set_foreign_size
842 * Set size estimates for a foreign table RTE
843 */
844 static void
set_foreign_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)845 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
846 {
847 /* Mark rel with estimated output rows, width, etc */
848 set_foreign_size_estimates(root, rel);
849
850 /* Let FDW adjust the size estimates, if it can */
851 rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
852
853 /* ... but do not let it set the rows estimate to zero */
854 rel->rows = clamp_row_est(rel->rows);
855
856 /* also, make sure rel->tuples is not insane relative to rel->rows */
857 rel->tuples = Max(rel->tuples, rel->rows);
858 }
859
860 /*
861 * set_foreign_pathlist
862 * Build access paths for a foreign table RTE
863 */
864 static void
set_foreign_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)865 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
866 {
867 /* Call the FDW's GetForeignPaths function to generate path(s) */
868 rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
869 }
870
871 /*
872 * set_append_rel_size
873 * Set size estimates for an "append relation"
874 *
875 * The passed-in rel and RTE represent the entire append relation. The
876 * relation's contents are computed by appending together the output of
877 * the individual member relations. Note that in the inheritance case,
878 * the first member relation is actually the same table as is mentioned in
879 * the parent RTE ... but it has a different RTE and RelOptInfo. This is
880 * a good thing because their outputs are not the same size.
881 */
882 static void
set_append_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)883 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
884 Index rti, RangeTblEntry *rte)
885 {
886 int parentRTindex = rti;
887 bool has_live_children;
888 double parent_rows;
889 double parent_size;
890 double *parent_attrsizes;
891 int nattrs;
892 ListCell *l;
893
894 /*
895 * Initialize to compute size estimates for whole append relation.
896 *
897 * We handle width estimates by weighting the widths of different child
898 * rels proportionally to their number of rows. This is sensible because
899 * the use of width estimates is mainly to compute the total relation
900 * "footprint" if we have to sort or hash it. To do this, we sum the
901 * total equivalent size (in "double" arithmetic) and then divide by the
902 * total rowcount estimate. This is done separately for the total rel
903 * width and each attribute.
904 *
905 * Note: if you consider changing this logic, beware that child rels could
906 * have zero rows and/or width, if they were excluded by constraints.
907 */
908 has_live_children = false;
909 parent_rows = 0;
910 parent_size = 0;
911 nattrs = rel->max_attr - rel->min_attr + 1;
912 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
913
914 foreach(l, root->append_rel_list)
915 {
916 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
917 int childRTindex;
918 RangeTblEntry *childRTE;
919 RelOptInfo *childrel;
920 List *childquals;
921 Node *childqual;
922 ListCell *parentvars;
923 ListCell *childvars;
924
925 /* append_rel_list contains all append rels; ignore others */
926 if (appinfo->parent_relid != parentRTindex)
927 continue;
928
929 childRTindex = appinfo->child_relid;
930 childRTE = root->simple_rte_array[childRTindex];
931
932 /*
933 * The child rel's RelOptInfo was already created during
934 * add_base_rels_to_query.
935 */
936 childrel = find_base_rel(root, childRTindex);
937 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
938
939 /*
940 * We have to copy the parent's targetlist and quals to the child,
941 * with appropriate substitution of variables. However, only the
942 * baserestrictinfo quals are needed before we can check for
943 * constraint exclusion; so do that first and then check to see if we
944 * can disregard this child.
945 *
946 * As of 8.4, the child rel's targetlist might contain non-Var
947 * expressions, which means that substitution into the quals could
948 * produce opportunities for const-simplification, and perhaps even
949 * pseudoconstant quals. To deal with this, we strip the RestrictInfo
950 * nodes, do the substitution, do const-simplification, and then
951 * reconstitute the RestrictInfo layer.
952 */
953 childquals = get_all_actual_clauses(rel->baserestrictinfo);
954 childquals = (List *) adjust_appendrel_attrs(root,
955 (Node *) childquals,
956 appinfo);
957 childqual = eval_const_expressions(root, (Node *)
958 make_ands_explicit(childquals));
959 if (childqual && IsA(childqual, Const) &&
960 (((Const *) childqual)->constisnull ||
961 !DatumGetBool(((Const *) childqual)->constvalue)))
962 {
963 /*
964 * Restriction reduces to constant FALSE or constant NULL after
965 * substitution, so this child need not be scanned.
966 */
967 set_dummy_rel_pathlist(childrel);
968 continue;
969 }
970 childquals = make_ands_implicit((Expr *) childqual);
971 childquals = make_restrictinfos_from_actual_clauses(root,
972 childquals);
973 childrel->baserestrictinfo = childquals;
974
975 if (relation_excluded_by_constraints(root, childrel, childRTE))
976 {
977 /*
978 * This child need not be scanned, so we can omit it from the
979 * appendrel.
980 */
981 set_dummy_rel_pathlist(childrel);
982 continue;
983 }
984
985 /*
986 * CE failed, so finish copying/modifying targetlist and join quals.
987 *
988 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
989 * expressions, which otherwise would not occur in a rel's targetlist.
990 * Code that might be looking at an appendrel child must cope with
991 * such. (Normally, a rel's targetlist would only include Vars and
992 * PlaceHolderVars.) XXX we do not bother to update the cost or width
993 * fields of childrel->reltarget; not clear if that would be useful.
994 */
995 childrel->joininfo = (List *)
996 adjust_appendrel_attrs(root,
997 (Node *) rel->joininfo,
998 appinfo);
999 childrel->reltarget->exprs = (List *)
1000 adjust_appendrel_attrs(root,
1001 (Node *) rel->reltarget->exprs,
1002 appinfo);
1003
1004 /*
1005 * We have to make child entries in the EquivalenceClass data
1006 * structures as well. This is needed either if the parent
1007 * participates in some eclass joins (because we will want to consider
1008 * inner-indexscan joins on the individual children) or if the parent
1009 * has useful pathkeys (because we should try to build MergeAppend
1010 * paths that produce those sort orderings).
1011 */
1012 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1013 add_child_rel_equivalences(root, appinfo, rel, childrel);
1014 childrel->has_eclass_joins = rel->has_eclass_joins;
1015
1016 /*
1017 * Note: we could compute appropriate attr_needed data for the child's
1018 * variables, by transforming the parent's attr_needed through the
1019 * translated_vars mapping. However, currently there's no need
1020 * because attr_needed is only examined for base relations not
1021 * otherrels. So we just leave the child's attr_needed empty.
1022 */
1023
1024 /*
1025 * If parallelism is allowable for this query in general, see whether
1026 * it's allowable for this childrel in particular. But if we've
1027 * already decided the appendrel is not parallel-safe as a whole,
1028 * there's no point in considering parallelism for this child. For
1029 * consistency, do this before calling set_rel_size() for the child.
1030 */
1031 if (root->glob->parallelModeOK && rel->consider_parallel)
1032 set_rel_consider_parallel(root, childrel, childRTE);
1033
1034 /*
1035 * Compute the child's size.
1036 */
1037 set_rel_size(root, childrel, childRTindex, childRTE);
1038
1039 /*
1040 * It is possible that constraint exclusion detected a contradiction
1041 * within a child subquery, even though we didn't prove one above. If
1042 * so, we can skip this child.
1043 */
1044 if (IS_DUMMY_REL(childrel))
1045 continue;
1046
1047 /* We have at least one live child. */
1048 has_live_children = true;
1049
1050 /*
1051 * If any live child is not parallel-safe, treat the whole appendrel
1052 * as not parallel-safe. In future we might be able to generate plans
1053 * in which some children are farmed out to workers while others are
1054 * not; but we don't have that today, so it's a waste to consider
1055 * partial paths anywhere in the appendrel unless it's all safe.
1056 * (Child rels visited before this one will be unmarked in
1057 * set_append_rel_pathlist().)
1058 */
1059 if (!childrel->consider_parallel)
1060 rel->consider_parallel = false;
1061
1062 /*
1063 * Accumulate size information from each live child.
1064 */
1065 Assert(childrel->rows > 0);
1066
1067 parent_rows += childrel->rows;
1068 parent_size += childrel->reltarget->width * childrel->rows;
1069
1070 /*
1071 * Accumulate per-column estimates too. We need not do anything for
1072 * PlaceHolderVars in the parent list. If child expression isn't a
1073 * Var, or we didn't record a width estimate for it, we have to fall
1074 * back on a datatype-based estimate.
1075 *
1076 * By construction, child's targetlist is 1-to-1 with parent's.
1077 */
1078 forboth(parentvars, rel->reltarget->exprs,
1079 childvars, childrel->reltarget->exprs)
1080 {
1081 Var *parentvar = (Var *) lfirst(parentvars);
1082 Node *childvar = (Node *) lfirst(childvars);
1083
1084 if (IsA(parentvar, Var))
1085 {
1086 int pndx = parentvar->varattno - rel->min_attr;
1087 int32 child_width = 0;
1088
1089 if (IsA(childvar, Var) &&
1090 ((Var *) childvar)->varno == childrel->relid)
1091 {
1092 int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1093
1094 child_width = childrel->attr_widths[cndx];
1095 }
1096 if (child_width <= 0)
1097 child_width = get_typavgwidth(exprType(childvar),
1098 exprTypmod(childvar));
1099 Assert(child_width > 0);
1100 parent_attrsizes[pndx] += child_width * childrel->rows;
1101 }
1102 }
1103 }
1104
1105 if (has_live_children)
1106 {
1107 /*
1108 * Save the finished size estimates.
1109 */
1110 int i;
1111
1112 Assert(parent_rows > 0);
1113 rel->rows = parent_rows;
1114 rel->reltarget->width = rint(parent_size / parent_rows);
1115 for (i = 0; i < nattrs; i++)
1116 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1117
1118 /*
1119 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1120 * because some places assume rel->tuples is valid for any baserel.
1121 */
1122 rel->tuples = parent_rows;
1123 }
1124 else
1125 {
1126 /*
1127 * All children were excluded by constraints, so mark the whole
1128 * appendrel dummy. We must do this in this phase so that the rel's
1129 * dummy-ness is visible when we generate paths for other rels.
1130 */
1131 set_dummy_rel_pathlist(rel);
1132 }
1133
1134 pfree(parent_attrsizes);
1135 }
1136
1137 /*
1138 * set_append_rel_pathlist
1139 * Build access paths for an "append relation"
1140 */
1141 static void
set_append_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)1142 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1143 Index rti, RangeTblEntry *rte)
1144 {
1145 int parentRTindex = rti;
1146 List *live_childrels = NIL;
1147 List *subpaths = NIL;
1148 bool subpaths_valid = true;
1149 List *partial_subpaths = NIL;
1150 bool partial_subpaths_valid = true;
1151 List *all_child_pathkeys = NIL;
1152 List *all_child_outers = NIL;
1153 ListCell *l;
1154
1155 /*
1156 * Generate access paths for each member relation, and remember the
1157 * cheapest path for each one. Also, identify all pathkeys (orderings)
1158 * and parameterizations (required_outer sets) available for the member
1159 * relations.
1160 */
1161 foreach(l, root->append_rel_list)
1162 {
1163 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1164 int childRTindex;
1165 RangeTblEntry *childRTE;
1166 RelOptInfo *childrel;
1167 ListCell *lcp;
1168
1169 /* append_rel_list contains all append rels; ignore others */
1170 if (appinfo->parent_relid != parentRTindex)
1171 continue;
1172
1173 /* Re-locate the child RTE and RelOptInfo */
1174 childRTindex = appinfo->child_relid;
1175 childRTE = root->simple_rte_array[childRTindex];
1176 childrel = root->simple_rel_array[childRTindex];
1177
1178 /*
1179 * If set_append_rel_size() decided the parent appendrel was
1180 * parallel-unsafe at some point after visiting this child rel, we
1181 * need to propagate the unsafety marking down to the child, so that
1182 * we don't generate useless partial paths for it.
1183 */
1184 if (!rel->consider_parallel)
1185 childrel->consider_parallel = false;
1186
1187 /*
1188 * Compute the child's access paths.
1189 */
1190 set_rel_pathlist(root, childrel, childRTindex, childRTE);
1191
1192 /*
1193 * If child is dummy, ignore it.
1194 */
1195 if (IS_DUMMY_REL(childrel))
1196 continue;
1197
1198 /*
1199 * Child is live, so add it to the live_childrels list for use below.
1200 */
1201 live_childrels = lappend(live_childrels, childrel);
1202
1203 /*
1204 * If child has an unparameterized cheapest-total path, add that to
1205 * the unparameterized Append path we are constructing for the parent.
1206 * If not, there's no workable unparameterized path.
1207 */
1208 if (childrel->cheapest_total_path->param_info == NULL)
1209 subpaths = accumulate_append_subpath(subpaths,
1210 childrel->cheapest_total_path);
1211 else
1212 subpaths_valid = false;
1213
1214 /* Same idea, but for a partial plan. */
1215 if (childrel->partial_pathlist != NIL)
1216 partial_subpaths = accumulate_append_subpath(partial_subpaths,
1217 linitial(childrel->partial_pathlist));
1218 else
1219 partial_subpaths_valid = false;
1220
1221 /*
1222 * Collect lists of all the available path orderings and
1223 * parameterizations for all the children. We use these as a
1224 * heuristic to indicate which sort orderings and parameterizations we
1225 * should build Append and MergeAppend paths for.
1226 */
1227 foreach(lcp, childrel->pathlist)
1228 {
1229 Path *childpath = (Path *) lfirst(lcp);
1230 List *childkeys = childpath->pathkeys;
1231 Relids childouter = PATH_REQ_OUTER(childpath);
1232
1233 /* Unsorted paths don't contribute to pathkey list */
1234 if (childkeys != NIL)
1235 {
1236 ListCell *lpk;
1237 bool found = false;
1238
1239 /* Have we already seen this ordering? */
1240 foreach(lpk, all_child_pathkeys)
1241 {
1242 List *existing_pathkeys = (List *) lfirst(lpk);
1243
1244 if (compare_pathkeys(existing_pathkeys,
1245 childkeys) == PATHKEYS_EQUAL)
1246 {
1247 found = true;
1248 break;
1249 }
1250 }
1251 if (!found)
1252 {
1253 /* No, so add it to all_child_pathkeys */
1254 all_child_pathkeys = lappend(all_child_pathkeys,
1255 childkeys);
1256 }
1257 }
1258
1259 /* Unparameterized paths don't contribute to param-set list */
1260 if (childouter)
1261 {
1262 ListCell *lco;
1263 bool found = false;
1264
1265 /* Have we already seen this param set? */
1266 foreach(lco, all_child_outers)
1267 {
1268 Relids existing_outers = (Relids) lfirst(lco);
1269
1270 if (bms_equal(existing_outers, childouter))
1271 {
1272 found = true;
1273 break;
1274 }
1275 }
1276 if (!found)
1277 {
1278 /* No, so add it to all_child_outers */
1279 all_child_outers = lappend(all_child_outers,
1280 childouter);
1281 }
1282 }
1283 }
1284 }
1285
1286 /*
1287 * If we found unparameterized paths for all children, build an unordered,
1288 * unparameterized Append path for the rel. (Note: this is correct even
1289 * if we have zero or one live subpath due to constraint exclusion.)
1290 */
1291 if (subpaths_valid)
1292 add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0));
1293
1294 /*
1295 * Consider an append of partial unordered, unparameterized partial paths.
1296 */
1297 if (partial_subpaths_valid)
1298 {
1299 AppendPath *appendpath;
1300 ListCell *lc;
1301 int parallel_workers = 0;
1302
1303 /*
1304 * Decide on the number of workers to request for this append path.
1305 * For now, we just use the maximum value from among the members. It
1306 * might be useful to use a higher number if the Append node were
1307 * smart enough to spread out the workers, but it currently isn't.
1308 */
1309 foreach(lc, partial_subpaths)
1310 {
1311 Path *path = lfirst(lc);
1312
1313 parallel_workers = Max(parallel_workers, path->parallel_workers);
1314 }
1315 Assert(parallel_workers > 0);
1316
1317 /* Generate a partial append path. */
1318 appendpath = create_append_path(rel, partial_subpaths, NULL,
1319 parallel_workers);
1320 add_partial_path(rel, (Path *) appendpath);
1321 }
1322
1323 /*
1324 * Also build unparameterized MergeAppend paths based on the collected
1325 * list of child pathkeys.
1326 */
1327 if (subpaths_valid)
1328 generate_mergeappend_paths(root, rel, live_childrels,
1329 all_child_pathkeys);
1330
1331 /*
1332 * Build Append paths for each parameterization seen among the child rels.
1333 * (This may look pretty expensive, but in most cases of practical
1334 * interest, the child rels will expose mostly the same parameterizations,
1335 * so that not that many cases actually get considered here.)
1336 *
1337 * The Append node itself cannot enforce quals, so all qual checking must
1338 * be done in the child paths. This means that to have a parameterized
1339 * Append path, we must have the exact same parameterization for each
1340 * child path; otherwise some children might be failing to check the
1341 * moved-down quals. To make them match up, we can try to increase the
1342 * parameterization of lesser-parameterized paths.
1343 */
1344 foreach(l, all_child_outers)
1345 {
1346 Relids required_outer = (Relids) lfirst(l);
1347 ListCell *lcr;
1348
1349 /* Select the child paths for an Append with this parameterization */
1350 subpaths = NIL;
1351 subpaths_valid = true;
1352 foreach(lcr, live_childrels)
1353 {
1354 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1355 Path *subpath;
1356
1357 subpath = get_cheapest_parameterized_child_path(root,
1358 childrel,
1359 required_outer);
1360 if (subpath == NULL)
1361 {
1362 /* failed to make a suitable path for this child */
1363 subpaths_valid = false;
1364 break;
1365 }
1366 subpaths = accumulate_append_subpath(subpaths, subpath);
1367 }
1368
1369 if (subpaths_valid)
1370 add_path(rel, (Path *)
1371 create_append_path(rel, subpaths, required_outer, 0));
1372 }
1373 }
1374
1375 /*
1376 * generate_mergeappend_paths
1377 * Generate MergeAppend paths for an append relation
1378 *
1379 * Generate a path for each ordering (pathkey list) appearing in
1380 * all_child_pathkeys.
1381 *
1382 * We consider both cheapest-startup and cheapest-total cases, ie, for each
1383 * interesting ordering, collect all the cheapest startup subpaths and all the
1384 * cheapest total paths, and build a MergeAppend path for each case.
1385 *
1386 * We don't currently generate any parameterized MergeAppend paths. While
1387 * it would not take much more code here to do so, it's very unclear that it
1388 * is worth the planning cycles to investigate such paths: there's little
1389 * use for an ordered path on the inside of a nestloop. In fact, it's likely
1390 * that the current coding of add_path would reject such paths out of hand,
1391 * because add_path gives no credit for sort ordering of parameterized paths,
1392 * and a parameterized MergeAppend is going to be more expensive than the
1393 * corresponding parameterized Append path. If we ever try harder to support
1394 * parameterized mergejoin plans, it might be worth adding support for
1395 * parameterized MergeAppends to feed such joins. (See notes in
1396 * optimizer/README for why that might not ever happen, though.)
1397 */
1398 static void
generate_mergeappend_paths(PlannerInfo * root,RelOptInfo * rel,List * live_childrels,List * all_child_pathkeys)1399 generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
1400 List *live_childrels,
1401 List *all_child_pathkeys)
1402 {
1403 ListCell *lcp;
1404
1405 foreach(lcp, all_child_pathkeys)
1406 {
1407 List *pathkeys = (List *) lfirst(lcp);
1408 List *startup_subpaths = NIL;
1409 List *total_subpaths = NIL;
1410 bool startup_neq_total = false;
1411 ListCell *lcr;
1412
1413 /* Select the child paths for this ordering... */
1414 foreach(lcr, live_childrels)
1415 {
1416 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1417 Path *cheapest_startup,
1418 *cheapest_total;
1419
1420 /* Locate the right paths, if they are available. */
1421 cheapest_startup =
1422 get_cheapest_path_for_pathkeys(childrel->pathlist,
1423 pathkeys,
1424 NULL,
1425 STARTUP_COST);
1426 cheapest_total =
1427 get_cheapest_path_for_pathkeys(childrel->pathlist,
1428 pathkeys,
1429 NULL,
1430 TOTAL_COST);
1431
1432 /*
1433 * If we can't find any paths with the right order just use the
1434 * cheapest-total path; we'll have to sort it later.
1435 */
1436 if (cheapest_startup == NULL || cheapest_total == NULL)
1437 {
1438 cheapest_startup = cheapest_total =
1439 childrel->cheapest_total_path;
1440 /* Assert we do have an unparameterized path for this child */
1441 Assert(cheapest_total->param_info == NULL);
1442 }
1443
1444 /*
1445 * Notice whether we actually have different paths for the
1446 * "cheapest" and "total" cases; frequently there will be no point
1447 * in two create_merge_append_path() calls.
1448 */
1449 if (cheapest_startup != cheapest_total)
1450 startup_neq_total = true;
1451
1452 startup_subpaths =
1453 accumulate_append_subpath(startup_subpaths, cheapest_startup);
1454 total_subpaths =
1455 accumulate_append_subpath(total_subpaths, cheapest_total);
1456 }
1457
1458 /* ... and build the MergeAppend paths */
1459 add_path(rel, (Path *) create_merge_append_path(root,
1460 rel,
1461 startup_subpaths,
1462 pathkeys,
1463 NULL));
1464 if (startup_neq_total)
1465 add_path(rel, (Path *) create_merge_append_path(root,
1466 rel,
1467 total_subpaths,
1468 pathkeys,
1469 NULL));
1470 }
1471 }
1472
1473 /*
1474 * get_cheapest_parameterized_child_path
1475 * Get cheapest path for this relation that has exactly the requested
1476 * parameterization.
1477 *
1478 * Returns NULL if unable to create such a path.
1479 */
1480 static Path *
get_cheapest_parameterized_child_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1481 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1482 Relids required_outer)
1483 {
1484 Path *cheapest;
1485 ListCell *lc;
1486
1487 /*
1488 * Look up the cheapest existing path with no more than the needed
1489 * parameterization. If it has exactly the needed parameterization, we're
1490 * done.
1491 */
1492 cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1493 NIL,
1494 required_outer,
1495 TOTAL_COST);
1496 Assert(cheapest != NULL);
1497 if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1498 return cheapest;
1499
1500 /*
1501 * Otherwise, we can "reparameterize" an existing path to match the given
1502 * parameterization, which effectively means pushing down additional
1503 * joinquals to be checked within the path's scan. However, some existing
1504 * paths might check the available joinquals already while others don't;
1505 * therefore, it's not clear which existing path will be cheapest after
1506 * reparameterization. We have to go through them all and find out.
1507 */
1508 cheapest = NULL;
1509 foreach(lc, rel->pathlist)
1510 {
1511 Path *path = (Path *) lfirst(lc);
1512
1513 /* Can't use it if it needs more than requested parameterization */
1514 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1515 continue;
1516
1517 /*
1518 * Reparameterization can only increase the path's cost, so if it's
1519 * already more expensive than the current cheapest, forget it.
1520 */
1521 if (cheapest != NULL &&
1522 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1523 continue;
1524
1525 /* Reparameterize if needed, then recheck cost */
1526 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
1527 {
1528 path = reparameterize_path(root, path, required_outer, 1.0);
1529 if (path == NULL)
1530 continue; /* failed to reparameterize this one */
1531 Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
1532
1533 if (cheapest != NULL &&
1534 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1535 continue;
1536 }
1537
1538 /* We have a new best path */
1539 cheapest = path;
1540 }
1541
1542 /* Return the best path, or NULL if we found no suitable candidate */
1543 return cheapest;
1544 }
1545
1546 /*
1547 * accumulate_append_subpath
1548 * Add a subpath to the list being built for an Append or MergeAppend
1549 *
1550 * It's possible that the child is itself an Append or MergeAppend path, in
1551 * which case we can "cut out the middleman" and just add its child paths to
1552 * our own list. (We don't try to do this earlier because we need to apply
1553 * both levels of transformation to the quals.)
1554 *
1555 * Note that if we omit a child MergeAppend in this way, we are effectively
1556 * omitting a sort step, which seems fine: if the parent is to be an Append,
1557 * its result would be unsorted anyway, while if the parent is to be a
1558 * MergeAppend, there's no point in a separate sort on a child.
1559 */
1560 static List *
accumulate_append_subpath(List * subpaths,Path * path)1561 accumulate_append_subpath(List *subpaths, Path *path)
1562 {
1563 if (IsA(path, AppendPath))
1564 {
1565 AppendPath *apath = (AppendPath *) path;
1566
1567 /* list_copy is important here to avoid sharing list substructure */
1568 return list_concat(subpaths, list_copy(apath->subpaths));
1569 }
1570 else if (IsA(path, MergeAppendPath))
1571 {
1572 MergeAppendPath *mpath = (MergeAppendPath *) path;
1573
1574 /* list_copy is important here to avoid sharing list substructure */
1575 return list_concat(subpaths, list_copy(mpath->subpaths));
1576 }
1577 else
1578 return lappend(subpaths, path);
1579 }
1580
1581 /*
1582 * set_dummy_rel_pathlist
1583 * Build a dummy path for a relation that's been excluded by constraints
1584 *
1585 * Rather than inventing a special "dummy" path type, we represent this as an
1586 * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
1587 *
1588 * This is exported because inheritance_planner() has need for it.
1589 */
1590 void
set_dummy_rel_pathlist(RelOptInfo * rel)1591 set_dummy_rel_pathlist(RelOptInfo *rel)
1592 {
1593 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
1594 rel->rows = 0;
1595 rel->reltarget->width = 0;
1596
1597 /* Discard any pre-existing paths; no further need for them */
1598 rel->pathlist = NIL;
1599 rel->partial_pathlist = NIL;
1600
1601 add_path(rel, (Path *) create_append_path(rel, NIL,
1602 rel->lateral_relids,
1603 0));
1604
1605 /*
1606 * We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
1607 * will recognize the relation as dummy if anyone asks. This is redundant
1608 * when we're called from set_rel_size(), but not when called from
1609 * elsewhere, and doing it twice is harmless anyway.
1610 */
1611 set_cheapest(rel);
1612 }
1613
1614 /* quick-and-dirty test to see if any joining is needed */
1615 static bool
has_multiple_baserels(PlannerInfo * root)1616 has_multiple_baserels(PlannerInfo *root)
1617 {
1618 int num_base_rels = 0;
1619 Index rti;
1620
1621 for (rti = 1; rti < root->simple_rel_array_size; rti++)
1622 {
1623 RelOptInfo *brel = root->simple_rel_array[rti];
1624
1625 if (brel == NULL)
1626 continue;
1627
1628 /* ignore RTEs that are "other rels" */
1629 if (brel->reloptkind == RELOPT_BASEREL)
1630 if (++num_base_rels > 1)
1631 return true;
1632 }
1633 return false;
1634 }
1635
1636 /*
1637 * set_subquery_pathlist
1638 * Generate SubqueryScan access paths for a subquery RTE
1639 *
1640 * We don't currently support generating parameterized paths for subqueries
1641 * by pushing join clauses down into them; it seems too expensive to re-plan
1642 * the subquery multiple times to consider different alternatives.
1643 * (XXX that could stand to be reconsidered, now that we use Paths.)
1644 * So the paths made here will be parameterized if the subquery contains
1645 * LATERAL references, otherwise not. As long as that's true, there's no need
1646 * for a separate set_subquery_size phase: just make the paths right away.
1647 */
1648 static void
set_subquery_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)1649 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1650 Index rti, RangeTblEntry *rte)
1651 {
1652 Query *parse = root->parse;
1653 Query *subquery = rte->subquery;
1654 Relids required_outer;
1655 pushdown_safety_info safetyInfo;
1656 double tuple_fraction;
1657 RelOptInfo *sub_final_rel;
1658 ListCell *lc;
1659
1660 /*
1661 * Must copy the Query so that planning doesn't mess up the RTE contents
1662 * (really really need to fix the planner to not scribble on its input,
1663 * someday ... but see remove_unused_subquery_outputs to start with).
1664 */
1665 subquery = copyObject(subquery);
1666
1667 /*
1668 * If it's a LATERAL subquery, it might contain some Vars of the current
1669 * query level, requiring it to be treated as parameterized, even though
1670 * we don't support pushing down join quals into subqueries.
1671 */
1672 required_outer = rel->lateral_relids;
1673
1674 /*
1675 * Zero out result area for subquery_is_pushdown_safe, so that it can set
1676 * flags as needed while recursing. In particular, we need a workspace
1677 * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1678 * will be set TRUE if we find that output column i of the subquery is
1679 * unsafe to use in a pushed-down qual.
1680 */
1681 memset(&safetyInfo, 0, sizeof(safetyInfo));
1682 safetyInfo.unsafeColumns = (bool *)
1683 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
1684
1685 /*
1686 * If the subquery has the "security_barrier" flag, it means the subquery
1687 * originated from a view that must enforce row level security. Then we
1688 * must not push down quals that contain leaky functions. (Ideally this
1689 * would be checked inside subquery_is_pushdown_safe, but since we don't
1690 * currently pass the RTE to that function, we must do it here.)
1691 */
1692 safetyInfo.unsafeLeaky = rte->security_barrier;
1693
1694 /*
1695 * If there are any restriction clauses that have been attached to the
1696 * subquery relation, consider pushing them down to become WHERE or HAVING
1697 * quals of the subquery itself. This transformation is useful because it
1698 * may allow us to generate a better plan for the subquery than evaluating
1699 * all the subquery output rows and then filtering them.
1700 *
1701 * There are several cases where we cannot push down clauses. Restrictions
1702 * involving the subquery are checked by subquery_is_pushdown_safe().
1703 * Restrictions on individual clauses are checked by
1704 * qual_is_pushdown_safe(). Also, we don't want to push down
1705 * pseudoconstant clauses; better to have the gating node above the
1706 * subquery.
1707 *
1708 * Non-pushed-down clauses will get evaluated as qpquals of the
1709 * SubqueryScan node.
1710 *
1711 * XXX Are there any cases where we want to make a policy decision not to
1712 * push down a pushable qual, because it'd result in a worse plan?
1713 */
1714 if (rel->baserestrictinfo != NIL &&
1715 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
1716 {
1717 /* OK to consider pushing down individual quals */
1718 List *upperrestrictlist = NIL;
1719 ListCell *l;
1720
1721 foreach(l, rel->baserestrictinfo)
1722 {
1723 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
1724 Node *clause = (Node *) rinfo->clause;
1725
1726 if (!rinfo->pseudoconstant &&
1727 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
1728 {
1729 /* Push it down */
1730 subquery_push_qual(subquery, rte, rti, clause);
1731 }
1732 else
1733 {
1734 /* Keep it in the upper query */
1735 upperrestrictlist = lappend(upperrestrictlist, rinfo);
1736 }
1737 }
1738 rel->baserestrictinfo = upperrestrictlist;
1739 }
1740
1741 pfree(safetyInfo.unsafeColumns);
1742
1743 /*
1744 * The upper query might not use all the subquery's output columns; if
1745 * not, we can simplify.
1746 */
1747 remove_unused_subquery_outputs(subquery, rel);
1748
1749 /*
1750 * We can safely pass the outer tuple_fraction down to the subquery if the
1751 * outer level has no joining, aggregation, or sorting to do. Otherwise
1752 * we'd better tell the subquery to plan for full retrieval. (XXX This
1753 * could probably be made more intelligent ...)
1754 */
1755 if (parse->hasAggs ||
1756 parse->groupClause ||
1757 parse->groupingSets ||
1758 parse->havingQual ||
1759 parse->distinctClause ||
1760 parse->sortClause ||
1761 has_multiple_baserels(root))
1762 tuple_fraction = 0.0; /* default case */
1763 else
1764 tuple_fraction = root->tuple_fraction;
1765
1766 /* plan_params should not be in use in current query level */
1767 Assert(root->plan_params == NIL);
1768
1769 /* Generate a subroot and Paths for the subquery */
1770 rel->subroot = subquery_planner(root->glob, subquery,
1771 root,
1772 false, tuple_fraction);
1773
1774 /* Isolate the params needed by this specific subplan */
1775 rel->subplan_params = root->plan_params;
1776 root->plan_params = NIL;
1777
1778 /*
1779 * It's possible that constraint exclusion proved the subquery empty. If
1780 * so, it's desirable to produce an unadorned dummy path so that we will
1781 * recognize appropriate optimizations at this query level.
1782 */
1783 sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
1784
1785 if (IS_DUMMY_REL(sub_final_rel))
1786 {
1787 set_dummy_rel_pathlist(rel);
1788 return;
1789 }
1790
1791 /*
1792 * Mark rel with estimated output rows, width, etc. Note that we have to
1793 * do this before generating outer-query paths, else cost_subqueryscan is
1794 * not happy.
1795 */
1796 set_subquery_size_estimates(root, rel);
1797
1798 /*
1799 * For each Path that subquery_planner produced, make a SubqueryScanPath
1800 * in the outer query.
1801 */
1802 foreach(lc, sub_final_rel->pathlist)
1803 {
1804 Path *subpath = (Path *) lfirst(lc);
1805 List *pathkeys;
1806
1807 /* Convert subpath's pathkeys to outer representation */
1808 pathkeys = convert_subquery_pathkeys(root,
1809 rel,
1810 subpath->pathkeys,
1811 make_tlist_from_pathtarget(subpath->pathtarget));
1812
1813 /* Generate outer path using this subpath */
1814 add_path(rel, (Path *)
1815 create_subqueryscan_path(root, rel, subpath,
1816 pathkeys, required_outer));
1817 }
1818 }
1819
1820 /*
1821 * set_function_pathlist
1822 * Build the (single) access path for a function RTE
1823 */
1824 static void
set_function_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)1825 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1826 {
1827 Relids required_outer;
1828 List *pathkeys = NIL;
1829
1830 /*
1831 * We don't support pushing join clauses into the quals of a function
1832 * scan, but it could still have required parameterization due to LATERAL
1833 * refs in the function expression.
1834 */
1835 required_outer = rel->lateral_relids;
1836
1837 /*
1838 * The result is considered unordered unless ORDINALITY was used, in which
1839 * case it is ordered by the ordinal column (the last one). See if we
1840 * care, by checking for uses of that Var in equivalence classes.
1841 */
1842 if (rte->funcordinality)
1843 {
1844 AttrNumber ordattno = rel->max_attr;
1845 Var *var = NULL;
1846 ListCell *lc;
1847
1848 /*
1849 * Is there a Var for it in rel's targetlist? If not, the query did
1850 * not reference the ordinality column, or at least not in any way
1851 * that would be interesting for sorting.
1852 */
1853 foreach(lc, rel->reltarget->exprs)
1854 {
1855 Var *node = (Var *) lfirst(lc);
1856
1857 /* checking varno/varlevelsup is just paranoia */
1858 if (IsA(node, Var) &&
1859 node->varattno == ordattno &&
1860 node->varno == rel->relid &&
1861 node->varlevelsup == 0)
1862 {
1863 var = node;
1864 break;
1865 }
1866 }
1867
1868 /*
1869 * Try to build pathkeys for this Var with int8 sorting. We tell
1870 * build_expression_pathkey not to build any new equivalence class; if
1871 * the Var isn't already mentioned in some EC, it means that nothing
1872 * cares about the ordering.
1873 */
1874 if (var)
1875 pathkeys = build_expression_pathkey(root,
1876 (Expr *) var,
1877 NULL, /* below outer joins */
1878 Int8LessOperator,
1879 rel->relids,
1880 false);
1881 }
1882
1883 /* Generate appropriate path */
1884 add_path(rel, create_functionscan_path(root, rel,
1885 pathkeys, required_outer));
1886 }
1887
1888 /*
1889 * set_values_pathlist
1890 * Build the (single) access path for a VALUES RTE
1891 */
1892 static void
set_values_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)1893 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1894 {
1895 Relids required_outer;
1896
1897 /*
1898 * We don't support pushing join clauses into the quals of a values scan,
1899 * but it could still have required parameterization due to LATERAL refs
1900 * in the values expressions.
1901 */
1902 required_outer = rel->lateral_relids;
1903
1904 /* Generate appropriate path */
1905 add_path(rel, create_valuesscan_path(root, rel, required_outer));
1906 }
1907
1908 /*
1909 * set_cte_pathlist
1910 * Build the (single) access path for a non-self-reference CTE RTE
1911 *
1912 * There's no need for a separate set_cte_size phase, since we don't
1913 * support join-qual-parameterized paths for CTEs.
1914 */
1915 static void
set_cte_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)1916 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1917 {
1918 Plan *cteplan;
1919 PlannerInfo *cteroot;
1920 Index levelsup;
1921 int ndx;
1922 ListCell *lc;
1923 int plan_id;
1924 Relids required_outer;
1925
1926 /*
1927 * Find the referenced CTE, and locate the plan previously made for it.
1928 */
1929 levelsup = rte->ctelevelsup;
1930 cteroot = root;
1931 while (levelsup-- > 0)
1932 {
1933 cteroot = cteroot->parent_root;
1934 if (!cteroot) /* shouldn't happen */
1935 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1936 }
1937
1938 /*
1939 * Note: cte_plan_ids can be shorter than cteList, if we are still working
1940 * on planning the CTEs (ie, this is a side-reference from another CTE).
1941 * So we mustn't use forboth here.
1942 */
1943 ndx = 0;
1944 foreach(lc, cteroot->parse->cteList)
1945 {
1946 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
1947
1948 if (strcmp(cte->ctename, rte->ctename) == 0)
1949 break;
1950 ndx++;
1951 }
1952 if (lc == NULL) /* shouldn't happen */
1953 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
1954 if (ndx >= list_length(cteroot->cte_plan_ids))
1955 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
1956 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
1957 Assert(plan_id > 0);
1958 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
1959
1960 /* Mark rel with estimated output rows, width, etc */
1961 set_cte_size_estimates(root, rel, cteplan->plan_rows);
1962
1963 /*
1964 * We don't support pushing join clauses into the quals of a CTE scan, but
1965 * it could still have required parameterization due to LATERAL refs in
1966 * its tlist.
1967 */
1968 required_outer = rel->lateral_relids;
1969
1970 /* Generate appropriate path */
1971 add_path(rel, create_ctescan_path(root, rel, required_outer));
1972 }
1973
1974 /*
1975 * set_worktable_pathlist
1976 * Build the (single) access path for a self-reference CTE RTE
1977 *
1978 * There's no need for a separate set_worktable_size phase, since we don't
1979 * support join-qual-parameterized paths for CTEs.
1980 */
1981 static void
set_worktable_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)1982 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1983 {
1984 Path *ctepath;
1985 PlannerInfo *cteroot;
1986 Index levelsup;
1987 Relids required_outer;
1988
1989 /*
1990 * We need to find the non-recursive term's path, which is in the plan
1991 * level that's processing the recursive UNION, which is one level *below*
1992 * where the CTE comes from.
1993 */
1994 levelsup = rte->ctelevelsup;
1995 if (levelsup == 0) /* shouldn't happen */
1996 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1997 levelsup--;
1998 cteroot = root;
1999 while (levelsup-- > 0)
2000 {
2001 cteroot = cteroot->parent_root;
2002 if (!cteroot) /* shouldn't happen */
2003 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2004 }
2005 ctepath = cteroot->non_recursive_path;
2006 if (!ctepath) /* shouldn't happen */
2007 elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
2008
2009 /* Mark rel with estimated output rows, width, etc */
2010 set_cte_size_estimates(root, rel, ctepath->rows);
2011
2012 /*
2013 * We don't support pushing join clauses into the quals of a worktable
2014 * scan, but it could still have required parameterization due to LATERAL
2015 * refs in its tlist. (I'm not sure this is actually possible given the
2016 * restrictions on recursive references, but it's easy enough to support.)
2017 */
2018 required_outer = rel->lateral_relids;
2019
2020 /* Generate appropriate path */
2021 add_path(rel, create_worktablescan_path(root, rel, required_outer));
2022 }
2023
2024 /*
2025 * generate_gather_paths
2026 * Generate parallel access paths for a relation by pushing a Gather on
2027 * top of a partial path.
2028 *
2029 * This must not be called until after we're done creating all partial paths
2030 * for the specified relation. (Otherwise, add_partial_path might delete a
2031 * path that some GatherPath has a reference to.)
2032 */
2033 void
generate_gather_paths(PlannerInfo * root,RelOptInfo * rel)2034 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
2035 {
2036 Path *cheapest_partial_path;
2037 Path *simple_gather_path;
2038
2039 /* If there are no partial paths, there's nothing to do here. */
2040 if (rel->partial_pathlist == NIL)
2041 return;
2042
2043 /*
2044 * The output of Gather is currently always unsorted, so there's only one
2045 * partial path of interest: the cheapest one. That will be the one at
2046 * the front of partial_pathlist because of the way add_partial_path
2047 * works.
2048 *
2049 * Eventually, we should have a Gather Merge operation that can merge
2050 * multiple tuple streams together while preserving their ordering. We
2051 * could usefully generate such a path from each partial path that has
2052 * non-NIL pathkeys.
2053 */
2054 cheapest_partial_path = linitial(rel->partial_pathlist);
2055 simple_gather_path = (Path *)
2056 create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
2057 NULL, NULL);
2058 add_path(rel, simple_gather_path);
2059 }
2060
2061 /*
2062 * make_rel_from_joinlist
2063 * Build access paths using a "joinlist" to guide the join path search.
2064 *
2065 * See comments for deconstruct_jointree() for definition of the joinlist
2066 * data structure.
2067 */
2068 static RelOptInfo *
make_rel_from_joinlist(PlannerInfo * root,List * joinlist)2069 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
2070 {
2071 int levels_needed;
2072 List *initial_rels;
2073 ListCell *jl;
2074
2075 /*
2076 * Count the number of child joinlist nodes. This is the depth of the
2077 * dynamic-programming algorithm we must employ to consider all ways of
2078 * joining the child nodes.
2079 */
2080 levels_needed = list_length(joinlist);
2081
2082 if (levels_needed <= 0)
2083 return NULL; /* nothing to do? */
2084
2085 /*
2086 * Construct a list of rels corresponding to the child joinlist nodes.
2087 * This may contain both base rels and rels constructed according to
2088 * sub-joinlists.
2089 */
2090 initial_rels = NIL;
2091 foreach(jl, joinlist)
2092 {
2093 Node *jlnode = (Node *) lfirst(jl);
2094 RelOptInfo *thisrel;
2095
2096 if (IsA(jlnode, RangeTblRef))
2097 {
2098 int varno = ((RangeTblRef *) jlnode)->rtindex;
2099
2100 thisrel = find_base_rel(root, varno);
2101 }
2102 else if (IsA(jlnode, List))
2103 {
2104 /* Recurse to handle subproblem */
2105 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
2106 }
2107 else
2108 {
2109 elog(ERROR, "unrecognized joinlist node type: %d",
2110 (int) nodeTag(jlnode));
2111 thisrel = NULL; /* keep compiler quiet */
2112 }
2113
2114 initial_rels = lappend(initial_rels, thisrel);
2115 }
2116
2117 if (levels_needed == 1)
2118 {
2119 /*
2120 * Single joinlist node, so we're done.
2121 */
2122 return (RelOptInfo *) linitial(initial_rels);
2123 }
2124 else
2125 {
2126 /*
2127 * Consider the different orders in which we could join the rels,
2128 * using a plugin, GEQO, or the regular join search code.
2129 *
2130 * We put the initial_rels list into a PlannerInfo field because
2131 * has_legal_joinclause() needs to look at it (ugly :-().
2132 */
2133 root->initial_rels = initial_rels;
2134
2135 if (join_search_hook)
2136 return (*join_search_hook) (root, levels_needed, initial_rels);
2137 else if (enable_geqo && levels_needed >= geqo_threshold)
2138 return geqo(root, levels_needed, initial_rels);
2139 else
2140 return standard_join_search(root, levels_needed, initial_rels);
2141 }
2142 }
2143
2144 /*
2145 * standard_join_search
2146 * Find possible joinpaths for a query by successively finding ways
2147 * to join component relations into join relations.
2148 *
2149 * 'levels_needed' is the number of iterations needed, ie, the number of
2150 * independent jointree items in the query. This is > 1.
2151 *
2152 * 'initial_rels' is a list of RelOptInfo nodes for each independent
2153 * jointree item. These are the components to be joined together.
2154 * Note that levels_needed == list_length(initial_rels).
2155 *
2156 * Returns the final level of join relations, i.e., the relation that is
2157 * the result of joining all the original relations together.
2158 * At least one implementation path must be provided for this relation and
2159 * all required sub-relations.
2160 *
2161 * To support loadable plugins that modify planner behavior by changing the
2162 * join searching algorithm, we provide a hook variable that lets a plugin
2163 * replace or supplement this function. Any such hook must return the same
2164 * final join relation as the standard code would, but it might have a
2165 * different set of implementation paths attached, and only the sub-joinrels
2166 * needed for these paths need have been instantiated.
2167 *
2168 * Note to plugin authors: the functions invoked during standard_join_search()
2169 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
2170 * than one join-order search, you'll probably need to save and restore the
2171 * original states of those data structures. See geqo_eval() for an example.
2172 */
2173 RelOptInfo *
standard_join_search(PlannerInfo * root,int levels_needed,List * initial_rels)2174 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
2175 {
2176 int lev;
2177 RelOptInfo *rel;
2178
2179 /*
2180 * This function cannot be invoked recursively within any one planning
2181 * problem, so join_rel_level[] can't be in use already.
2182 */
2183 Assert(root->join_rel_level == NULL);
2184
2185 /*
2186 * We employ a simple "dynamic programming" algorithm: we first find all
2187 * ways to build joins of two jointree items, then all ways to build joins
2188 * of three items (from two-item joins and single items), then four-item
2189 * joins, and so on until we have considered all ways to join all the
2190 * items into one rel.
2191 *
2192 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
2193 * set root->join_rel_level[1] to represent all the single-jointree-item
2194 * relations.
2195 */
2196 root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
2197
2198 root->join_rel_level[1] = initial_rels;
2199
2200 for (lev = 2; lev <= levels_needed; lev++)
2201 {
2202 ListCell *lc;
2203
2204 /*
2205 * Determine all possible pairs of relations to be joined at this
2206 * level, and build paths for making each one from every available
2207 * pair of lower-level relations.
2208 */
2209 join_search_one_level(root, lev);
2210
2211 /*
2212 * Run generate_gather_paths() for each just-processed joinrel. We
2213 * could not do this earlier because both regular and partial paths
2214 * can get added to a particular joinrel at multiple times within
2215 * join_search_one_level. After that, we're done creating paths for
2216 * the joinrel, so run set_cheapest().
2217 */
2218 foreach(lc, root->join_rel_level[lev])
2219 {
2220 rel = (RelOptInfo *) lfirst(lc);
2221
2222 /* Create GatherPaths for any useful partial paths for rel */
2223 generate_gather_paths(root, rel);
2224
2225 /* Find and save the cheapest paths for this rel */
2226 set_cheapest(rel);
2227
2228 #ifdef OPTIMIZER_DEBUG
2229 debug_print_rel(root, rel);
2230 #endif
2231 }
2232 }
2233
2234 /*
2235 * We should have a single rel at the final level.
2236 */
2237 if (root->join_rel_level[levels_needed] == NIL)
2238 elog(ERROR, "failed to build any %d-way joins", levels_needed);
2239 Assert(list_length(root->join_rel_level[levels_needed]) == 1);
2240
2241 rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
2242
2243 root->join_rel_level = NULL;
2244
2245 return rel;
2246 }
2247
2248 /*****************************************************************************
2249 * PUSHING QUALS DOWN INTO SUBQUERIES
2250 *****************************************************************************/
2251
2252 /*
2253 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
2254 *
2255 * subquery is the particular component query being checked. topquery
2256 * is the top component of a set-operations tree (the same Query if no
2257 * set-op is involved).
2258 *
2259 * Conditions checked here:
2260 *
2261 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
2262 * since that could change the set of rows returned.
2263 *
2264 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
2265 * quals into it, because that could change the results.
2266 *
2267 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
2268 * This is because upper-level quals should semantically be evaluated only
2269 * once per distinct row, not once per original row, and if the qual is
2270 * volatile then extra evaluations could change the results. (This issue
2271 * does not apply to other forms of aggregation such as GROUP BY, because
2272 * when those are present we push into HAVING not WHERE, so that the quals
2273 * are still applied after aggregation.)
2274 *
2275 * 4. If the subquery contains window functions, we cannot push volatile quals
2276 * into it. The issue here is a bit different from DISTINCT: a volatile qual
2277 * might succeed for some rows of a window partition and fail for others,
2278 * thereby changing the partition contents and thus the window functions'
2279 * results for rows that remain.
2280 *
2281 * 5. If the subquery has nonempty grouping sets, we cannot push down any
2282 * quals. The concern here is that a qual referencing a "constant" grouping
2283 * column could get constant-folded, which would be improper because the value
2284 * is potentially nullable by grouping-set expansion. This restriction could
2285 * be removed if we had a parsetree representation that shows that such
2286 * grouping columns are not really constant. (There are other ideas that
2287 * could be used to relax this restriction, but that's the approach most
2288 * likely to get taken in the future. Note that there's not much to be gained
2289 * so long as subquery_planner can't move HAVING clauses to WHERE within such
2290 * a subquery.)
2291 *
2292 * In addition, we make several checks on the subquery's output columns to see
2293 * if it is safe to reference them in pushed-down quals. If output column k
2294 * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
2295 * to TRUE, but we don't reject the subquery overall since column k might not
2296 * be referenced by some/all quals. The unsafeColumns[] array will be
2297 * consulted later by qual_is_pushdown_safe(). It's better to do it this way
2298 * than to make the checks directly in qual_is_pushdown_safe(), because when
2299 * the subquery involves set operations we have to check the output
2300 * expressions in each arm of the set op.
2301 *
2302 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
2303 * we're effectively assuming that the quals cannot distinguish values that
2304 * the DISTINCT's equality operator sees as equal, yet there are many
2305 * counterexamples to that assumption. However use of such a qual with a
2306 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
2307 * "equal" value will be chosen as the output value by the DISTINCT operation.
2308 * So we don't worry too much about that. Another objection is that if the
2309 * qual is expensive to evaluate, running it for each original row might cost
2310 * more than we save by eliminating rows before the DISTINCT step. But it
2311 * would be very hard to estimate that at this stage, and in practice pushdown
2312 * seldom seems to make things worse, so we ignore that problem too.
2313 *
2314 * Note: likewise, pushing quals into a subquery with window functions is a
2315 * bit dubious: the quals might remove some rows of a window partition while
2316 * leaving others, causing changes in the window functions' results for the
2317 * surviving rows. We insist that such a qual reference only partitioning
2318 * columns, but again that only protects us if the qual does not distinguish
2319 * values that the partitioning equality operator sees as equal. The risks
2320 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
2321 * occurs and thus there is no theoretical problem with such a qual. But
2322 * we'll do this anyway because the potential performance benefits are very
2323 * large, and we've seen no field complaints about the longstanding comparable
2324 * behavior with DISTINCT.
2325 */
2326 static bool
subquery_is_pushdown_safe(Query * subquery,Query * topquery,pushdown_safety_info * safetyInfo)2327 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
2328 pushdown_safety_info *safetyInfo)
2329 {
2330 SetOperationStmt *topop;
2331
2332 /* Check point 1 */
2333 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
2334 return false;
2335
2336 /* Check point 5 */
2337 if (subquery->groupClause && subquery->groupingSets)
2338 return false;
2339
2340 /* Check points 3 and 4 */
2341 if (subquery->distinctClause || subquery->hasWindowFuncs)
2342 safetyInfo->unsafeVolatile = true;
2343
2344 /*
2345 * If we're at a leaf query, check for unsafe expressions in its target
2346 * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
2347 * setop trees have only simple Vars in their tlists, so no need to check
2348 * them.)
2349 */
2350 if (subquery->setOperations == NULL)
2351 check_output_expressions(subquery, safetyInfo);
2352
2353 /* Are we at top level, or looking at a setop component? */
2354 if (subquery == topquery)
2355 {
2356 /* Top level, so check any component queries */
2357 if (subquery->setOperations != NULL)
2358 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
2359 safetyInfo))
2360 return false;
2361 }
2362 else
2363 {
2364 /* Setop component must not have more components (too weird) */
2365 if (subquery->setOperations != NULL)
2366 return false;
2367 /* Check whether setop component output types match top level */
2368 topop = (SetOperationStmt *) topquery->setOperations;
2369 Assert(topop && IsA(topop, SetOperationStmt));
2370 compare_tlist_datatypes(subquery->targetList,
2371 topop->colTypes,
2372 safetyInfo);
2373 }
2374 return true;
2375 }
2376
2377 /*
2378 * Helper routine to recurse through setOperations tree
2379 */
2380 static bool
recurse_pushdown_safe(Node * setOp,Query * topquery,pushdown_safety_info * safetyInfo)2381 recurse_pushdown_safe(Node *setOp, Query *topquery,
2382 pushdown_safety_info *safetyInfo)
2383 {
2384 if (IsA(setOp, RangeTblRef))
2385 {
2386 RangeTblRef *rtr = (RangeTblRef *) setOp;
2387 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
2388 Query *subquery = rte->subquery;
2389
2390 Assert(subquery != NULL);
2391 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
2392 }
2393 else if (IsA(setOp, SetOperationStmt))
2394 {
2395 SetOperationStmt *op = (SetOperationStmt *) setOp;
2396
2397 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
2398 if (op->op == SETOP_EXCEPT)
2399 return false;
2400 /* Else recurse */
2401 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
2402 return false;
2403 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
2404 return false;
2405 }
2406 else
2407 {
2408 elog(ERROR, "unrecognized node type: %d",
2409 (int) nodeTag(setOp));
2410 }
2411 return true;
2412 }
2413
2414 /*
2415 * check_output_expressions - check subquery's output expressions for safety
2416 *
2417 * There are several cases in which it's unsafe to push down an upper-level
2418 * qual if it references a particular output column of a subquery. We check
2419 * each output column of the subquery and set unsafeColumns[k] to TRUE if
2420 * that column is unsafe for a pushed-down qual to reference. The conditions
2421 * checked here are:
2422 *
2423 * 1. We must not push down any quals that refer to subselect outputs that
2424 * return sets, else we'd introduce functions-returning-sets into the
2425 * subquery's WHERE/HAVING quals.
2426 *
2427 * 2. We must not push down any quals that refer to subselect outputs that
2428 * contain volatile functions, for fear of introducing strange results due
2429 * to multiple evaluation of a volatile function.
2430 *
2431 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
2432 * refer to non-DISTINCT output columns, because that could change the set
2433 * of rows returned. (This condition is vacuous for DISTINCT, because then
2434 * there are no non-DISTINCT output columns, so we needn't check. Note that
2435 * subquery_is_pushdown_safe already reported that we can't use volatile
2436 * quals if there's DISTINCT or DISTINCT ON.)
2437 *
2438 * 4. If the subquery has any window functions, we must not push down quals
2439 * that reference any output columns that are not listed in all the subquery's
2440 * window PARTITION BY clauses. We can push down quals that use only
2441 * partitioning columns because they should succeed or fail identically for
2442 * every row of any one window partition, and totally excluding some
2443 * partitions will not change a window function's results for remaining
2444 * partitions. (Again, this also requires nonvolatile quals, but
2445 * subquery_is_pushdown_safe handles that.)
2446 */
2447 static void
check_output_expressions(Query * subquery,pushdown_safety_info * safetyInfo)2448 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
2449 {
2450 ListCell *lc;
2451
2452 foreach(lc, subquery->targetList)
2453 {
2454 TargetEntry *tle = (TargetEntry *) lfirst(lc);
2455
2456 if (tle->resjunk)
2457 continue; /* ignore resjunk columns */
2458
2459 /* We need not check further if output col is already known unsafe */
2460 if (safetyInfo->unsafeColumns[tle->resno])
2461 continue;
2462
2463 /* Functions returning sets are unsafe (point 1) */
2464 if (expression_returns_set((Node *) tle->expr))
2465 {
2466 safetyInfo->unsafeColumns[tle->resno] = true;
2467 continue;
2468 }
2469
2470 /* Volatile functions are unsafe (point 2) */
2471 if (contain_volatile_functions((Node *) tle->expr))
2472 {
2473 safetyInfo->unsafeColumns[tle->resno] = true;
2474 continue;
2475 }
2476
2477 /* If subquery uses DISTINCT ON, check point 3 */
2478 if (subquery->hasDistinctOn &&
2479 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
2480 {
2481 /* non-DISTINCT column, so mark it unsafe */
2482 safetyInfo->unsafeColumns[tle->resno] = true;
2483 continue;
2484 }
2485
2486 /* If subquery uses window functions, check point 4 */
2487 if (subquery->hasWindowFuncs &&
2488 !targetIsInAllPartitionLists(tle, subquery))
2489 {
2490 /* not present in all PARTITION BY clauses, so mark it unsafe */
2491 safetyInfo->unsafeColumns[tle->resno] = true;
2492 continue;
2493 }
2494 }
2495 }
2496
2497 /*
2498 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
2499 * push quals into each component query, but the quals can only reference
2500 * subquery columns that suffer no type coercions in the set operation.
2501 * Otherwise there are possible semantic gotchas. So, we check the
2502 * component queries to see if any of them have output types different from
2503 * the top-level setop outputs. unsafeColumns[k] is set true if column k
2504 * has different type in any component.
2505 *
2506 * We don't have to care about typmods here: the only allowed difference
2507 * between set-op input and output typmods is input is a specific typmod
2508 * and output is -1, and that does not require a coercion.
2509 *
2510 * tlist is a subquery tlist.
2511 * colTypes is an OID list of the top-level setop's output column types.
2512 * safetyInfo->unsafeColumns[] is the result array.
2513 */
2514 static void
compare_tlist_datatypes(List * tlist,List * colTypes,pushdown_safety_info * safetyInfo)2515 compare_tlist_datatypes(List *tlist, List *colTypes,
2516 pushdown_safety_info *safetyInfo)
2517 {
2518 ListCell *l;
2519 ListCell *colType = list_head(colTypes);
2520
2521 foreach(l, tlist)
2522 {
2523 TargetEntry *tle = (TargetEntry *) lfirst(l);
2524
2525 if (tle->resjunk)
2526 continue; /* ignore resjunk columns */
2527 if (colType == NULL)
2528 elog(ERROR, "wrong number of tlist entries");
2529 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
2530 safetyInfo->unsafeColumns[tle->resno] = true;
2531 colType = lnext(colType);
2532 }
2533 if (colType != NULL)
2534 elog(ERROR, "wrong number of tlist entries");
2535 }
2536
2537 /*
2538 * targetIsInAllPartitionLists
2539 * True if the TargetEntry is listed in the PARTITION BY clause
2540 * of every window defined in the query.
2541 *
2542 * It would be safe to ignore windows not actually used by any window
2543 * function, but it's not easy to get that info at this stage; and it's
2544 * unlikely to be useful to spend any extra cycles getting it, since
2545 * unreferenced window definitions are probably infrequent in practice.
2546 */
2547 static bool
targetIsInAllPartitionLists(TargetEntry * tle,Query * query)2548 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
2549 {
2550 ListCell *lc;
2551
2552 foreach(lc, query->windowClause)
2553 {
2554 WindowClause *wc = (WindowClause *) lfirst(lc);
2555
2556 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
2557 return false;
2558 }
2559 return true;
2560 }
2561
2562 /*
2563 * qual_is_pushdown_safe - is a particular qual safe to push down?
2564 *
2565 * qual is a restriction clause applying to the given subquery (whose RTE
2566 * has index rti in the parent query).
2567 *
2568 * Conditions checked here:
2569 *
2570 * 1. The qual must not contain any subselects (mainly because I'm not sure
2571 * it will work correctly: sublinks will already have been transformed into
2572 * subplans in the qual, but not in the subquery).
2573 *
2574 * 2. If unsafeVolatile is set, the qual must not contain any volatile
2575 * functions.
2576 *
2577 * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
2578 * that are passed Var nodes, and therefore might reveal values from the
2579 * subquery as side effects.
2580 *
2581 * 4. The qual must not refer to the whole-row output of the subquery
2582 * (since there is no easy way to name that within the subquery itself).
2583 *
2584 * 5. The qual must not refer to any subquery output columns that were
2585 * found to be unsafe to reference by subquery_is_pushdown_safe().
2586 */
2587 static bool
qual_is_pushdown_safe(Query * subquery,Index rti,Node * qual,pushdown_safety_info * safetyInfo)2588 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
2589 pushdown_safety_info *safetyInfo)
2590 {
2591 bool safe = true;
2592 List *vars;
2593 ListCell *vl;
2594
2595 /* Refuse subselects (point 1) */
2596 if (contain_subplans(qual))
2597 return false;
2598
2599 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
2600 if (safetyInfo->unsafeVolatile &&
2601 contain_volatile_functions(qual))
2602 return false;
2603
2604 /* Refuse leaky quals if told to (point 3) */
2605 if (safetyInfo->unsafeLeaky &&
2606 contain_leaked_vars(qual))
2607 return false;
2608
2609 /*
2610 * It would be unsafe to push down window function calls, but at least for
2611 * the moment we could never see any in a qual anyhow. (The same applies
2612 * to aggregates, which we check for in pull_var_clause below.)
2613 */
2614 Assert(!contain_window_function(qual));
2615
2616 /*
2617 * Examine all Vars used in clause. Since it's a restriction clause, all
2618 * such Vars must refer to subselect output columns ... unless this is
2619 * part of a LATERAL subquery, in which case there could be lateral
2620 * references.
2621 */
2622 vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
2623 foreach(vl, vars)
2624 {
2625 Var *var = (Var *) lfirst(vl);
2626
2627 /*
2628 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
2629 * It's not clear whether a PHV could safely be pushed down, and even
2630 * less clear whether such a situation could arise in any cases of
2631 * practical interest anyway. So for the moment, just refuse to push
2632 * down.
2633 */
2634 if (!IsA(var, Var))
2635 {
2636 safe = false;
2637 break;
2638 }
2639
2640 /*
2641 * Punt if we find any lateral references. It would be safe to push
2642 * these down, but we'd have to convert them into outer references,
2643 * which subquery_push_qual lacks the infrastructure to do. The case
2644 * arises so seldom that it doesn't seem worth working hard on.
2645 */
2646 if (var->varno != rti)
2647 {
2648 safe = false;
2649 break;
2650 }
2651
2652 /* Subqueries have no system columns */
2653 Assert(var->varattno >= 0);
2654
2655 /* Check point 4 */
2656 if (var->varattno == 0)
2657 {
2658 safe = false;
2659 break;
2660 }
2661
2662 /* Check point 5 */
2663 if (safetyInfo->unsafeColumns[var->varattno])
2664 {
2665 safe = false;
2666 break;
2667 }
2668 }
2669
2670 list_free(vars);
2671
2672 return safe;
2673 }
2674
2675 /*
2676 * subquery_push_qual - push down a qual that we have determined is safe
2677 */
2678 static void
subquery_push_qual(Query * subquery,RangeTblEntry * rte,Index rti,Node * qual)2679 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
2680 {
2681 if (subquery->setOperations != NULL)
2682 {
2683 /* Recurse to push it separately to each component query */
2684 recurse_push_qual(subquery->setOperations, subquery,
2685 rte, rti, qual);
2686 }
2687 else if (IsA(qual, CurrentOfExpr))
2688 {
2689 /*
2690 * This is possible when a WHERE CURRENT OF expression is applied to a
2691 * table with row-level security. In that case, the subquery should
2692 * contain precisely one rtable entry for the table, and we can safely
2693 * push the expression down into the subquery. This will cause a TID
2694 * scan subquery plan to be generated allowing the target relation to
2695 * be updated.
2696 *
2697 * Someday we might also be able to use a WHERE CURRENT OF expression
2698 * on a view, but currently the rewriter prevents that, so we should
2699 * never see any other case here, but generate sane error messages in
2700 * case it does somehow happen.
2701 */
2702 if (subquery->rtable == NIL)
2703 ereport(ERROR,
2704 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2705 errmsg("WHERE CURRENT OF is not supported on a view with no underlying relation")));
2706
2707 if (list_length(subquery->rtable) > 1)
2708 ereport(ERROR,
2709 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2710 errmsg("WHERE CURRENT OF is not supported on a view with more than one underlying relation")));
2711
2712 if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
2713 ereport(ERROR,
2714 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2715 errmsg("WHERE CURRENT OF is not supported on a view with grouping or aggregation")));
2716
2717 /*
2718 * Adjust the CURRENT OF expression to refer to the underlying table
2719 * in the subquery, and attach it to the subquery's WHERE clause.
2720 */
2721 qual = copyObject(qual);
2722 ((CurrentOfExpr *) qual)->cvarno = 1;
2723
2724 subquery->jointree->quals =
2725 make_and_qual(subquery->jointree->quals, qual);
2726 }
2727 else
2728 {
2729 /*
2730 * We need to replace Vars in the qual (which must refer to outputs of
2731 * the subquery) with copies of the subquery's targetlist expressions.
2732 * Note that at this point, any uplevel Vars in the qual should have
2733 * been replaced with Params, so they need no work.
2734 *
2735 * This step also ensures that when we are pushing into a setop tree,
2736 * each component query gets its own copy of the qual.
2737 */
2738 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
2739 subquery->targetList,
2740 REPLACEVARS_REPORT_ERROR, 0,
2741 &subquery->hasSubLinks);
2742
2743 /*
2744 * Now attach the qual to the proper place: normally WHERE, but if the
2745 * subquery uses grouping or aggregation, put it in HAVING (since the
2746 * qual really refers to the group-result rows).
2747 */
2748 if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
2749 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
2750 else
2751 subquery->jointree->quals =
2752 make_and_qual(subquery->jointree->quals, qual);
2753
2754 /*
2755 * We need not change the subquery's hasAggs or hasSublinks flags,
2756 * since we can't be pushing down any aggregates that weren't there
2757 * before, and we don't push down subselects at all.
2758 */
2759 }
2760 }
2761
2762 /*
2763 * Helper routine to recurse through setOperations tree
2764 */
2765 static void
recurse_push_qual(Node * setOp,Query * topquery,RangeTblEntry * rte,Index rti,Node * qual)2766 recurse_push_qual(Node *setOp, Query *topquery,
2767 RangeTblEntry *rte, Index rti, Node *qual)
2768 {
2769 if (IsA(setOp, RangeTblRef))
2770 {
2771 RangeTblRef *rtr = (RangeTblRef *) setOp;
2772 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
2773 Query *subquery = subrte->subquery;
2774
2775 Assert(subquery != NULL);
2776 subquery_push_qual(subquery, rte, rti, qual);
2777 }
2778 else if (IsA(setOp, SetOperationStmt))
2779 {
2780 SetOperationStmt *op = (SetOperationStmt *) setOp;
2781
2782 recurse_push_qual(op->larg, topquery, rte, rti, qual);
2783 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
2784 }
2785 else
2786 {
2787 elog(ERROR, "unrecognized node type: %d",
2788 (int) nodeTag(setOp));
2789 }
2790 }
2791
2792 /*****************************************************************************
2793 * SIMPLIFYING SUBQUERY TARGETLISTS
2794 *****************************************************************************/
2795
2796 /*
2797 * remove_unused_subquery_outputs
2798 * Remove subquery targetlist items we don't need
2799 *
2800 * It's possible, even likely, that the upper query does not read all the
2801 * output columns of the subquery. We can remove any such outputs that are
2802 * not needed by the subquery itself (e.g., as sort/group columns) and do not
2803 * affect semantics otherwise (e.g., volatile functions can't be removed).
2804 * This is useful not only because we might be able to remove expensive-to-
2805 * compute expressions, but because deletion of output columns might allow
2806 * optimizations such as join removal to occur within the subquery.
2807 *
2808 * To avoid affecting column numbering in the targetlist, we don't physically
2809 * remove unused tlist entries, but rather replace their expressions with NULL
2810 * constants. This is implemented by modifying subquery->targetList.
2811 */
2812 static void
remove_unused_subquery_outputs(Query * subquery,RelOptInfo * rel)2813 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
2814 {
2815 Bitmapset *attrs_used = NULL;
2816 ListCell *lc;
2817
2818 /*
2819 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
2820 * could update all the child SELECTs' tlists, but it seems not worth the
2821 * trouble presently.
2822 */
2823 if (subquery->setOperations)
2824 return;
2825
2826 /*
2827 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
2828 * time: all its output columns must be used in the distinctClause.
2829 */
2830 if (subquery->distinctClause && !subquery->hasDistinctOn)
2831 return;
2832
2833 /*
2834 * Collect a bitmap of all the output column numbers used by the upper
2835 * query.
2836 *
2837 * Add all the attributes needed for joins or final output. Note: we must
2838 * look at rel's targetlist, not the attr_needed data, because attr_needed
2839 * isn't computed for inheritance child rels, cf set_append_rel_size().
2840 * (XXX might be worth changing that sometime.)
2841 */
2842 pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
2843
2844 /* Add all the attributes used by un-pushed-down restriction clauses. */
2845 foreach(lc, rel->baserestrictinfo)
2846 {
2847 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
2848
2849 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
2850 }
2851
2852 /*
2853 * If there's a whole-row reference to the subquery, we can't remove
2854 * anything.
2855 */
2856 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
2857 return;
2858
2859 /*
2860 * Run through the tlist and zap entries we don't need. It's okay to
2861 * modify the tlist items in-place because set_subquery_pathlist made a
2862 * copy of the subquery.
2863 */
2864 foreach(lc, subquery->targetList)
2865 {
2866 TargetEntry *tle = (TargetEntry *) lfirst(lc);
2867 Node *texpr = (Node *) tle->expr;
2868
2869 /*
2870 * If it has a sortgroupref number, it's used in some sort/group
2871 * clause so we'd better not remove it. Also, don't remove any
2872 * resjunk columns, since their reason for being has nothing to do
2873 * with anybody reading the subquery's output. (It's likely that
2874 * resjunk columns in a sub-SELECT would always have ressortgroupref
2875 * set, but even if they don't, it seems imprudent to remove them.)
2876 */
2877 if (tle->ressortgroupref || tle->resjunk)
2878 continue;
2879
2880 /*
2881 * If it's used by the upper query, we can't remove it.
2882 */
2883 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
2884 attrs_used))
2885 continue;
2886
2887 /*
2888 * If it contains a set-returning function, we can't remove it since
2889 * that could change the number of rows returned by the subquery.
2890 */
2891 if (expression_returns_set(texpr))
2892 continue;
2893
2894 /*
2895 * If it contains volatile functions, we daren't remove it for fear
2896 * that the user is expecting their side-effects to happen.
2897 */
2898 if (contain_volatile_functions(texpr))
2899 continue;
2900
2901 /*
2902 * OK, we don't need it. Replace the expression with a NULL constant.
2903 * Preserve the exposed type of the expression, in case something
2904 * looks at the rowtype of the subquery's result.
2905 */
2906 tle->expr = (Expr *) makeNullConst(exprType(texpr),
2907 exprTypmod(texpr),
2908 exprCollation(texpr));
2909 }
2910 }
2911
2912 /*****************************************************************************
2913 * DEBUG SUPPORT
2914 *****************************************************************************/
2915
2916 #ifdef OPTIMIZER_DEBUG
2917
2918 static void
print_relids(PlannerInfo * root,Relids relids)2919 print_relids(PlannerInfo *root, Relids relids)
2920 {
2921 int x;
2922 bool first = true;
2923
2924 x = -1;
2925 while ((x = bms_next_member(relids, x)) >= 0)
2926 {
2927 if (!first)
2928 printf(" ");
2929 if (x < root->simple_rel_array_size &&
2930 root->simple_rte_array[x])
2931 printf("%s", root->simple_rte_array[x]->eref->aliasname);
2932 else
2933 printf("%d", x);
2934 first = false;
2935 }
2936 }
2937
2938 static void
print_restrictclauses(PlannerInfo * root,List * clauses)2939 print_restrictclauses(PlannerInfo *root, List *clauses)
2940 {
2941 ListCell *l;
2942
2943 foreach(l, clauses)
2944 {
2945 RestrictInfo *c = lfirst(l);
2946
2947 print_expr((Node *) c->clause, root->parse->rtable);
2948 if (lnext(l))
2949 printf(", ");
2950 }
2951 }
2952
2953 static void
print_path(PlannerInfo * root,Path * path,int indent)2954 print_path(PlannerInfo *root, Path *path, int indent)
2955 {
2956 const char *ptype;
2957 bool join = false;
2958 Path *subpath = NULL;
2959 int i;
2960
2961 switch (nodeTag(path))
2962 {
2963 case T_Path:
2964 switch (path->pathtype)
2965 {
2966 case T_SeqScan:
2967 ptype = "SeqScan";
2968 break;
2969 case T_SampleScan:
2970 ptype = "SampleScan";
2971 break;
2972 case T_SubqueryScan:
2973 ptype = "SubqueryScan";
2974 break;
2975 case T_FunctionScan:
2976 ptype = "FunctionScan";
2977 break;
2978 case T_ValuesScan:
2979 ptype = "ValuesScan";
2980 break;
2981 case T_CteScan:
2982 ptype = "CteScan";
2983 break;
2984 case T_WorkTableScan:
2985 ptype = "WorkTableScan";
2986 break;
2987 default:
2988 ptype = "???Path";
2989 break;
2990 }
2991 break;
2992 case T_IndexPath:
2993 ptype = "IdxScan";
2994 break;
2995 case T_BitmapHeapPath:
2996 ptype = "BitmapHeapScan";
2997 break;
2998 case T_BitmapAndPath:
2999 ptype = "BitmapAndPath";
3000 break;
3001 case T_BitmapOrPath:
3002 ptype = "BitmapOrPath";
3003 break;
3004 case T_TidPath:
3005 ptype = "TidScan";
3006 break;
3007 case T_SubqueryScanPath:
3008 ptype = "SubqueryScanScan";
3009 break;
3010 case T_ForeignPath:
3011 ptype = "ForeignScan";
3012 break;
3013 case T_CustomPath:
3014 ptype = "CustomScan";
3015 break;
3016 case T_NestPath:
3017 ptype = "NestLoop";
3018 join = true;
3019 break;
3020 case T_MergePath:
3021 ptype = "MergeJoin";
3022 join = true;
3023 break;
3024 case T_HashPath:
3025 ptype = "HashJoin";
3026 join = true;
3027 break;
3028 case T_AppendPath:
3029 ptype = "Append";
3030 break;
3031 case T_MergeAppendPath:
3032 ptype = "MergeAppend";
3033 break;
3034 case T_ResultPath:
3035 ptype = "Result";
3036 break;
3037 case T_MaterialPath:
3038 ptype = "Material";
3039 subpath = ((MaterialPath *) path)->subpath;
3040 break;
3041 case T_UniquePath:
3042 ptype = "Unique";
3043 subpath = ((UniquePath *) path)->subpath;
3044 break;
3045 case T_GatherPath:
3046 ptype = "Gather";
3047 subpath = ((GatherPath *) path)->subpath;
3048 break;
3049 case T_ProjectionPath:
3050 ptype = "Projection";
3051 subpath = ((ProjectionPath *) path)->subpath;
3052 break;
3053 case T_SortPath:
3054 ptype = "Sort";
3055 subpath = ((SortPath *) path)->subpath;
3056 break;
3057 case T_GroupPath:
3058 ptype = "Group";
3059 subpath = ((GroupPath *) path)->subpath;
3060 break;
3061 case T_UpperUniquePath:
3062 ptype = "UpperUnique";
3063 subpath = ((UpperUniquePath *) path)->subpath;
3064 break;
3065 case T_AggPath:
3066 ptype = "Agg";
3067 subpath = ((AggPath *) path)->subpath;
3068 break;
3069 case T_GroupingSetsPath:
3070 ptype = "GroupingSets";
3071 subpath = ((GroupingSetsPath *) path)->subpath;
3072 break;
3073 case T_MinMaxAggPath:
3074 ptype = "MinMaxAgg";
3075 break;
3076 case T_WindowAggPath:
3077 ptype = "WindowAgg";
3078 subpath = ((WindowAggPath *) path)->subpath;
3079 break;
3080 case T_SetOpPath:
3081 ptype = "SetOp";
3082 subpath = ((SetOpPath *) path)->subpath;
3083 break;
3084 case T_RecursiveUnionPath:
3085 ptype = "RecursiveUnion";
3086 break;
3087 case T_LockRowsPath:
3088 ptype = "LockRows";
3089 subpath = ((LockRowsPath *) path)->subpath;
3090 break;
3091 case T_ModifyTablePath:
3092 ptype = "ModifyTable";
3093 break;
3094 case T_LimitPath:
3095 ptype = "Limit";
3096 subpath = ((LimitPath *) path)->subpath;
3097 break;
3098 default:
3099 ptype = "???Path";
3100 break;
3101 }
3102
3103 for (i = 0; i < indent; i++)
3104 printf("\t");
3105 printf("%s", ptype);
3106
3107 if (path->parent)
3108 {
3109 printf("(");
3110 print_relids(root, path->parent->relids);
3111 printf(")");
3112 }
3113 if (path->param_info)
3114 {
3115 printf(" required_outer (");
3116 print_relids(root, path->param_info->ppi_req_outer);
3117 printf(")");
3118 }
3119 printf(" rows=%.0f cost=%.2f..%.2f\n",
3120 path->rows, path->startup_cost, path->total_cost);
3121
3122 if (path->pathkeys)
3123 {
3124 for (i = 0; i < indent; i++)
3125 printf("\t");
3126 printf(" pathkeys: ");
3127 print_pathkeys(path->pathkeys, root->parse->rtable);
3128 }
3129
3130 if (join)
3131 {
3132 JoinPath *jp = (JoinPath *) path;
3133
3134 for (i = 0; i < indent; i++)
3135 printf("\t");
3136 printf(" clauses: ");
3137 print_restrictclauses(root, jp->joinrestrictinfo);
3138 printf("\n");
3139
3140 if (IsA(path, MergePath))
3141 {
3142 MergePath *mp = (MergePath *) path;
3143
3144 for (i = 0; i < indent; i++)
3145 printf("\t");
3146 printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
3147 ((mp->outersortkeys) ? 1 : 0),
3148 ((mp->innersortkeys) ? 1 : 0),
3149 ((mp->materialize_inner) ? 1 : 0));
3150 }
3151
3152 print_path(root, jp->outerjoinpath, indent + 1);
3153 print_path(root, jp->innerjoinpath, indent + 1);
3154 }
3155
3156 if (subpath)
3157 print_path(root, subpath, indent + 1);
3158 }
3159
3160 void
debug_print_rel(PlannerInfo * root,RelOptInfo * rel)3161 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
3162 {
3163 ListCell *l;
3164
3165 printf("RELOPTINFO (");
3166 print_relids(root, rel->relids);
3167 printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
3168
3169 if (rel->baserestrictinfo)
3170 {
3171 printf("\tbaserestrictinfo: ");
3172 print_restrictclauses(root, rel->baserestrictinfo);
3173 printf("\n");
3174 }
3175
3176 if (rel->joininfo)
3177 {
3178 printf("\tjoininfo: ");
3179 print_restrictclauses(root, rel->joininfo);
3180 printf("\n");
3181 }
3182
3183 printf("\tpath list:\n");
3184 foreach(l, rel->pathlist)
3185 print_path(root, lfirst(l), 1);
3186 if (rel->cheapest_parameterized_paths)
3187 {
3188 printf("\n\tcheapest parameterized paths:\n");
3189 foreach(l, rel->cheapest_parameterized_paths)
3190 print_path(root, lfirst(l), 1);
3191 }
3192 if (rel->cheapest_startup_path)
3193 {
3194 printf("\n\tcheapest startup path:\n");
3195 print_path(root, rel->cheapest_startup_path, 1);
3196 }
3197 if (rel->cheapest_total_path)
3198 {
3199 printf("\n\tcheapest total path:\n");
3200 print_path(root, rel->cheapest_total_path, 1);
3201 }
3202 printf("\n");
3203 fflush(stdout);
3204 }
3205
3206 #endif /* OPTIMIZER_DEBUG */
3207