1 /*-------------------------------------------------------------------------
2 *
3 * allpaths.c
4 * Routines to find possible search paths for processing a query
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/allpaths.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include <limits.h>
19 #include <math.h>
20
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #ifdef OPTIMIZER_DEBUG
31 #include "nodes/print.h"
32 #endif
33 #include "optimizer/appendinfo.h"
34 #include "optimizer/clauses.h"
35 #include "optimizer/cost.h"
36 #include "optimizer/geqo.h"
37 #include "optimizer/inherit.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/restrictinfo.h"
44 #include "optimizer/tlist.h"
45 #include "parser/parse_clause.h"
46 #include "parser/parsetree.h"
47 #include "partitioning/partbounds.h"
48 #include "partitioning/partprune.h"
49 #include "rewrite/rewriteManip.h"
50 #include "utils/lsyscache.h"
51
52
53 /* results of subquery_is_pushdown_safe */
54 typedef struct pushdown_safety_info
55 {
56 bool *unsafeColumns; /* which output columns are unsafe to use */
57 bool unsafeVolatile; /* don't push down volatile quals */
58 bool unsafeLeaky; /* don't push down leaky quals */
59 } pushdown_safety_info;
60
61 /* These parameters are set by GUC */
62 bool enable_geqo = false; /* just in case GUC doesn't set it */
63 int geqo_threshold;
64 int min_parallel_table_scan_size;
65 int min_parallel_index_scan_size;
66
67 /* Hook for plugins to get control in set_rel_pathlist() */
68 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
69
70 /* Hook for plugins to replace standard_join_search() */
71 join_search_hook_type join_search_hook = NULL;
72
73
74 static void set_base_rel_consider_startup(PlannerInfo *root);
75 static void set_base_rel_sizes(PlannerInfo *root);
76 static void set_base_rel_pathlists(PlannerInfo *root);
77 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
78 Index rti, RangeTblEntry *rte);
79 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
80 Index rti, RangeTblEntry *rte);
81 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
82 RangeTblEntry *rte);
83 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
84 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
85 RangeTblEntry *rte);
86 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
87 RangeTblEntry *rte);
88 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
89 RangeTblEntry *rte);
90 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
91 RangeTblEntry *rte);
92 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
93 RangeTblEntry *rte);
94 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
95 RangeTblEntry *rte);
96 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
97 Index rti, RangeTblEntry *rte);
98 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
99 Index rti, RangeTblEntry *rte);
100 static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
101 List *live_childrels,
102 List *all_child_pathkeys,
103 List *partitioned_rels);
104 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
105 RelOptInfo *rel,
106 Relids required_outer);
107 static void accumulate_append_subpath(Path *path,
108 List **subpaths, List **special_subpaths);
109 static Path *get_singleton_append_subpath(Path *path);
110 static void set_dummy_rel_pathlist(RelOptInfo *rel);
111 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
112 Index rti, RangeTblEntry *rte);
113 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
114 RangeTblEntry *rte);
115 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
116 RangeTblEntry *rte);
117 static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
118 RangeTblEntry *rte);
119 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
120 RangeTblEntry *rte);
121 static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
122 RangeTblEntry *rte);
123 static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
124 RangeTblEntry *rte);
125 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
126 RangeTblEntry *rte);
127 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
128 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
129 pushdown_safety_info *safetyInfo);
130 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
131 pushdown_safety_info *safetyInfo);
132 static void check_output_expressions(Query *subquery,
133 pushdown_safety_info *safetyInfo);
134 static void compare_tlist_datatypes(List *tlist, List *colTypes,
135 pushdown_safety_info *safetyInfo);
136 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
137 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
138 pushdown_safety_info *safetyInfo);
139 static void subquery_push_qual(Query *subquery,
140 RangeTblEntry *rte, Index rti, Node *qual);
141 static void recurse_push_qual(Node *setOp, Query *topquery,
142 RangeTblEntry *rte, Index rti, Node *qual);
143 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
144
145
146 /*
147 * make_one_rel
148 * Finds all possible access paths for executing a query, returning a
149 * single rel that represents the join of all base rels in the query.
150 */
151 RelOptInfo *
make_one_rel(PlannerInfo * root,List * joinlist)152 make_one_rel(PlannerInfo *root, List *joinlist)
153 {
154 RelOptInfo *rel;
155 Index rti;
156 double total_pages;
157
158 /*
159 * Construct the all_baserels Relids set.
160 */
161 root->all_baserels = NULL;
162 for (rti = 1; rti < root->simple_rel_array_size; rti++)
163 {
164 RelOptInfo *brel = root->simple_rel_array[rti];
165
166 /* there may be empty slots corresponding to non-baserel RTEs */
167 if (brel == NULL)
168 continue;
169
170 Assert(brel->relid == rti); /* sanity check on array */
171
172 /* ignore RTEs that are "other rels" */
173 if (brel->reloptkind != RELOPT_BASEREL)
174 continue;
175
176 root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
177 }
178
179 /* Mark base rels as to whether we care about fast-start plans */
180 set_base_rel_consider_startup(root);
181
182 /*
183 * Compute size estimates and consider_parallel flags for each base rel.
184 */
185 set_base_rel_sizes(root);
186
187 /*
188 * We should now have size estimates for every actual table involved in
189 * the query, and we also know which if any have been deleted from the
190 * query by join removal, pruned by partition pruning, or eliminated by
191 * constraint exclusion. So we can now compute total_table_pages.
192 *
193 * Note that appendrels are not double-counted here, even though we don't
194 * bother to distinguish RelOptInfos for appendrel parents, because the
195 * parents will have pages = 0.
196 *
197 * XXX if a table is self-joined, we will count it once per appearance,
198 * which perhaps is the wrong thing ... but that's not completely clear,
199 * and detecting self-joins here is difficult, so ignore it for now.
200 */
201 total_pages = 0;
202 for (rti = 1; rti < root->simple_rel_array_size; rti++)
203 {
204 RelOptInfo *brel = root->simple_rel_array[rti];
205
206 if (brel == NULL)
207 continue;
208
209 Assert(brel->relid == rti); /* sanity check on array */
210
211 if (IS_DUMMY_REL(brel))
212 continue;
213
214 if (IS_SIMPLE_REL(brel))
215 total_pages += (double) brel->pages;
216 }
217 root->total_table_pages = total_pages;
218
219 /*
220 * Generate access paths for each base rel.
221 */
222 set_base_rel_pathlists(root);
223
224 /*
225 * Generate access paths for the entire join tree.
226 */
227 rel = make_rel_from_joinlist(root, joinlist);
228
229 /*
230 * The result should join all and only the query's base rels.
231 */
232 Assert(bms_equal(rel->relids, root->all_baserels));
233
234 return rel;
235 }
236
237 /*
238 * set_base_rel_consider_startup
239 * Set the consider_[param_]startup flags for each base-relation entry.
240 *
241 * For the moment, we only deal with consider_param_startup here; because the
242 * logic for consider_startup is pretty trivial and is the same for every base
243 * relation, we just let build_simple_rel() initialize that flag correctly to
244 * start with. If that logic ever gets more complicated it would probably
245 * be better to move it here.
246 */
247 static void
set_base_rel_consider_startup(PlannerInfo * root)248 set_base_rel_consider_startup(PlannerInfo *root)
249 {
250 /*
251 * Since parameterized paths can only be used on the inside of a nestloop
252 * join plan, there is usually little value in considering fast-start
253 * plans for them. However, for relations that are on the RHS of a SEMI
254 * or ANTI join, a fast-start plan can be useful because we're only going
255 * to care about fetching one tuple anyway.
256 *
257 * To minimize growth of planning time, we currently restrict this to
258 * cases where the RHS is a single base relation, not a join; there is no
259 * provision for consider_param_startup to get set at all on joinrels.
260 * Also we don't worry about appendrels. costsize.c's costing rules for
261 * nestloop semi/antijoins don't consider such cases either.
262 */
263 ListCell *lc;
264
265 foreach(lc, root->join_info_list)
266 {
267 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
268 int varno;
269
270 if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
271 bms_get_singleton_member(sjinfo->syn_righthand, &varno))
272 {
273 RelOptInfo *rel = find_base_rel(root, varno);
274
275 rel->consider_param_startup = true;
276 }
277 }
278 }
279
280 /*
281 * set_base_rel_sizes
282 * Set the size estimates (rows and widths) for each base-relation entry.
283 * Also determine whether to consider parallel paths for base relations.
284 *
285 * We do this in a separate pass over the base rels so that rowcount
286 * estimates are available for parameterized path generation, and also so
287 * that each rel's consider_parallel flag is set correctly before we begin to
288 * generate paths.
289 */
290 static void
set_base_rel_sizes(PlannerInfo * root)291 set_base_rel_sizes(PlannerInfo *root)
292 {
293 Index rti;
294
295 for (rti = 1; rti < root->simple_rel_array_size; rti++)
296 {
297 RelOptInfo *rel = root->simple_rel_array[rti];
298 RangeTblEntry *rte;
299
300 /* there may be empty slots corresponding to non-baserel RTEs */
301 if (rel == NULL)
302 continue;
303
304 Assert(rel->relid == rti); /* sanity check on array */
305
306 /* ignore RTEs that are "other rels" */
307 if (rel->reloptkind != RELOPT_BASEREL)
308 continue;
309
310 rte = root->simple_rte_array[rti];
311
312 /*
313 * If parallelism is allowable for this query in general, see whether
314 * it's allowable for this rel in particular. We have to do this
315 * before set_rel_size(), because (a) if this rel is an inheritance
316 * parent, set_append_rel_size() will use and perhaps change the rel's
317 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
318 * goes ahead and makes paths immediately.
319 */
320 if (root->glob->parallelModeOK)
321 set_rel_consider_parallel(root, rel, rte);
322
323 set_rel_size(root, rel, rti, rte);
324 }
325 }
326
327 /*
328 * set_base_rel_pathlists
329 * Finds all paths available for scanning each base-relation entry.
330 * Sequential scan and any available indices are considered.
331 * Each useful path is attached to its relation's 'pathlist' field.
332 */
333 static void
set_base_rel_pathlists(PlannerInfo * root)334 set_base_rel_pathlists(PlannerInfo *root)
335 {
336 Index rti;
337
338 for (rti = 1; rti < root->simple_rel_array_size; rti++)
339 {
340 RelOptInfo *rel = root->simple_rel_array[rti];
341
342 /* there may be empty slots corresponding to non-baserel RTEs */
343 if (rel == NULL)
344 continue;
345
346 Assert(rel->relid == rti); /* sanity check on array */
347
348 /* ignore RTEs that are "other rels" */
349 if (rel->reloptkind != RELOPT_BASEREL)
350 continue;
351
352 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
353 }
354 }
355
356 /*
357 * set_rel_size
358 * Set size estimates for a base relation
359 */
360 static void
set_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)361 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
362 Index rti, RangeTblEntry *rte)
363 {
364 if (rel->reloptkind == RELOPT_BASEREL &&
365 relation_excluded_by_constraints(root, rel, rte))
366 {
367 /*
368 * We proved we don't need to scan the rel via constraint exclusion,
369 * so set up a single dummy path for it. Here we only check this for
370 * regular baserels; if it's an otherrel, CE was already checked in
371 * set_append_rel_size().
372 *
373 * In this case, we go ahead and set up the relation's path right away
374 * instead of leaving it for set_rel_pathlist to do. This is because
375 * we don't have a convention for marking a rel as dummy except by
376 * assigning a dummy path to it.
377 */
378 set_dummy_rel_pathlist(rel);
379 }
380 else if (rte->inh)
381 {
382 /* It's an "append relation", process accordingly */
383 set_append_rel_size(root, rel, rti, rte);
384 }
385 else
386 {
387 switch (rel->rtekind)
388 {
389 case RTE_RELATION:
390 if (rte->relkind == RELKIND_FOREIGN_TABLE)
391 {
392 /* Foreign table */
393 set_foreign_size(root, rel, rte);
394 }
395 else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
396 {
397 /*
398 * We could get here if asked to scan a partitioned table
399 * with ONLY. In that case we shouldn't scan any of the
400 * partitions, so mark it as a dummy rel.
401 */
402 set_dummy_rel_pathlist(rel);
403 }
404 else if (rte->tablesample != NULL)
405 {
406 /* Sampled relation */
407 set_tablesample_rel_size(root, rel, rte);
408 }
409 else
410 {
411 /* Plain relation */
412 set_plain_rel_size(root, rel, rte);
413 }
414 break;
415 case RTE_SUBQUERY:
416
417 /*
418 * Subqueries don't support making a choice between
419 * parameterized and unparameterized paths, so just go ahead
420 * and build their paths immediately.
421 */
422 set_subquery_pathlist(root, rel, rti, rte);
423 break;
424 case RTE_FUNCTION:
425 set_function_size_estimates(root, rel);
426 break;
427 case RTE_TABLEFUNC:
428 set_tablefunc_size_estimates(root, rel);
429 break;
430 case RTE_VALUES:
431 set_values_size_estimates(root, rel);
432 break;
433 case RTE_CTE:
434
435 /*
436 * CTEs don't support making a choice between parameterized
437 * and unparameterized paths, so just go ahead and build their
438 * paths immediately.
439 */
440 if (rte->self_reference)
441 set_worktable_pathlist(root, rel, rte);
442 else
443 set_cte_pathlist(root, rel, rte);
444 break;
445 case RTE_NAMEDTUPLESTORE:
446 /* Might as well just build the path immediately */
447 set_namedtuplestore_pathlist(root, rel, rte);
448 break;
449 case RTE_RESULT:
450 /* Might as well just build the path immediately */
451 set_result_pathlist(root, rel, rte);
452 break;
453 default:
454 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
455 break;
456 }
457 }
458
459 /*
460 * We insist that all non-dummy rels have a nonzero rowcount estimate.
461 */
462 Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
463 }
464
465 /*
466 * set_rel_pathlist
467 * Build access paths for a base relation
468 */
469 static void
set_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)470 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
471 Index rti, RangeTblEntry *rte)
472 {
473 if (IS_DUMMY_REL(rel))
474 {
475 /* We already proved the relation empty, so nothing more to do */
476 }
477 else if (rte->inh)
478 {
479 /* It's an "append relation", process accordingly */
480 set_append_rel_pathlist(root, rel, rti, rte);
481 }
482 else
483 {
484 switch (rel->rtekind)
485 {
486 case RTE_RELATION:
487 if (rte->relkind == RELKIND_FOREIGN_TABLE)
488 {
489 /* Foreign table */
490 set_foreign_pathlist(root, rel, rte);
491 }
492 else if (rte->tablesample != NULL)
493 {
494 /* Sampled relation */
495 set_tablesample_rel_pathlist(root, rel, rte);
496 }
497 else
498 {
499 /* Plain relation */
500 set_plain_rel_pathlist(root, rel, rte);
501 }
502 break;
503 case RTE_SUBQUERY:
504 /* Subquery --- fully handled during set_rel_size */
505 break;
506 case RTE_FUNCTION:
507 /* RangeFunction */
508 set_function_pathlist(root, rel, rte);
509 break;
510 case RTE_TABLEFUNC:
511 /* Table Function */
512 set_tablefunc_pathlist(root, rel, rte);
513 break;
514 case RTE_VALUES:
515 /* Values list */
516 set_values_pathlist(root, rel, rte);
517 break;
518 case RTE_CTE:
519 /* CTE reference --- fully handled during set_rel_size */
520 break;
521 case RTE_NAMEDTUPLESTORE:
522 /* tuplestore reference --- fully handled during set_rel_size */
523 break;
524 case RTE_RESULT:
525 /* simple Result --- fully handled during set_rel_size */
526 break;
527 default:
528 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
529 break;
530 }
531 }
532
533 /*
534 * Allow a plugin to editorialize on the set of Paths for this base
535 * relation. It could add new paths (such as CustomPaths) by calling
536 * add_path(), or add_partial_path() if parallel aware. It could also
537 * delete or modify paths added by the core code.
538 */
539 if (set_rel_pathlist_hook)
540 (*set_rel_pathlist_hook) (root, rel, rti, rte);
541
542 /*
543 * If this is a baserel, we should normally consider gathering any partial
544 * paths we may have created for it. We have to do this after calling the
545 * set_rel_pathlist_hook, else it cannot add partial paths to be included
546 * here.
547 *
548 * However, if this is an inheritance child, skip it. Otherwise, we could
549 * end up with a very large number of gather nodes, each trying to grab
550 * its own pool of workers. Instead, we'll consider gathering partial
551 * paths for the parent appendrel.
552 *
553 * Also, if this is the topmost scan/join rel (that is, the only baserel),
554 * we postpone gathering until the final scan/join targetlist is available
555 * (see grouping_planner).
556 */
557 if (rel->reloptkind == RELOPT_BASEREL &&
558 bms_membership(root->all_baserels) != BMS_SINGLETON)
559 generate_useful_gather_paths(root, rel, false);
560
561 /* Now find the cheapest of the paths for this rel */
562 set_cheapest(rel);
563
564 #ifdef OPTIMIZER_DEBUG
565 debug_print_rel(root, rel);
566 #endif
567 }
568
569 /*
570 * set_plain_rel_size
571 * Set size estimates for a plain relation (no subquery, no inheritance)
572 */
573 static void
set_plain_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)574 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
575 {
576 /*
577 * Test any partial indexes of rel for applicability. We must do this
578 * first since partial unique indexes can affect size estimates.
579 */
580 check_index_predicates(root, rel);
581
582 /* Mark rel with estimated output rows, width, etc */
583 set_baserel_size_estimates(root, rel);
584 }
585
586 /*
587 * If this relation could possibly be scanned from within a worker, then set
588 * its consider_parallel flag.
589 */
590 static void
set_rel_consider_parallel(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)591 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
592 RangeTblEntry *rte)
593 {
594 /*
595 * The flag has previously been initialized to false, so we can just
596 * return if it becomes clear that we can't safely set it.
597 */
598 Assert(!rel->consider_parallel);
599
600 /* Don't call this if parallelism is disallowed for the entire query. */
601 Assert(root->glob->parallelModeOK);
602
603 /* This should only be called for baserels and appendrel children. */
604 Assert(IS_SIMPLE_REL(rel));
605
606 /* Assorted checks based on rtekind. */
607 switch (rte->rtekind)
608 {
609 case RTE_RELATION:
610
611 /*
612 * Currently, parallel workers can't access the leader's temporary
613 * tables. We could possibly relax this if we wrote all of its
614 * local buffers at the start of the query and made no changes
615 * thereafter (maybe we could allow hint bit changes), and if we
616 * taught the workers to read them. Writing a large number of
617 * temporary buffers could be expensive, though, and we don't have
618 * the rest of the necessary infrastructure right now anyway. So
619 * for now, bail out if we see a temporary table.
620 */
621 if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
622 return;
623
624 /*
625 * Table sampling can be pushed down to workers if the sample
626 * function and its arguments are safe.
627 */
628 if (rte->tablesample != NULL)
629 {
630 char proparallel = func_parallel(rte->tablesample->tsmhandler);
631
632 if (proparallel != PROPARALLEL_SAFE)
633 return;
634 if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
635 return;
636 }
637
638 /*
639 * Ask FDWs whether they can support performing a ForeignScan
640 * within a worker. Most often, the answer will be no. For
641 * example, if the nature of the FDW is such that it opens a TCP
642 * connection with a remote server, each parallel worker would end
643 * up with a separate connection, and these connections might not
644 * be appropriately coordinated between workers and the leader.
645 */
646 if (rte->relkind == RELKIND_FOREIGN_TABLE)
647 {
648 Assert(rel->fdwroutine);
649 if (!rel->fdwroutine->IsForeignScanParallelSafe)
650 return;
651 if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
652 return;
653 }
654
655 /*
656 * There are additional considerations for appendrels, which we'll
657 * deal with in set_append_rel_size and set_append_rel_pathlist.
658 * For now, just set consider_parallel based on the rel's own
659 * quals and targetlist.
660 */
661 break;
662
663 case RTE_SUBQUERY:
664
665 /*
666 * There's no intrinsic problem with scanning a subquery-in-FROM
667 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
668 * If the subquery doesn't happen to have any parallel-safe paths,
669 * then flagging it as consider_parallel won't change anything,
670 * but that's true for plain tables, too. We must set
671 * consider_parallel based on the rel's own quals and targetlist,
672 * so that if a subquery path is parallel-safe but the quals and
673 * projection we're sticking onto it are not, we correctly mark
674 * the SubqueryScanPath as not parallel-safe. (Note that
675 * set_subquery_pathlist() might push some of these quals down
676 * into the subquery itself, but that doesn't change anything.)
677 *
678 * We can't push sub-select containing LIMIT/OFFSET to workers as
679 * there is no guarantee that the row order will be fully
680 * deterministic, and applying LIMIT/OFFSET will lead to
681 * inconsistent results at the top-level. (In some cases, where
682 * the result is ordered, we could relax this restriction. But it
683 * doesn't currently seem worth expending extra effort to do so.)
684 */
685 {
686 Query *subquery = castNode(Query, rte->subquery);
687
688 if (limit_needed(subquery))
689 return;
690 }
691 break;
692
693 case RTE_JOIN:
694 /* Shouldn't happen; we're only considering baserels here. */
695 Assert(false);
696 return;
697
698 case RTE_FUNCTION:
699 /* Check for parallel-restricted functions. */
700 if (!is_parallel_safe(root, (Node *) rte->functions))
701 return;
702 break;
703
704 case RTE_TABLEFUNC:
705 /* not parallel safe */
706 return;
707
708 case RTE_VALUES:
709 /* Check for parallel-restricted functions. */
710 if (!is_parallel_safe(root, (Node *) rte->values_lists))
711 return;
712 break;
713
714 case RTE_CTE:
715
716 /*
717 * CTE tuplestores aren't shared among parallel workers, so we
718 * force all CTE scans to happen in the leader. Also, populating
719 * the CTE would require executing a subplan that's not available
720 * in the worker, might be parallel-restricted, and must get
721 * executed only once.
722 */
723 return;
724
725 case RTE_NAMEDTUPLESTORE:
726
727 /*
728 * tuplestore cannot be shared, at least without more
729 * infrastructure to support that.
730 */
731 return;
732
733 case RTE_RESULT:
734 /* RESULT RTEs, in themselves, are no problem. */
735 break;
736 }
737
738 /*
739 * If there's anything in baserestrictinfo that's parallel-restricted, we
740 * give up on parallelizing access to this relation. We could consider
741 * instead postponing application of the restricted quals until we're
742 * above all the parallelism in the plan tree, but it's not clear that
743 * that would be a win in very many cases, and it might be tricky to make
744 * outer join clauses work correctly. It would likely break equivalence
745 * classes, too.
746 */
747 if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
748 return;
749
750 /*
751 * Likewise, if the relation's outputs are not parallel-safe, give up.
752 * (Usually, they're just Vars, but sometimes they're not.)
753 */
754 if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
755 return;
756
757 /* We have a winner. */
758 rel->consider_parallel = true;
759 }
760
761 /*
762 * set_plain_rel_pathlist
763 * Build access paths for a plain relation (no subquery, no inheritance)
764 */
765 static void
set_plain_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)766 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
767 {
768 Relids required_outer;
769
770 /*
771 * We don't support pushing join clauses into the quals of a seqscan, but
772 * it could still have required parameterization due to LATERAL refs in
773 * its tlist.
774 */
775 required_outer = rel->lateral_relids;
776
777 /* Consider sequential scan */
778 add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
779
780 /* If appropriate, consider parallel sequential scan */
781 if (rel->consider_parallel && required_outer == NULL)
782 create_plain_partial_paths(root, rel);
783
784 /* Consider index scans */
785 create_index_paths(root, rel);
786
787 /* Consider TID scans */
788 create_tidscan_paths(root, rel);
789 }
790
791 /*
792 * create_plain_partial_paths
793 * Build partial access paths for parallel scan of a plain relation
794 */
795 static void
create_plain_partial_paths(PlannerInfo * root,RelOptInfo * rel)796 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
797 {
798 int parallel_workers;
799
800 parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
801 max_parallel_workers_per_gather);
802
803 /* If any limit was set to zero, the user doesn't want a parallel scan. */
804 if (parallel_workers <= 0)
805 return;
806
807 /* Add an unordered partial path based on a parallel sequential scan. */
808 add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
809 }
810
811 /*
812 * set_tablesample_rel_size
813 * Set size estimates for a sampled relation
814 */
815 static void
set_tablesample_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)816 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
817 {
818 TableSampleClause *tsc = rte->tablesample;
819 TsmRoutine *tsm;
820 BlockNumber pages;
821 double tuples;
822
823 /*
824 * Test any partial indexes of rel for applicability. We must do this
825 * first since partial unique indexes can affect size estimates.
826 */
827 check_index_predicates(root, rel);
828
829 /*
830 * Call the sampling method's estimation function to estimate the number
831 * of pages it will read and the number of tuples it will return. (Note:
832 * we assume the function returns sane values.)
833 */
834 tsm = GetTsmRoutine(tsc->tsmhandler);
835 tsm->SampleScanGetSampleSize(root, rel, tsc->args,
836 &pages, &tuples);
837
838 /*
839 * For the moment, because we will only consider a SampleScan path for the
840 * rel, it's okay to just overwrite the pages and tuples estimates for the
841 * whole relation. If we ever consider multiple path types for sampled
842 * rels, we'll need more complication.
843 */
844 rel->pages = pages;
845 rel->tuples = tuples;
846
847 /* Mark rel with estimated output rows, width, etc */
848 set_baserel_size_estimates(root, rel);
849 }
850
851 /*
852 * set_tablesample_rel_pathlist
853 * Build access paths for a sampled relation
854 */
855 static void
set_tablesample_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)856 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
857 {
858 Relids required_outer;
859 Path *path;
860
861 /*
862 * We don't support pushing join clauses into the quals of a samplescan,
863 * but it could still have required parameterization due to LATERAL refs
864 * in its tlist or TABLESAMPLE arguments.
865 */
866 required_outer = rel->lateral_relids;
867
868 /* Consider sampled scan */
869 path = create_samplescan_path(root, rel, required_outer);
870
871 /*
872 * If the sampling method does not support repeatable scans, we must avoid
873 * plans that would scan the rel multiple times. Ideally, we'd simply
874 * avoid putting the rel on the inside of a nestloop join; but adding such
875 * a consideration to the planner seems like a great deal of complication
876 * to support an uncommon usage of second-rate sampling methods. Instead,
877 * if there is a risk that the query might perform an unsafe join, just
878 * wrap the SampleScan in a Materialize node. We can check for joins by
879 * counting the membership of all_baserels (note that this correctly
880 * counts inheritance trees as single rels). If we're inside a subquery,
881 * we can't easily check whether a join might occur in the outer query, so
882 * just assume one is possible.
883 *
884 * GetTsmRoutine is relatively expensive compared to the other tests here,
885 * so check repeatable_across_scans last, even though that's a bit odd.
886 */
887 if ((root->query_level > 1 ||
888 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
889 !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
890 {
891 path = (Path *) create_material_path(rel, path);
892 }
893
894 add_path(rel, path);
895
896 /* For the moment, at least, there are no other paths to consider */
897 }
898
899 /*
900 * set_foreign_size
901 * Set size estimates for a foreign table RTE
902 */
903 static void
set_foreign_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)904 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
905 {
906 /* Mark rel with estimated output rows, width, etc */
907 set_foreign_size_estimates(root, rel);
908
909 /* Let FDW adjust the size estimates, if it can */
910 rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
911
912 /* ... but do not let it set the rows estimate to zero */
913 rel->rows = clamp_row_est(rel->rows);
914
915 /* also, make sure rel->tuples is not insane relative to rel->rows */
916 rel->tuples = Max(rel->tuples, rel->rows);
917 }
918
919 /*
920 * set_foreign_pathlist
921 * Build access paths for a foreign table RTE
922 */
923 static void
set_foreign_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)924 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
925 {
926 /* Call the FDW's GetForeignPaths function to generate path(s) */
927 rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
928 }
929
930 /*
931 * set_append_rel_size
932 * Set size estimates for a simple "append relation"
933 *
934 * The passed-in rel and RTE represent the entire append relation. The
935 * relation's contents are computed by appending together the output of the
936 * individual member relations. Note that in the non-partitioned inheritance
937 * case, the first member relation is actually the same table as is mentioned
938 * in the parent RTE ... but it has a different RTE and RelOptInfo. This is
939 * a good thing because their outputs are not the same size.
940 */
941 static void
set_append_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)942 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
943 Index rti, RangeTblEntry *rte)
944 {
945 int parentRTindex = rti;
946 bool has_live_children;
947 double parent_rows;
948 double parent_size;
949 double *parent_attrsizes;
950 int nattrs;
951 ListCell *l;
952
953 /* Guard against stack overflow due to overly deep inheritance tree. */
954 check_stack_depth();
955
956 Assert(IS_SIMPLE_REL(rel));
957
958 /*
959 * Initialize partitioned_child_rels to contain this RT index.
960 *
961 * Note that during the set_append_rel_pathlist() phase, we will bubble up
962 * the indexes of partitioned relations that appear down in the tree, so
963 * that when we've created Paths for all the children, the root
964 * partitioned table's list will contain all such indexes.
965 */
966 if (rte->relkind == RELKIND_PARTITIONED_TABLE)
967 rel->partitioned_child_rels = list_make1_int(rti);
968
969 /*
970 * If this is a partitioned baserel, set the consider_partitionwise_join
971 * flag; currently, we only consider partitionwise joins with the baserel
972 * if its targetlist doesn't contain a whole-row Var.
973 */
974 if (enable_partitionwise_join &&
975 rel->reloptkind == RELOPT_BASEREL &&
976 rte->relkind == RELKIND_PARTITIONED_TABLE &&
977 rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
978 rel->consider_partitionwise_join = true;
979
980 /*
981 * Initialize to compute size estimates for whole append relation.
982 *
983 * We handle width estimates by weighting the widths of different child
984 * rels proportionally to their number of rows. This is sensible because
985 * the use of width estimates is mainly to compute the total relation
986 * "footprint" if we have to sort or hash it. To do this, we sum the
987 * total equivalent size (in "double" arithmetic) and then divide by the
988 * total rowcount estimate. This is done separately for the total rel
989 * width and each attribute.
990 *
991 * Note: if you consider changing this logic, beware that child rels could
992 * have zero rows and/or width, if they were excluded by constraints.
993 */
994 has_live_children = false;
995 parent_rows = 0;
996 parent_size = 0;
997 nattrs = rel->max_attr - rel->min_attr + 1;
998 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
999
1000 foreach(l, root->append_rel_list)
1001 {
1002 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1003 int childRTindex;
1004 RangeTblEntry *childRTE;
1005 RelOptInfo *childrel;
1006 ListCell *parentvars;
1007 ListCell *childvars;
1008
1009 /* append_rel_list contains all append rels; ignore others */
1010 if (appinfo->parent_relid != parentRTindex)
1011 continue;
1012
1013 childRTindex = appinfo->child_relid;
1014 childRTE = root->simple_rte_array[childRTindex];
1015
1016 /*
1017 * The child rel's RelOptInfo was already created during
1018 * add_other_rels_to_query.
1019 */
1020 childrel = find_base_rel(root, childRTindex);
1021 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1022
1023 /* We may have already proven the child to be dummy. */
1024 if (IS_DUMMY_REL(childrel))
1025 continue;
1026
1027 /*
1028 * We have to copy the parent's targetlist and quals to the child,
1029 * with appropriate substitution of variables. However, the
1030 * baserestrictinfo quals were already copied/substituted when the
1031 * child RelOptInfo was built. So we don't need any additional setup
1032 * before applying constraint exclusion.
1033 */
1034 if (relation_excluded_by_constraints(root, childrel, childRTE))
1035 {
1036 /*
1037 * This child need not be scanned, so we can omit it from the
1038 * appendrel.
1039 */
1040 set_dummy_rel_pathlist(childrel);
1041 continue;
1042 }
1043
1044 /*
1045 * Constraint exclusion failed, so copy the parent's join quals and
1046 * targetlist to the child, with appropriate variable substitutions.
1047 *
1048 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1049 * expressions, which otherwise would not occur in a rel's targetlist.
1050 * Code that might be looking at an appendrel child must cope with
1051 * such. (Normally, a rel's targetlist would only include Vars and
1052 * PlaceHolderVars.) XXX we do not bother to update the cost or width
1053 * fields of childrel->reltarget; not clear if that would be useful.
1054 */
1055 childrel->joininfo = (List *)
1056 adjust_appendrel_attrs(root,
1057 (Node *) rel->joininfo,
1058 1, &appinfo);
1059 childrel->reltarget->exprs = (List *)
1060 adjust_appendrel_attrs(root,
1061 (Node *) rel->reltarget->exprs,
1062 1, &appinfo);
1063
1064 /*
1065 * We have to make child entries in the EquivalenceClass data
1066 * structures as well. This is needed either if the parent
1067 * participates in some eclass joins (because we will want to consider
1068 * inner-indexscan joins on the individual children) or if the parent
1069 * has useful pathkeys (because we should try to build MergeAppend
1070 * paths that produce those sort orderings).
1071 */
1072 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1073 add_child_rel_equivalences(root, appinfo, rel, childrel);
1074 childrel->has_eclass_joins = rel->has_eclass_joins;
1075
1076 /*
1077 * Note: we could compute appropriate attr_needed data for the child's
1078 * variables, by transforming the parent's attr_needed through the
1079 * translated_vars mapping. However, currently there's no need
1080 * because attr_needed is only examined for base relations not
1081 * otherrels. So we just leave the child's attr_needed empty.
1082 */
1083
1084 /*
1085 * If we consider partitionwise joins with the parent rel, do the same
1086 * for partitioned child rels.
1087 *
1088 * Note: here we abuse the consider_partitionwise_join flag by setting
1089 * it for child rels that are not themselves partitioned. We do so to
1090 * tell try_partitionwise_join() that the child rel is sufficiently
1091 * valid to be used as a per-partition input, even if it later gets
1092 * proven to be dummy. (It's not usable until we've set up the
1093 * reltarget and EC entries, which we just did.)
1094 */
1095 if (rel->consider_partitionwise_join)
1096 childrel->consider_partitionwise_join = true;
1097
1098 /*
1099 * If parallelism is allowable for this query in general, see whether
1100 * it's allowable for this childrel in particular. But if we've
1101 * already decided the appendrel is not parallel-safe as a whole,
1102 * there's no point in considering parallelism for this child. For
1103 * consistency, do this before calling set_rel_size() for the child.
1104 */
1105 if (root->glob->parallelModeOK && rel->consider_parallel)
1106 set_rel_consider_parallel(root, childrel, childRTE);
1107
1108 /*
1109 * Compute the child's size.
1110 */
1111 set_rel_size(root, childrel, childRTindex, childRTE);
1112
1113 /*
1114 * It is possible that constraint exclusion detected a contradiction
1115 * within a child subquery, even though we didn't prove one above. If
1116 * so, we can skip this child.
1117 */
1118 if (IS_DUMMY_REL(childrel))
1119 continue;
1120
1121 /* We have at least one live child. */
1122 has_live_children = true;
1123
1124 /*
1125 * If any live child is not parallel-safe, treat the whole appendrel
1126 * as not parallel-safe. In future we might be able to generate plans
1127 * in which some children are farmed out to workers while others are
1128 * not; but we don't have that today, so it's a waste to consider
1129 * partial paths anywhere in the appendrel unless it's all safe.
1130 * (Child rels visited before this one will be unmarked in
1131 * set_append_rel_pathlist().)
1132 */
1133 if (!childrel->consider_parallel)
1134 rel->consider_parallel = false;
1135
1136 /*
1137 * Accumulate size information from each live child.
1138 */
1139 Assert(childrel->rows > 0);
1140
1141 parent_rows += childrel->rows;
1142 parent_size += childrel->reltarget->width * childrel->rows;
1143
1144 /*
1145 * Accumulate per-column estimates too. We need not do anything for
1146 * PlaceHolderVars in the parent list. If child expression isn't a
1147 * Var, or we didn't record a width estimate for it, we have to fall
1148 * back on a datatype-based estimate.
1149 *
1150 * By construction, child's targetlist is 1-to-1 with parent's.
1151 */
1152 forboth(parentvars, rel->reltarget->exprs,
1153 childvars, childrel->reltarget->exprs)
1154 {
1155 Var *parentvar = (Var *) lfirst(parentvars);
1156 Node *childvar = (Node *) lfirst(childvars);
1157
1158 if (IsA(parentvar, Var))
1159 {
1160 int pndx = parentvar->varattno - rel->min_attr;
1161 int32 child_width = 0;
1162
1163 if (IsA(childvar, Var) &&
1164 ((Var *) childvar)->varno == childrel->relid)
1165 {
1166 int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1167
1168 child_width = childrel->attr_widths[cndx];
1169 }
1170 if (child_width <= 0)
1171 child_width = get_typavgwidth(exprType(childvar),
1172 exprTypmod(childvar));
1173 Assert(child_width > 0);
1174 parent_attrsizes[pndx] += child_width * childrel->rows;
1175 }
1176 }
1177 }
1178
1179 if (has_live_children)
1180 {
1181 /*
1182 * Save the finished size estimates.
1183 */
1184 int i;
1185
1186 Assert(parent_rows > 0);
1187 rel->rows = parent_rows;
1188 rel->reltarget->width = rint(parent_size / parent_rows);
1189 for (i = 0; i < nattrs; i++)
1190 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1191
1192 /*
1193 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1194 * because some places assume rel->tuples is valid for any baserel.
1195 */
1196 rel->tuples = parent_rows;
1197
1198 /*
1199 * Note that we leave rel->pages as zero; this is important to avoid
1200 * double-counting the appendrel tree in total_table_pages.
1201 */
1202 }
1203 else
1204 {
1205 /*
1206 * All children were excluded by constraints, so mark the whole
1207 * appendrel dummy. We must do this in this phase so that the rel's
1208 * dummy-ness is visible when we generate paths for other rels.
1209 */
1210 set_dummy_rel_pathlist(rel);
1211 }
1212
1213 pfree(parent_attrsizes);
1214 }
1215
1216 /*
1217 * set_append_rel_pathlist
1218 * Build access paths for an "append relation"
1219 */
1220 static void
set_append_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)1221 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1222 Index rti, RangeTblEntry *rte)
1223 {
1224 int parentRTindex = rti;
1225 List *live_childrels = NIL;
1226 ListCell *l;
1227
1228 /*
1229 * Generate access paths for each member relation, and remember the
1230 * non-dummy children.
1231 */
1232 foreach(l, root->append_rel_list)
1233 {
1234 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1235 int childRTindex;
1236 RangeTblEntry *childRTE;
1237 RelOptInfo *childrel;
1238
1239 /* append_rel_list contains all append rels; ignore others */
1240 if (appinfo->parent_relid != parentRTindex)
1241 continue;
1242
1243 /* Re-locate the child RTE and RelOptInfo */
1244 childRTindex = appinfo->child_relid;
1245 childRTE = root->simple_rte_array[childRTindex];
1246 childrel = root->simple_rel_array[childRTindex];
1247
1248 /*
1249 * If set_append_rel_size() decided the parent appendrel was
1250 * parallel-unsafe at some point after visiting this child rel, we
1251 * need to propagate the unsafety marking down to the child, so that
1252 * we don't generate useless partial paths for it.
1253 */
1254 if (!rel->consider_parallel)
1255 childrel->consider_parallel = false;
1256
1257 /*
1258 * Compute the child's access paths.
1259 */
1260 set_rel_pathlist(root, childrel, childRTindex, childRTE);
1261
1262 /*
1263 * If child is dummy, ignore it.
1264 */
1265 if (IS_DUMMY_REL(childrel))
1266 continue;
1267
1268 /* Bubble up childrel's partitioned children. */
1269 if (rel->part_scheme)
1270 rel->partitioned_child_rels =
1271 list_concat(rel->partitioned_child_rels,
1272 childrel->partitioned_child_rels);
1273
1274 /*
1275 * Child is live, so add it to the live_childrels list for use below.
1276 */
1277 live_childrels = lappend(live_childrels, childrel);
1278 }
1279
1280 /* Add paths to the append relation. */
1281 add_paths_to_append_rel(root, rel, live_childrels);
1282 }
1283
1284
1285 /*
1286 * add_paths_to_append_rel
1287 * Generate paths for the given append relation given the set of non-dummy
1288 * child rels.
1289 *
1290 * The function collects all parameterizations and orderings supported by the
1291 * non-dummy children. For every such parameterization or ordering, it creates
1292 * an append path collecting one path from each non-dummy child with given
1293 * parameterization or ordering. Similarly it collects partial paths from
1294 * non-dummy children to create partial append paths.
1295 */
1296 void
add_paths_to_append_rel(PlannerInfo * root,RelOptInfo * rel,List * live_childrels)1297 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
1298 List *live_childrels)
1299 {
1300 List *subpaths = NIL;
1301 bool subpaths_valid = true;
1302 List *partial_subpaths = NIL;
1303 List *pa_partial_subpaths = NIL;
1304 List *pa_nonpartial_subpaths = NIL;
1305 bool partial_subpaths_valid = true;
1306 bool pa_subpaths_valid;
1307 List *all_child_pathkeys = NIL;
1308 List *all_child_outers = NIL;
1309 ListCell *l;
1310 List *partitioned_rels = NIL;
1311 double partial_rows = -1;
1312
1313 /* If appropriate, consider parallel append */
1314 pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
1315
1316 /*
1317 * AppendPath generated for partitioned tables must record the RT indexes
1318 * of partitioned tables that are direct or indirect children of this
1319 * Append rel.
1320 *
1321 * AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
1322 * itself does not represent a partitioned relation, but the child sub-
1323 * queries may contain references to partitioned relations. The loop
1324 * below will look for such children and collect them in a list to be
1325 * passed to the path creation function. (This assumes that we don't need
1326 * to look through multiple levels of subquery RTEs; if we ever do, we
1327 * could consider stuffing the list we generate here into sub-query RTE's
1328 * RelOptInfo, just like we do for partitioned rels, which would be used
1329 * when populating our parent rel with paths. For the present, that
1330 * appears to be unnecessary.)
1331 */
1332 if (rel->part_scheme != NULL)
1333 {
1334 if (IS_SIMPLE_REL(rel))
1335 partitioned_rels = list_make1(rel->partitioned_child_rels);
1336 else if (IS_JOIN_REL(rel))
1337 {
1338 int relid = -1;
1339 List *partrels = NIL;
1340
1341 /*
1342 * For a partitioned joinrel, concatenate the component rels'
1343 * partitioned_child_rels lists.
1344 */
1345 while ((relid = bms_next_member(rel->relids, relid)) >= 0)
1346 {
1347 RelOptInfo *component;
1348
1349 Assert(relid >= 1 && relid < root->simple_rel_array_size);
1350 component = root->simple_rel_array[relid];
1351 Assert(component->part_scheme != NULL);
1352 Assert(list_length(component->partitioned_child_rels) >= 1);
1353 partrels = list_concat(partrels,
1354 component->partitioned_child_rels);
1355 }
1356
1357 partitioned_rels = list_make1(partrels);
1358 }
1359
1360 Assert(list_length(partitioned_rels) >= 1);
1361 }
1362
1363 /*
1364 * For every non-dummy child, remember the cheapest path. Also, identify
1365 * all pathkeys (orderings) and parameterizations (required_outer sets)
1366 * available for the non-dummy member relations.
1367 */
1368 foreach(l, live_childrels)
1369 {
1370 RelOptInfo *childrel = lfirst(l);
1371 ListCell *lcp;
1372 Path *cheapest_partial_path = NULL;
1373
1374 /*
1375 * For UNION ALLs with non-empty partitioned_child_rels, accumulate
1376 * the Lists of child relations.
1377 */
1378 if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
1379 partitioned_rels = lappend(partitioned_rels,
1380 childrel->partitioned_child_rels);
1381
1382 /*
1383 * If child has an unparameterized cheapest-total path, add that to
1384 * the unparameterized Append path we are constructing for the parent.
1385 * If not, there's no workable unparameterized path.
1386 *
1387 * With partitionwise aggregates, the child rel's pathlist may be
1388 * empty, so don't assume that a path exists here.
1389 */
1390 if (childrel->pathlist != NIL &&
1391 childrel->cheapest_total_path->param_info == NULL)
1392 accumulate_append_subpath(childrel->cheapest_total_path,
1393 &subpaths, NULL);
1394 else
1395 subpaths_valid = false;
1396
1397 /* Same idea, but for a partial plan. */
1398 if (childrel->partial_pathlist != NIL)
1399 {
1400 cheapest_partial_path = linitial(childrel->partial_pathlist);
1401 accumulate_append_subpath(cheapest_partial_path,
1402 &partial_subpaths, NULL);
1403 }
1404 else
1405 partial_subpaths_valid = false;
1406
1407 /*
1408 * Same idea, but for a parallel append mixing partial and non-partial
1409 * paths.
1410 */
1411 if (pa_subpaths_valid)
1412 {
1413 Path *nppath = NULL;
1414
1415 nppath =
1416 get_cheapest_parallel_safe_total_inner(childrel->pathlist);
1417
1418 if (cheapest_partial_path == NULL && nppath == NULL)
1419 {
1420 /* Neither a partial nor a parallel-safe path? Forget it. */
1421 pa_subpaths_valid = false;
1422 }
1423 else if (nppath == NULL ||
1424 (cheapest_partial_path != NULL &&
1425 cheapest_partial_path->total_cost < nppath->total_cost))
1426 {
1427 /* Partial path is cheaper or the only option. */
1428 Assert(cheapest_partial_path != NULL);
1429 accumulate_append_subpath(cheapest_partial_path,
1430 &pa_partial_subpaths,
1431 &pa_nonpartial_subpaths);
1432
1433 }
1434 else
1435 {
1436 /*
1437 * Either we've got only a non-partial path, or we think that
1438 * a single backend can execute the best non-partial path
1439 * faster than all the parallel backends working together can
1440 * execute the best partial path.
1441 *
1442 * It might make sense to be more aggressive here. Even if
1443 * the best non-partial path is more expensive than the best
1444 * partial path, it could still be better to choose the
1445 * non-partial path if there are several such paths that can
1446 * be given to different workers. For now, we don't try to
1447 * figure that out.
1448 */
1449 accumulate_append_subpath(nppath,
1450 &pa_nonpartial_subpaths,
1451 NULL);
1452 }
1453 }
1454
1455 /*
1456 * Collect lists of all the available path orderings and
1457 * parameterizations for all the children. We use these as a
1458 * heuristic to indicate which sort orderings and parameterizations we
1459 * should build Append and MergeAppend paths for.
1460 */
1461 foreach(lcp, childrel->pathlist)
1462 {
1463 Path *childpath = (Path *) lfirst(lcp);
1464 List *childkeys = childpath->pathkeys;
1465 Relids childouter = PATH_REQ_OUTER(childpath);
1466
1467 /* Unsorted paths don't contribute to pathkey list */
1468 if (childkeys != NIL)
1469 {
1470 ListCell *lpk;
1471 bool found = false;
1472
1473 /* Have we already seen this ordering? */
1474 foreach(lpk, all_child_pathkeys)
1475 {
1476 List *existing_pathkeys = (List *) lfirst(lpk);
1477
1478 if (compare_pathkeys(existing_pathkeys,
1479 childkeys) == PATHKEYS_EQUAL)
1480 {
1481 found = true;
1482 break;
1483 }
1484 }
1485 if (!found)
1486 {
1487 /* No, so add it to all_child_pathkeys */
1488 all_child_pathkeys = lappend(all_child_pathkeys,
1489 childkeys);
1490 }
1491 }
1492
1493 /* Unparameterized paths don't contribute to param-set list */
1494 if (childouter)
1495 {
1496 ListCell *lco;
1497 bool found = false;
1498
1499 /* Have we already seen this param set? */
1500 foreach(lco, all_child_outers)
1501 {
1502 Relids existing_outers = (Relids) lfirst(lco);
1503
1504 if (bms_equal(existing_outers, childouter))
1505 {
1506 found = true;
1507 break;
1508 }
1509 }
1510 if (!found)
1511 {
1512 /* No, so add it to all_child_outers */
1513 all_child_outers = lappend(all_child_outers,
1514 childouter);
1515 }
1516 }
1517 }
1518 }
1519
1520 /*
1521 * If we found unparameterized paths for all children, build an unordered,
1522 * unparameterized Append path for the rel. (Note: this is correct even
1523 * if we have zero or one live subpath due to constraint exclusion.)
1524 */
1525 if (subpaths_valid)
1526 add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
1527 NIL, NULL, 0, false,
1528 partitioned_rels, -1));
1529
1530 /*
1531 * Consider an append of unordered, unparameterized partial paths. Make
1532 * it parallel-aware if possible.
1533 */
1534 if (partial_subpaths_valid && partial_subpaths != NIL)
1535 {
1536 AppendPath *appendpath;
1537 ListCell *lc;
1538 int parallel_workers = 0;
1539
1540 /* Find the highest number of workers requested for any subpath. */
1541 foreach(lc, partial_subpaths)
1542 {
1543 Path *path = lfirst(lc);
1544
1545 parallel_workers = Max(parallel_workers, path->parallel_workers);
1546 }
1547 Assert(parallel_workers > 0);
1548
1549 /*
1550 * If the use of parallel append is permitted, always request at least
1551 * log2(# of children) workers. We assume it can be useful to have
1552 * extra workers in this case because they will be spread out across
1553 * the children. The precise formula is just a guess, but we don't
1554 * want to end up with a radically different answer for a table with N
1555 * partitions vs. an unpartitioned table with the same data, so the
1556 * use of some kind of log-scaling here seems to make some sense.
1557 */
1558 if (enable_parallel_append)
1559 {
1560 parallel_workers = Max(parallel_workers,
1561 fls(list_length(live_childrels)));
1562 parallel_workers = Min(parallel_workers,
1563 max_parallel_workers_per_gather);
1564 }
1565 Assert(parallel_workers > 0);
1566
1567 /* Generate a partial append path. */
1568 appendpath = create_append_path(root, rel, NIL, partial_subpaths,
1569 NIL, NULL, parallel_workers,
1570 enable_parallel_append,
1571 partitioned_rels, -1);
1572
1573 /*
1574 * Make sure any subsequent partial paths use the same row count
1575 * estimate.
1576 */
1577 partial_rows = appendpath->path.rows;
1578
1579 /* Add the path. */
1580 add_partial_path(rel, (Path *) appendpath);
1581 }
1582
1583 /*
1584 * Consider a parallel-aware append using a mix of partial and non-partial
1585 * paths. (This only makes sense if there's at least one child which has
1586 * a non-partial path that is substantially cheaper than any partial path;
1587 * otherwise, we should use the append path added in the previous step.)
1588 */
1589 if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
1590 {
1591 AppendPath *appendpath;
1592 ListCell *lc;
1593 int parallel_workers = 0;
1594
1595 /*
1596 * Find the highest number of workers requested for any partial
1597 * subpath.
1598 */
1599 foreach(lc, pa_partial_subpaths)
1600 {
1601 Path *path = lfirst(lc);
1602
1603 parallel_workers = Max(parallel_workers, path->parallel_workers);
1604 }
1605
1606 /*
1607 * Same formula here as above. It's even more important in this
1608 * instance because the non-partial paths won't contribute anything to
1609 * the planned number of parallel workers.
1610 */
1611 parallel_workers = Max(parallel_workers,
1612 fls(list_length(live_childrels)));
1613 parallel_workers = Min(parallel_workers,
1614 max_parallel_workers_per_gather);
1615 Assert(parallel_workers > 0);
1616
1617 appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
1618 pa_partial_subpaths,
1619 NIL, NULL, parallel_workers, true,
1620 partitioned_rels, partial_rows);
1621 add_partial_path(rel, (Path *) appendpath);
1622 }
1623
1624 /*
1625 * Also build unparameterized ordered append paths based on the collected
1626 * list of child pathkeys.
1627 */
1628 if (subpaths_valid)
1629 generate_orderedappend_paths(root, rel, live_childrels,
1630 all_child_pathkeys,
1631 partitioned_rels);
1632
1633 /*
1634 * Build Append paths for each parameterization seen among the child rels.
1635 * (This may look pretty expensive, but in most cases of practical
1636 * interest, the child rels will expose mostly the same parameterizations,
1637 * so that not that many cases actually get considered here.)
1638 *
1639 * The Append node itself cannot enforce quals, so all qual checking must
1640 * be done in the child paths. This means that to have a parameterized
1641 * Append path, we must have the exact same parameterization for each
1642 * child path; otherwise some children might be failing to check the
1643 * moved-down quals. To make them match up, we can try to increase the
1644 * parameterization of lesser-parameterized paths.
1645 */
1646 foreach(l, all_child_outers)
1647 {
1648 Relids required_outer = (Relids) lfirst(l);
1649 ListCell *lcr;
1650
1651 /* Select the child paths for an Append with this parameterization */
1652 subpaths = NIL;
1653 subpaths_valid = true;
1654 foreach(lcr, live_childrels)
1655 {
1656 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1657 Path *subpath;
1658
1659 if (childrel->pathlist == NIL)
1660 {
1661 /* failed to make a suitable path for this child */
1662 subpaths_valid = false;
1663 break;
1664 }
1665
1666 subpath = get_cheapest_parameterized_child_path(root,
1667 childrel,
1668 required_outer);
1669 if (subpath == NULL)
1670 {
1671 /* failed to make a suitable path for this child */
1672 subpaths_valid = false;
1673 break;
1674 }
1675 accumulate_append_subpath(subpath, &subpaths, NULL);
1676 }
1677
1678 if (subpaths_valid)
1679 add_path(rel, (Path *)
1680 create_append_path(root, rel, subpaths, NIL,
1681 NIL, required_outer, 0, false,
1682 partitioned_rels, -1));
1683 }
1684
1685 /*
1686 * When there is only a single child relation, the Append path can inherit
1687 * any ordering available for the child rel's path, so that it's useful to
1688 * consider ordered partial paths. Above we only considered the cheapest
1689 * partial path for each child, but let's also make paths using any
1690 * partial paths that have pathkeys.
1691 */
1692 if (list_length(live_childrels) == 1)
1693 {
1694 RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
1695
1696 foreach(l, childrel->partial_pathlist)
1697 {
1698 Path *path = (Path *) lfirst(l);
1699 AppendPath *appendpath;
1700
1701 /*
1702 * Skip paths with no pathkeys. Also skip the cheapest partial
1703 * path, since we already used that above.
1704 */
1705 if (path->pathkeys == NIL ||
1706 path == linitial(childrel->partial_pathlist))
1707 continue;
1708
1709 appendpath = create_append_path(root, rel, NIL, list_make1(path),
1710 NIL, NULL,
1711 path->parallel_workers, true,
1712 partitioned_rels, partial_rows);
1713 add_partial_path(rel, (Path *) appendpath);
1714 }
1715 }
1716 }
1717
1718 /*
1719 * generate_orderedappend_paths
1720 * Generate ordered append paths for an append relation
1721 *
1722 * Usually we generate MergeAppend paths here, but there are some special
1723 * cases where we can generate simple Append paths, because the subpaths
1724 * can provide tuples in the required order already.
1725 *
1726 * We generate a path for each ordering (pathkey list) appearing in
1727 * all_child_pathkeys.
1728 *
1729 * We consider both cheapest-startup and cheapest-total cases, ie, for each
1730 * interesting ordering, collect all the cheapest startup subpaths and all the
1731 * cheapest total paths, and build a suitable path for each case.
1732 *
1733 * We don't currently generate any parameterized ordered paths here. While
1734 * it would not take much more code here to do so, it's very unclear that it
1735 * is worth the planning cycles to investigate such paths: there's little
1736 * use for an ordered path on the inside of a nestloop. In fact, it's likely
1737 * that the current coding of add_path would reject such paths out of hand,
1738 * because add_path gives no credit for sort ordering of parameterized paths,
1739 * and a parameterized MergeAppend is going to be more expensive than the
1740 * corresponding parameterized Append path. If we ever try harder to support
1741 * parameterized mergejoin plans, it might be worth adding support for
1742 * parameterized paths here to feed such joins. (See notes in
1743 * optimizer/README for why that might not ever happen, though.)
1744 */
1745 static void
generate_orderedappend_paths(PlannerInfo * root,RelOptInfo * rel,List * live_childrels,List * all_child_pathkeys,List * partitioned_rels)1746 generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
1747 List *live_childrels,
1748 List *all_child_pathkeys,
1749 List *partitioned_rels)
1750 {
1751 ListCell *lcp;
1752 List *partition_pathkeys = NIL;
1753 List *partition_pathkeys_desc = NIL;
1754 bool partition_pathkeys_partial = true;
1755 bool partition_pathkeys_desc_partial = true;
1756
1757 /*
1758 * Some partitioned table setups may allow us to use an Append node
1759 * instead of a MergeAppend. This is possible in cases such as RANGE
1760 * partitioned tables where it's guaranteed that an earlier partition must
1761 * contain rows which come earlier in the sort order. To detect whether
1762 * this is relevant, build pathkey descriptions of the partition ordering,
1763 * for both forward and reverse scans.
1764 */
1765 if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
1766 partitions_are_ordered(rel->boundinfo, rel->nparts))
1767 {
1768 partition_pathkeys = build_partition_pathkeys(root, rel,
1769 ForwardScanDirection,
1770 &partition_pathkeys_partial);
1771
1772 partition_pathkeys_desc = build_partition_pathkeys(root, rel,
1773 BackwardScanDirection,
1774 &partition_pathkeys_desc_partial);
1775
1776 /*
1777 * You might think we should truncate_useless_pathkeys here, but
1778 * allowing partition keys which are a subset of the query's pathkeys
1779 * can often be useful. For example, consider a table partitioned by
1780 * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
1781 * paths that can produce the a, b, c ordering (perhaps via indexes on
1782 * (a, b, c)) then it works to consider the appendrel output as
1783 * ordered by a, b, c.
1784 */
1785 }
1786
1787 /* Now consider each interesting sort ordering */
1788 foreach(lcp, all_child_pathkeys)
1789 {
1790 List *pathkeys = (List *) lfirst(lcp);
1791 List *startup_subpaths = NIL;
1792 List *total_subpaths = NIL;
1793 bool startup_neq_total = false;
1794 ListCell *lcr;
1795 bool match_partition_order;
1796 bool match_partition_order_desc;
1797
1798 /*
1799 * Determine if this sort ordering matches any partition pathkeys we
1800 * have, for both ascending and descending partition order. If the
1801 * partition pathkeys happen to be contained in pathkeys then it still
1802 * works, as described above, providing that the partition pathkeys
1803 * are complete and not just a prefix of the partition keys. (In such
1804 * cases we'll be relying on the child paths to have sorted the
1805 * lower-order columns of the required pathkeys.)
1806 */
1807 match_partition_order =
1808 pathkeys_contained_in(pathkeys, partition_pathkeys) ||
1809 (!partition_pathkeys_partial &&
1810 pathkeys_contained_in(partition_pathkeys, pathkeys));
1811
1812 match_partition_order_desc = !match_partition_order &&
1813 (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
1814 (!partition_pathkeys_desc_partial &&
1815 pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
1816
1817 /* Select the child paths for this ordering... */
1818 foreach(lcr, live_childrels)
1819 {
1820 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1821 Path *cheapest_startup,
1822 *cheapest_total;
1823
1824 /* Locate the right paths, if they are available. */
1825 cheapest_startup =
1826 get_cheapest_path_for_pathkeys(childrel->pathlist,
1827 pathkeys,
1828 NULL,
1829 STARTUP_COST,
1830 false);
1831 cheapest_total =
1832 get_cheapest_path_for_pathkeys(childrel->pathlist,
1833 pathkeys,
1834 NULL,
1835 TOTAL_COST,
1836 false);
1837
1838 /*
1839 * If we can't find any paths with the right order just use the
1840 * cheapest-total path; we'll have to sort it later.
1841 */
1842 if (cheapest_startup == NULL || cheapest_total == NULL)
1843 {
1844 cheapest_startup = cheapest_total =
1845 childrel->cheapest_total_path;
1846 /* Assert we do have an unparameterized path for this child */
1847 Assert(cheapest_total->param_info == NULL);
1848 }
1849
1850 /*
1851 * Notice whether we actually have different paths for the
1852 * "cheapest" and "total" cases; frequently there will be no point
1853 * in two create_merge_append_path() calls.
1854 */
1855 if (cheapest_startup != cheapest_total)
1856 startup_neq_total = true;
1857
1858 /*
1859 * Collect the appropriate child paths. The required logic varies
1860 * for the Append and MergeAppend cases.
1861 */
1862 if (match_partition_order)
1863 {
1864 /*
1865 * We're going to make a plain Append path. We don't need
1866 * most of what accumulate_append_subpath would do, but we do
1867 * want to cut out child Appends or MergeAppends if they have
1868 * just a single subpath (and hence aren't doing anything
1869 * useful).
1870 */
1871 cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1872 cheapest_total = get_singleton_append_subpath(cheapest_total);
1873
1874 startup_subpaths = lappend(startup_subpaths, cheapest_startup);
1875 total_subpaths = lappend(total_subpaths, cheapest_total);
1876 }
1877 else if (match_partition_order_desc)
1878 {
1879 /*
1880 * As above, but we need to reverse the order of the children,
1881 * because nodeAppend.c doesn't know anything about reverse
1882 * ordering and will scan the children in the order presented.
1883 */
1884 cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1885 cheapest_total = get_singleton_append_subpath(cheapest_total);
1886
1887 startup_subpaths = lcons(cheapest_startup, startup_subpaths);
1888 total_subpaths = lcons(cheapest_total, total_subpaths);
1889 }
1890 else
1891 {
1892 /*
1893 * Otherwise, rely on accumulate_append_subpath to collect the
1894 * child paths for the MergeAppend.
1895 */
1896 accumulate_append_subpath(cheapest_startup,
1897 &startup_subpaths, NULL);
1898 accumulate_append_subpath(cheapest_total,
1899 &total_subpaths, NULL);
1900 }
1901 }
1902
1903 /* ... and build the Append or MergeAppend paths */
1904 if (match_partition_order || match_partition_order_desc)
1905 {
1906 /* We only need Append */
1907 add_path(rel, (Path *) create_append_path(root,
1908 rel,
1909 startup_subpaths,
1910 NIL,
1911 pathkeys,
1912 NULL,
1913 0,
1914 false,
1915 partitioned_rels,
1916 -1));
1917 if (startup_neq_total)
1918 add_path(rel, (Path *) create_append_path(root,
1919 rel,
1920 total_subpaths,
1921 NIL,
1922 pathkeys,
1923 NULL,
1924 0,
1925 false,
1926 partitioned_rels,
1927 -1));
1928 }
1929 else
1930 {
1931 /* We need MergeAppend */
1932 add_path(rel, (Path *) create_merge_append_path(root,
1933 rel,
1934 startup_subpaths,
1935 pathkeys,
1936 NULL,
1937 partitioned_rels));
1938 if (startup_neq_total)
1939 add_path(rel, (Path *) create_merge_append_path(root,
1940 rel,
1941 total_subpaths,
1942 pathkeys,
1943 NULL,
1944 partitioned_rels));
1945 }
1946 }
1947 }
1948
1949 /*
1950 * get_cheapest_parameterized_child_path
1951 * Get cheapest path for this relation that has exactly the requested
1952 * parameterization.
1953 *
1954 * Returns NULL if unable to create such a path.
1955 */
1956 static Path *
get_cheapest_parameterized_child_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1957 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1958 Relids required_outer)
1959 {
1960 Path *cheapest;
1961 ListCell *lc;
1962
1963 /*
1964 * Look up the cheapest existing path with no more than the needed
1965 * parameterization. If it has exactly the needed parameterization, we're
1966 * done.
1967 */
1968 cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1969 NIL,
1970 required_outer,
1971 TOTAL_COST,
1972 false);
1973 Assert(cheapest != NULL);
1974 if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1975 return cheapest;
1976
1977 /*
1978 * Otherwise, we can "reparameterize" an existing path to match the given
1979 * parameterization, which effectively means pushing down additional
1980 * joinquals to be checked within the path's scan. However, some existing
1981 * paths might check the available joinquals already while others don't;
1982 * therefore, it's not clear which existing path will be cheapest after
1983 * reparameterization. We have to go through them all and find out.
1984 */
1985 cheapest = NULL;
1986 foreach(lc, rel->pathlist)
1987 {
1988 Path *path = (Path *) lfirst(lc);
1989
1990 /* Can't use it if it needs more than requested parameterization */
1991 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1992 continue;
1993
1994 /*
1995 * Reparameterization can only increase the path's cost, so if it's
1996 * already more expensive than the current cheapest, forget it.
1997 */
1998 if (cheapest != NULL &&
1999 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2000 continue;
2001
2002 /* Reparameterize if needed, then recheck cost */
2003 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
2004 {
2005 path = reparameterize_path(root, path, required_outer, 1.0);
2006 if (path == NULL)
2007 continue; /* failed to reparameterize this one */
2008 Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
2009
2010 if (cheapest != NULL &&
2011 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2012 continue;
2013 }
2014
2015 /* We have a new best path */
2016 cheapest = path;
2017 }
2018
2019 /* Return the best path, or NULL if we found no suitable candidate */
2020 return cheapest;
2021 }
2022
2023 /*
2024 * accumulate_append_subpath
2025 * Add a subpath to the list being built for an Append or MergeAppend.
2026 *
2027 * It's possible that the child is itself an Append or MergeAppend path, in
2028 * which case we can "cut out the middleman" and just add its child paths to
2029 * our own list. (We don't try to do this earlier because we need to apply
2030 * both levels of transformation to the quals.)
2031 *
2032 * Note that if we omit a child MergeAppend in this way, we are effectively
2033 * omitting a sort step, which seems fine: if the parent is to be an Append,
2034 * its result would be unsorted anyway, while if the parent is to be a
2035 * MergeAppend, there's no point in a separate sort on a child.
2036 *
2037 * Normally, either path is a partial path and subpaths is a list of partial
2038 * paths, or else path is a non-partial plan and subpaths is a list of those.
2039 * However, if path is a parallel-aware Append, then we add its partial path
2040 * children to subpaths and the rest to special_subpaths. If the latter is
2041 * NULL, we don't flatten the path at all (unless it contains only partial
2042 * paths).
2043 */
2044 static void
accumulate_append_subpath(Path * path,List ** subpaths,List ** special_subpaths)2045 accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
2046 {
2047 if (IsA(path, AppendPath))
2048 {
2049 AppendPath *apath = (AppendPath *) path;
2050
2051 if (!apath->path.parallel_aware || apath->first_partial_path == 0)
2052 {
2053 *subpaths = list_concat(*subpaths, apath->subpaths);
2054 return;
2055 }
2056 else if (special_subpaths != NULL)
2057 {
2058 List *new_special_subpaths;
2059
2060 /* Split Parallel Append into partial and non-partial subpaths */
2061 *subpaths = list_concat(*subpaths,
2062 list_copy_tail(apath->subpaths,
2063 apath->first_partial_path));
2064 new_special_subpaths =
2065 list_truncate(list_copy(apath->subpaths),
2066 apath->first_partial_path);
2067 *special_subpaths = list_concat(*special_subpaths,
2068 new_special_subpaths);
2069 return;
2070 }
2071 }
2072 else if (IsA(path, MergeAppendPath))
2073 {
2074 MergeAppendPath *mpath = (MergeAppendPath *) path;
2075
2076 *subpaths = list_concat(*subpaths, mpath->subpaths);
2077 return;
2078 }
2079
2080 *subpaths = lappend(*subpaths, path);
2081 }
2082
2083 /*
2084 * get_singleton_append_subpath
2085 * Returns the single subpath of an Append/MergeAppend, or just
2086 * return 'path' if it's not a single sub-path Append/MergeAppend.
2087 *
2088 * Note: 'path' must not be a parallel-aware path.
2089 */
2090 static Path *
get_singleton_append_subpath(Path * path)2091 get_singleton_append_subpath(Path *path)
2092 {
2093 Assert(!path->parallel_aware);
2094
2095 if (IsA(path, AppendPath))
2096 {
2097 AppendPath *apath = (AppendPath *) path;
2098
2099 if (list_length(apath->subpaths) == 1)
2100 return (Path *) linitial(apath->subpaths);
2101 }
2102 else if (IsA(path, MergeAppendPath))
2103 {
2104 MergeAppendPath *mpath = (MergeAppendPath *) path;
2105
2106 if (list_length(mpath->subpaths) == 1)
2107 return (Path *) linitial(mpath->subpaths);
2108 }
2109
2110 return path;
2111 }
2112
2113 /*
2114 * set_dummy_rel_pathlist
2115 * Build a dummy path for a relation that's been excluded by constraints
2116 *
2117 * Rather than inventing a special "dummy" path type, we represent this as an
2118 * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2119 *
2120 * (See also mark_dummy_rel, which does basically the same thing, but is
2121 * typically used to change a rel into dummy state after we already made
2122 * paths for it.)
2123 */
2124 static void
set_dummy_rel_pathlist(RelOptInfo * rel)2125 set_dummy_rel_pathlist(RelOptInfo *rel)
2126 {
2127 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2128 rel->rows = 0;
2129 rel->reltarget->width = 0;
2130
2131 /* Discard any pre-existing paths; no further need for them */
2132 rel->pathlist = NIL;
2133 rel->partial_pathlist = NIL;
2134
2135 /* Set up the dummy path */
2136 add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
2137 NIL, rel->lateral_relids,
2138 0, false, NIL, -1));
2139
2140 /*
2141 * We set the cheapest-path fields immediately, just in case they were
2142 * pointing at some discarded path. This is redundant when we're called
2143 * from set_rel_size(), but not when called from elsewhere, and doing it
2144 * twice is harmless anyway.
2145 */
2146 set_cheapest(rel);
2147 }
2148
2149 /* quick-and-dirty test to see if any joining is needed */
2150 static bool
has_multiple_baserels(PlannerInfo * root)2151 has_multiple_baserels(PlannerInfo *root)
2152 {
2153 int num_base_rels = 0;
2154 Index rti;
2155
2156 for (rti = 1; rti < root->simple_rel_array_size; rti++)
2157 {
2158 RelOptInfo *brel = root->simple_rel_array[rti];
2159
2160 if (brel == NULL)
2161 continue;
2162
2163 /* ignore RTEs that are "other rels" */
2164 if (brel->reloptkind == RELOPT_BASEREL)
2165 if (++num_base_rels > 1)
2166 return true;
2167 }
2168 return false;
2169 }
2170
2171 /*
2172 * set_subquery_pathlist
2173 * Generate SubqueryScan access paths for a subquery RTE
2174 *
2175 * We don't currently support generating parameterized paths for subqueries
2176 * by pushing join clauses down into them; it seems too expensive to re-plan
2177 * the subquery multiple times to consider different alternatives.
2178 * (XXX that could stand to be reconsidered, now that we use Paths.)
2179 * So the paths made here will be parameterized if the subquery contains
2180 * LATERAL references, otherwise not. As long as that's true, there's no need
2181 * for a separate set_subquery_size phase: just make the paths right away.
2182 */
2183 static void
set_subquery_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)2184 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
2185 Index rti, RangeTblEntry *rte)
2186 {
2187 Query *parse = root->parse;
2188 Query *subquery = rte->subquery;
2189 Relids required_outer;
2190 pushdown_safety_info safetyInfo;
2191 double tuple_fraction;
2192 RelOptInfo *sub_final_rel;
2193 ListCell *lc;
2194
2195 /*
2196 * Must copy the Query so that planning doesn't mess up the RTE contents
2197 * (really really need to fix the planner to not scribble on its input,
2198 * someday ... but see remove_unused_subquery_outputs to start with).
2199 */
2200 subquery = copyObject(subquery);
2201
2202 /*
2203 * If it's a LATERAL subquery, it might contain some Vars of the current
2204 * query level, requiring it to be treated as parameterized, even though
2205 * we don't support pushing down join quals into subqueries.
2206 */
2207 required_outer = rel->lateral_relids;
2208
2209 /*
2210 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2211 * flags as needed while recursing. In particular, we need a workspace
2212 * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
2213 * will be set true if we find that output column i of the subquery is
2214 * unsafe to use in a pushed-down qual.
2215 */
2216 memset(&safetyInfo, 0, sizeof(safetyInfo));
2217 safetyInfo.unsafeColumns = (bool *)
2218 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
2219
2220 /*
2221 * If the subquery has the "security_barrier" flag, it means the subquery
2222 * originated from a view that must enforce row level security. Then we
2223 * must not push down quals that contain leaky functions. (Ideally this
2224 * would be checked inside subquery_is_pushdown_safe, but since we don't
2225 * currently pass the RTE to that function, we must do it here.)
2226 */
2227 safetyInfo.unsafeLeaky = rte->security_barrier;
2228
2229 /*
2230 * If there are any restriction clauses that have been attached to the
2231 * subquery relation, consider pushing them down to become WHERE or HAVING
2232 * quals of the subquery itself. This transformation is useful because it
2233 * may allow us to generate a better plan for the subquery than evaluating
2234 * all the subquery output rows and then filtering them.
2235 *
2236 * There are several cases where we cannot push down clauses. Restrictions
2237 * involving the subquery are checked by subquery_is_pushdown_safe().
2238 * Restrictions on individual clauses are checked by
2239 * qual_is_pushdown_safe(). Also, we don't want to push down
2240 * pseudoconstant clauses; better to have the gating node above the
2241 * subquery.
2242 *
2243 * Non-pushed-down clauses will get evaluated as qpquals of the
2244 * SubqueryScan node.
2245 *
2246 * XXX Are there any cases where we want to make a policy decision not to
2247 * push down a pushable qual, because it'd result in a worse plan?
2248 */
2249 if (rel->baserestrictinfo != NIL &&
2250 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
2251 {
2252 /* OK to consider pushing down individual quals */
2253 List *upperrestrictlist = NIL;
2254 ListCell *l;
2255
2256 foreach(l, rel->baserestrictinfo)
2257 {
2258 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
2259 Node *clause = (Node *) rinfo->clause;
2260
2261 if (!rinfo->pseudoconstant &&
2262 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
2263 {
2264 /* Push it down */
2265 subquery_push_qual(subquery, rte, rti, clause);
2266 }
2267 else
2268 {
2269 /* Keep it in the upper query */
2270 upperrestrictlist = lappend(upperrestrictlist, rinfo);
2271 }
2272 }
2273 rel->baserestrictinfo = upperrestrictlist;
2274 /* We don't bother recomputing baserestrict_min_security */
2275 }
2276
2277 pfree(safetyInfo.unsafeColumns);
2278
2279 /*
2280 * The upper query might not use all the subquery's output columns; if
2281 * not, we can simplify.
2282 */
2283 remove_unused_subquery_outputs(subquery, rel);
2284
2285 /*
2286 * We can safely pass the outer tuple_fraction down to the subquery if the
2287 * outer level has no joining, aggregation, or sorting to do. Otherwise
2288 * we'd better tell the subquery to plan for full retrieval. (XXX This
2289 * could probably be made more intelligent ...)
2290 */
2291 if (parse->hasAggs ||
2292 parse->groupClause ||
2293 parse->groupingSets ||
2294 parse->havingQual ||
2295 parse->distinctClause ||
2296 parse->sortClause ||
2297 has_multiple_baserels(root))
2298 tuple_fraction = 0.0; /* default case */
2299 else
2300 tuple_fraction = root->tuple_fraction;
2301
2302 /* plan_params should not be in use in current query level */
2303 Assert(root->plan_params == NIL);
2304
2305 /* Generate a subroot and Paths for the subquery */
2306 rel->subroot = subquery_planner(root->glob, subquery,
2307 root,
2308 false, tuple_fraction);
2309
2310 /* Isolate the params needed by this specific subplan */
2311 rel->subplan_params = root->plan_params;
2312 root->plan_params = NIL;
2313
2314 /*
2315 * It's possible that constraint exclusion proved the subquery empty. If
2316 * so, it's desirable to produce an unadorned dummy path so that we will
2317 * recognize appropriate optimizations at this query level.
2318 */
2319 sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
2320
2321 if (IS_DUMMY_REL(sub_final_rel))
2322 {
2323 set_dummy_rel_pathlist(rel);
2324 return;
2325 }
2326
2327 /*
2328 * Mark rel with estimated output rows, width, etc. Note that we have to
2329 * do this before generating outer-query paths, else cost_subqueryscan is
2330 * not happy.
2331 */
2332 set_subquery_size_estimates(root, rel);
2333
2334 /*
2335 * For each Path that subquery_planner produced, make a SubqueryScanPath
2336 * in the outer query.
2337 */
2338 foreach(lc, sub_final_rel->pathlist)
2339 {
2340 Path *subpath = (Path *) lfirst(lc);
2341 List *pathkeys;
2342
2343 /* Convert subpath's pathkeys to outer representation */
2344 pathkeys = convert_subquery_pathkeys(root,
2345 rel,
2346 subpath->pathkeys,
2347 make_tlist_from_pathtarget(subpath->pathtarget));
2348
2349 /* Generate outer path using this subpath */
2350 add_path(rel, (Path *)
2351 create_subqueryscan_path(root, rel, subpath,
2352 pathkeys, required_outer));
2353 }
2354
2355 /* If outer rel allows parallelism, do same for partial paths. */
2356 if (rel->consider_parallel && bms_is_empty(required_outer))
2357 {
2358 /* If consider_parallel is false, there should be no partial paths. */
2359 Assert(sub_final_rel->consider_parallel ||
2360 sub_final_rel->partial_pathlist == NIL);
2361
2362 /* Same for partial paths. */
2363 foreach(lc, sub_final_rel->partial_pathlist)
2364 {
2365 Path *subpath = (Path *) lfirst(lc);
2366 List *pathkeys;
2367
2368 /* Convert subpath's pathkeys to outer representation */
2369 pathkeys = convert_subquery_pathkeys(root,
2370 rel,
2371 subpath->pathkeys,
2372 make_tlist_from_pathtarget(subpath->pathtarget));
2373
2374 /* Generate outer path using this subpath */
2375 add_partial_path(rel, (Path *)
2376 create_subqueryscan_path(root, rel, subpath,
2377 pathkeys,
2378 required_outer));
2379 }
2380 }
2381 }
2382
2383 /*
2384 * set_function_pathlist
2385 * Build the (single) access path for a function RTE
2386 */
2387 static void
set_function_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2388 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2389 {
2390 Relids required_outer;
2391 List *pathkeys = NIL;
2392
2393 /*
2394 * We don't support pushing join clauses into the quals of a function
2395 * scan, but it could still have required parameterization due to LATERAL
2396 * refs in the function expression.
2397 */
2398 required_outer = rel->lateral_relids;
2399
2400 /*
2401 * The result is considered unordered unless ORDINALITY was used, in which
2402 * case it is ordered by the ordinal column (the last one). See if we
2403 * care, by checking for uses of that Var in equivalence classes.
2404 */
2405 if (rte->funcordinality)
2406 {
2407 AttrNumber ordattno = rel->max_attr;
2408 Var *var = NULL;
2409 ListCell *lc;
2410
2411 /*
2412 * Is there a Var for it in rel's targetlist? If not, the query did
2413 * not reference the ordinality column, or at least not in any way
2414 * that would be interesting for sorting.
2415 */
2416 foreach(lc, rel->reltarget->exprs)
2417 {
2418 Var *node = (Var *) lfirst(lc);
2419
2420 /* checking varno/varlevelsup is just paranoia */
2421 if (IsA(node, Var) &&
2422 node->varattno == ordattno &&
2423 node->varno == rel->relid &&
2424 node->varlevelsup == 0)
2425 {
2426 var = node;
2427 break;
2428 }
2429 }
2430
2431 /*
2432 * Try to build pathkeys for this Var with int8 sorting. We tell
2433 * build_expression_pathkey not to build any new equivalence class; if
2434 * the Var isn't already mentioned in some EC, it means that nothing
2435 * cares about the ordering.
2436 */
2437 if (var)
2438 pathkeys = build_expression_pathkey(root,
2439 (Expr *) var,
2440 NULL, /* below outer joins */
2441 Int8LessOperator,
2442 rel->relids,
2443 false);
2444 }
2445
2446 /* Generate appropriate path */
2447 add_path(rel, create_functionscan_path(root, rel,
2448 pathkeys, required_outer));
2449 }
2450
2451 /*
2452 * set_values_pathlist
2453 * Build the (single) access path for a VALUES RTE
2454 */
2455 static void
set_values_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2456 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2457 {
2458 Relids required_outer;
2459
2460 /*
2461 * We don't support pushing join clauses into the quals of a values scan,
2462 * but it could still have required parameterization due to LATERAL refs
2463 * in the values expressions.
2464 */
2465 required_outer = rel->lateral_relids;
2466
2467 /* Generate appropriate path */
2468 add_path(rel, create_valuesscan_path(root, rel, required_outer));
2469 }
2470
2471 /*
2472 * set_tablefunc_pathlist
2473 * Build the (single) access path for a table func RTE
2474 */
2475 static void
set_tablefunc_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2476 set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2477 {
2478 Relids required_outer;
2479
2480 /*
2481 * We don't support pushing join clauses into the quals of a tablefunc
2482 * scan, but it could still have required parameterization due to LATERAL
2483 * refs in the function expression.
2484 */
2485 required_outer = rel->lateral_relids;
2486
2487 /* Generate appropriate path */
2488 add_path(rel, create_tablefuncscan_path(root, rel,
2489 required_outer));
2490 }
2491
2492 /*
2493 * set_cte_pathlist
2494 * Build the (single) access path for a non-self-reference CTE RTE
2495 *
2496 * There's no need for a separate set_cte_size phase, since we don't
2497 * support join-qual-parameterized paths for CTEs.
2498 */
2499 static void
set_cte_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2500 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2501 {
2502 Plan *cteplan;
2503 PlannerInfo *cteroot;
2504 Index levelsup;
2505 int ndx;
2506 ListCell *lc;
2507 int plan_id;
2508 Relids required_outer;
2509
2510 /*
2511 * Find the referenced CTE, and locate the plan previously made for it.
2512 */
2513 levelsup = rte->ctelevelsup;
2514 cteroot = root;
2515 while (levelsup-- > 0)
2516 {
2517 cteroot = cteroot->parent_root;
2518 if (!cteroot) /* shouldn't happen */
2519 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2520 }
2521
2522 /*
2523 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2524 * on planning the CTEs (ie, this is a side-reference from another CTE).
2525 * So we mustn't use forboth here.
2526 */
2527 ndx = 0;
2528 foreach(lc, cteroot->parse->cteList)
2529 {
2530 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
2531
2532 if (strcmp(cte->ctename, rte->ctename) == 0)
2533 break;
2534 ndx++;
2535 }
2536 if (lc == NULL) /* shouldn't happen */
2537 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
2538 if (ndx >= list_length(cteroot->cte_plan_ids))
2539 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
2540 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
2541 Assert(plan_id > 0);
2542 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2543
2544 /* Mark rel with estimated output rows, width, etc */
2545 set_cte_size_estimates(root, rel, cteplan->plan_rows);
2546
2547 /*
2548 * We don't support pushing join clauses into the quals of a CTE scan, but
2549 * it could still have required parameterization due to LATERAL refs in
2550 * its tlist.
2551 */
2552 required_outer = rel->lateral_relids;
2553
2554 /* Generate appropriate path */
2555 add_path(rel, create_ctescan_path(root, rel, required_outer));
2556 }
2557
2558 /*
2559 * set_namedtuplestore_pathlist
2560 * Build the (single) access path for a named tuplestore RTE
2561 *
2562 * There's no need for a separate set_namedtuplestore_size phase, since we
2563 * don't support join-qual-parameterized paths for tuplestores.
2564 */
2565 static void
set_namedtuplestore_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2566 set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
2567 RangeTblEntry *rte)
2568 {
2569 Relids required_outer;
2570
2571 /* Mark rel with estimated output rows, width, etc */
2572 set_namedtuplestore_size_estimates(root, rel);
2573
2574 /*
2575 * We don't support pushing join clauses into the quals of a tuplestore
2576 * scan, but it could still have required parameterization due to LATERAL
2577 * refs in its tlist.
2578 */
2579 required_outer = rel->lateral_relids;
2580
2581 /* Generate appropriate path */
2582 add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
2583
2584 /* Select cheapest path (pretty easy in this case...) */
2585 set_cheapest(rel);
2586 }
2587
2588 /*
2589 * set_result_pathlist
2590 * Build the (single) access path for an RTE_RESULT RTE
2591 *
2592 * There's no need for a separate set_result_size phase, since we
2593 * don't support join-qual-parameterized paths for these RTEs.
2594 */
2595 static void
set_result_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2596 set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
2597 RangeTblEntry *rte)
2598 {
2599 Relids required_outer;
2600
2601 /* Mark rel with estimated output rows, width, etc */
2602 set_result_size_estimates(root, rel);
2603
2604 /*
2605 * We don't support pushing join clauses into the quals of a Result scan,
2606 * but it could still have required parameterization due to LATERAL refs
2607 * in its tlist.
2608 */
2609 required_outer = rel->lateral_relids;
2610
2611 /* Generate appropriate path */
2612 add_path(rel, create_resultscan_path(root, rel, required_outer));
2613
2614 /* Select cheapest path (pretty easy in this case...) */
2615 set_cheapest(rel);
2616 }
2617
2618 /*
2619 * set_worktable_pathlist
2620 * Build the (single) access path for a self-reference CTE RTE
2621 *
2622 * There's no need for a separate set_worktable_size phase, since we don't
2623 * support join-qual-parameterized paths for CTEs.
2624 */
2625 static void
set_worktable_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2626 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2627 {
2628 Path *ctepath;
2629 PlannerInfo *cteroot;
2630 Index levelsup;
2631 Relids required_outer;
2632
2633 /*
2634 * We need to find the non-recursive term's path, which is in the plan
2635 * level that's processing the recursive UNION, which is one level *below*
2636 * where the CTE comes from.
2637 */
2638 levelsup = rte->ctelevelsup;
2639 if (levelsup == 0) /* shouldn't happen */
2640 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2641 levelsup--;
2642 cteroot = root;
2643 while (levelsup-- > 0)
2644 {
2645 cteroot = cteroot->parent_root;
2646 if (!cteroot) /* shouldn't happen */
2647 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2648 }
2649 ctepath = cteroot->non_recursive_path;
2650 if (!ctepath) /* shouldn't happen */
2651 elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
2652
2653 /* Mark rel with estimated output rows, width, etc */
2654 set_cte_size_estimates(root, rel, ctepath->rows);
2655
2656 /*
2657 * We don't support pushing join clauses into the quals of a worktable
2658 * scan, but it could still have required parameterization due to LATERAL
2659 * refs in its tlist. (I'm not sure this is actually possible given the
2660 * restrictions on recursive references, but it's easy enough to support.)
2661 */
2662 required_outer = rel->lateral_relids;
2663
2664 /* Generate appropriate path */
2665 add_path(rel, create_worktablescan_path(root, rel, required_outer));
2666 }
2667
2668 /*
2669 * generate_gather_paths
2670 * Generate parallel access paths for a relation by pushing a Gather or
2671 * Gather Merge on top of a partial path.
2672 *
2673 * This must not be called until after we're done creating all partial paths
2674 * for the specified relation. (Otherwise, add_partial_path might delete a
2675 * path that some GatherPath or GatherMergePath has a reference to.)
2676 *
2677 * If we're generating paths for a scan or join relation, override_rows will
2678 * be false, and we'll just use the relation's size estimate. When we're
2679 * being called for a partially-grouped path, though, we need to override
2680 * the rowcount estimate. (It's not clear that the particular value we're
2681 * using here is actually best, but the underlying rel has no estimate so
2682 * we must do something.)
2683 */
2684 void
generate_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2685 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2686 {
2687 Path *cheapest_partial_path;
2688 Path *simple_gather_path;
2689 ListCell *lc;
2690 double rows;
2691 double *rowsp = NULL;
2692
2693 /* If there are no partial paths, there's nothing to do here. */
2694 if (rel->partial_pathlist == NIL)
2695 return;
2696
2697 /* Should we override the rel's rowcount estimate? */
2698 if (override_rows)
2699 rowsp = &rows;
2700
2701 /*
2702 * The output of Gather is always unsorted, so there's only one partial
2703 * path of interest: the cheapest one. That will be the one at the front
2704 * of partial_pathlist because of the way add_partial_path works.
2705 */
2706 cheapest_partial_path = linitial(rel->partial_pathlist);
2707 rows =
2708 cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
2709 simple_gather_path = (Path *)
2710 create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
2711 NULL, rowsp);
2712 add_path(rel, simple_gather_path);
2713
2714 /*
2715 * For each useful ordering, we can consider an order-preserving Gather
2716 * Merge.
2717 */
2718 foreach(lc, rel->partial_pathlist)
2719 {
2720 Path *subpath = (Path *) lfirst(lc);
2721 GatherMergePath *path;
2722
2723 if (subpath->pathkeys == NIL)
2724 continue;
2725
2726 rows = subpath->rows * subpath->parallel_workers;
2727 path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
2728 subpath->pathkeys, NULL, rowsp);
2729 add_path(rel, &path->path);
2730 }
2731 }
2732
2733 /*
2734 * get_useful_pathkeys_for_relation
2735 * Determine which orderings of a relation might be useful.
2736 *
2737 * Getting data in sorted order can be useful either because the requested
2738 * order matches the final output ordering for the overall query we're
2739 * planning, or because it enables an efficient merge join. Here, we try
2740 * to figure out which pathkeys to consider.
2741 *
2742 * This allows us to do incremental sort on top of an index scan under a gather
2743 * merge node, i.e. parallelized.
2744 *
2745 * If the require_parallel_safe is true, we also require the expressions to
2746 * be parallel safe (which allows pushing the sort below Gather Merge).
2747 *
2748 * XXX At the moment this can only ever return a list with a single element,
2749 * because it looks at query_pathkeys only. So we might return the pathkeys
2750 * directly, but it seems plausible we'll want to consider other orderings
2751 * in the future. For example, we might want to consider pathkeys useful for
2752 * merge joins.
2753 */
2754 static List *
get_useful_pathkeys_for_relation(PlannerInfo * root,RelOptInfo * rel,bool require_parallel_safe)2755 get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
2756 bool require_parallel_safe)
2757 {
2758 List *useful_pathkeys_list = NIL;
2759
2760 /*
2761 * Considering query_pathkeys is always worth it, because it might allow
2762 * us to avoid a total sort when we have a partially presorted path
2763 * available or to push the total sort into the parallel portion of the
2764 * query.
2765 */
2766 if (root->query_pathkeys)
2767 {
2768 ListCell *lc;
2769 int npathkeys = 0; /* useful pathkeys */
2770
2771 foreach(lc, root->query_pathkeys)
2772 {
2773 PathKey *pathkey = (PathKey *) lfirst(lc);
2774 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
2775
2776 /*
2777 * We can only build a sort for pathkeys which contain an EC
2778 * member in the current relation's target, so ignore any suffix
2779 * of the list as soon as we find a pathkey without an EC member
2780 * in the relation.
2781 *
2782 * By still returning the prefix of the pathkeys list that does
2783 * meet criteria of EC membership in the current relation, we
2784 * enable not just an incremental sort on the entirety of
2785 * query_pathkeys but also incremental sort below a JOIN.
2786 *
2787 * If requested, ensure the expression is parallel safe too.
2788 */
2789 if (!find_em_expr_usable_for_sorting_rel(root, pathkey_ec, rel,
2790 require_parallel_safe))
2791 break;
2792
2793 npathkeys++;
2794 }
2795
2796 /*
2797 * The whole query_pathkeys list matches, so append it directly, to
2798 * allow comparing pathkeys easily by comparing list pointer. If we
2799 * have to truncate the pathkeys, we gotta do a copy though.
2800 */
2801 if (npathkeys == list_length(root->query_pathkeys))
2802 useful_pathkeys_list = lappend(useful_pathkeys_list,
2803 root->query_pathkeys);
2804 else if (npathkeys > 0)
2805 useful_pathkeys_list = lappend(useful_pathkeys_list,
2806 list_truncate(list_copy(root->query_pathkeys),
2807 npathkeys));
2808 }
2809
2810 return useful_pathkeys_list;
2811 }
2812
2813 /*
2814 * generate_useful_gather_paths
2815 * Generate parallel access paths for a relation by pushing a Gather or
2816 * Gather Merge on top of a partial path.
2817 *
2818 * Unlike plain generate_gather_paths, this looks both at pathkeys of input
2819 * paths (aiming to preserve the ordering), but also considers ordering that
2820 * might be useful for nodes above the gather merge node, and tries to add
2821 * a sort (regular or incremental) to provide that.
2822 */
2823 void
generate_useful_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2824 generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2825 {
2826 ListCell *lc;
2827 double rows;
2828 double *rowsp = NULL;
2829 List *useful_pathkeys_list = NIL;
2830 Path *cheapest_partial_path = NULL;
2831
2832 /* If there are no partial paths, there's nothing to do here. */
2833 if (rel->partial_pathlist == NIL)
2834 return;
2835
2836 /* Should we override the rel's rowcount estimate? */
2837 if (override_rows)
2838 rowsp = &rows;
2839
2840 /* generate the regular gather (merge) paths */
2841 generate_gather_paths(root, rel, override_rows);
2842
2843 /* consider incremental sort for interesting orderings */
2844 useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
2845
2846 /* used for explicit (full) sort paths */
2847 cheapest_partial_path = linitial(rel->partial_pathlist);
2848
2849 /*
2850 * Consider sorted paths for each interesting ordering. We generate both
2851 * incremental and full sort.
2852 */
2853 foreach(lc, useful_pathkeys_list)
2854 {
2855 List *useful_pathkeys = lfirst(lc);
2856 ListCell *lc2;
2857 bool is_sorted;
2858 int presorted_keys;
2859
2860 foreach(lc2, rel->partial_pathlist)
2861 {
2862 Path *subpath = (Path *) lfirst(lc2);
2863 GatherMergePath *path;
2864
2865 is_sorted = pathkeys_count_contained_in(useful_pathkeys,
2866 subpath->pathkeys,
2867 &presorted_keys);
2868
2869 /*
2870 * We don't need to consider the case where a subpath is already
2871 * fully sorted because generate_gather_paths already creates a
2872 * gather merge path for every subpath that has pathkeys present.
2873 *
2874 * But since the subpath is already sorted, we know we don't need
2875 * to consider adding a sort (other either kind) on top of it, so
2876 * we can continue here.
2877 */
2878 if (is_sorted)
2879 continue;
2880
2881 /*
2882 * Consider regular sort for the cheapest partial path (for each
2883 * useful pathkeys). We know the path is not sorted, because we'd
2884 * not get here otherwise.
2885 *
2886 * This is not redundant with the gather paths created in
2887 * generate_gather_paths, because that doesn't generate ordered
2888 * output. Here we add an explicit sort to match the useful
2889 * ordering.
2890 */
2891 if (cheapest_partial_path == subpath)
2892 {
2893 Path *tmp;
2894
2895 tmp = (Path *) create_sort_path(root,
2896 rel,
2897 subpath,
2898 useful_pathkeys,
2899 -1.0);
2900
2901 rows = tmp->rows * tmp->parallel_workers;
2902
2903 path = create_gather_merge_path(root, rel,
2904 tmp,
2905 rel->reltarget,
2906 tmp->pathkeys,
2907 NULL,
2908 rowsp);
2909
2910 add_path(rel, &path->path);
2911
2912 /* Fall through */
2913 }
2914
2915 /*
2916 * Consider incremental sort, but only when the subpath is already
2917 * partially sorted on a pathkey prefix.
2918 */
2919 if (enable_incremental_sort && presorted_keys > 0)
2920 {
2921 Path *tmp;
2922
2923 /*
2924 * We should have already excluded pathkeys of length 1
2925 * because then presorted_keys > 0 would imply is_sorted was
2926 * true.
2927 */
2928 Assert(list_length(useful_pathkeys) != 1);
2929
2930 tmp = (Path *) create_incremental_sort_path(root,
2931 rel,
2932 subpath,
2933 useful_pathkeys,
2934 presorted_keys,
2935 -1);
2936
2937 path = create_gather_merge_path(root, rel,
2938 tmp,
2939 rel->reltarget,
2940 tmp->pathkeys,
2941 NULL,
2942 rowsp);
2943
2944 add_path(rel, &path->path);
2945 }
2946 }
2947 }
2948 }
2949
2950 /*
2951 * make_rel_from_joinlist
2952 * Build access paths using a "joinlist" to guide the join path search.
2953 *
2954 * See comments for deconstruct_jointree() for definition of the joinlist
2955 * data structure.
2956 */
2957 static RelOptInfo *
make_rel_from_joinlist(PlannerInfo * root,List * joinlist)2958 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
2959 {
2960 int levels_needed;
2961 List *initial_rels;
2962 ListCell *jl;
2963
2964 /*
2965 * Count the number of child joinlist nodes. This is the depth of the
2966 * dynamic-programming algorithm we must employ to consider all ways of
2967 * joining the child nodes.
2968 */
2969 levels_needed = list_length(joinlist);
2970
2971 if (levels_needed <= 0)
2972 return NULL; /* nothing to do? */
2973
2974 /*
2975 * Construct a list of rels corresponding to the child joinlist nodes.
2976 * This may contain both base rels and rels constructed according to
2977 * sub-joinlists.
2978 */
2979 initial_rels = NIL;
2980 foreach(jl, joinlist)
2981 {
2982 Node *jlnode = (Node *) lfirst(jl);
2983 RelOptInfo *thisrel;
2984
2985 if (IsA(jlnode, RangeTblRef))
2986 {
2987 int varno = ((RangeTblRef *) jlnode)->rtindex;
2988
2989 thisrel = find_base_rel(root, varno);
2990 }
2991 else if (IsA(jlnode, List))
2992 {
2993 /* Recurse to handle subproblem */
2994 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
2995 }
2996 else
2997 {
2998 elog(ERROR, "unrecognized joinlist node type: %d",
2999 (int) nodeTag(jlnode));
3000 thisrel = NULL; /* keep compiler quiet */
3001 }
3002
3003 initial_rels = lappend(initial_rels, thisrel);
3004 }
3005
3006 if (levels_needed == 1)
3007 {
3008 /*
3009 * Single joinlist node, so we're done.
3010 */
3011 return (RelOptInfo *) linitial(initial_rels);
3012 }
3013 else
3014 {
3015 /*
3016 * Consider the different orders in which we could join the rels,
3017 * using a plugin, GEQO, or the regular join search code.
3018 *
3019 * We put the initial_rels list into a PlannerInfo field because
3020 * has_legal_joinclause() needs to look at it (ugly :-().
3021 */
3022 root->initial_rels = initial_rels;
3023
3024 if (join_search_hook)
3025 return (*join_search_hook) (root, levels_needed, initial_rels);
3026 else if (enable_geqo && levels_needed >= geqo_threshold)
3027 return geqo(root, levels_needed, initial_rels);
3028 else
3029 return standard_join_search(root, levels_needed, initial_rels);
3030 }
3031 }
3032
3033 /*
3034 * standard_join_search
3035 * Find possible joinpaths for a query by successively finding ways
3036 * to join component relations into join relations.
3037 *
3038 * 'levels_needed' is the number of iterations needed, ie, the number of
3039 * independent jointree items in the query. This is > 1.
3040 *
3041 * 'initial_rels' is a list of RelOptInfo nodes for each independent
3042 * jointree item. These are the components to be joined together.
3043 * Note that levels_needed == list_length(initial_rels).
3044 *
3045 * Returns the final level of join relations, i.e., the relation that is
3046 * the result of joining all the original relations together.
3047 * At least one implementation path must be provided for this relation and
3048 * all required sub-relations.
3049 *
3050 * To support loadable plugins that modify planner behavior by changing the
3051 * join searching algorithm, we provide a hook variable that lets a plugin
3052 * replace or supplement this function. Any such hook must return the same
3053 * final join relation as the standard code would, but it might have a
3054 * different set of implementation paths attached, and only the sub-joinrels
3055 * needed for these paths need have been instantiated.
3056 *
3057 * Note to plugin authors: the functions invoked during standard_join_search()
3058 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
3059 * than one join-order search, you'll probably need to save and restore the
3060 * original states of those data structures. See geqo_eval() for an example.
3061 */
3062 RelOptInfo *
standard_join_search(PlannerInfo * root,int levels_needed,List * initial_rels)3063 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
3064 {
3065 int lev;
3066 RelOptInfo *rel;
3067
3068 /*
3069 * This function cannot be invoked recursively within any one planning
3070 * problem, so join_rel_level[] can't be in use already.
3071 */
3072 Assert(root->join_rel_level == NULL);
3073
3074 /*
3075 * We employ a simple "dynamic programming" algorithm: we first find all
3076 * ways to build joins of two jointree items, then all ways to build joins
3077 * of three items (from two-item joins and single items), then four-item
3078 * joins, and so on until we have considered all ways to join all the
3079 * items into one rel.
3080 *
3081 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
3082 * set root->join_rel_level[1] to represent all the single-jointree-item
3083 * relations.
3084 */
3085 root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
3086
3087 root->join_rel_level[1] = initial_rels;
3088
3089 for (lev = 2; lev <= levels_needed; lev++)
3090 {
3091 ListCell *lc;
3092
3093 /*
3094 * Determine all possible pairs of relations to be joined at this
3095 * level, and build paths for making each one from every available
3096 * pair of lower-level relations.
3097 */
3098 join_search_one_level(root, lev);
3099
3100 /*
3101 * Run generate_partitionwise_join_paths() and generate_gather_paths()
3102 * for each just-processed joinrel. We could not do this earlier
3103 * because both regular and partial paths can get added to a
3104 * particular joinrel at multiple times within join_search_one_level.
3105 *
3106 * After that, we're done creating paths for the joinrel, so run
3107 * set_cheapest().
3108 */
3109 foreach(lc, root->join_rel_level[lev])
3110 {
3111 rel = (RelOptInfo *) lfirst(lc);
3112
3113 /* Create paths for partitionwise joins. */
3114 generate_partitionwise_join_paths(root, rel);
3115
3116 /*
3117 * Except for the topmost scan/join rel, consider gathering
3118 * partial paths. We'll do the same for the topmost scan/join rel
3119 * once we know the final targetlist (see grouping_planner).
3120 */
3121 if (lev < levels_needed)
3122 generate_useful_gather_paths(root, rel, false);
3123
3124 /* Find and save the cheapest paths for this rel */
3125 set_cheapest(rel);
3126
3127 #ifdef OPTIMIZER_DEBUG
3128 debug_print_rel(root, rel);
3129 #endif
3130 }
3131 }
3132
3133 /*
3134 * We should have a single rel at the final level.
3135 */
3136 if (root->join_rel_level[levels_needed] == NIL)
3137 elog(ERROR, "failed to build any %d-way joins", levels_needed);
3138 Assert(list_length(root->join_rel_level[levels_needed]) == 1);
3139
3140 rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
3141
3142 root->join_rel_level = NULL;
3143
3144 return rel;
3145 }
3146
3147 /*****************************************************************************
3148 * PUSHING QUALS DOWN INTO SUBQUERIES
3149 *****************************************************************************/
3150
3151 /*
3152 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
3153 *
3154 * subquery is the particular component query being checked. topquery
3155 * is the top component of a set-operations tree (the same Query if no
3156 * set-op is involved).
3157 *
3158 * Conditions checked here:
3159 *
3160 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
3161 * since that could change the set of rows returned.
3162 *
3163 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
3164 * quals into it, because that could change the results.
3165 *
3166 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
3167 * This is because upper-level quals should semantically be evaluated only
3168 * once per distinct row, not once per original row, and if the qual is
3169 * volatile then extra evaluations could change the results. (This issue
3170 * does not apply to other forms of aggregation such as GROUP BY, because
3171 * when those are present we push into HAVING not WHERE, so that the quals
3172 * are still applied after aggregation.)
3173 *
3174 * 4. If the subquery contains window functions, we cannot push volatile quals
3175 * into it. The issue here is a bit different from DISTINCT: a volatile qual
3176 * might succeed for some rows of a window partition and fail for others,
3177 * thereby changing the partition contents and thus the window functions'
3178 * results for rows that remain.
3179 *
3180 * 5. If the subquery contains any set-returning functions in its targetlist,
3181 * we cannot push volatile quals into it. That would push them below the SRFs
3182 * and thereby change the number of times they are evaluated. Also, a
3183 * volatile qual could succeed for some SRF output rows and fail for others,
3184 * a behavior that cannot occur if it's evaluated before SRF expansion.
3185 *
3186 * 6. If the subquery has nonempty grouping sets, we cannot push down any
3187 * quals. The concern here is that a qual referencing a "constant" grouping
3188 * column could get constant-folded, which would be improper because the value
3189 * is potentially nullable by grouping-set expansion. This restriction could
3190 * be removed if we had a parsetree representation that shows that such
3191 * grouping columns are not really constant. (There are other ideas that
3192 * could be used to relax this restriction, but that's the approach most
3193 * likely to get taken in the future. Note that there's not much to be gained
3194 * so long as subquery_planner can't move HAVING clauses to WHERE within such
3195 * a subquery.)
3196 *
3197 * In addition, we make several checks on the subquery's output columns to see
3198 * if it is safe to reference them in pushed-down quals. If output column k
3199 * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
3200 * to true, but we don't reject the subquery overall since column k might not
3201 * be referenced by some/all quals. The unsafeColumns[] array will be
3202 * consulted later by qual_is_pushdown_safe(). It's better to do it this way
3203 * than to make the checks directly in qual_is_pushdown_safe(), because when
3204 * the subquery involves set operations we have to check the output
3205 * expressions in each arm of the set op.
3206 *
3207 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
3208 * we're effectively assuming that the quals cannot distinguish values that
3209 * the DISTINCT's equality operator sees as equal, yet there are many
3210 * counterexamples to that assumption. However use of such a qual with a
3211 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
3212 * "equal" value will be chosen as the output value by the DISTINCT operation.
3213 * So we don't worry too much about that. Another objection is that if the
3214 * qual is expensive to evaluate, running it for each original row might cost
3215 * more than we save by eliminating rows before the DISTINCT step. But it
3216 * would be very hard to estimate that at this stage, and in practice pushdown
3217 * seldom seems to make things worse, so we ignore that problem too.
3218 *
3219 * Note: likewise, pushing quals into a subquery with window functions is a
3220 * bit dubious: the quals might remove some rows of a window partition while
3221 * leaving others, causing changes in the window functions' results for the
3222 * surviving rows. We insist that such a qual reference only partitioning
3223 * columns, but again that only protects us if the qual does not distinguish
3224 * values that the partitioning equality operator sees as equal. The risks
3225 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3226 * occurs and thus there is no theoretical problem with such a qual. But
3227 * we'll do this anyway because the potential performance benefits are very
3228 * large, and we've seen no field complaints about the longstanding comparable
3229 * behavior with DISTINCT.
3230 */
3231 static bool
subquery_is_pushdown_safe(Query * subquery,Query * topquery,pushdown_safety_info * safetyInfo)3232 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
3233 pushdown_safety_info *safetyInfo)
3234 {
3235 SetOperationStmt *topop;
3236
3237 /* Check point 1 */
3238 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
3239 return false;
3240
3241 /* Check point 6 */
3242 if (subquery->groupClause && subquery->groupingSets)
3243 return false;
3244
3245 /* Check points 3, 4, and 5 */
3246 if (subquery->distinctClause ||
3247 subquery->hasWindowFuncs ||
3248 subquery->hasTargetSRFs)
3249 safetyInfo->unsafeVolatile = true;
3250
3251 /*
3252 * If we're at a leaf query, check for unsafe expressions in its target
3253 * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
3254 * setop trees have only simple Vars in their tlists, so no need to check
3255 * them.)
3256 */
3257 if (subquery->setOperations == NULL)
3258 check_output_expressions(subquery, safetyInfo);
3259
3260 /* Are we at top level, or looking at a setop component? */
3261 if (subquery == topquery)
3262 {
3263 /* Top level, so check any component queries */
3264 if (subquery->setOperations != NULL)
3265 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
3266 safetyInfo))
3267 return false;
3268 }
3269 else
3270 {
3271 /* Setop component must not have more components (too weird) */
3272 if (subquery->setOperations != NULL)
3273 return false;
3274 /* Check whether setop component output types match top level */
3275 topop = castNode(SetOperationStmt, topquery->setOperations);
3276 Assert(topop);
3277 compare_tlist_datatypes(subquery->targetList,
3278 topop->colTypes,
3279 safetyInfo);
3280 }
3281 return true;
3282 }
3283
3284 /*
3285 * Helper routine to recurse through setOperations tree
3286 */
3287 static bool
recurse_pushdown_safe(Node * setOp,Query * topquery,pushdown_safety_info * safetyInfo)3288 recurse_pushdown_safe(Node *setOp, Query *topquery,
3289 pushdown_safety_info *safetyInfo)
3290 {
3291 if (IsA(setOp, RangeTblRef))
3292 {
3293 RangeTblRef *rtr = (RangeTblRef *) setOp;
3294 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
3295 Query *subquery = rte->subquery;
3296
3297 Assert(subquery != NULL);
3298 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
3299 }
3300 else if (IsA(setOp, SetOperationStmt))
3301 {
3302 SetOperationStmt *op = (SetOperationStmt *) setOp;
3303
3304 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3305 if (op->op == SETOP_EXCEPT)
3306 return false;
3307 /* Else recurse */
3308 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
3309 return false;
3310 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
3311 return false;
3312 }
3313 else
3314 {
3315 elog(ERROR, "unrecognized node type: %d",
3316 (int) nodeTag(setOp));
3317 }
3318 return true;
3319 }
3320
3321 /*
3322 * check_output_expressions - check subquery's output expressions for safety
3323 *
3324 * There are several cases in which it's unsafe to push down an upper-level
3325 * qual if it references a particular output column of a subquery. We check
3326 * each output column of the subquery and set unsafeColumns[k] to true if
3327 * that column is unsafe for a pushed-down qual to reference. The conditions
3328 * checked here are:
3329 *
3330 * 1. We must not push down any quals that refer to subselect outputs that
3331 * return sets, else we'd introduce functions-returning-sets into the
3332 * subquery's WHERE/HAVING quals.
3333 *
3334 * 2. We must not push down any quals that refer to subselect outputs that
3335 * contain volatile functions, for fear of introducing strange results due
3336 * to multiple evaluation of a volatile function.
3337 *
3338 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3339 * refer to non-DISTINCT output columns, because that could change the set
3340 * of rows returned. (This condition is vacuous for DISTINCT, because then
3341 * there are no non-DISTINCT output columns, so we needn't check. Note that
3342 * subquery_is_pushdown_safe already reported that we can't use volatile
3343 * quals if there's DISTINCT or DISTINCT ON.)
3344 *
3345 * 4. If the subquery has any window functions, we must not push down quals
3346 * that reference any output columns that are not listed in all the subquery's
3347 * window PARTITION BY clauses. We can push down quals that use only
3348 * partitioning columns because they should succeed or fail identically for
3349 * every row of any one window partition, and totally excluding some
3350 * partitions will not change a window function's results for remaining
3351 * partitions. (Again, this also requires nonvolatile quals, but
3352 * subquery_is_pushdown_safe handles that.)
3353 */
3354 static void
check_output_expressions(Query * subquery,pushdown_safety_info * safetyInfo)3355 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
3356 {
3357 ListCell *lc;
3358
3359 foreach(lc, subquery->targetList)
3360 {
3361 TargetEntry *tle = (TargetEntry *) lfirst(lc);
3362
3363 if (tle->resjunk)
3364 continue; /* ignore resjunk columns */
3365
3366 /* We need not check further if output col is already known unsafe */
3367 if (safetyInfo->unsafeColumns[tle->resno])
3368 continue;
3369
3370 /* Functions returning sets are unsafe (point 1) */
3371 if (subquery->hasTargetSRFs &&
3372 expression_returns_set((Node *) tle->expr))
3373 {
3374 safetyInfo->unsafeColumns[tle->resno] = true;
3375 continue;
3376 }
3377
3378 /* Volatile functions are unsafe (point 2) */
3379 if (contain_volatile_functions((Node *) tle->expr))
3380 {
3381 safetyInfo->unsafeColumns[tle->resno] = true;
3382 continue;
3383 }
3384
3385 /* If subquery uses DISTINCT ON, check point 3 */
3386 if (subquery->hasDistinctOn &&
3387 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
3388 {
3389 /* non-DISTINCT column, so mark it unsafe */
3390 safetyInfo->unsafeColumns[tle->resno] = true;
3391 continue;
3392 }
3393
3394 /* If subquery uses window functions, check point 4 */
3395 if (subquery->hasWindowFuncs &&
3396 !targetIsInAllPartitionLists(tle, subquery))
3397 {
3398 /* not present in all PARTITION BY clauses, so mark it unsafe */
3399 safetyInfo->unsafeColumns[tle->resno] = true;
3400 continue;
3401 }
3402 }
3403 }
3404
3405 /*
3406 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3407 * push quals into each component query, but the quals can only reference
3408 * subquery columns that suffer no type coercions in the set operation.
3409 * Otherwise there are possible semantic gotchas. So, we check the
3410 * component queries to see if any of them have output types different from
3411 * the top-level setop outputs. unsafeColumns[k] is set true if column k
3412 * has different type in any component.
3413 *
3414 * We don't have to care about typmods here: the only allowed difference
3415 * between set-op input and output typmods is input is a specific typmod
3416 * and output is -1, and that does not require a coercion.
3417 *
3418 * tlist is a subquery tlist.
3419 * colTypes is an OID list of the top-level setop's output column types.
3420 * safetyInfo->unsafeColumns[] is the result array.
3421 */
3422 static void
compare_tlist_datatypes(List * tlist,List * colTypes,pushdown_safety_info * safetyInfo)3423 compare_tlist_datatypes(List *tlist, List *colTypes,
3424 pushdown_safety_info *safetyInfo)
3425 {
3426 ListCell *l;
3427 ListCell *colType = list_head(colTypes);
3428
3429 foreach(l, tlist)
3430 {
3431 TargetEntry *tle = (TargetEntry *) lfirst(l);
3432
3433 if (tle->resjunk)
3434 continue; /* ignore resjunk columns */
3435 if (colType == NULL)
3436 elog(ERROR, "wrong number of tlist entries");
3437 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
3438 safetyInfo->unsafeColumns[tle->resno] = true;
3439 colType = lnext(colTypes, colType);
3440 }
3441 if (colType != NULL)
3442 elog(ERROR, "wrong number of tlist entries");
3443 }
3444
3445 /*
3446 * targetIsInAllPartitionLists
3447 * True if the TargetEntry is listed in the PARTITION BY clause
3448 * of every window defined in the query.
3449 *
3450 * It would be safe to ignore windows not actually used by any window
3451 * function, but it's not easy to get that info at this stage; and it's
3452 * unlikely to be useful to spend any extra cycles getting it, since
3453 * unreferenced window definitions are probably infrequent in practice.
3454 */
3455 static bool
targetIsInAllPartitionLists(TargetEntry * tle,Query * query)3456 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
3457 {
3458 ListCell *lc;
3459
3460 foreach(lc, query->windowClause)
3461 {
3462 WindowClause *wc = (WindowClause *) lfirst(lc);
3463
3464 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
3465 return false;
3466 }
3467 return true;
3468 }
3469
3470 /*
3471 * qual_is_pushdown_safe - is a particular qual safe to push down?
3472 *
3473 * qual is a restriction clause applying to the given subquery (whose RTE
3474 * has index rti in the parent query).
3475 *
3476 * Conditions checked here:
3477 *
3478 * 1. The qual must not contain any SubPlans (mainly because I'm not sure
3479 * it will work correctly: SubLinks will already have been transformed into
3480 * SubPlans in the qual, but not in the subquery). Note that SubLinks that
3481 * transform to initplans are safe, and will be accepted here because what
3482 * we'll see in the qual is just a Param referencing the initplan output.
3483 *
3484 * 2. If unsafeVolatile is set, the qual must not contain any volatile
3485 * functions.
3486 *
3487 * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
3488 * that are passed Var nodes, and therefore might reveal values from the
3489 * subquery as side effects.
3490 *
3491 * 4. The qual must not refer to the whole-row output of the subquery
3492 * (since there is no easy way to name that within the subquery itself).
3493 *
3494 * 5. The qual must not refer to any subquery output columns that were
3495 * found to be unsafe to reference by subquery_is_pushdown_safe().
3496 */
3497 static bool
qual_is_pushdown_safe(Query * subquery,Index rti,Node * qual,pushdown_safety_info * safetyInfo)3498 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
3499 pushdown_safety_info *safetyInfo)
3500 {
3501 bool safe = true;
3502 List *vars;
3503 ListCell *vl;
3504
3505 /* Refuse subselects (point 1) */
3506 if (contain_subplans(qual))
3507 return false;
3508
3509 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
3510 if (safetyInfo->unsafeVolatile &&
3511 contain_volatile_functions(qual))
3512 return false;
3513
3514 /* Refuse leaky quals if told to (point 3) */
3515 if (safetyInfo->unsafeLeaky &&
3516 contain_leaked_vars(qual))
3517 return false;
3518
3519 /*
3520 * It would be unsafe to push down window function calls, but at least for
3521 * the moment we could never see any in a qual anyhow. (The same applies
3522 * to aggregates, which we check for in pull_var_clause below.)
3523 */
3524 Assert(!contain_window_function(qual));
3525
3526 /*
3527 * Examine all Vars used in clause. Since it's a restriction clause, all
3528 * such Vars must refer to subselect output columns ... unless this is
3529 * part of a LATERAL subquery, in which case there could be lateral
3530 * references.
3531 */
3532 vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
3533 foreach(vl, vars)
3534 {
3535 Var *var = (Var *) lfirst(vl);
3536
3537 /*
3538 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3539 * It's not clear whether a PHV could safely be pushed down, and even
3540 * less clear whether such a situation could arise in any cases of
3541 * practical interest anyway. So for the moment, just refuse to push
3542 * down.
3543 */
3544 if (!IsA(var, Var))
3545 {
3546 safe = false;
3547 break;
3548 }
3549
3550 /*
3551 * Punt if we find any lateral references. It would be safe to push
3552 * these down, but we'd have to convert them into outer references,
3553 * which subquery_push_qual lacks the infrastructure to do. The case
3554 * arises so seldom that it doesn't seem worth working hard on.
3555 */
3556 if (var->varno != rti)
3557 {
3558 safe = false;
3559 break;
3560 }
3561
3562 /* Subqueries have no system columns */
3563 Assert(var->varattno >= 0);
3564
3565 /* Check point 4 */
3566 if (var->varattno == 0)
3567 {
3568 safe = false;
3569 break;
3570 }
3571
3572 /* Check point 5 */
3573 if (safetyInfo->unsafeColumns[var->varattno])
3574 {
3575 safe = false;
3576 break;
3577 }
3578 }
3579
3580 list_free(vars);
3581
3582 return safe;
3583 }
3584
3585 /*
3586 * subquery_push_qual - push down a qual that we have determined is safe
3587 */
3588 static void
subquery_push_qual(Query * subquery,RangeTblEntry * rte,Index rti,Node * qual)3589 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
3590 {
3591 if (subquery->setOperations != NULL)
3592 {
3593 /* Recurse to push it separately to each component query */
3594 recurse_push_qual(subquery->setOperations, subquery,
3595 rte, rti, qual);
3596 }
3597 else
3598 {
3599 /*
3600 * We need to replace Vars in the qual (which must refer to outputs of
3601 * the subquery) with copies of the subquery's targetlist expressions.
3602 * Note that at this point, any uplevel Vars in the qual should have
3603 * been replaced with Params, so they need no work.
3604 *
3605 * This step also ensures that when we are pushing into a setop tree,
3606 * each component query gets its own copy of the qual.
3607 */
3608 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
3609 subquery->targetList,
3610 REPLACEVARS_REPORT_ERROR, 0,
3611 &subquery->hasSubLinks);
3612
3613 /*
3614 * Now attach the qual to the proper place: normally WHERE, but if the
3615 * subquery uses grouping or aggregation, put it in HAVING (since the
3616 * qual really refers to the group-result rows).
3617 */
3618 if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
3619 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
3620 else
3621 subquery->jointree->quals =
3622 make_and_qual(subquery->jointree->quals, qual);
3623
3624 /*
3625 * We need not change the subquery's hasAggs or hasSubLinks flags,
3626 * since we can't be pushing down any aggregates that weren't there
3627 * before, and we don't push down subselects at all.
3628 */
3629 }
3630 }
3631
3632 /*
3633 * Helper routine to recurse through setOperations tree
3634 */
3635 static void
recurse_push_qual(Node * setOp,Query * topquery,RangeTblEntry * rte,Index rti,Node * qual)3636 recurse_push_qual(Node *setOp, Query *topquery,
3637 RangeTblEntry *rte, Index rti, Node *qual)
3638 {
3639 if (IsA(setOp, RangeTblRef))
3640 {
3641 RangeTblRef *rtr = (RangeTblRef *) setOp;
3642 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
3643 Query *subquery = subrte->subquery;
3644
3645 Assert(subquery != NULL);
3646 subquery_push_qual(subquery, rte, rti, qual);
3647 }
3648 else if (IsA(setOp, SetOperationStmt))
3649 {
3650 SetOperationStmt *op = (SetOperationStmt *) setOp;
3651
3652 recurse_push_qual(op->larg, topquery, rte, rti, qual);
3653 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
3654 }
3655 else
3656 {
3657 elog(ERROR, "unrecognized node type: %d",
3658 (int) nodeTag(setOp));
3659 }
3660 }
3661
3662 /*****************************************************************************
3663 * SIMPLIFYING SUBQUERY TARGETLISTS
3664 *****************************************************************************/
3665
3666 /*
3667 * remove_unused_subquery_outputs
3668 * Remove subquery targetlist items we don't need
3669 *
3670 * It's possible, even likely, that the upper query does not read all the
3671 * output columns of the subquery. We can remove any such outputs that are
3672 * not needed by the subquery itself (e.g., as sort/group columns) and do not
3673 * affect semantics otherwise (e.g., volatile functions can't be removed).
3674 * This is useful not only because we might be able to remove expensive-to-
3675 * compute expressions, but because deletion of output columns might allow
3676 * optimizations such as join removal to occur within the subquery.
3677 *
3678 * To avoid affecting column numbering in the targetlist, we don't physically
3679 * remove unused tlist entries, but rather replace their expressions with NULL
3680 * constants. This is implemented by modifying subquery->targetList.
3681 */
3682 static void
remove_unused_subquery_outputs(Query * subquery,RelOptInfo * rel)3683 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
3684 {
3685 Bitmapset *attrs_used = NULL;
3686 ListCell *lc;
3687
3688 /*
3689 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
3690 * could update all the child SELECTs' tlists, but it seems not worth the
3691 * trouble presently.
3692 */
3693 if (subquery->setOperations)
3694 return;
3695
3696 /*
3697 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
3698 * time: all its output columns must be used in the distinctClause.
3699 */
3700 if (subquery->distinctClause && !subquery->hasDistinctOn)
3701 return;
3702
3703 /*
3704 * Collect a bitmap of all the output column numbers used by the upper
3705 * query.
3706 *
3707 * Add all the attributes needed for joins or final output. Note: we must
3708 * look at rel's targetlist, not the attr_needed data, because attr_needed
3709 * isn't computed for inheritance child rels, cf set_append_rel_size().
3710 * (XXX might be worth changing that sometime.)
3711 */
3712 pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
3713
3714 /* Add all the attributes used by un-pushed-down restriction clauses. */
3715 foreach(lc, rel->baserestrictinfo)
3716 {
3717 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
3718
3719 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
3720 }
3721
3722 /*
3723 * If there's a whole-row reference to the subquery, we can't remove
3724 * anything.
3725 */
3726 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
3727 return;
3728
3729 /*
3730 * Run through the tlist and zap entries we don't need. It's okay to
3731 * modify the tlist items in-place because set_subquery_pathlist made a
3732 * copy of the subquery.
3733 */
3734 foreach(lc, subquery->targetList)
3735 {
3736 TargetEntry *tle = (TargetEntry *) lfirst(lc);
3737 Node *texpr = (Node *) tle->expr;
3738
3739 /*
3740 * If it has a sortgroupref number, it's used in some sort/group
3741 * clause so we'd better not remove it. Also, don't remove any
3742 * resjunk columns, since their reason for being has nothing to do
3743 * with anybody reading the subquery's output. (It's likely that
3744 * resjunk columns in a sub-SELECT would always have ressortgroupref
3745 * set, but even if they don't, it seems imprudent to remove them.)
3746 */
3747 if (tle->ressortgroupref || tle->resjunk)
3748 continue;
3749
3750 /*
3751 * If it's used by the upper query, we can't remove it.
3752 */
3753 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
3754 attrs_used))
3755 continue;
3756
3757 /*
3758 * If it contains a set-returning function, we can't remove it since
3759 * that could change the number of rows returned by the subquery.
3760 */
3761 if (subquery->hasTargetSRFs &&
3762 expression_returns_set(texpr))
3763 continue;
3764
3765 /*
3766 * If it contains volatile functions, we daren't remove it for fear
3767 * that the user is expecting their side-effects to happen.
3768 */
3769 if (contain_volatile_functions(texpr))
3770 continue;
3771
3772 /*
3773 * OK, we don't need it. Replace the expression with a NULL constant.
3774 * Preserve the exposed type of the expression, in case something
3775 * looks at the rowtype of the subquery's result.
3776 */
3777 tle->expr = (Expr *) makeNullConst(exprType(texpr),
3778 exprTypmod(texpr),
3779 exprCollation(texpr));
3780 }
3781 }
3782
3783 /*
3784 * create_partial_bitmap_paths
3785 * Build partial bitmap heap path for the relation
3786 */
3787 void
create_partial_bitmap_paths(PlannerInfo * root,RelOptInfo * rel,Path * bitmapqual)3788 create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
3789 Path *bitmapqual)
3790 {
3791 int parallel_workers;
3792 double pages_fetched;
3793
3794 /* Compute heap pages for bitmap heap scan */
3795 pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
3796 NULL, NULL);
3797
3798 parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
3799 max_parallel_workers_per_gather);
3800
3801 if (parallel_workers <= 0)
3802 return;
3803
3804 add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
3805 bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
3806 }
3807
3808 /*
3809 * Compute the number of parallel workers that should be used to scan a
3810 * relation. We compute the parallel workers based on the size of the heap to
3811 * be scanned and the size of the index to be scanned, then choose a minimum
3812 * of those.
3813 *
3814 * "heap_pages" is the number of pages from the table that we expect to scan, or
3815 * -1 if we don't expect to scan any.
3816 *
3817 * "index_pages" is the number of pages from the index that we expect to scan, or
3818 * -1 if we don't expect to scan any.
3819 *
3820 * "max_workers" is caller's limit on the number of workers. This typically
3821 * comes from a GUC.
3822 */
3823 int
compute_parallel_worker(RelOptInfo * rel,double heap_pages,double index_pages,int max_workers)3824 compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
3825 int max_workers)
3826 {
3827 int parallel_workers = 0;
3828
3829 /*
3830 * If the user has set the parallel_workers reloption, use that; otherwise
3831 * select a default number of workers.
3832 */
3833 if (rel->rel_parallel_workers != -1)
3834 parallel_workers = rel->rel_parallel_workers;
3835 else
3836 {
3837 /*
3838 * If the number of pages being scanned is insufficient to justify a
3839 * parallel scan, just return zero ... unless it's an inheritance
3840 * child. In that case, we want to generate a parallel path here
3841 * anyway. It might not be worthwhile just for this relation, but
3842 * when combined with all of its inheritance siblings it may well pay
3843 * off.
3844 */
3845 if (rel->reloptkind == RELOPT_BASEREL &&
3846 ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
3847 (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
3848 return 0;
3849
3850 if (heap_pages >= 0)
3851 {
3852 int heap_parallel_threshold;
3853 int heap_parallel_workers = 1;
3854
3855 /*
3856 * Select the number of workers based on the log of the size of
3857 * the relation. This probably needs to be a good deal more
3858 * sophisticated, but we need something here for now. Note that
3859 * the upper limit of the min_parallel_table_scan_size GUC is
3860 * chosen to prevent overflow here.
3861 */
3862 heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
3863 while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
3864 {
3865 heap_parallel_workers++;
3866 heap_parallel_threshold *= 3;
3867 if (heap_parallel_threshold > INT_MAX / 3)
3868 break; /* avoid overflow */
3869 }
3870
3871 parallel_workers = heap_parallel_workers;
3872 }
3873
3874 if (index_pages >= 0)
3875 {
3876 int index_parallel_workers = 1;
3877 int index_parallel_threshold;
3878
3879 /* same calculation as for heap_pages above */
3880 index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
3881 while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
3882 {
3883 index_parallel_workers++;
3884 index_parallel_threshold *= 3;
3885 if (index_parallel_threshold > INT_MAX / 3)
3886 break; /* avoid overflow */
3887 }
3888
3889 if (parallel_workers > 0)
3890 parallel_workers = Min(parallel_workers, index_parallel_workers);
3891 else
3892 parallel_workers = index_parallel_workers;
3893 }
3894 }
3895
3896 /* In no case use more than caller supplied maximum number of workers */
3897 parallel_workers = Min(parallel_workers, max_workers);
3898
3899 return parallel_workers;
3900 }
3901
3902 /*
3903 * generate_partitionwise_join_paths
3904 * Create paths representing partitionwise join for given partitioned
3905 * join relation.
3906 *
3907 * This must not be called until after we are done adding paths for all
3908 * child-joins. Otherwise, add_path might delete a path to which some path
3909 * generated here has a reference.
3910 */
3911 void
generate_partitionwise_join_paths(PlannerInfo * root,RelOptInfo * rel)3912 generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
3913 {
3914 List *live_children = NIL;
3915 int cnt_parts;
3916 int num_parts;
3917 RelOptInfo **part_rels;
3918
3919 /* Handle only join relations here. */
3920 if (!IS_JOIN_REL(rel))
3921 return;
3922
3923 /* We've nothing to do if the relation is not partitioned. */
3924 if (!IS_PARTITIONED_REL(rel))
3925 return;
3926
3927 /* The relation should have consider_partitionwise_join set. */
3928 Assert(rel->consider_partitionwise_join);
3929
3930 /* Guard against stack overflow due to overly deep partition hierarchy. */
3931 check_stack_depth();
3932
3933 num_parts = rel->nparts;
3934 part_rels = rel->part_rels;
3935
3936 /* Collect non-dummy child-joins. */
3937 for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
3938 {
3939 RelOptInfo *child_rel = part_rels[cnt_parts];
3940
3941 /* If it's been pruned entirely, it's certainly dummy. */
3942 if (child_rel == NULL)
3943 continue;
3944
3945 /* Add partitionwise join paths for partitioned child-joins. */
3946 generate_partitionwise_join_paths(root, child_rel);
3947
3948 set_cheapest(child_rel);
3949
3950 /* Dummy children will not be scanned, so ignore those. */
3951 if (IS_DUMMY_REL(child_rel))
3952 continue;
3953
3954 #ifdef OPTIMIZER_DEBUG
3955 debug_print_rel(root, child_rel);
3956 #endif
3957
3958 live_children = lappend(live_children, child_rel);
3959 }
3960
3961 /* If all child-joins are dummy, parent join is also dummy. */
3962 if (!live_children)
3963 {
3964 mark_dummy_rel(rel);
3965 return;
3966 }
3967
3968 /* Build additional paths for this rel from child-join paths. */
3969 add_paths_to_append_rel(root, rel, live_children);
3970 list_free(live_children);
3971 }
3972
3973
3974 /*****************************************************************************
3975 * DEBUG SUPPORT
3976 *****************************************************************************/
3977
3978 #ifdef OPTIMIZER_DEBUG
3979
3980 static void
print_relids(PlannerInfo * root,Relids relids)3981 print_relids(PlannerInfo *root, Relids relids)
3982 {
3983 int x;
3984 bool first = true;
3985
3986 x = -1;
3987 while ((x = bms_next_member(relids, x)) >= 0)
3988 {
3989 if (!first)
3990 printf(" ");
3991 if (x < root->simple_rel_array_size &&
3992 root->simple_rte_array[x])
3993 printf("%s", root->simple_rte_array[x]->eref->aliasname);
3994 else
3995 printf("%d", x);
3996 first = false;
3997 }
3998 }
3999
4000 static void
print_restrictclauses(PlannerInfo * root,List * clauses)4001 print_restrictclauses(PlannerInfo *root, List *clauses)
4002 {
4003 ListCell *l;
4004
4005 foreach(l, clauses)
4006 {
4007 RestrictInfo *c = lfirst(l);
4008
4009 print_expr((Node *) c->clause, root->parse->rtable);
4010 if (lnext(clauses, l))
4011 printf(", ");
4012 }
4013 }
4014
4015 static void
print_path(PlannerInfo * root,Path * path,int indent)4016 print_path(PlannerInfo *root, Path *path, int indent)
4017 {
4018 const char *ptype;
4019 bool join = false;
4020 Path *subpath = NULL;
4021 int i;
4022
4023 switch (nodeTag(path))
4024 {
4025 case T_Path:
4026 switch (path->pathtype)
4027 {
4028 case T_SeqScan:
4029 ptype = "SeqScan";
4030 break;
4031 case T_SampleScan:
4032 ptype = "SampleScan";
4033 break;
4034 case T_FunctionScan:
4035 ptype = "FunctionScan";
4036 break;
4037 case T_TableFuncScan:
4038 ptype = "TableFuncScan";
4039 break;
4040 case T_ValuesScan:
4041 ptype = "ValuesScan";
4042 break;
4043 case T_CteScan:
4044 ptype = "CteScan";
4045 break;
4046 case T_NamedTuplestoreScan:
4047 ptype = "NamedTuplestoreScan";
4048 break;
4049 case T_Result:
4050 ptype = "Result";
4051 break;
4052 case T_WorkTableScan:
4053 ptype = "WorkTableScan";
4054 break;
4055 default:
4056 ptype = "???Path";
4057 break;
4058 }
4059 break;
4060 case T_IndexPath:
4061 ptype = "IdxScan";
4062 break;
4063 case T_BitmapHeapPath:
4064 ptype = "BitmapHeapScan";
4065 break;
4066 case T_BitmapAndPath:
4067 ptype = "BitmapAndPath";
4068 break;
4069 case T_BitmapOrPath:
4070 ptype = "BitmapOrPath";
4071 break;
4072 case T_TidPath:
4073 ptype = "TidScan";
4074 break;
4075 case T_SubqueryScanPath:
4076 ptype = "SubqueryScan";
4077 break;
4078 case T_ForeignPath:
4079 ptype = "ForeignScan";
4080 break;
4081 case T_CustomPath:
4082 ptype = "CustomScan";
4083 break;
4084 case T_NestPath:
4085 ptype = "NestLoop";
4086 join = true;
4087 break;
4088 case T_MergePath:
4089 ptype = "MergeJoin";
4090 join = true;
4091 break;
4092 case T_HashPath:
4093 ptype = "HashJoin";
4094 join = true;
4095 break;
4096 case T_AppendPath:
4097 ptype = "Append";
4098 break;
4099 case T_MergeAppendPath:
4100 ptype = "MergeAppend";
4101 break;
4102 case T_GroupResultPath:
4103 ptype = "GroupResult";
4104 break;
4105 case T_MaterialPath:
4106 ptype = "Material";
4107 subpath = ((MaterialPath *) path)->subpath;
4108 break;
4109 case T_UniquePath:
4110 ptype = "Unique";
4111 subpath = ((UniquePath *) path)->subpath;
4112 break;
4113 case T_GatherPath:
4114 ptype = "Gather";
4115 subpath = ((GatherPath *) path)->subpath;
4116 break;
4117 case T_GatherMergePath:
4118 ptype = "GatherMerge";
4119 subpath = ((GatherMergePath *) path)->subpath;
4120 break;
4121 case T_ProjectionPath:
4122 ptype = "Projection";
4123 subpath = ((ProjectionPath *) path)->subpath;
4124 break;
4125 case T_ProjectSetPath:
4126 ptype = "ProjectSet";
4127 subpath = ((ProjectSetPath *) path)->subpath;
4128 break;
4129 case T_SortPath:
4130 ptype = "Sort";
4131 subpath = ((SortPath *) path)->subpath;
4132 break;
4133 case T_IncrementalSortPath:
4134 ptype = "IncrementalSort";
4135 subpath = ((SortPath *) path)->subpath;
4136 break;
4137 case T_GroupPath:
4138 ptype = "Group";
4139 subpath = ((GroupPath *) path)->subpath;
4140 break;
4141 case T_UpperUniquePath:
4142 ptype = "UpperUnique";
4143 subpath = ((UpperUniquePath *) path)->subpath;
4144 break;
4145 case T_AggPath:
4146 ptype = "Agg";
4147 subpath = ((AggPath *) path)->subpath;
4148 break;
4149 case T_GroupingSetsPath:
4150 ptype = "GroupingSets";
4151 subpath = ((GroupingSetsPath *) path)->subpath;
4152 break;
4153 case T_MinMaxAggPath:
4154 ptype = "MinMaxAgg";
4155 break;
4156 case T_WindowAggPath:
4157 ptype = "WindowAgg";
4158 subpath = ((WindowAggPath *) path)->subpath;
4159 break;
4160 case T_SetOpPath:
4161 ptype = "SetOp";
4162 subpath = ((SetOpPath *) path)->subpath;
4163 break;
4164 case T_RecursiveUnionPath:
4165 ptype = "RecursiveUnion";
4166 break;
4167 case T_LockRowsPath:
4168 ptype = "LockRows";
4169 subpath = ((LockRowsPath *) path)->subpath;
4170 break;
4171 case T_ModifyTablePath:
4172 ptype = "ModifyTable";
4173 break;
4174 case T_LimitPath:
4175 ptype = "Limit";
4176 subpath = ((LimitPath *) path)->subpath;
4177 break;
4178 default:
4179 ptype = "???Path";
4180 break;
4181 }
4182
4183 for (i = 0; i < indent; i++)
4184 printf("\t");
4185 printf("%s", ptype);
4186
4187 if (path->parent)
4188 {
4189 printf("(");
4190 print_relids(root, path->parent->relids);
4191 printf(")");
4192 }
4193 if (path->param_info)
4194 {
4195 printf(" required_outer (");
4196 print_relids(root, path->param_info->ppi_req_outer);
4197 printf(")");
4198 }
4199 printf(" rows=%.0f cost=%.2f..%.2f\n",
4200 path->rows, path->startup_cost, path->total_cost);
4201
4202 if (path->pathkeys)
4203 {
4204 for (i = 0; i < indent; i++)
4205 printf("\t");
4206 printf(" pathkeys: ");
4207 print_pathkeys(path->pathkeys, root->parse->rtable);
4208 }
4209
4210 if (join)
4211 {
4212 JoinPath *jp = (JoinPath *) path;
4213
4214 for (i = 0; i < indent; i++)
4215 printf("\t");
4216 printf(" clauses: ");
4217 print_restrictclauses(root, jp->joinrestrictinfo);
4218 printf("\n");
4219
4220 if (IsA(path, MergePath))
4221 {
4222 MergePath *mp = (MergePath *) path;
4223
4224 for (i = 0; i < indent; i++)
4225 printf("\t");
4226 printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
4227 ((mp->outersortkeys) ? 1 : 0),
4228 ((mp->innersortkeys) ? 1 : 0),
4229 ((mp->materialize_inner) ? 1 : 0));
4230 }
4231
4232 print_path(root, jp->outerjoinpath, indent + 1);
4233 print_path(root, jp->innerjoinpath, indent + 1);
4234 }
4235
4236 if (subpath)
4237 print_path(root, subpath, indent + 1);
4238 }
4239
4240 void
debug_print_rel(PlannerInfo * root,RelOptInfo * rel)4241 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
4242 {
4243 ListCell *l;
4244
4245 printf("RELOPTINFO (");
4246 print_relids(root, rel->relids);
4247 printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
4248
4249 if (rel->baserestrictinfo)
4250 {
4251 printf("\tbaserestrictinfo: ");
4252 print_restrictclauses(root, rel->baserestrictinfo);
4253 printf("\n");
4254 }
4255
4256 if (rel->joininfo)
4257 {
4258 printf("\tjoininfo: ");
4259 print_restrictclauses(root, rel->joininfo);
4260 printf("\n");
4261 }
4262
4263 printf("\tpath list:\n");
4264 foreach(l, rel->pathlist)
4265 print_path(root, lfirst(l), 1);
4266 if (rel->cheapest_parameterized_paths)
4267 {
4268 printf("\n\tcheapest parameterized paths:\n");
4269 foreach(l, rel->cheapest_parameterized_paths)
4270 print_path(root, lfirst(l), 1);
4271 }
4272 if (rel->cheapest_startup_path)
4273 {
4274 printf("\n\tcheapest startup path:\n");
4275 print_path(root, rel->cheapest_startup_path, 1);
4276 }
4277 if (rel->cheapest_total_path)
4278 {
4279 printf("\n\tcheapest total path:\n");
4280 print_path(root, rel->cheapest_total_path, 1);
4281 }
4282 printf("\n");
4283 fflush(stdout);
4284 }
4285
4286 #endif /* OPTIMIZER_DEBUG */
4287