1 /*-------------------------------------------------------------------------
2 *
3 * allpaths.c
4 * Routines to find possible search paths for processing a query
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/allpaths.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include <limits.h>
19 #include <math.h>
20
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #ifdef OPTIMIZER_DEBUG
31 #include "nodes/print.h"
32 #endif
33 #include "optimizer/appendinfo.h"
34 #include "optimizer/clauses.h"
35 #include "optimizer/cost.h"
36 #include "optimizer/geqo.h"
37 #include "optimizer/inherit.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/restrictinfo.h"
44 #include "optimizer/tlist.h"
45 #include "parser/parse_clause.h"
46 #include "parser/parsetree.h"
47 #include "partitioning/partbounds.h"
48 #include "partitioning/partprune.h"
49 #include "rewrite/rewriteManip.h"
50 #include "utils/lsyscache.h"
51
52
53 /* results of subquery_is_pushdown_safe */
54 typedef struct pushdown_safety_info
55 {
56 bool *unsafeColumns; /* which output columns are unsafe to use */
57 bool unsafeVolatile; /* don't push down volatile quals */
58 bool unsafeLeaky; /* don't push down leaky quals */
59 } pushdown_safety_info;
60
61 /* These parameters are set by GUC */
62 bool enable_geqo = false; /* just in case GUC doesn't set it */
63 int geqo_threshold;
64 int min_parallel_table_scan_size;
65 int min_parallel_index_scan_size;
66
67 /* Hook for plugins to get control in set_rel_pathlist() */
68 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
69
70 /* Hook for plugins to replace standard_join_search() */
71 join_search_hook_type join_search_hook = NULL;
72
73
74 static void set_base_rel_consider_startup(PlannerInfo *root);
75 static void set_base_rel_sizes(PlannerInfo *root);
76 static void set_base_rel_pathlists(PlannerInfo *root);
77 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
78 Index rti, RangeTblEntry *rte);
79 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
80 Index rti, RangeTblEntry *rte);
81 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
82 RangeTblEntry *rte);
83 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
84 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
85 RangeTblEntry *rte);
TrackEditor(RosegardenDocument * doc,RosegardenMainViewWidget * mainViewWidget,SimpleRulerScale * rulerScale,bool showTrackLabels)86 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
87 RangeTblEntry *rte);
88 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
89 RangeTblEntry *rte);
90 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
91 RangeTblEntry *rte);
92 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
93 RangeTblEntry *rte);
94 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
95 RangeTblEntry *rte);
96 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
97 Index rti, RangeTblEntry *rte);
98 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
99 Index rti, RangeTblEntry *rte);
100 static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
101 List *live_childrels,
102 List *all_child_pathkeys,
103 List *partitioned_rels);
104 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
105 RelOptInfo *rel,
106 Relids required_outer);
107 static void accumulate_append_subpath(Path *path,
108 List **subpaths, List **special_subpaths);
109 static Path *get_singleton_append_subpath(Path *path);
110 static void set_dummy_rel_pathlist(RelOptInfo *rel);
111 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
112 Index rti, RangeTblEntry *rte);
113 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
114 RangeTblEntry *rte);
115 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
116 RangeTblEntry *rte);
init(RosegardenMainViewWidget * mainViewWidget)117 static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
118 RangeTblEntry *rte);
119 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
120 RangeTblEntry *rte);
121 static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
122 RangeTblEntry *rte);
123 static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
124 RangeTblEntry *rte);
125 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
126 RangeTblEntry *rte);
127 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
128 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
129 pushdown_safety_info *safetyInfo);
130 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
131 pushdown_safety_info *safetyInfo);
132 static void check_output_expressions(Query *subquery,
133 pushdown_safety_info *safetyInfo);
134 static void compare_tlist_datatypes(List *tlist, List *colTypes,
135 pushdown_safety_info *safetyInfo);
136 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
137 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
138 pushdown_safety_info *safetyInfo);
139 static void subquery_push_qual(Query *subquery,
140 RangeTblEntry *rte, Index rti, Node *qual);
141 static void recurse_push_qual(Node *setOp, Query *topquery,
142 RangeTblEntry *rte, Index rti, Node *qual);
143 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
144
145
146 /*
147 * make_one_rel
148 * Finds all possible access paths for executing a query, returning a
149 * single rel that represents the join of all base rels in the query.
150 */
151 RelOptInfo *
152 make_one_rel(PlannerInfo *root, List *joinlist)
153 {
154 RelOptInfo *rel;
155 Index rti;
156 double total_pages;
157
158 /*
159 * Construct the all_baserels Relids set.
160 */
161 root->all_baserels = NULL;
162 for (rti = 1; rti < root->simple_rel_array_size; rti++)
163 {
164 RelOptInfo *brel = root->simple_rel_array[rti];
165
166 /* there may be empty slots corresponding to non-baserel RTEs */
167 if (brel == NULL)
168 continue;
169
170 Assert(brel->relid == rti); /* sanity check on array */
171
172 /* ignore RTEs that are "other rels" */
173 if (brel->reloptkind != RELOPT_BASEREL)
174 continue;
175
176 root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
177 }
178
179 /* Mark base rels as to whether we care about fast-start plans */
180 set_base_rel_consider_startup(root);
181
182 /*
183 * Compute size estimates and consider_parallel flags for each base rel.
184 */
185 set_base_rel_sizes(root);
186
187 /*
188 * We should now have size estimates for every actual table involved in
189 * the query, and we also know which if any have been deleted from the
190 * query by join removal, pruned by partition pruning, or eliminated by
191 * constraint exclusion. So we can now compute total_table_pages.
192 *
193 * Note that appendrels are not double-counted here, even though we don't
194 * bother to distinguish RelOptInfos for appendrel parents, because the
195 * parents will have pages = 0.
196 *
197 * XXX if a table is self-joined, we will count it once per appearance,
198 * which perhaps is the wrong thing ... but that's not completely clear,
199 * and detecting self-joins here is difficult, so ignore it for now.
200 */
201 total_pages = 0;
202 for (rti = 1; rti < root->simple_rel_array_size; rti++)
203 {
204 RelOptInfo *brel = root->simple_rel_array[rti];
205
206 if (brel == NULL)
207 continue;
208
209 Assert(brel->relid == rti); /* sanity check on array */
210
211 if (IS_DUMMY_REL(brel))
212 continue;
213
214 if (IS_SIMPLE_REL(brel))
215 total_pages += (double) brel->pages;
216 }
217 root->total_table_pages = total_pages;
218
219 /*
220 * Generate access paths for each base rel.
221 */
222 set_base_rel_pathlists(root);
223
224 /*
225 * Generate access paths for the entire join tree.
226 */
227 rel = make_rel_from_joinlist(root, joinlist);
228
229 /*
230 * The result should join all and only the query's base rels.
231 */
232 Assert(bms_equal(rel->relids, root->all_baserels));
233
234 return rel;
235 }
236
237 /*
238 * set_base_rel_consider_startup
239 * Set the consider_[param_]startup flags for each base-relation entry.
240 *
241 * For the moment, we only deal with consider_param_startup here; because the
242 * logic for consider_startup is pretty trivial and is the same for every base
243 * relation, we just let build_simple_rel() initialize that flag correctly to
244 * start with. If that logic ever gets more complicated it would probably
245 * be better to move it here.
246 */
247 static void
248 set_base_rel_consider_startup(PlannerInfo *root)
249 {
250 /*
251 * Since parameterized paths can only be used on the inside of a nestloop
252 * join plan, there is usually little value in considering fast-start
253 * plans for them. However, for relations that are on the RHS of a SEMI
254 * or ANTI join, a fast-start plan can be useful because we're only going
255 * to care about fetching one tuple anyway.
256 *
257 * To minimize growth of planning time, we currently restrict this to
258 * cases where the RHS is a single base relation, not a join; there is no
259 * provision for consider_param_startup to get set at all on joinrels.
260 * Also we don't worry about appendrels. costsize.c's costing rules for
261 * nestloop semi/antijoins don't consider such cases either.
262 */
263 ListCell *lc;
264
265 foreach(lc, root->join_info_list)
266 {
267 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
268 int varno;
269
270 if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
271 bms_get_singleton_member(sjinfo->syn_righthand, &varno))
272 {
273 RelOptInfo *rel = find_base_rel(root, varno);
274
275 rel->consider_param_startup = true;
276 }
277 }
278 }
279
280 /*
281 * set_base_rel_sizes
282 * Set the size estimates (rows and widths) for each base-relation entry.
283 * Also determine whether to consider parallel paths for base relations.
284 *
285 * We do this in a separate pass over the base rels so that rowcount
286 * estimates are available for parameterized path generation, and also so
287 * that each rel's consider_parallel flag is set correctly before we begin to
288 * generate paths.
289 */
290 static void
291 set_base_rel_sizes(PlannerInfo *root)
292 {
293 Index rti;
294
295 for (rti = 1; rti < root->simple_rel_array_size; rti++)
296 {
297 RelOptInfo *rel = root->simple_rel_array[rti];
298 RangeTblEntry *rte;
299
300 /* there may be empty slots corresponding to non-baserel RTEs */
301 if (rel == NULL)
302 continue;
303
304 Assert(rel->relid == rti); /* sanity check on array */
305
306 /* ignore RTEs that are "other rels" */
307 if (rel->reloptkind != RELOPT_BASEREL)
308 continue;
309
310 rte = root->simple_rte_array[rti];
311
312 /*
313 * If parallelism is allowable for this query in general, see whether
314 * it's allowable for this rel in particular. We have to do this
315 * before set_rel_size(), because (a) if this rel is an inheritance
316 * parent, set_append_rel_size() will use and perhaps change the rel's
317 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
318 * goes ahead and makes paths immediately.
319 */
320 if (root->glob->parallelModeOK)
321 set_rel_consider_parallel(root, rel, rte);
322
323 set_rel_size(root, rel, rti, rte);
324 }
325 }
326
updateCanvasSize()327 /*
328 * set_base_rel_pathlists
329 * Finds all paths available for scanning each base-relation entry.
330 * Sequential scan and any available indices are considered.
331 * Each useful path is attached to its relation's 'pathlist' field.
332 */
333 static void
334 set_base_rel_pathlists(PlannerInfo *root)
335 {
336 Index rti;
337
338 for (rti = 1; rti < root->simple_rel_array_size; rti++)
339 {
340 RelOptInfo *rel = root->simple_rel_array[rti];
341
342 /* there may be empty slots corresponding to non-baserel RTEs */
343 if (rel == NULL)
344 continue;
345
346 Assert(rel->relid == rti); /* sanity check on array */
347
348 /* ignore RTEs that are "other rels" */
349 if (rel->reloptkind != RELOPT_BASEREL)
350 continue;
351
352 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
353 }
354 }
355
356 /*
357 * set_rel_size
addTracks(unsigned int nbNewTracks,InstrumentId id,int position)358 * Set size estimates for a base relation
359 */
360 static void
361 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
362 Index rti, RangeTblEntry *rte)
363 {
364 if (rel->reloptkind == RELOPT_BASEREL &&
365 relation_excluded_by_constraints(root, rel, rte))
366 {
367 /*
368 * We proved we don't need to scan the rel via constraint exclusion,
369 * so set up a single dummy path for it. Here we only check this for
370 * regular baserels; if it's an otherrel, CE was already checked in
371 * set_append_rel_size().
372 *
373 * In this case, we go ahead and set up the relation's path right away
374 * instead of leaving it for set_rel_pathlist to do. This is because
375 * we don't have a convention for marking a rel as dummy except by
376 * assigning a dummy path to it.
377 */
378 set_dummy_rel_pathlist(rel);
379 }
380 else if (rte->inh)
381 {
382 /* It's an "append relation", process accordingly */
383 set_append_rel_size(root, rel, rti, rte);
384 }
385 else
386 {
387 switch (rel->rtekind)
388 {
389 case RTE_RELATION:
390 if (rte->relkind == RELKIND_FOREIGN_TABLE)
391 {
392 /* Foreign table */
393 set_foreign_size(root, rel, rte);
394 }
395 else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
396 {
397 /*
398 * We could get here if asked to scan a partitioned table
399 * with ONLY. In that case we shouldn't scan any of the
400 * partitions, so mark it as a dummy rel.
401 */
402 set_dummy_rel_pathlist(rel);
403 }
404 else if (rte->tablesample != NULL)
405 {
406 /* Sampled relation */
407 set_tablesample_rel_size(root, rel, rte);
408 }
409 else
410 {
411 /* Plain relation */
412 set_plain_rel_size(root, rel, rte);
413 }
414 break;
415 case RTE_SUBQUERY:
416
417 /*
418 * Subqueries don't support making a choice between
419 * parameterized and unparameterized paths, so just go ahead
420 * and build their paths immediately.
421 */
422 set_subquery_pathlist(root, rel, rti, rte);
423 break;
424 case RTE_FUNCTION:
425 set_function_size_estimates(root, rel);
426 break;
427 case RTE_TABLEFUNC:
428 set_tablefunc_size_estimates(root, rel);
429 break;
430 case RTE_VALUES:
431 set_values_size_estimates(root, rel);
432 break;
433 case RTE_CTE:
434
435 /*
436 * CTEs don't support making a choice between parameterized
437 * and unparameterized paths, so just go ahead and build their
438 * paths immediately.
439 */
440 if (rte->self_reference)
441 set_worktable_pathlist(root, rel, rte);
442 else
443 set_cte_pathlist(root, rel, rte);
444 break;
445 case RTE_NAMEDTUPLESTORE:
446 /* Might as well just build the path immediately */
447 set_namedtuplestore_pathlist(root, rel, rte);
448 break;
449 case RTE_RESULT:
450 /* Might as well just build the path immediately */
451 set_result_pathlist(root, rel, rte);
452 break;
453 default:
454 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
455 break;
456 }
457 }
458
459 /*
460 * We insist that all non-dummy rels have a nonzero rowcount estimate.
461 */
462 Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
463 }
464
465 /*
466 * set_rel_pathlist
467 * Build access paths for a base relation
468 */
469 static void
470 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
471 Index rti, RangeTblEntry *rte)
472 {
473 if (IS_DUMMY_REL(rel))
474 {
475 /* We already proved the relation empty, so nothing more to do */
476 }
477 else if (rte->inh)
slotPointerDraggedToPosition(timeT position)478 {
479 /* It's an "append relation", process accordingly */
480 set_append_rel_pathlist(root, rel, rti, rte);
481 }
482 else
483 {
484 switch (rel->rtekind)
485 {
486 case RTE_RELATION:
487 if (rte->relkind == RELKIND_FOREIGN_TABLE)
488 {
slotSRStartMouseMove()489 /* Foreign table */
490 set_foreign_pathlist(root, rel, rte);
491 }
492 else if (rte->tablesample != NULL)
493 {
494 /* Sampled relation */
slotSRStopMouseMove()495 set_tablesample_rel_pathlist(root, rel, rte);
496 }
497 else
498 {
499 /* Plain relation */
500 set_plain_rel_pathlist(root, rel, rte);
slotTRMousePress()501 }
502 break;
503 case RTE_SUBQUERY:
504 /* Subquery --- fully handled during set_rel_size */
505 break;
506 case RTE_FUNCTION:
507 /* RangeFunction */
slotTRMouseRelease()508 set_function_pathlist(root, rel, rte);
509 break;
510 case RTE_TABLEFUNC:
511 /* Table Function */
512 set_tablefunc_pathlist(root, rel, rte);
513 break;
toggleTracking()514 case RTE_VALUES:
515 /* Values list */
516 set_values_pathlist(root, rel, rte);
517 break;
518 case RTE_CTE:
519 /* CTE reference --- fully handled during set_rel_size */
slotSetLoop(timeT start,timeT end)520 break;
521 case RTE_NAMEDTUPLESTORE:
522 /* tuplestore reference --- fully handled during set_rel_size */
523 break;
524 case RTE_RESULT:
525 /* simple Result --- fully handled during set_rel_size */
526 break;
addCommandToHistory(Command * command)527 default:
528 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
529 break;
530 }
531 }
532
slotScrollToTrack(int track)533 /*
534 * Allow a plugin to editorialize on the set of Paths for this base
535 * relation. It could add new paths (such as CustomPaths) by calling
536 * add_path(), or add_partial_path() if parallel aware. It could also
537 * delete or modify paths added by the core code.
538 */
539 if (set_rel_pathlist_hook)
540 (*set_rel_pathlist_hook) (root, rel, rti, rte);
541
deleteSelectedSegments()542 /*
543 * If this is a baserel, we should normally consider gathering any partial
544 * paths we may have created for it. We have to do this after calling the
545 * set_rel_pathlist_hook, else it cannot add partial paths to be included
546 * here.
547 *
548 * However, if this is an inheritance child, skip it. Otherwise, we could
549 * end up with a very large number of gather nodes, each trying to grab
550 * its own pool of workers. Instead, we'll consider gathering partial
551 * paths for the parent appendrel.
552 *
553 * Also, if this is the topmost scan/join rel (that is, the only baserel),
554 * we postpone gathering until the final scan/join targetlist is available
555 * (see grouping_planner).
556 */
557 if (rel->reloptkind == RELOPT_BASEREL &&
558 bms_membership(root->all_baserels) != BMS_SINGLETON)
559 generate_gather_paths(root, rel, false);
560
561 /* Now find the cheapest of the paths for this rel */
562 set_cheapest(rel);
563
564 #ifdef OPTIMIZER_DEBUG
565 debug_print_rel(root, rel);
566 #endif
567 }
turnRepeatingSegmentToRealCopies()568
569 /*
570 * set_plain_rel_size
571 * Set size estimates for a plain relation (no subquery, no inheritance)
572 */
573 static void
574 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
575 {
576 /*
577 * Test any partial indexes of rel for applicability. We must do this
578 * first since partial unique indexes can affect size estimates.
579 */
580 check_index_predicates(root, rel);
581
582 /* Mark rel with estimated output rows, width, etc */
583 set_baserel_size_estimates(root, rel);
584 }
585
586 /*
587 * If this relation could possibly be scanned from within a worker, then set
588 * its consider_parallel flag.
589 */
590 static void
591 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
592 RangeTblEntry *rte)
593 {
turnLinkedSegmentsToRealCopies()594 /*
595 * The flag has previously been initialized to false, so we can just
596 * return if it becomes clear that we can't safely set it.
597 */
598 Assert(!rel->consider_parallel);
599
600 /* Don't call this if parallelism is disallowed for the entire query. */
601 Assert(root->glob->parallelModeOK);
602
603 /* This should only be called for baserels and appendrel children. */
604 Assert(IS_SIMPLE_REL(rel));
605
606 /* Assorted checks based on rtekind. */
607 switch (rte->rtekind)
608 {
609 case RTE_RELATION:
610
611 /*
612 * Currently, parallel workers can't access the leader's temporary
613 * tables. We could possibly relax this if we wrote all of its
614 * local buffers at the start of the query and made no changes
615 * thereafter (maybe we could allow hint bit changes), and if we
616 * taught the workers to read them. Writing a large number of
617 * temporary buffers could be expensive, though, and we don't have
618 * the rest of the necessary infrastructure right now anyway. So
619 * for now, bail out if we see a temporary table.
slotVerticalScrollTrackButtons(int y)620 */
621 if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
622 return;
623
624 /*
625 * Table sampling can be pushed down to workers if the sample
626 * function and its arguments are safe.
slotCommandExecuted()627 */
628 if (rte->tablesample != NULL)
629 {
630 char proparallel = func_parallel(rte->tablesample->tsmhandler);
631
632 if (proparallel != PROPARALLEL_SAFE)
633 return;
634 if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
635 return;
636 }
637
638 /*
639 * Ask FDWs whether they can support performing a ForeignScan
640 * within a worker. Most often, the answer will be no. For
641 * example, if the nature of the FDW is such that it opens a TCP
642 * connection with a remote server, each parallel worker would end
643 * up with a separate connection, and these connections might not
644 * be appropriately coordinated between workers and the leader.
645 */
646 if (rte->relkind == RELKIND_FOREIGN_TABLE)
647 {
648 Assert(rel->fdwroutine);
649 if (!rel->fdwroutine->IsForeignScanParallelSafe)
650 return;
651 if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
652 return;
653 }
654
655 /*
656 * There are additional considerations for appendrels, which we'll
657 * deal with in set_append_rel_size and set_append_rel_pathlist.
658 * For now, just set consider_parallel based on the rel's own
659 * quals and targetlist.
660 */
661 break;
662
663 case RTE_SUBQUERY:
664
665 /*
666 * There's no intrinsic problem with scanning a subquery-in-FROM
667 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
668 * If the subquery doesn't happen to have any parallel-safe paths,
669 * then flagging it as consider_parallel won't change anything,
670 * but that's true for plain tables, too. We must set
671 * consider_parallel based on the rel's own quals and targetlist,
672 * so that if a subquery path is parallel-safe but the quals and
673 * projection we're sticking onto it are not, we correctly mark
674 * the SubqueryScanPath as not parallel-safe. (Note that
675 * set_subquery_pathlist() might push some of these quals down
676 * into the subquery itself, but that doesn't change anything.)
677 *
678 * We can't push sub-select containing LIMIT/OFFSET to workers as
679 * there is no guarantee that the row order will be fully
680 * deterministic, and applying LIMIT/OFFSET will lead to
681 * inconsistent results at the top-level. (In some cases, where
682 * the result is ordered, we could relax this restriction. But it
683 * doesn't currently seem worth expending extra effort to do so.)
684 */
685 {
686 Query *subquery = castNode(Query, rte->subquery);
687
688 if (limit_needed(subquery))
689 return;
690 }
691 break;
692
693 case RTE_JOIN:
694 /* Shouldn't happen; we're only considering baserels here. */
695 Assert(false);
696 return;
697
698 case RTE_FUNCTION:
699 /* Check for parallel-restricted functions. */
700 if (!is_parallel_safe(root, (Node *) rte->functions))
701 return;
slotViewportResize()702 break;
703
704 case RTE_TABLEFUNC:
705 /* not parallel safe */
706 return;
707
708 case RTE_VALUES:
709 /* Check for parallel-restricted functions. */
710 if (!is_parallel_safe(root, (Node *) rte->values_lists))
711 return;
712 break;
713
714 case RTE_CTE:
715
716 /*
717 * CTE tuplestores aren't shared among parallel workers, so we
718 * force all CTE scans to happen in the leader. Also, populating
dragEnterEvent(QDragEnterEvent * e)719 * the CTE would require executing a subplan that's not available
720 * in the worker, might be parallel-restricted, and must get
721 * executed only once.
722 */
723 return;
724
725 case RTE_NAMEDTUPLESTORE:
726
727 /*
728 * tuplestore cannot be shared, at least without more
729 * infrastructure to support that.
730 */
731 return;
732
733 case RTE_RESULT:
734 /* RESULT RTEs, in themselves, are no problem. */
735 break;
736 }
737
dragMoveEvent(QDragMoveEvent *)738 /*
739 * If there's anything in baserestrictinfo that's parallel-restricted, we
740 * give up on parallelizing access to this relation. We could consider
741 * instead postponing application of the restricted quals until we're
742 * above all the parallelism in the plan tree, but it's not clear that
743 * that would be a win in very many cases, and it might be tricky to make
744 * outer join clauses work correctly. It would likely break equivalence
745 * classes, too.
746 */
747 if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
748 return;
749
750 /*
751 * Likewise, if the relation's outputs are not parallel-safe, give up.
752 * (Usually, they're just Vars, but sometimes they're not.)
753 */
754 if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
755 return;
756
757 /* We have a winner. */
758 rel->consider_parallel = true;
759 }
760
761 /*
762 * set_plain_rel_pathlist
763 * Build access paths for a plain relation (no subquery, no inheritance)
764 */
765 static void
766 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
767 {
768 Relids required_outer;
769
770 /*
771 * We don't support pushing join clauses into the quals of a seqscan, but
772 * it could still have required parameterization due to LATERAL refs in
773 * its tlist.
774 */
775 required_outer = rel->lateral_relids;
776
777 /* Consider sequential scan */
778 add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
779
780 /* If appropriate, consider parallel sequential scan */
781 if (rel->consider_parallel && required_outer == NULL)
782 create_plain_partial_paths(root, rel);
783
784 /* Consider index scans */
785 create_index_paths(root, rel);
786
787 /* Consider TID scans */
788 create_tidscan_paths(root, rel);
789 }
790
791 /*
792 * create_plain_partial_paths
793 * Build partial access paths for parallel scan of a plain relation
794 */
795 static void
796 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
797 {
798 int parallel_workers;
799
800 parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
801 max_parallel_workers_per_gather);
802
803 /* If any limit was set to zero, the user doesn't want a parallel scan. */
804 if (parallel_workers <= 0)
805 return;
806
807 /* Add an unordered partial path based on a parallel sequential scan. */
808 add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
809 }
810
811 /*
812 * set_tablesample_rel_size
813 * Set size estimates for a sampled relation
814 */
815 static void
816 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
817 {
818 TableSampleClause *tsc = rte->tablesample;
819 TsmRoutine *tsm;
820 BlockNumber pages;
821 double tuples;
822
823 /*
824 * Test any partial indexes of rel for applicability. We must do this
825 * first since partial unique indexes can affect size estimates.
826 */
827 check_index_predicates(root, rel);
828
829 /*
830 * Call the sampling method's estimation function to estimate the number
831 * of pages it will read and the number of tuples it will return. (Note:
832 * we assume the function returns sane values.)
833 */
834 tsm = GetTsmRoutine(tsc->tsmhandler);
835 tsm->SampleScanGetSampleSize(root, rel, tsc->args,
836 &pages, &tuples);
837
838 /*
839 * For the moment, because we will only consider a SampleScan path for the
840 * rel, it's okay to just overwrite the pages and tuples estimates for the
841 * whole relation. If we ever consider multiple path types for sampled
842 * rels, we'll need more complication.
843 */
844 rel->pages = pages;
845 rel->tuples = tuples;
846
847 /* Mark rel with estimated output rows, width, etc */
848 set_baserel_size_estimates(root, rel);
849 }
850
851 /*
852 * set_tablesample_rel_pathlist
853 * Build access paths for a sampled relation
854 */
855 static void
856 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
857 {
858 Relids required_outer;
859 Path *path;
860
861 /*
862 * We don't support pushing join clauses into the quals of a samplescan,
863 * but it could still have required parameterization due to LATERAL refs
864 * in its tlist or TABLESAMPLE arguments.
865 */
866 required_outer = rel->lateral_relids;
867
868 /* Consider sampled scan */
869 path = create_samplescan_path(root, rel, required_outer);
870
871 /*
872 * If the sampling method does not support repeatable scans, we must avoid
873 * plans that would scan the rel multiple times. Ideally, we'd simply
874 * avoid putting the rel on the inside of a nestloop join; but adding such
875 * a consideration to the planner seems like a great deal of complication
876 * to support an uncommon usage of second-rate sampling methods. Instead,
877 * if there is a risk that the query might perform an unsafe join, just
878 * wrap the SampleScan in a Materialize node. We can check for joins by
879 * counting the membership of all_baserels (note that this correctly
880 * counts inheritance trees as single rels). If we're inside a subquery,
881 * we can't easily check whether a join might occur in the outer query, so
882 * just assume one is possible.
883 *
884 * GetTsmRoutine is relatively expensive compared to the other tests here,
885 * so check repeatable_across_scans last, even though that's a bit odd.
886 */
887 if ((root->query_level > 1 ||
888 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
889 !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
890 {
891 path = (Path *) create_material_path(rel, path);
892 }
893
894 add_path(rel, path);
895
896 /* For the moment, at least, there are no other paths to consider */
897 }
898
899 /*
900 * set_foreign_size
901 * Set size estimates for a foreign table RTE
902 */
903 static void
904 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
905 {
906 /* Mark rel with estimated output rows, width, etc */
907 set_foreign_size_estimates(root, rel);
908
909 /* Let FDW adjust the size estimates, if it can */
910 rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
911
912 /* ... but do not let it set the rows estimate to zero */
913 rel->rows = clamp_row_est(rel->rows);
914
915 /* also, make sure rel->tuples is not insane relative to rel->rows */
916 rel->tuples = Max(rel->tuples, rel->rows);
917 }
918
919 /*
920 * set_foreign_pathlist
921 * Build access paths for a foreign table RTE
922 */
923 static void
924 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
925 {
926 /* Call the FDW's GetForeignPaths function to generate path(s) */
927 rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
928 }
929
930 /*
931 * set_append_rel_size
932 * Set size estimates for a simple "append relation"
933 *
934 * The passed-in rel and RTE represent the entire append relation. The
935 * relation's contents are computed by appending together the output of the
936 * individual member relations. Note that in the non-partitioned inheritance
937 * case, the first member relation is actually the same table as is mentioned
938 * in the parent RTE ... but it has a different RTE and RelOptInfo. This is
939 * a good thing because their outputs are not the same size.
940 */
941 static void
942 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
943 Index rti, RangeTblEntry *rte)
944 {
945 int parentRTindex = rti;
946 bool has_live_children;
947 double parent_rows;
948 double parent_size;
949 double *parent_attrsizes;
950 int nattrs;
951 ListCell *l;
952
953 /* Guard against stack overflow due to overly deep inheritance tree. */
954 check_stack_depth();
955
956 Assert(IS_SIMPLE_REL(rel));
957
958 /*
959 * Initialize partitioned_child_rels to contain this RT index.
960 *
961 * Note that during the set_append_rel_pathlist() phase, we will bubble up
962 * the indexes of partitioned relations that appear down in the tree, so
963 * that when we've created Paths for all the children, the root
964 * partitioned table's list will contain all such indexes.
965 */
966 if (rte->relkind == RELKIND_PARTITIONED_TABLE)
967 rel->partitioned_child_rels = list_make1_int(rti);
968
969 /*
970 * If this is a partitioned baserel, set the consider_partitionwise_join
971 * flag; currently, we only consider partitionwise joins with the baserel
972 * if its targetlist doesn't contain a whole-row Var.
973 */
974 if (enable_partitionwise_join &&
975 rel->reloptkind == RELOPT_BASEREL &&
976 rte->relkind == RELKIND_PARTITIONED_TABLE &&
977 rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
978 rel->consider_partitionwise_join = true;
979
980 /*
981 * Initialize to compute size estimates for whole append relation.
982 *
983 * We handle width estimates by weighting the widths of different child
984 * rels proportionally to their number of rows. This is sensible because
985 * the use of width estimates is mainly to compute the total relation
986 * "footprint" if we have to sort or hash it. To do this, we sum the
987 * total equivalent size (in "double" arithmetic) and then divide by the
988 * total rowcount estimate. This is done separately for the total rel
989 * width and each attribute.
990 *
991 * Note: if you consider changing this logic, beware that child rels could
992 * have zero rows and/or width, if they were excluded by constraints.
993 */
994 has_live_children = false;
995 parent_rows = 0;
996 parent_size = 0;
997 nattrs = rel->max_attr - rel->min_attr + 1;
998 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
999
1000 foreach(l, root->append_rel_list)
1001 {
1002 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1003 int childRTindex;
1004 RangeTblEntry *childRTE;
1005 RelOptInfo *childrel;
1006 ListCell *parentvars;
1007 ListCell *childvars;
1008
1009 /* append_rel_list contains all append rels; ignore others */
1010 if (appinfo->parent_relid != parentRTindex)
1011 continue;
1012
1013 childRTindex = appinfo->child_relid;
1014 childRTE = root->simple_rte_array[childRTindex];
1015
1016 /*
1017 * The child rel's RelOptInfo was already created during
1018 * add_other_rels_to_query.
1019 */
1020 childrel = find_base_rel(root, childRTindex);
1021 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1022
1023 /* We may have already proven the child to be dummy. */
1024 if (IS_DUMMY_REL(childrel))
1025 continue;
1026
1027 /*
1028 * We have to copy the parent's targetlist and quals to the child,
1029 * with appropriate substitution of variables. However, the
1030 * baserestrictinfo quals were already copied/substituted when the
1031 * child RelOptInfo was built. So we don't need any additional setup
1032 * before applying constraint exclusion.
1033 */
1034 if (relation_excluded_by_constraints(root, childrel, childRTE))
1035 {
1036 /*
1037 * This child need not be scanned, so we can omit it from the
1038 * appendrel.
1039 */
1040 set_dummy_rel_pathlist(childrel);
1041 continue;
1042 }
1043
1044 /*
1045 * Constraint exclusion failed, so copy the parent's join quals and
1046 * targetlist to the child, with appropriate variable substitutions.
1047 *
1048 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1049 * expressions, which otherwise would not occur in a rel's targetlist.
1050 * Code that might be looking at an appendrel child must cope with
1051 * such. (Normally, a rel's targetlist would only include Vars and
1052 * PlaceHolderVars.) XXX we do not bother to update the cost or width
1053 * fields of childrel->reltarget; not clear if that would be useful.
1054 */
1055 childrel->joininfo = (List *)
1056 adjust_appendrel_attrs(root,
1057 (Node *) rel->joininfo,
1058 1, &appinfo);
1059 childrel->reltarget->exprs = (List *)
1060 adjust_appendrel_attrs(root,
1061 (Node *) rel->reltarget->exprs,
1062 1, &appinfo);
1063
1064 /*
1065 * We have to make child entries in the EquivalenceClass data
1066 * structures as well. This is needed either if the parent
1067 * participates in some eclass joins (because we will want to consider
1068 * inner-indexscan joins on the individual children) or if the parent
1069 * has useful pathkeys (because we should try to build MergeAppend
1070 * paths that produce those sort orderings).
1071 */
1072 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1073 add_child_rel_equivalences(root, appinfo, rel, childrel);
1074 childrel->has_eclass_joins = rel->has_eclass_joins;
1075
1076 /*
1077 * Note: we could compute appropriate attr_needed data for the child's
1078 * variables, by transforming the parent's attr_needed through the
1079 * translated_vars mapping. However, currently there's no need
1080 * because attr_needed is only examined for base relations not
1081 * otherrels. So we just leave the child's attr_needed empty.
1082 */
1083
1084 /*
1085 * If we consider partitionwise joins with the parent rel, do the same
1086 * for partitioned child rels.
1087 *
1088 * Note: here we abuse the consider_partitionwise_join flag by setting
1089 * it for child rels that are not themselves partitioned. We do so to
1090 * tell try_partitionwise_join() that the child rel is sufficiently
1091 * valid to be used as a per-partition input, even if it later gets
1092 * proven to be dummy. (It's not usable until we've set up the
1093 * reltarget and EC entries, which we just did.)
1094 */
1095 if (rel->consider_partitionwise_join)
1096 childrel->consider_partitionwise_join = true;
1097
1098 /*
1099 * If parallelism is allowable for this query in general, see whether
1100 * it's allowable for this childrel in particular. But if we've
1101 * already decided the appendrel is not parallel-safe as a whole,
1102 * there's no point in considering parallelism for this child. For
1103 * consistency, do this before calling set_rel_size() for the child.
1104 */
1105 if (root->glob->parallelModeOK && rel->consider_parallel)
1106 set_rel_consider_parallel(root, childrel, childRTE);
1107
1108 /*
1109 * Compute the child's size.
1110 */
1111 set_rel_size(root, childrel, childRTindex, childRTE);
1112
1113 /*
1114 * It is possible that constraint exclusion detected a contradiction
1115 * within a child subquery, even though we didn't prove one above. If
1116 * so, we can skip this child.
1117 */
1118 if (IS_DUMMY_REL(childrel))
1119 continue;
1120
1121 /* We have at least one live child. */
1122 has_live_children = true;
1123
1124 /*
1125 * If any live child is not parallel-safe, treat the whole appendrel
1126 * as not parallel-safe. In future we might be able to generate plans
1127 * in which some children are farmed out to workers while others are
1128 * not; but we don't have that today, so it's a waste to consider
1129 * partial paths anywhere in the appendrel unless it's all safe.
1130 * (Child rels visited before this one will be unmarked in
1131 * set_append_rel_pathlist().)
1132 */
1133 if (!childrel->consider_parallel)
1134 rel->consider_parallel = false;
1135
1136 /*
1137 * Accumulate size information from each live child.
1138 */
1139 Assert(childrel->rows > 0);
1140
1141 parent_rows += childrel->rows;
1142 parent_size += childrel->reltarget->width * childrel->rows;
1143
1144 /*
1145 * Accumulate per-column estimates too. We need not do anything for
1146 * PlaceHolderVars in the parent list. If child expression isn't a
1147 * Var, or we didn't record a width estimate for it, we have to fall
1148 * back on a datatype-based estimate.
1149 *
1150 * By construction, child's targetlist is 1-to-1 with parent's.
1151 */
1152 forboth(parentvars, rel->reltarget->exprs,
1153 childvars, childrel->reltarget->exprs)
1154 {
1155 Var *parentvar = (Var *) lfirst(parentvars);
1156 Node *childvar = (Node *) lfirst(childvars);
1157
1158 if (IsA(parentvar, Var))
1159 {
1160 int pndx = parentvar->varattno - rel->min_attr;
1161 int32 child_width = 0;
1162
1163 if (IsA(childvar, Var) &&
1164 ((Var *) childvar)->varno == childrel->relid)
1165 {
1166 int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1167
1168 child_width = childrel->attr_widths[cndx];
1169 }
1170 if (child_width <= 0)
1171 child_width = get_typavgwidth(exprType(childvar),
1172 exprTypmod(childvar));
1173 Assert(child_width > 0);
1174 parent_attrsizes[pndx] += child_width * childrel->rows;
1175 }
1176 }
1177 }
1178
1179 if (has_live_children)
1180 {
1181 /*
1182 * Save the finished size estimates.
1183 */
1184 int i;
1185
1186 Assert(parent_rows > 0);
1187 rel->rows = parent_rows;
1188 rel->reltarget->width = rint(parent_size / parent_rows);
1189 for (i = 0; i < nattrs; i++)
1190 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1191
1192 /*
1193 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1194 * because some places assume rel->tuples is valid for any baserel.
1195 */
1196 rel->tuples = parent_rows;
1197
1198 /*
1199 * Note that we leave rel->pages as zero; this is important to avoid
1200 * double-counting the appendrel tree in total_table_pages.
1201 */
1202 }
1203 else
1204 {
1205 /*
1206 * All children were excluded by constraints, so mark the whole
1207 * appendrel dummy. We must do this in this phase so that the rel's
1208 * dummy-ness is visible when we generate paths for other rels.
1209 */
1210 set_dummy_rel_pathlist(rel);
1211 }
1212
1213 pfree(parent_attrsizes);
1214 }
1215
1216 /*
1217 * set_append_rel_pathlist
1218 * Build access paths for an "append relation"
1219 */
1220 static void
1221 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1222 Index rti, RangeTblEntry *rte)
1223 {
1224 int parentRTindex = rti;
1225 List *live_childrels = NIL;
1226 ListCell *l;
1227
1228 /*
1229 * Generate access paths for each member relation, and remember the
1230 * non-dummy children.
1231 */
1232 foreach(l, root->append_rel_list)
1233 {
1234 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1235 int childRTindex;
1236 RangeTblEntry *childRTE;
1237 RelOptInfo *childrel;
1238
1239 /* append_rel_list contains all append rels; ignore others */
1240 if (appinfo->parent_relid != parentRTindex)
1241 continue;
1242
1243 /* Re-locate the child RTE and RelOptInfo */
1244 childRTindex = appinfo->child_relid;
1245 childRTE = root->simple_rte_array[childRTindex];
1246 childrel = root->simple_rel_array[childRTindex];
1247
1248 /*
1249 * If set_append_rel_size() decided the parent appendrel was
1250 * parallel-unsafe at some point after visiting this child rel, we
1251 * need to propagate the unsafety marking down to the child, so that
1252 * we don't generate useless partial paths for it.
1253 */
1254 if (!rel->consider_parallel)
1255 childrel->consider_parallel = false;
1256
1257 /*
1258 * Compute the child's access paths.
1259 */
1260 set_rel_pathlist(root, childrel, childRTindex, childRTE);
1261
1262 /*
1263 * If child is dummy, ignore it.
1264 */
1265 if (IS_DUMMY_REL(childrel))
1266 continue;
1267
1268 /* Bubble up childrel's partitioned children. */
1269 if (rel->part_scheme)
1270 rel->partitioned_child_rels =
1271 list_concat(rel->partitioned_child_rels,
1272 list_copy(childrel->partitioned_child_rels));
1273
1274 /*
1275 * Child is live, so add it to the live_childrels list for use below.
1276 */
1277 live_childrels = lappend(live_childrels, childrel);
1278 }
1279
1280 /* Add paths to the append relation. */
1281 add_paths_to_append_rel(root, rel, live_childrels);
1282 }
1283
1284
1285 /*
1286 * add_paths_to_append_rel
1287 * Generate paths for the given append relation given the set of non-dummy
1288 * child rels.
1289 *
1290 * The function collects all parameterizations and orderings supported by the
1291 * non-dummy children. For every such parameterization or ordering, it creates
1292 * an append path collecting one path from each non-dummy child with given
1293 * parameterization or ordering. Similarly it collects partial paths from
1294 * non-dummy children to create partial append paths.
1295 */
1296 void
1297 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
1298 List *live_childrels)
1299 {
1300 List *subpaths = NIL;
1301 bool subpaths_valid = true;
1302 List *partial_subpaths = NIL;
1303 List *pa_partial_subpaths = NIL;
1304 List *pa_nonpartial_subpaths = NIL;
1305 bool partial_subpaths_valid = true;
1306 bool pa_subpaths_valid;
1307 List *all_child_pathkeys = NIL;
1308 List *all_child_outers = NIL;
1309 ListCell *l;
1310 List *partitioned_rels = NIL;
1311 double partial_rows = -1;
1312
1313 /* If appropriate, consider parallel append */
1314 pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
1315
1316 /*
1317 * AppendPath generated for partitioned tables must record the RT indexes
1318 * of partitioned tables that are direct or indirect children of this
1319 * Append rel.
1320 *
1321 * AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
1322 * itself does not represent a partitioned relation, but the child sub-
1323 * queries may contain references to partitioned relations. The loop
1324 * below will look for such children and collect them in a list to be
1325 * passed to the path creation function. (This assumes that we don't need
1326 * to look through multiple levels of subquery RTEs; if we ever do, we
1327 * could consider stuffing the list we generate here into sub-query RTE's
1328 * RelOptInfo, just like we do for partitioned rels, which would be used
1329 * when populating our parent rel with paths. For the present, that
1330 * appears to be unnecessary.)
1331 */
1332 if (rel->part_scheme != NULL)
1333 {
1334 if (IS_SIMPLE_REL(rel))
1335 partitioned_rels = list_make1(rel->partitioned_child_rels);
1336 else if (IS_JOIN_REL(rel))
1337 {
1338 int relid = -1;
1339 List *partrels = NIL;
1340
1341 /*
1342 * For a partitioned joinrel, concatenate the component rels'
1343 * partitioned_child_rels lists.
1344 */
1345 while ((relid = bms_next_member(rel->relids, relid)) >= 0)
1346 {
1347 RelOptInfo *component;
1348
1349 Assert(relid >= 1 && relid < root->simple_rel_array_size);
1350 component = root->simple_rel_array[relid];
1351 Assert(component->part_scheme != NULL);
1352 Assert(list_length(component->partitioned_child_rels) >= 1);
1353 partrels =
1354 list_concat(partrels,
1355 list_copy(component->partitioned_child_rels));
1356 }
1357
1358 partitioned_rels = list_make1(partrels);
1359 }
1360
1361 Assert(list_length(partitioned_rels) >= 1);
1362 }
1363
1364 /*
1365 * For every non-dummy child, remember the cheapest path. Also, identify
1366 * all pathkeys (orderings) and parameterizations (required_outer sets)
1367 * available for the non-dummy member relations.
1368 */
1369 foreach(l, live_childrels)
1370 {
1371 RelOptInfo *childrel = lfirst(l);
1372 ListCell *lcp;
1373 Path *cheapest_partial_path = NULL;
1374
1375 /*
1376 * For UNION ALLs with non-empty partitioned_child_rels, accumulate
1377 * the Lists of child relations.
1378 */
1379 if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
1380 partitioned_rels = lappend(partitioned_rels,
1381 childrel->partitioned_child_rels);
1382
1383 /*
1384 * If child has an unparameterized cheapest-total path, add that to
1385 * the unparameterized Append path we are constructing for the parent.
1386 * If not, there's no workable unparameterized path.
1387 *
1388 * With partitionwise aggregates, the child rel's pathlist may be
1389 * empty, so don't assume that a path exists here.
1390 */
1391 if (childrel->pathlist != NIL &&
1392 childrel->cheapest_total_path->param_info == NULL)
1393 accumulate_append_subpath(childrel->cheapest_total_path,
1394 &subpaths, NULL);
1395 else
1396 subpaths_valid = false;
1397
1398 /* Same idea, but for a partial plan. */
1399 if (childrel->partial_pathlist != NIL)
1400 {
1401 cheapest_partial_path = linitial(childrel->partial_pathlist);
1402 accumulate_append_subpath(cheapest_partial_path,
1403 &partial_subpaths, NULL);
1404 }
1405 else
1406 partial_subpaths_valid = false;
1407
1408 /*
1409 * Same idea, but for a parallel append mixing partial and non-partial
1410 * paths.
1411 */
1412 if (pa_subpaths_valid)
1413 {
1414 Path *nppath = NULL;
1415
1416 nppath =
1417 get_cheapest_parallel_safe_total_inner(childrel->pathlist);
1418
1419 if (cheapest_partial_path == NULL && nppath == NULL)
1420 {
1421 /* Neither a partial nor a parallel-safe path? Forget it. */
1422 pa_subpaths_valid = false;
1423 }
1424 else if (nppath == NULL ||
1425 (cheapest_partial_path != NULL &&
1426 cheapest_partial_path->total_cost < nppath->total_cost))
1427 {
1428 /* Partial path is cheaper or the only option. */
1429 Assert(cheapest_partial_path != NULL);
1430 accumulate_append_subpath(cheapest_partial_path,
1431 &pa_partial_subpaths,
1432 &pa_nonpartial_subpaths);
1433
1434 }
1435 else
1436 {
1437 /*
1438 * Either we've got only a non-partial path, or we think that
1439 * a single backend can execute the best non-partial path
1440 * faster than all the parallel backends working together can
1441 * execute the best partial path.
1442 *
1443 * It might make sense to be more aggressive here. Even if
1444 * the best non-partial path is more expensive than the best
1445 * partial path, it could still be better to choose the
1446 * non-partial path if there are several such paths that can
1447 * be given to different workers. For now, we don't try to
1448 * figure that out.
1449 */
1450 accumulate_append_subpath(nppath,
1451 &pa_nonpartial_subpaths,
1452 NULL);
1453 }
1454 }
1455
1456 /*
1457 * Collect lists of all the available path orderings and
1458 * parameterizations for all the children. We use these as a
1459 * heuristic to indicate which sort orderings and parameterizations we
1460 * should build Append and MergeAppend paths for.
1461 */
1462 foreach(lcp, childrel->pathlist)
1463 {
1464 Path *childpath = (Path *) lfirst(lcp);
1465 List *childkeys = childpath->pathkeys;
1466 Relids childouter = PATH_REQ_OUTER(childpath);
1467
1468 /* Unsorted paths don't contribute to pathkey list */
1469 if (childkeys != NIL)
1470 {
1471 ListCell *lpk;
1472 bool found = false;
1473
1474 /* Have we already seen this ordering? */
1475 foreach(lpk, all_child_pathkeys)
1476 {
1477 List *existing_pathkeys = (List *) lfirst(lpk);
1478
1479 if (compare_pathkeys(existing_pathkeys,
1480 childkeys) == PATHKEYS_EQUAL)
1481 {
1482 found = true;
1483 break;
1484 }
1485 }
1486 if (!found)
1487 {
1488 /* No, so add it to all_child_pathkeys */
1489 all_child_pathkeys = lappend(all_child_pathkeys,
1490 childkeys);
1491 }
1492 }
1493
1494 /* Unparameterized paths don't contribute to param-set list */
1495 if (childouter)
1496 {
1497 ListCell *lco;
1498 bool found = false;
1499
1500 /* Have we already seen this param set? */
1501 foreach(lco, all_child_outers)
1502 {
1503 Relids existing_outers = (Relids) lfirst(lco);
1504
1505 if (bms_equal(existing_outers, childouter))
1506 {
1507 found = true;
1508 break;
1509 }
1510 }
1511 if (!found)
1512 {
1513 /* No, so add it to all_child_outers */
1514 all_child_outers = lappend(all_child_outers,
1515 childouter);
1516 }
1517 }
1518 }
1519 }
1520
1521 /*
1522 * If we found unparameterized paths for all children, build an unordered,
1523 * unparameterized Append path for the rel. (Note: this is correct even
1524 * if we have zero or one live subpath due to constraint exclusion.)
1525 */
1526 if (subpaths_valid)
1527 add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
1528 NIL, NULL, 0, false,
1529 partitioned_rels, -1));
1530
1531 /*
1532 * Consider an append of unordered, unparameterized partial paths. Make
1533 * it parallel-aware if possible.
1534 */
1535 if (partial_subpaths_valid && partial_subpaths != NIL)
1536 {
1537 AppendPath *appendpath;
1538 ListCell *lc;
1539 int parallel_workers = 0;
1540
1541 /* Find the highest number of workers requested for any subpath. */
1542 foreach(lc, partial_subpaths)
1543 {
1544 Path *path = lfirst(lc);
1545
1546 parallel_workers = Max(parallel_workers, path->parallel_workers);
1547 }
1548 Assert(parallel_workers > 0);
1549
1550 /*
1551 * If the use of parallel append is permitted, always request at least
1552 * log2(# of children) workers. We assume it can be useful to have
1553 * extra workers in this case because they will be spread out across
1554 * the children. The precise formula is just a guess, but we don't
1555 * want to end up with a radically different answer for a table with N
1556 * partitions vs. an unpartitioned table with the same data, so the
1557 * use of some kind of log-scaling here seems to make some sense.
1558 */
1559 if (enable_parallel_append)
1560 {
1561 parallel_workers = Max(parallel_workers,
1562 fls(list_length(live_childrels)));
1563 parallel_workers = Min(parallel_workers,
1564 max_parallel_workers_per_gather);
1565 }
1566 Assert(parallel_workers > 0);
1567
1568 /* Generate a partial append path. */
1569 appendpath = create_append_path(root, rel, NIL, partial_subpaths,
1570 NIL, NULL, parallel_workers,
1571 enable_parallel_append,
1572 partitioned_rels, -1);
1573
1574 /*
1575 * Make sure any subsequent partial paths use the same row count
1576 * estimate.
1577 */
1578 partial_rows = appendpath->path.rows;
1579
1580 /* Add the path. */
1581 add_partial_path(rel, (Path *) appendpath);
1582 }
1583
1584 /*
1585 * Consider a parallel-aware append using a mix of partial and non-partial
1586 * paths. (This only makes sense if there's at least one child which has
1587 * a non-partial path that is substantially cheaper than any partial path;
1588 * otherwise, we should use the append path added in the previous step.)
1589 */
1590 if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
1591 {
1592 AppendPath *appendpath;
1593 ListCell *lc;
1594 int parallel_workers = 0;
1595
1596 /*
1597 * Find the highest number of workers requested for any partial
1598 * subpath.
1599 */
1600 foreach(lc, pa_partial_subpaths)
1601 {
1602 Path *path = lfirst(lc);
1603
1604 parallel_workers = Max(parallel_workers, path->parallel_workers);
1605 }
1606
1607 /*
1608 * Same formula here as above. It's even more important in this
1609 * instance because the non-partial paths won't contribute anything to
1610 * the planned number of parallel workers.
1611 */
1612 parallel_workers = Max(parallel_workers,
1613 fls(list_length(live_childrels)));
1614 parallel_workers = Min(parallel_workers,
1615 max_parallel_workers_per_gather);
1616 Assert(parallel_workers > 0);
1617
1618 appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
1619 pa_partial_subpaths,
1620 NIL, NULL, parallel_workers, true,
1621 partitioned_rels, partial_rows);
1622 add_partial_path(rel, (Path *) appendpath);
1623 }
1624
1625 /*
1626 * Also build unparameterized ordered append paths based on the collected
1627 * list of child pathkeys.
1628 */
1629 if (subpaths_valid)
1630 generate_orderedappend_paths(root, rel, live_childrels,
1631 all_child_pathkeys,
1632 partitioned_rels);
1633
1634 /*
1635 * Build Append paths for each parameterization seen among the child rels.
1636 * (This may look pretty expensive, but in most cases of practical
1637 * interest, the child rels will expose mostly the same parameterizations,
1638 * so that not that many cases actually get considered here.)
1639 *
1640 * The Append node itself cannot enforce quals, so all qual checking must
1641 * be done in the child paths. This means that to have a parameterized
1642 * Append path, we must have the exact same parameterization for each
1643 * child path; otherwise some children might be failing to check the
1644 * moved-down quals. To make them match up, we can try to increase the
1645 * parameterization of lesser-parameterized paths.
1646 */
1647 foreach(l, all_child_outers)
1648 {
1649 Relids required_outer = (Relids) lfirst(l);
1650 ListCell *lcr;
1651
1652 /* Select the child paths for an Append with this parameterization */
1653 subpaths = NIL;
1654 subpaths_valid = true;
1655 foreach(lcr, live_childrels)
1656 {
1657 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1658 Path *subpath;
1659
1660 if (childrel->pathlist == NIL)
1661 {
1662 /* failed to make a suitable path for this child */
1663 subpaths_valid = false;
1664 break;
1665 }
1666
1667 subpath = get_cheapest_parameterized_child_path(root,
1668 childrel,
1669 required_outer);
1670 if (subpath == NULL)
1671 {
1672 /* failed to make a suitable path for this child */
1673 subpaths_valid = false;
1674 break;
1675 }
1676 accumulate_append_subpath(subpath, &subpaths, NULL);
1677 }
1678
1679 if (subpaths_valid)
1680 add_path(rel, (Path *)
1681 create_append_path(root, rel, subpaths, NIL,
1682 NIL, required_outer, 0, false,
1683 partitioned_rels, -1));
1684 }
1685
1686 /*
1687 * When there is only a single child relation, the Append path can inherit
1688 * any ordering available for the child rel's path, so that it's useful to
1689 * consider ordered partial paths. Above we only considered the cheapest
1690 * partial path for each child, but let's also make paths using any
1691 * partial paths that have pathkeys.
1692 */
1693 if (list_length(live_childrels) == 1)
1694 {
1695 RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
1696
1697 foreach(l, childrel->partial_pathlist)
1698 {
1699 Path *path = (Path *) lfirst(l);
1700 AppendPath *appendpath;
1701
1702 /*
1703 * Skip paths with no pathkeys. Also skip the cheapest partial
1704 * path, since we already used that above.
1705 */
1706 if (path->pathkeys == NIL ||
1707 path == linitial(childrel->partial_pathlist))
1708 continue;
1709
1710 appendpath = create_append_path(root, rel, NIL, list_make1(path),
1711 NIL, NULL,
1712 path->parallel_workers, true,
1713 partitioned_rels, partial_rows);
1714 add_partial_path(rel, (Path *) appendpath);
1715 }
1716 }
1717 }
1718
1719 /*
1720 * generate_orderedappend_paths
1721 * Generate ordered append paths for an append relation
1722 *
1723 * Usually we generate MergeAppend paths here, but there are some special
1724 * cases where we can generate simple Append paths, because the subpaths
1725 * can provide tuples in the required order already.
1726 *
1727 * We generate a path for each ordering (pathkey list) appearing in
1728 * all_child_pathkeys.
1729 *
1730 * We consider both cheapest-startup and cheapest-total cases, ie, for each
1731 * interesting ordering, collect all the cheapest startup subpaths and all the
1732 * cheapest total paths, and build a suitable path for each case.
1733 *
1734 * We don't currently generate any parameterized ordered paths here. While
1735 * it would not take much more code here to do so, it's very unclear that it
1736 * is worth the planning cycles to investigate such paths: there's little
1737 * use for an ordered path on the inside of a nestloop. In fact, it's likely
1738 * that the current coding of add_path would reject such paths out of hand,
1739 * because add_path gives no credit for sort ordering of parameterized paths,
1740 * and a parameterized MergeAppend is going to be more expensive than the
1741 * corresponding parameterized Append path. If we ever try harder to support
1742 * parameterized mergejoin plans, it might be worth adding support for
1743 * parameterized paths here to feed such joins. (See notes in
1744 * optimizer/README for why that might not ever happen, though.)
1745 */
1746 static void
1747 generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
1748 List *live_childrels,
1749 List *all_child_pathkeys,
1750 List *partitioned_rels)
1751 {
1752 ListCell *lcp;
1753 List *partition_pathkeys = NIL;
1754 List *partition_pathkeys_desc = NIL;
1755 bool partition_pathkeys_partial = true;
1756 bool partition_pathkeys_desc_partial = true;
1757
1758 /*
1759 * Some partitioned table setups may allow us to use an Append node
1760 * instead of a MergeAppend. This is possible in cases such as RANGE
1761 * partitioned tables where it's guaranteed that an earlier partition must
1762 * contain rows which come earlier in the sort order. To detect whether
1763 * this is relevant, build pathkey descriptions of the partition ordering,
1764 * for both forward and reverse scans.
1765 */
1766 if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
1767 partitions_are_ordered(rel->boundinfo, rel->nparts))
1768 {
1769 partition_pathkeys = build_partition_pathkeys(root, rel,
1770 ForwardScanDirection,
1771 &partition_pathkeys_partial);
1772
1773 partition_pathkeys_desc = build_partition_pathkeys(root, rel,
1774 BackwardScanDirection,
1775 &partition_pathkeys_desc_partial);
1776
1777 /*
1778 * You might think we should truncate_useless_pathkeys here, but
1779 * allowing partition keys which are a subset of the query's pathkeys
1780 * can often be useful. For example, consider a table partitioned by
1781 * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
1782 * paths that can produce the a, b, c ordering (perhaps via indexes on
1783 * (a, b, c)) then it works to consider the appendrel output as
1784 * ordered by a, b, c.
1785 */
1786 }
1787
1788 /* Now consider each interesting sort ordering */
1789 foreach(lcp, all_child_pathkeys)
1790 {
1791 List *pathkeys = (List *) lfirst(lcp);
1792 List *startup_subpaths = NIL;
1793 List *total_subpaths = NIL;
1794 bool startup_neq_total = false;
1795 ListCell *lcr;
1796 bool match_partition_order;
1797 bool match_partition_order_desc;
1798
1799 /*
1800 * Determine if this sort ordering matches any partition pathkeys we
1801 * have, for both ascending and descending partition order. If the
1802 * partition pathkeys happen to be contained in pathkeys then it still
1803 * works, as described above, providing that the partition pathkeys
1804 * are complete and not just a prefix of the partition keys. (In such
1805 * cases we'll be relying on the child paths to have sorted the
1806 * lower-order columns of the required pathkeys.)
1807 */
1808 match_partition_order =
1809 pathkeys_contained_in(pathkeys, partition_pathkeys) ||
1810 (!partition_pathkeys_partial &&
1811 pathkeys_contained_in(partition_pathkeys, pathkeys));
1812
1813 match_partition_order_desc = !match_partition_order &&
1814 (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
1815 (!partition_pathkeys_desc_partial &&
1816 pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
1817
1818 /* Select the child paths for this ordering... */
1819 foreach(lcr, live_childrels)
1820 {
1821 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1822 Path *cheapest_startup,
1823 *cheapest_total;
1824
1825 /* Locate the right paths, if they are available. */
1826 cheapest_startup =
1827 get_cheapest_path_for_pathkeys(childrel->pathlist,
1828 pathkeys,
1829 NULL,
1830 STARTUP_COST,
1831 false);
1832 cheapest_total =
1833 get_cheapest_path_for_pathkeys(childrel->pathlist,
1834 pathkeys,
1835 NULL,
1836 TOTAL_COST,
1837 false);
1838
1839 /*
1840 * If we can't find any paths with the right order just use the
1841 * cheapest-total path; we'll have to sort it later.
1842 */
1843 if (cheapest_startup == NULL || cheapest_total == NULL)
1844 {
1845 cheapest_startup = cheapest_total =
1846 childrel->cheapest_total_path;
1847 /* Assert we do have an unparameterized path for this child */
1848 Assert(cheapest_total->param_info == NULL);
1849 }
1850
1851 /*
1852 * Notice whether we actually have different paths for the
1853 * "cheapest" and "total" cases; frequently there will be no point
1854 * in two create_merge_append_path() calls.
1855 */
1856 if (cheapest_startup != cheapest_total)
1857 startup_neq_total = true;
1858
1859 /*
1860 * Collect the appropriate child paths. The required logic varies
1861 * for the Append and MergeAppend cases.
1862 */
1863 if (match_partition_order)
1864 {
1865 /*
1866 * We're going to make a plain Append path. We don't need
1867 * most of what accumulate_append_subpath would do, but we do
1868 * want to cut out child Appends or MergeAppends if they have
1869 * just a single subpath (and hence aren't doing anything
1870 * useful).
1871 */
1872 cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1873 cheapest_total = get_singleton_append_subpath(cheapest_total);
1874
1875 startup_subpaths = lappend(startup_subpaths, cheapest_startup);
1876 total_subpaths = lappend(total_subpaths, cheapest_total);
1877 }
1878 else if (match_partition_order_desc)
1879 {
1880 /*
1881 * As above, but we need to reverse the order of the children,
1882 * because nodeAppend.c doesn't know anything about reverse
1883 * ordering and will scan the children in the order presented.
1884 */
1885 cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1886 cheapest_total = get_singleton_append_subpath(cheapest_total);
1887
1888 startup_subpaths = lcons(cheapest_startup, startup_subpaths);
1889 total_subpaths = lcons(cheapest_total, total_subpaths);
1890 }
1891 else
1892 {
1893 /*
1894 * Otherwise, rely on accumulate_append_subpath to collect the
1895 * child paths for the MergeAppend.
1896 */
1897 accumulate_append_subpath(cheapest_startup,
1898 &startup_subpaths, NULL);
1899 accumulate_append_subpath(cheapest_total,
1900 &total_subpaths, NULL);
1901 }
1902 }
1903
1904 /* ... and build the Append or MergeAppend paths */
1905 if (match_partition_order || match_partition_order_desc)
1906 {
1907 /* We only need Append */
1908 add_path(rel, (Path *) create_append_path(root,
1909 rel,
1910 startup_subpaths,
1911 NIL,
1912 pathkeys,
1913 NULL,
1914 0,
1915 false,
1916 partitioned_rels,
1917 -1));
1918 if (startup_neq_total)
1919 add_path(rel, (Path *) create_append_path(root,
1920 rel,
1921 total_subpaths,
1922 NIL,
1923 pathkeys,
1924 NULL,
1925 0,
1926 false,
1927 partitioned_rels,
1928 -1));
1929 }
1930 else
1931 {
1932 /* We need MergeAppend */
1933 add_path(rel, (Path *) create_merge_append_path(root,
1934 rel,
1935 startup_subpaths,
1936 pathkeys,
1937 NULL,
1938 partitioned_rels));
1939 if (startup_neq_total)
1940 add_path(rel, (Path *) create_merge_append_path(root,
1941 rel,
1942 total_subpaths,
1943 pathkeys,
1944 NULL,
1945 partitioned_rels));
1946 }
1947 }
1948 }
1949
1950 /*
1951 * get_cheapest_parameterized_child_path
1952 * Get cheapest path for this relation that has exactly the requested
1953 * parameterization.
1954 *
1955 * Returns NULL if unable to create such a path.
1956 */
1957 static Path *
1958 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1959 Relids required_outer)
1960 {
1961 Path *cheapest;
1962 ListCell *lc;
1963
1964 /*
1965 * Look up the cheapest existing path with no more than the needed
1966 * parameterization. If it has exactly the needed parameterization, we're
1967 * done.
1968 */
1969 cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1970 NIL,
1971 required_outer,
1972 TOTAL_COST,
1973 false);
1974 Assert(cheapest != NULL);
1975 if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1976 return cheapest;
1977
1978 /*
1979 * Otherwise, we can "reparameterize" an existing path to match the given
1980 * parameterization, which effectively means pushing down additional
1981 * joinquals to be checked within the path's scan. However, some existing
1982 * paths might check the available joinquals already while others don't;
1983 * therefore, it's not clear which existing path will be cheapest after
1984 * reparameterization. We have to go through them all and find out.
1985 */
1986 cheapest = NULL;
1987 foreach(lc, rel->pathlist)
1988 {
1989 Path *path = (Path *) lfirst(lc);
1990
1991 /* Can't use it if it needs more than requested parameterization */
1992 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1993 continue;
1994
1995 /*
1996 * Reparameterization can only increase the path's cost, so if it's
1997 * already more expensive than the current cheapest, forget it.
1998 */
1999 if (cheapest != NULL &&
2000 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2001 continue;
2002
2003 /* Reparameterize if needed, then recheck cost */
2004 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
2005 {
2006 path = reparameterize_path(root, path, required_outer, 1.0);
2007 if (path == NULL)
2008 continue; /* failed to reparameterize this one */
2009 Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
2010
2011 if (cheapest != NULL &&
2012 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2013 continue;
2014 }
2015
2016 /* We have a new best path */
2017 cheapest = path;
2018 }
2019
2020 /* Return the best path, or NULL if we found no suitable candidate */
2021 return cheapest;
2022 }
2023
2024 /*
2025 * accumulate_append_subpath
2026 * Add a subpath to the list being built for an Append or MergeAppend.
2027 *
2028 * It's possible that the child is itself an Append or MergeAppend path, in
2029 * which case we can "cut out the middleman" and just add its child paths to
2030 * our own list. (We don't try to do this earlier because we need to apply
2031 * both levels of transformation to the quals.)
2032 *
2033 * Note that if we omit a child MergeAppend in this way, we are effectively
2034 * omitting a sort step, which seems fine: if the parent is to be an Append,
2035 * its result would be unsorted anyway, while if the parent is to be a
2036 * MergeAppend, there's no point in a separate sort on a child.
2037 *
2038 * Normally, either path is a partial path and subpaths is a list of partial
2039 * paths, or else path is a non-partial plan and subpaths is a list of those.
2040 * However, if path is a parallel-aware Append, then we add its partial path
2041 * children to subpaths and the rest to special_subpaths. If the latter is
2042 * NULL, we don't flatten the path at all (unless it contains only partial
2043 * paths).
2044 */
2045 static void
2046 accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
2047 {
2048 if (IsA(path, AppendPath))
2049 {
2050 AppendPath *apath = (AppendPath *) path;
2051
2052 if (!apath->path.parallel_aware || apath->first_partial_path == 0)
2053 {
2054 /* list_copy is important here to avoid sharing list substructure */
2055 *subpaths = list_concat(*subpaths, list_copy(apath->subpaths));
2056 return;
2057 }
2058 else if (special_subpaths != NULL)
2059 {
2060 List *new_special_subpaths;
2061
2062 /* Split Parallel Append into partial and non-partial subpaths */
2063 *subpaths = list_concat(*subpaths,
2064 list_copy_tail(apath->subpaths,
2065 apath->first_partial_path));
2066 new_special_subpaths =
2067 list_truncate(list_copy(apath->subpaths),
2068 apath->first_partial_path);
2069 *special_subpaths = list_concat(*special_subpaths,
2070 new_special_subpaths);
2071 return;
2072 }
2073 }
2074 else if (IsA(path, MergeAppendPath))
2075 {
2076 MergeAppendPath *mpath = (MergeAppendPath *) path;
2077
2078 /* list_copy is important here to avoid sharing list substructure */
2079 *subpaths = list_concat(*subpaths, list_copy(mpath->subpaths));
2080 return;
2081 }
2082
2083 *subpaths = lappend(*subpaths, path);
2084 }
2085
2086 /*
2087 * get_singleton_append_subpath
2088 * Returns the single subpath of an Append/MergeAppend, or just
2089 * return 'path' if it's not a single sub-path Append/MergeAppend.
2090 *
2091 * Note: 'path' must not be a parallel-aware path.
2092 */
2093 static Path *
2094 get_singleton_append_subpath(Path *path)
2095 {
2096 Assert(!path->parallel_aware);
2097
2098 if (IsA(path, AppendPath))
2099 {
2100 AppendPath *apath = (AppendPath *) path;
2101
2102 if (list_length(apath->subpaths) == 1)
2103 return (Path *) linitial(apath->subpaths);
2104 }
2105 else if (IsA(path, MergeAppendPath))
2106 {
2107 MergeAppendPath *mpath = (MergeAppendPath *) path;
2108
2109 if (list_length(mpath->subpaths) == 1)
2110 return (Path *) linitial(mpath->subpaths);
2111 }
2112
2113 return path;
2114 }
2115
2116 /*
2117 * set_dummy_rel_pathlist
2118 * Build a dummy path for a relation that's been excluded by constraints
2119 *
2120 * Rather than inventing a special "dummy" path type, we represent this as an
2121 * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2122 *
2123 * (See also mark_dummy_rel, which does basically the same thing, but is
2124 * typically used to change a rel into dummy state after we already made
2125 * paths for it.)
2126 */
2127 static void
2128 set_dummy_rel_pathlist(RelOptInfo *rel)
2129 {
2130 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2131 rel->rows = 0;
2132 rel->reltarget->width = 0;
2133
2134 /* Discard any pre-existing paths; no further need for them */
2135 rel->pathlist = NIL;
2136 rel->partial_pathlist = NIL;
2137
2138 /* Set up the dummy path */
2139 add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
2140 NIL, rel->lateral_relids,
2141 0, false, NIL, -1));
2142
2143 /*
2144 * We set the cheapest-path fields immediately, just in case they were
2145 * pointing at some discarded path. This is redundant when we're called
2146 * from set_rel_size(), but not when called from elsewhere, and doing it
2147 * twice is harmless anyway.
2148 */
2149 set_cheapest(rel);
2150 }
2151
2152 /* quick-and-dirty test to see if any joining is needed */
2153 static bool
2154 has_multiple_baserels(PlannerInfo *root)
2155 {
2156 int num_base_rels = 0;
2157 Index rti;
2158
2159 for (rti = 1; rti < root->simple_rel_array_size; rti++)
2160 {
2161 RelOptInfo *brel = root->simple_rel_array[rti];
2162
2163 if (brel == NULL)
2164 continue;
2165
2166 /* ignore RTEs that are "other rels" */
2167 if (brel->reloptkind == RELOPT_BASEREL)
2168 if (++num_base_rels > 1)
2169 return true;
2170 }
2171 return false;
2172 }
2173
2174 /*
2175 * set_subquery_pathlist
2176 * Generate SubqueryScan access paths for a subquery RTE
2177 *
2178 * We don't currently support generating parameterized paths for subqueries
2179 * by pushing join clauses down into them; it seems too expensive to re-plan
2180 * the subquery multiple times to consider different alternatives.
2181 * (XXX that could stand to be reconsidered, now that we use Paths.)
2182 * So the paths made here will be parameterized if the subquery contains
2183 * LATERAL references, otherwise not. As long as that's true, there's no need
2184 * for a separate set_subquery_size phase: just make the paths right away.
2185 */
2186 static void
2187 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
2188 Index rti, RangeTblEntry *rte)
2189 {
2190 Query *parse = root->parse;
2191 Query *subquery = rte->subquery;
2192 Relids required_outer;
2193 pushdown_safety_info safetyInfo;
2194 double tuple_fraction;
2195 RelOptInfo *sub_final_rel;
2196 ListCell *lc;
2197
2198 /*
2199 * Must copy the Query so that planning doesn't mess up the RTE contents
2200 * (really really need to fix the planner to not scribble on its input,
2201 * someday ... but see remove_unused_subquery_outputs to start with).
2202 */
2203 subquery = copyObject(subquery);
2204
2205 /*
2206 * If it's a LATERAL subquery, it might contain some Vars of the current
2207 * query level, requiring it to be treated as parameterized, even though
2208 * we don't support pushing down join quals into subqueries.
2209 */
2210 required_outer = rel->lateral_relids;
2211
2212 /*
2213 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2214 * flags as needed while recursing. In particular, we need a workspace
2215 * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
2216 * will be set true if we find that output column i of the subquery is
2217 * unsafe to use in a pushed-down qual.
2218 */
2219 memset(&safetyInfo, 0, sizeof(safetyInfo));
2220 safetyInfo.unsafeColumns = (bool *)
2221 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
2222
2223 /*
2224 * If the subquery has the "security_barrier" flag, it means the subquery
2225 * originated from a view that must enforce row level security. Then we
2226 * must not push down quals that contain leaky functions. (Ideally this
2227 * would be checked inside subquery_is_pushdown_safe, but since we don't
2228 * currently pass the RTE to that function, we must do it here.)
2229 */
2230 safetyInfo.unsafeLeaky = rte->security_barrier;
2231
2232 /*
2233 * If there are any restriction clauses that have been attached to the
2234 * subquery relation, consider pushing them down to become WHERE or HAVING
2235 * quals of the subquery itself. This transformation is useful because it
2236 * may allow us to generate a better plan for the subquery than evaluating
2237 * all the subquery output rows and then filtering them.
2238 *
2239 * There are several cases where we cannot push down clauses. Restrictions
2240 * involving the subquery are checked by subquery_is_pushdown_safe().
2241 * Restrictions on individual clauses are checked by
2242 * qual_is_pushdown_safe(). Also, we don't want to push down
2243 * pseudoconstant clauses; better to have the gating node above the
2244 * subquery.
2245 *
2246 * Non-pushed-down clauses will get evaluated as qpquals of the
2247 * SubqueryScan node.
2248 *
2249 * XXX Are there any cases where we want to make a policy decision not to
2250 * push down a pushable qual, because it'd result in a worse plan?
2251 */
2252 if (rel->baserestrictinfo != NIL &&
2253 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
2254 {
2255 /* OK to consider pushing down individual quals */
2256 List *upperrestrictlist = NIL;
2257 ListCell *l;
2258
2259 foreach(l, rel->baserestrictinfo)
2260 {
2261 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
2262 Node *clause = (Node *) rinfo->clause;
2263
2264 if (!rinfo->pseudoconstant &&
2265 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
2266 {
2267 /* Push it down */
2268 subquery_push_qual(subquery, rte, rti, clause);
2269 }
2270 else
2271 {
2272 /* Keep it in the upper query */
2273 upperrestrictlist = lappend(upperrestrictlist, rinfo);
2274 }
2275 }
2276 rel->baserestrictinfo = upperrestrictlist;
2277 /* We don't bother recomputing baserestrict_min_security */
2278 }
2279
2280 pfree(safetyInfo.unsafeColumns);
2281
2282 /*
2283 * The upper query might not use all the subquery's output columns; if
2284 * not, we can simplify.
2285 */
2286 remove_unused_subquery_outputs(subquery, rel);
2287
2288 /*
2289 * We can safely pass the outer tuple_fraction down to the subquery if the
2290 * outer level has no joining, aggregation, or sorting to do. Otherwise
2291 * we'd better tell the subquery to plan for full retrieval. (XXX This
2292 * could probably be made more intelligent ...)
2293 */
2294 if (parse->hasAggs ||
2295 parse->groupClause ||
2296 parse->groupingSets ||
2297 parse->havingQual ||
2298 parse->distinctClause ||
2299 parse->sortClause ||
2300 has_multiple_baserels(root))
2301 tuple_fraction = 0.0; /* default case */
2302 else
2303 tuple_fraction = root->tuple_fraction;
2304
2305 /* plan_params should not be in use in current query level */
2306 Assert(root->plan_params == NIL);
2307
2308 /* Generate a subroot and Paths for the subquery */
2309 rel->subroot = subquery_planner(root->glob, subquery,
2310 root,
2311 false, tuple_fraction);
2312
2313 /* Isolate the params needed by this specific subplan */
2314 rel->subplan_params = root->plan_params;
2315 root->plan_params = NIL;
2316
2317 /*
2318 * It's possible that constraint exclusion proved the subquery empty. If
2319 * so, it's desirable to produce an unadorned dummy path so that we will
2320 * recognize appropriate optimizations at this query level.
2321 */
2322 sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
2323
2324 if (IS_DUMMY_REL(sub_final_rel))
2325 {
2326 set_dummy_rel_pathlist(rel);
2327 return;
2328 }
2329
2330 /*
2331 * Mark rel with estimated output rows, width, etc. Note that we have to
2332 * do this before generating outer-query paths, else cost_subqueryscan is
2333 * not happy.
2334 */
2335 set_subquery_size_estimates(root, rel);
2336
2337 /*
2338 * For each Path that subquery_planner produced, make a SubqueryScanPath
2339 * in the outer query.
2340 */
2341 foreach(lc, sub_final_rel->pathlist)
2342 {
2343 Path *subpath = (Path *) lfirst(lc);
2344 List *pathkeys;
2345
2346 /* Convert subpath's pathkeys to outer representation */
2347 pathkeys = convert_subquery_pathkeys(root,
2348 rel,
2349 subpath->pathkeys,
2350 make_tlist_from_pathtarget(subpath->pathtarget));
2351
2352 /* Generate outer path using this subpath */
2353 add_path(rel, (Path *)
2354 create_subqueryscan_path(root, rel, subpath,
2355 pathkeys, required_outer));
2356 }
2357
2358 /* If outer rel allows parallelism, do same for partial paths. */
2359 if (rel->consider_parallel && bms_is_empty(required_outer))
2360 {
2361 /* If consider_parallel is false, there should be no partial paths. */
2362 Assert(sub_final_rel->consider_parallel ||
2363 sub_final_rel->partial_pathlist == NIL);
2364
2365 /* Same for partial paths. */
2366 foreach(lc, sub_final_rel->partial_pathlist)
2367 {
2368 Path *subpath = (Path *) lfirst(lc);
2369 List *pathkeys;
2370
2371 /* Convert subpath's pathkeys to outer representation */
2372 pathkeys = convert_subquery_pathkeys(root,
2373 rel,
2374 subpath->pathkeys,
2375 make_tlist_from_pathtarget(subpath->pathtarget));
2376
2377 /* Generate outer path using this subpath */
2378 add_partial_path(rel, (Path *)
2379 create_subqueryscan_path(root, rel, subpath,
2380 pathkeys,
2381 required_outer));
2382 }
2383 }
2384 }
2385
2386 /*
2387 * set_function_pathlist
2388 * Build the (single) access path for a function RTE
2389 */
2390 static void
2391 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2392 {
2393 Relids required_outer;
2394 List *pathkeys = NIL;
2395
2396 /*
2397 * We don't support pushing join clauses into the quals of a function
2398 * scan, but it could still have required parameterization due to LATERAL
2399 * refs in the function expression.
2400 */
2401 required_outer = rel->lateral_relids;
2402
2403 /*
2404 * The result is considered unordered unless ORDINALITY was used, in which
2405 * case it is ordered by the ordinal column (the last one). See if we
2406 * care, by checking for uses of that Var in equivalence classes.
2407 */
2408 if (rte->funcordinality)
2409 {
2410 AttrNumber ordattno = rel->max_attr;
2411 Var *var = NULL;
2412 ListCell *lc;
2413
2414 /*
2415 * Is there a Var for it in rel's targetlist? If not, the query did
2416 * not reference the ordinality column, or at least not in any way
2417 * that would be interesting for sorting.
2418 */
2419 foreach(lc, rel->reltarget->exprs)
2420 {
2421 Var *node = (Var *) lfirst(lc);
2422
2423 /* checking varno/varlevelsup is just paranoia */
2424 if (IsA(node, Var) &&
2425 node->varattno == ordattno &&
2426 node->varno == rel->relid &&
2427 node->varlevelsup == 0)
2428 {
2429 var = node;
2430 break;
2431 }
2432 }
2433
2434 /*
2435 * Try to build pathkeys for this Var with int8 sorting. We tell
2436 * build_expression_pathkey not to build any new equivalence class; if
2437 * the Var isn't already mentioned in some EC, it means that nothing
2438 * cares about the ordering.
2439 */
2440 if (var)
2441 pathkeys = build_expression_pathkey(root,
2442 (Expr *) var,
2443 NULL, /* below outer joins */
2444 Int8LessOperator,
2445 rel->relids,
2446 false);
2447 }
2448
2449 /* Generate appropriate path */
2450 add_path(rel, create_functionscan_path(root, rel,
2451 pathkeys, required_outer));
2452 }
2453
2454 /*
2455 * set_values_pathlist
2456 * Build the (single) access path for a VALUES RTE
2457 */
2458 static void
2459 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2460 {
2461 Relids required_outer;
2462
2463 /*
2464 * We don't support pushing join clauses into the quals of a values scan,
2465 * but it could still have required parameterization due to LATERAL refs
2466 * in the values expressions.
2467 */
2468 required_outer = rel->lateral_relids;
2469
2470 /* Generate appropriate path */
2471 add_path(rel, create_valuesscan_path(root, rel, required_outer));
2472 }
2473
2474 /*
2475 * set_tablefunc_pathlist
2476 * Build the (single) access path for a table func RTE
2477 */
2478 static void
2479 set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2480 {
2481 Relids required_outer;
2482
2483 /*
2484 * We don't support pushing join clauses into the quals of a tablefunc
2485 * scan, but it could still have required parameterization due to LATERAL
2486 * refs in the function expression.
2487 */
2488 required_outer = rel->lateral_relids;
2489
2490 /* Generate appropriate path */
2491 add_path(rel, create_tablefuncscan_path(root, rel,
2492 required_outer));
2493 }
2494
2495 /*
2496 * set_cte_pathlist
2497 * Build the (single) access path for a non-self-reference CTE RTE
2498 *
2499 * There's no need for a separate set_cte_size phase, since we don't
2500 * support join-qual-parameterized paths for CTEs.
2501 */
2502 static void
2503 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2504 {
2505 Plan *cteplan;
2506 PlannerInfo *cteroot;
2507 Index levelsup;
2508 int ndx;
2509 ListCell *lc;
2510 int plan_id;
2511 Relids required_outer;
2512
2513 /*
2514 * Find the referenced CTE, and locate the plan previously made for it.
2515 */
2516 levelsup = rte->ctelevelsup;
2517 cteroot = root;
2518 while (levelsup-- > 0)
2519 {
2520 cteroot = cteroot->parent_root;
2521 if (!cteroot) /* shouldn't happen */
2522 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2523 }
2524
2525 /*
2526 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2527 * on planning the CTEs (ie, this is a side-reference from another CTE).
2528 * So we mustn't use forboth here.
2529 */
2530 ndx = 0;
2531 foreach(lc, cteroot->parse->cteList)
2532 {
2533 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
2534
2535 if (strcmp(cte->ctename, rte->ctename) == 0)
2536 break;
2537 ndx++;
2538 }
2539 if (lc == NULL) /* shouldn't happen */
2540 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
2541 if (ndx >= list_length(cteroot->cte_plan_ids))
2542 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
2543 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
2544 Assert(plan_id > 0);
2545 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2546
2547 /* Mark rel with estimated output rows, width, etc */
2548 set_cte_size_estimates(root, rel, cteplan->plan_rows);
2549
2550 /*
2551 * We don't support pushing join clauses into the quals of a CTE scan, but
2552 * it could still have required parameterization due to LATERAL refs in
2553 * its tlist.
2554 */
2555 required_outer = rel->lateral_relids;
2556
2557 /* Generate appropriate path */
2558 add_path(rel, create_ctescan_path(root, rel, required_outer));
2559 }
2560
2561 /*
2562 * set_namedtuplestore_pathlist
2563 * Build the (single) access path for a named tuplestore RTE
2564 *
2565 * There's no need for a separate set_namedtuplestore_size phase, since we
2566 * don't support join-qual-parameterized paths for tuplestores.
2567 */
2568 static void
2569 set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
2570 RangeTblEntry *rte)
2571 {
2572 Relids required_outer;
2573
2574 /* Mark rel with estimated output rows, width, etc */
2575 set_namedtuplestore_size_estimates(root, rel);
2576
2577 /*
2578 * We don't support pushing join clauses into the quals of a tuplestore
2579 * scan, but it could still have required parameterization due to LATERAL
2580 * refs in its tlist.
2581 */
2582 required_outer = rel->lateral_relids;
2583
2584 /* Generate appropriate path */
2585 add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
2586
2587 /* Select cheapest path (pretty easy in this case...) */
2588 set_cheapest(rel);
2589 }
2590
2591 /*
2592 * set_result_pathlist
2593 * Build the (single) access path for an RTE_RESULT RTE
2594 *
2595 * There's no need for a separate set_result_size phase, since we
2596 * don't support join-qual-parameterized paths for these RTEs.
2597 */
2598 static void
2599 set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
2600 RangeTblEntry *rte)
2601 {
2602 Relids required_outer;
2603
2604 /* Mark rel with estimated output rows, width, etc */
2605 set_result_size_estimates(root, rel);
2606
2607 /*
2608 * We don't support pushing join clauses into the quals of a Result scan,
2609 * but it could still have required parameterization due to LATERAL refs
2610 * in its tlist.
2611 */
2612 required_outer = rel->lateral_relids;
2613
2614 /* Generate appropriate path */
2615 add_path(rel, create_resultscan_path(root, rel, required_outer));
2616
2617 /* Select cheapest path (pretty easy in this case...) */
2618 set_cheapest(rel);
2619 }
2620
2621 /*
2622 * set_worktable_pathlist
2623 * Build the (single) access path for a self-reference CTE RTE
2624 *
2625 * There's no need for a separate set_worktable_size phase, since we don't
2626 * support join-qual-parameterized paths for CTEs.
2627 */
2628 static void
2629 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2630 {
2631 Path *ctepath;
2632 PlannerInfo *cteroot;
2633 Index levelsup;
2634 Relids required_outer;
2635
2636 /*
2637 * We need to find the non-recursive term's path, which is in the plan
2638 * level that's processing the recursive UNION, which is one level *below*
2639 * where the CTE comes from.
2640 */
2641 levelsup = rte->ctelevelsup;
2642 if (levelsup == 0) /* shouldn't happen */
2643 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2644 levelsup--;
2645 cteroot = root;
2646 while (levelsup-- > 0)
2647 {
2648 cteroot = cteroot->parent_root;
2649 if (!cteroot) /* shouldn't happen */
2650 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2651 }
2652 ctepath = cteroot->non_recursive_path;
2653 if (!ctepath) /* shouldn't happen */
2654 elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
2655
2656 /* Mark rel with estimated output rows, width, etc */
2657 set_cte_size_estimates(root, rel, ctepath->rows);
2658
2659 /*
2660 * We don't support pushing join clauses into the quals of a worktable
2661 * scan, but it could still have required parameterization due to LATERAL
2662 * refs in its tlist. (I'm not sure this is actually possible given the
2663 * restrictions on recursive references, but it's easy enough to support.)
2664 */
2665 required_outer = rel->lateral_relids;
2666
2667 /* Generate appropriate path */
2668 add_path(rel, create_worktablescan_path(root, rel, required_outer));
2669 }
2670
2671 /*
2672 * generate_gather_paths
2673 * Generate parallel access paths for a relation by pushing a Gather or
2674 * Gather Merge on top of a partial path.
2675 *
2676 * This must not be called until after we're done creating all partial paths
2677 * for the specified relation. (Otherwise, add_partial_path might delete a
2678 * path that some GatherPath or GatherMergePath has a reference to.)
2679 *
2680 * If we're generating paths for a scan or join relation, override_rows will
2681 * be false, and we'll just use the relation's size estimate. When we're
2682 * being called for a partially-grouped path, though, we need to override
2683 * the rowcount estimate. (It's not clear that the particular value we're
2684 * using here is actually best, but the underlying rel has no estimate so
2685 * we must do something.)
2686 */
2687 void
2688 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2689 {
2690 Path *cheapest_partial_path;
2691 Path *simple_gather_path;
2692 ListCell *lc;
2693 double rows;
2694 double *rowsp = NULL;
2695
2696 /* If there are no partial paths, there's nothing to do here. */
2697 if (rel->partial_pathlist == NIL)
2698 return;
2699
2700 /* Should we override the rel's rowcount estimate? */
2701 if (override_rows)
2702 rowsp = &rows;
2703
2704 /*
2705 * The output of Gather is always unsorted, so there's only one partial
2706 * path of interest: the cheapest one. That will be the one at the front
2707 * of partial_pathlist because of the way add_partial_path works.
2708 */
2709 cheapest_partial_path = linitial(rel->partial_pathlist);
2710 rows =
2711 cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
2712 simple_gather_path = (Path *)
2713 create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
2714 NULL, rowsp);
2715 add_path(rel, simple_gather_path);
2716
2717 /*
2718 * For each useful ordering, we can consider an order-preserving Gather
2719 * Merge.
2720 */
2721 foreach(lc, rel->partial_pathlist)
2722 {
2723 Path *subpath = (Path *) lfirst(lc);
2724 GatherMergePath *path;
2725
2726 if (subpath->pathkeys == NIL)
2727 continue;
2728
2729 rows = subpath->rows * subpath->parallel_workers;
2730 path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
2731 subpath->pathkeys, NULL, rowsp);
2732 add_path(rel, &path->path);
2733 }
2734 }
2735
2736 /*
2737 * make_rel_from_joinlist
2738 * Build access paths using a "joinlist" to guide the join path search.
2739 *
2740 * See comments for deconstruct_jointree() for definition of the joinlist
2741 * data structure.
2742 */
2743 static RelOptInfo *
2744 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
2745 {
2746 int levels_needed;
2747 List *initial_rels;
2748 ListCell *jl;
2749
2750 /*
2751 * Count the number of child joinlist nodes. This is the depth of the
2752 * dynamic-programming algorithm we must employ to consider all ways of
2753 * joining the child nodes.
2754 */
2755 levels_needed = list_length(joinlist);
2756
2757 if (levels_needed <= 0)
2758 return NULL; /* nothing to do? */
2759
2760 /*
2761 * Construct a list of rels corresponding to the child joinlist nodes.
2762 * This may contain both base rels and rels constructed according to
2763 * sub-joinlists.
2764 */
2765 initial_rels = NIL;
2766 foreach(jl, joinlist)
2767 {
2768 Node *jlnode = (Node *) lfirst(jl);
2769 RelOptInfo *thisrel;
2770
2771 if (IsA(jlnode, RangeTblRef))
2772 {
2773 int varno = ((RangeTblRef *) jlnode)->rtindex;
2774
2775 thisrel = find_base_rel(root, varno);
2776 }
2777 else if (IsA(jlnode, List))
2778 {
2779 /* Recurse to handle subproblem */
2780 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
2781 }
2782 else
2783 {
2784 elog(ERROR, "unrecognized joinlist node type: %d",
2785 (int) nodeTag(jlnode));
2786 thisrel = NULL; /* keep compiler quiet */
2787 }
2788
2789 initial_rels = lappend(initial_rels, thisrel);
2790 }
2791
2792 if (levels_needed == 1)
2793 {
2794 /*
2795 * Single joinlist node, so we're done.
2796 */
2797 return (RelOptInfo *) linitial(initial_rels);
2798 }
2799 else
2800 {
2801 /*
2802 * Consider the different orders in which we could join the rels,
2803 * using a plugin, GEQO, or the regular join search code.
2804 *
2805 * We put the initial_rels list into a PlannerInfo field because
2806 * has_legal_joinclause() needs to look at it (ugly :-().
2807 */
2808 root->initial_rels = initial_rels;
2809
2810 if (join_search_hook)
2811 return (*join_search_hook) (root, levels_needed, initial_rels);
2812 else if (enable_geqo && levels_needed >= geqo_threshold)
2813 return geqo(root, levels_needed, initial_rels);
2814 else
2815 return standard_join_search(root, levels_needed, initial_rels);
2816 }
2817 }
2818
2819 /*
2820 * standard_join_search
2821 * Find possible joinpaths for a query by successively finding ways
2822 * to join component relations into join relations.
2823 *
2824 * 'levels_needed' is the number of iterations needed, ie, the number of
2825 * independent jointree items in the query. This is > 1.
2826 *
2827 * 'initial_rels' is a list of RelOptInfo nodes for each independent
2828 * jointree item. These are the components to be joined together.
2829 * Note that levels_needed == list_length(initial_rels).
2830 *
2831 * Returns the final level of join relations, i.e., the relation that is
2832 * the result of joining all the original relations together.
2833 * At least one implementation path must be provided for this relation and
2834 * all required sub-relations.
2835 *
2836 * To support loadable plugins that modify planner behavior by changing the
2837 * join searching algorithm, we provide a hook variable that lets a plugin
2838 * replace or supplement this function. Any such hook must return the same
2839 * final join relation as the standard code would, but it might have a
2840 * different set of implementation paths attached, and only the sub-joinrels
2841 * needed for these paths need have been instantiated.
2842 *
2843 * Note to plugin authors: the functions invoked during standard_join_search()
2844 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
2845 * than one join-order search, you'll probably need to save and restore the
2846 * original states of those data structures. See geqo_eval() for an example.
2847 */
2848 RelOptInfo *
2849 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
2850 {
2851 int lev;
2852 RelOptInfo *rel;
2853
2854 /*
2855 * This function cannot be invoked recursively within any one planning
2856 * problem, so join_rel_level[] can't be in use already.
2857 */
2858 Assert(root->join_rel_level == NULL);
2859
2860 /*
2861 * We employ a simple "dynamic programming" algorithm: we first find all
2862 * ways to build joins of two jointree items, then all ways to build joins
2863 * of three items (from two-item joins and single items), then four-item
2864 * joins, and so on until we have considered all ways to join all the
2865 * items into one rel.
2866 *
2867 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
2868 * set root->join_rel_level[1] to represent all the single-jointree-item
2869 * relations.
2870 */
2871 root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
2872
2873 root->join_rel_level[1] = initial_rels;
2874
2875 for (lev = 2; lev <= levels_needed; lev++)
2876 {
2877 ListCell *lc;
2878
2879 /*
2880 * Determine all possible pairs of relations to be joined at this
2881 * level, and build paths for making each one from every available
2882 * pair of lower-level relations.
2883 */
2884 join_search_one_level(root, lev);
2885
2886 /*
2887 * Run generate_partitionwise_join_paths() and generate_gather_paths()
2888 * for each just-processed joinrel. We could not do this earlier
2889 * because both regular and partial paths can get added to a
2890 * particular joinrel at multiple times within join_search_one_level.
2891 *
2892 * After that, we're done creating paths for the joinrel, so run
2893 * set_cheapest().
2894 */
2895 foreach(lc, root->join_rel_level[lev])
2896 {
2897 rel = (RelOptInfo *) lfirst(lc);
2898
2899 /* Create paths for partitionwise joins. */
2900 generate_partitionwise_join_paths(root, rel);
2901
2902 /*
2903 * Except for the topmost scan/join rel, consider gathering
2904 * partial paths. We'll do the same for the topmost scan/join rel
2905 * once we know the final targetlist (see grouping_planner).
2906 */
2907 if (lev < levels_needed)
2908 generate_gather_paths(root, rel, false);
2909
2910 /* Find and save the cheapest paths for this rel */
2911 set_cheapest(rel);
2912
2913 #ifdef OPTIMIZER_DEBUG
2914 debug_print_rel(root, rel);
2915 #endif
2916 }
2917 }
2918
2919 /*
2920 * We should have a single rel at the final level.
2921 */
2922 if (root->join_rel_level[levels_needed] == NIL)
2923 elog(ERROR, "failed to build any %d-way joins", levels_needed);
2924 Assert(list_length(root->join_rel_level[levels_needed]) == 1);
2925
2926 rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
2927
2928 root->join_rel_level = NULL;
2929
2930 return rel;
2931 }
2932
2933 /*****************************************************************************
2934 * PUSHING QUALS DOWN INTO SUBQUERIES
2935 *****************************************************************************/
2936
2937 /*
2938 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
2939 *
2940 * subquery is the particular component query being checked. topquery
2941 * is the top component of a set-operations tree (the same Query if no
2942 * set-op is involved).
2943 *
2944 * Conditions checked here:
2945 *
2946 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
2947 * since that could change the set of rows returned.
2948 *
2949 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
2950 * quals into it, because that could change the results.
2951 *
2952 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
2953 * This is because upper-level quals should semantically be evaluated only
2954 * once per distinct row, not once per original row, and if the qual is
2955 * volatile then extra evaluations could change the results. (This issue
2956 * does not apply to other forms of aggregation such as GROUP BY, because
2957 * when those are present we push into HAVING not WHERE, so that the quals
2958 * are still applied after aggregation.)
2959 *
2960 * 4. If the subquery contains window functions, we cannot push volatile quals
2961 * into it. The issue here is a bit different from DISTINCT: a volatile qual
2962 * might succeed for some rows of a window partition and fail for others,
2963 * thereby changing the partition contents and thus the window functions'
2964 * results for rows that remain.
2965 *
2966 * 5. If the subquery contains any set-returning functions in its targetlist,
2967 * we cannot push volatile quals into it. That would push them below the SRFs
2968 * and thereby change the number of times they are evaluated. Also, a
2969 * volatile qual could succeed for some SRF output rows and fail for others,
2970 * a behavior that cannot occur if it's evaluated before SRF expansion.
2971 *
2972 * 6. If the subquery has nonempty grouping sets, we cannot push down any
2973 * quals. The concern here is that a qual referencing a "constant" grouping
2974 * column could get constant-folded, which would be improper because the value
2975 * is potentially nullable by grouping-set expansion. This restriction could
2976 * be removed if we had a parsetree representation that shows that such
2977 * grouping columns are not really constant. (There are other ideas that
2978 * could be used to relax this restriction, but that's the approach most
2979 * likely to get taken in the future. Note that there's not much to be gained
2980 * so long as subquery_planner can't move HAVING clauses to WHERE within such
2981 * a subquery.)
2982 *
2983 * In addition, we make several checks on the subquery's output columns to see
2984 * if it is safe to reference them in pushed-down quals. If output column k
2985 * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
2986 * to true, but we don't reject the subquery overall since column k might not
2987 * be referenced by some/all quals. The unsafeColumns[] array will be
2988 * consulted later by qual_is_pushdown_safe(). It's better to do it this way
2989 * than to make the checks directly in qual_is_pushdown_safe(), because when
2990 * the subquery involves set operations we have to check the output
2991 * expressions in each arm of the set op.
2992 *
2993 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
2994 * we're effectively assuming that the quals cannot distinguish values that
2995 * the DISTINCT's equality operator sees as equal, yet there are many
2996 * counterexamples to that assumption. However use of such a qual with a
2997 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
2998 * "equal" value will be chosen as the output value by the DISTINCT operation.
2999 * So we don't worry too much about that. Another objection is that if the
3000 * qual is expensive to evaluate, running it for each original row might cost
3001 * more than we save by eliminating rows before the DISTINCT step. But it
3002 * would be very hard to estimate that at this stage, and in practice pushdown
3003 * seldom seems to make things worse, so we ignore that problem too.
3004 *
3005 * Note: likewise, pushing quals into a subquery with window functions is a
3006 * bit dubious: the quals might remove some rows of a window partition while
3007 * leaving others, causing changes in the window functions' results for the
3008 * surviving rows. We insist that such a qual reference only partitioning
3009 * columns, but again that only protects us if the qual does not distinguish
3010 * values that the partitioning equality operator sees as equal. The risks
3011 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3012 * occurs and thus there is no theoretical problem with such a qual. But
3013 * we'll do this anyway because the potential performance benefits are very
3014 * large, and we've seen no field complaints about the longstanding comparable
3015 * behavior with DISTINCT.
3016 */
3017 static bool
3018 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
3019 pushdown_safety_info *safetyInfo)
3020 {
3021 SetOperationStmt *topop;
3022
3023 /* Check point 1 */
3024 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
3025 return false;
3026
3027 /* Check point 6 */
3028 if (subquery->groupClause && subquery->groupingSets)
3029 return false;
3030
3031 /* Check points 3, 4, and 5 */
3032 if (subquery->distinctClause ||
3033 subquery->hasWindowFuncs ||
3034 subquery->hasTargetSRFs)
3035 safetyInfo->unsafeVolatile = true;
3036
3037 /*
3038 * If we're at a leaf query, check for unsafe expressions in its target
3039 * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
3040 * setop trees have only simple Vars in their tlists, so no need to check
3041 * them.)
3042 */
3043 if (subquery->setOperations == NULL)
3044 check_output_expressions(subquery, safetyInfo);
3045
3046 /* Are we at top level, or looking at a setop component? */
3047 if (subquery == topquery)
3048 {
3049 /* Top level, so check any component queries */
3050 if (subquery->setOperations != NULL)
3051 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
3052 safetyInfo))
3053 return false;
3054 }
3055 else
3056 {
3057 /* Setop component must not have more components (too weird) */
3058 if (subquery->setOperations != NULL)
3059 return false;
3060 /* Check whether setop component output types match top level */
3061 topop = castNode(SetOperationStmt, topquery->setOperations);
3062 Assert(topop);
3063 compare_tlist_datatypes(subquery->targetList,
3064 topop->colTypes,
3065 safetyInfo);
3066 }
3067 return true;
3068 }
3069
3070 /*
3071 * Helper routine to recurse through setOperations tree
3072 */
3073 static bool
3074 recurse_pushdown_safe(Node *setOp, Query *topquery,
3075 pushdown_safety_info *safetyInfo)
3076 {
3077 if (IsA(setOp, RangeTblRef))
3078 {
3079 RangeTblRef *rtr = (RangeTblRef *) setOp;
3080 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
3081 Query *subquery = rte->subquery;
3082
3083 Assert(subquery != NULL);
3084 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
3085 }
3086 else if (IsA(setOp, SetOperationStmt))
3087 {
3088 SetOperationStmt *op = (SetOperationStmt *) setOp;
3089
3090 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3091 if (op->op == SETOP_EXCEPT)
3092 return false;
3093 /* Else recurse */
3094 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
3095 return false;
3096 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
3097 return false;
3098 }
3099 else
3100 {
3101 elog(ERROR, "unrecognized node type: %d",
3102 (int) nodeTag(setOp));
3103 }
3104 return true;
3105 }
3106
3107 /*
3108 * check_output_expressions - check subquery's output expressions for safety
3109 *
3110 * There are several cases in which it's unsafe to push down an upper-level
3111 * qual if it references a particular output column of a subquery. We check
3112 * each output column of the subquery and set unsafeColumns[k] to true if
3113 * that column is unsafe for a pushed-down qual to reference. The conditions
3114 * checked here are:
3115 *
3116 * 1. We must not push down any quals that refer to subselect outputs that
3117 * return sets, else we'd introduce functions-returning-sets into the
3118 * subquery's WHERE/HAVING quals.
3119 *
3120 * 2. We must not push down any quals that refer to subselect outputs that
3121 * contain volatile functions, for fear of introducing strange results due
3122 * to multiple evaluation of a volatile function.
3123 *
3124 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3125 * refer to non-DISTINCT output columns, because that could change the set
3126 * of rows returned. (This condition is vacuous for DISTINCT, because then
3127 * there are no non-DISTINCT output columns, so we needn't check. Note that
3128 * subquery_is_pushdown_safe already reported that we can't use volatile
3129 * quals if there's DISTINCT or DISTINCT ON.)
3130 *
3131 * 4. If the subquery has any window functions, we must not push down quals
3132 * that reference any output columns that are not listed in all the subquery's
3133 * window PARTITION BY clauses. We can push down quals that use only
3134 * partitioning columns because they should succeed or fail identically for
3135 * every row of any one window partition, and totally excluding some
3136 * partitions will not change a window function's results for remaining
3137 * partitions. (Again, this also requires nonvolatile quals, but
3138 * subquery_is_pushdown_safe handles that.)
3139 */
3140 static void
3141 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
3142 {
3143 ListCell *lc;
3144
3145 foreach(lc, subquery->targetList)
3146 {
3147 TargetEntry *tle = (TargetEntry *) lfirst(lc);
3148
3149 if (tle->resjunk)
3150 continue; /* ignore resjunk columns */
3151
3152 /* We need not check further if output col is already known unsafe */
3153 if (safetyInfo->unsafeColumns[tle->resno])
3154 continue;
3155
3156 /* Functions returning sets are unsafe (point 1) */
3157 if (subquery->hasTargetSRFs &&
3158 expression_returns_set((Node *) tle->expr))
3159 {
3160 safetyInfo->unsafeColumns[tle->resno] = true;
3161 continue;
3162 }
3163
3164 /* Volatile functions are unsafe (point 2) */
3165 if (contain_volatile_functions((Node *) tle->expr))
3166 {
3167 safetyInfo->unsafeColumns[tle->resno] = true;
3168 continue;
3169 }
3170
3171 /* If subquery uses DISTINCT ON, check point 3 */
3172 if (subquery->hasDistinctOn &&
3173 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
3174 {
3175 /* non-DISTINCT column, so mark it unsafe */
3176 safetyInfo->unsafeColumns[tle->resno] = true;
3177 continue;
3178 }
3179
3180 /* If subquery uses window functions, check point 4 */
3181 if (subquery->hasWindowFuncs &&
3182 !targetIsInAllPartitionLists(tle, subquery))
3183 {
3184 /* not present in all PARTITION BY clauses, so mark it unsafe */
3185 safetyInfo->unsafeColumns[tle->resno] = true;
3186 continue;
3187 }
3188 }
3189 }
3190
3191 /*
3192 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3193 * push quals into each component query, but the quals can only reference
3194 * subquery columns that suffer no type coercions in the set operation.
3195 * Otherwise there are possible semantic gotchas. So, we check the
3196 * component queries to see if any of them have output types different from
3197 * the top-level setop outputs. unsafeColumns[k] is set true if column k
3198 * has different type in any component.
3199 *
3200 * We don't have to care about typmods here: the only allowed difference
3201 * between set-op input and output typmods is input is a specific typmod
3202 * and output is -1, and that does not require a coercion.
3203 *
3204 * tlist is a subquery tlist.
3205 * colTypes is an OID list of the top-level setop's output column types.
3206 * safetyInfo->unsafeColumns[] is the result array.
3207 */
3208 static void
3209 compare_tlist_datatypes(List *tlist, List *colTypes,
3210 pushdown_safety_info *safetyInfo)
3211 {
3212 ListCell *l;
3213 ListCell *colType = list_head(colTypes);
3214
3215 foreach(l, tlist)
3216 {
3217 TargetEntry *tle = (TargetEntry *) lfirst(l);
3218
3219 if (tle->resjunk)
3220 continue; /* ignore resjunk columns */
3221 if (colType == NULL)
3222 elog(ERROR, "wrong number of tlist entries");
3223 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
3224 safetyInfo->unsafeColumns[tle->resno] = true;
3225 colType = lnext(colType);
3226 }
3227 if (colType != NULL)
3228 elog(ERROR, "wrong number of tlist entries");
3229 }
3230
3231 /*
3232 * targetIsInAllPartitionLists
3233 * True if the TargetEntry is listed in the PARTITION BY clause
3234 * of every window defined in the query.
3235 *
3236 * It would be safe to ignore windows not actually used by any window
3237 * function, but it's not easy to get that info at this stage; and it's
3238 * unlikely to be useful to spend any extra cycles getting it, since
3239 * unreferenced window definitions are probably infrequent in practice.
3240 */
3241 static bool
3242 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
3243 {
3244 ListCell *lc;
3245
3246 foreach(lc, query->windowClause)
3247 {
3248 WindowClause *wc = (WindowClause *) lfirst(lc);
3249
3250 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
3251 return false;
3252 }
3253 return true;
3254 }
3255
3256 /*
3257 * qual_is_pushdown_safe - is a particular qual safe to push down?
3258 *
3259 * qual is a restriction clause applying to the given subquery (whose RTE
3260 * has index rti in the parent query).
3261 *
3262 * Conditions checked here:
3263 *
3264 * 1. The qual must not contain any SubPlans (mainly because I'm not sure
3265 * it will work correctly: SubLinks will already have been transformed into
3266 * SubPlans in the qual, but not in the subquery). Note that SubLinks that
3267 * transform to initplans are safe, and will be accepted here because what
3268 * we'll see in the qual is just a Param referencing the initplan output.
3269 *
3270 * 2. If unsafeVolatile is set, the qual must not contain any volatile
3271 * functions.
3272 *
3273 * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
3274 * that are passed Var nodes, and therefore might reveal values from the
3275 * subquery as side effects.
3276 *
3277 * 4. The qual must not refer to the whole-row output of the subquery
3278 * (since there is no easy way to name that within the subquery itself).
3279 *
3280 * 5. The qual must not refer to any subquery output columns that were
3281 * found to be unsafe to reference by subquery_is_pushdown_safe().
3282 */
3283 static bool
3284 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
3285 pushdown_safety_info *safetyInfo)
3286 {
3287 bool safe = true;
3288 List *vars;
3289 ListCell *vl;
3290
3291 /* Refuse subselects (point 1) */
3292 if (contain_subplans(qual))
3293 return false;
3294
3295 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
3296 if (safetyInfo->unsafeVolatile &&
3297 contain_volatile_functions(qual))
3298 return false;
3299
3300 /* Refuse leaky quals if told to (point 3) */
3301 if (safetyInfo->unsafeLeaky &&
3302 contain_leaked_vars(qual))
3303 return false;
3304
3305 /*
3306 * It would be unsafe to push down window function calls, but at least for
3307 * the moment we could never see any in a qual anyhow. (The same applies
3308 * to aggregates, which we check for in pull_var_clause below.)
3309 */
3310 Assert(!contain_window_function(qual));
3311
3312 /*
3313 * Examine all Vars used in clause. Since it's a restriction clause, all
3314 * such Vars must refer to subselect output columns ... unless this is
3315 * part of a LATERAL subquery, in which case there could be lateral
3316 * references.
3317 */
3318 vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
3319 foreach(vl, vars)
3320 {
3321 Var *var = (Var *) lfirst(vl);
3322
3323 /*
3324 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3325 * It's not clear whether a PHV could safely be pushed down, and even
3326 * less clear whether such a situation could arise in any cases of
3327 * practical interest anyway. So for the moment, just refuse to push
3328 * down.
3329 */
3330 if (!IsA(var, Var))
3331 {
3332 safe = false;
3333 break;
3334 }
3335
3336 /*
3337 * Punt if we find any lateral references. It would be safe to push
3338 * these down, but we'd have to convert them into outer references,
3339 * which subquery_push_qual lacks the infrastructure to do. The case
3340 * arises so seldom that it doesn't seem worth working hard on.
3341 */
3342 if (var->varno != rti)
3343 {
3344 safe = false;
3345 break;
3346 }
3347
3348 /* Subqueries have no system columns */
3349 Assert(var->varattno >= 0);
3350
3351 /* Check point 4 */
3352 if (var->varattno == 0)
3353 {
3354 safe = false;
3355 break;
3356 }
3357
3358 /* Check point 5 */
3359 if (safetyInfo->unsafeColumns[var->varattno])
3360 {
3361 safe = false;
3362 break;
3363 }
3364 }
3365
3366 list_free(vars);
3367
3368 return safe;
3369 }
3370
3371 /*
3372 * subquery_push_qual - push down a qual that we have determined is safe
3373 */
3374 static void
3375 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
3376 {
3377 if (subquery->setOperations != NULL)
3378 {
3379 /* Recurse to push it separately to each component query */
3380 recurse_push_qual(subquery->setOperations, subquery,
3381 rte, rti, qual);
3382 }
3383 else
3384 {
3385 /*
3386 * We need to replace Vars in the qual (which must refer to outputs of
3387 * the subquery) with copies of the subquery's targetlist expressions.
3388 * Note that at this point, any uplevel Vars in the qual should have
3389 * been replaced with Params, so they need no work.
3390 *
3391 * This step also ensures that when we are pushing into a setop tree,
3392 * each component query gets its own copy of the qual.
3393 */
3394 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
3395 subquery->targetList,
3396 REPLACEVARS_REPORT_ERROR, 0,
3397 &subquery->hasSubLinks);
3398
3399 /*
3400 * Now attach the qual to the proper place: normally WHERE, but if the
3401 * subquery uses grouping or aggregation, put it in HAVING (since the
3402 * qual really refers to the group-result rows).
3403 */
3404 if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
3405 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
3406 else
3407 subquery->jointree->quals =
3408 make_and_qual(subquery->jointree->quals, qual);
3409
3410 /*
3411 * We need not change the subquery's hasAggs or hasSubLinks flags,
3412 * since we can't be pushing down any aggregates that weren't there
3413 * before, and we don't push down subselects at all.
3414 */
3415 }
3416 }
3417
3418 /*
3419 * Helper routine to recurse through setOperations tree
3420 */
3421 static void
3422 recurse_push_qual(Node *setOp, Query *topquery,
3423 RangeTblEntry *rte, Index rti, Node *qual)
3424 {
3425 if (IsA(setOp, RangeTblRef))
3426 {
3427 RangeTblRef *rtr = (RangeTblRef *) setOp;
3428 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
3429 Query *subquery = subrte->subquery;
3430
3431 Assert(subquery != NULL);
3432 subquery_push_qual(subquery, rte, rti, qual);
3433 }
3434 else if (IsA(setOp, SetOperationStmt))
3435 {
3436 SetOperationStmt *op = (SetOperationStmt *) setOp;
3437
3438 recurse_push_qual(op->larg, topquery, rte, rti, qual);
3439 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
3440 }
3441 else
3442 {
3443 elog(ERROR, "unrecognized node type: %d",
3444 (int) nodeTag(setOp));
3445 }
3446 }
3447
3448 /*****************************************************************************
3449 * SIMPLIFYING SUBQUERY TARGETLISTS
3450 *****************************************************************************/
3451
3452 /*
3453 * remove_unused_subquery_outputs
3454 * Remove subquery targetlist items we don't need
3455 *
3456 * It's possible, even likely, that the upper query does not read all the
3457 * output columns of the subquery. We can remove any such outputs that are
3458 * not needed by the subquery itself (e.g., as sort/group columns) and do not
3459 * affect semantics otherwise (e.g., volatile functions can't be removed).
3460 * This is useful not only because we might be able to remove expensive-to-
3461 * compute expressions, but because deletion of output columns might allow
3462 * optimizations such as join removal to occur within the subquery.
3463 *
3464 * To avoid affecting column numbering in the targetlist, we don't physically
3465 * remove unused tlist entries, but rather replace their expressions with NULL
3466 * constants. This is implemented by modifying subquery->targetList.
3467 */
3468 static void
3469 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
3470 {
3471 Bitmapset *attrs_used = NULL;
3472 ListCell *lc;
3473
3474 /*
3475 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
3476 * could update all the child SELECTs' tlists, but it seems not worth the
3477 * trouble presently.
3478 */
3479 if (subquery->setOperations)
3480 return;
3481
3482 /*
3483 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
3484 * time: all its output columns must be used in the distinctClause.
3485 */
3486 if (subquery->distinctClause && !subquery->hasDistinctOn)
3487 return;
3488
3489 /*
3490 * Collect a bitmap of all the output column numbers used by the upper
3491 * query.
3492 *
3493 * Add all the attributes needed for joins or final output. Note: we must
3494 * look at rel's targetlist, not the attr_needed data, because attr_needed
3495 * isn't computed for inheritance child rels, cf set_append_rel_size().
3496 * (XXX might be worth changing that sometime.)
3497 */
3498 pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
3499
3500 /* Add all the attributes used by un-pushed-down restriction clauses. */
3501 foreach(lc, rel->baserestrictinfo)
3502 {
3503 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
3504
3505 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
3506 }
3507
3508 /*
3509 * If there's a whole-row reference to the subquery, we can't remove
3510 * anything.
3511 */
3512 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
3513 return;
3514
3515 /*
3516 * Run through the tlist and zap entries we don't need. It's okay to
3517 * modify the tlist items in-place because set_subquery_pathlist made a
3518 * copy of the subquery.
3519 */
3520 foreach(lc, subquery->targetList)
3521 {
3522 TargetEntry *tle = (TargetEntry *) lfirst(lc);
3523 Node *texpr = (Node *) tle->expr;
3524
3525 /*
3526 * If it has a sortgroupref number, it's used in some sort/group
3527 * clause so we'd better not remove it. Also, don't remove any
3528 * resjunk columns, since their reason for being has nothing to do
3529 * with anybody reading the subquery's output. (It's likely that
3530 * resjunk columns in a sub-SELECT would always have ressortgroupref
3531 * set, but even if they don't, it seems imprudent to remove them.)
3532 */
3533 if (tle->ressortgroupref || tle->resjunk)
3534 continue;
3535
3536 /*
3537 * If it's used by the upper query, we can't remove it.
3538 */
3539 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
3540 attrs_used))
3541 continue;
3542
3543 /*
3544 * If it contains a set-returning function, we can't remove it since
3545 * that could change the number of rows returned by the subquery.
3546 */
3547 if (subquery->hasTargetSRFs &&
3548 expression_returns_set(texpr))
3549 continue;
3550
3551 /*
3552 * If it contains volatile functions, we daren't remove it for fear
3553 * that the user is expecting their side-effects to happen.
3554 */
3555 if (contain_volatile_functions(texpr))
3556 continue;
3557
3558 /*
3559 * OK, we don't need it. Replace the expression with a NULL constant.
3560 * Preserve the exposed type of the expression, in case something
3561 * looks at the rowtype of the subquery's result.
3562 */
3563 tle->expr = (Expr *) makeNullConst(exprType(texpr),
3564 exprTypmod(texpr),
3565 exprCollation(texpr));
3566 }
3567 }
3568
3569 /*
3570 * create_partial_bitmap_paths
3571 * Build partial bitmap heap path for the relation
3572 */
3573 void
3574 create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
3575 Path *bitmapqual)
3576 {
3577 int parallel_workers;
3578 double pages_fetched;
3579
3580 /* Compute heap pages for bitmap heap scan */
3581 pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
3582 NULL, NULL);
3583
3584 parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
3585 max_parallel_workers_per_gather);
3586
3587 if (parallel_workers <= 0)
3588 return;
3589
3590 add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
3591 bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
3592 }
3593
3594 /*
3595 * Compute the number of parallel workers that should be used to scan a
3596 * relation. We compute the parallel workers based on the size of the heap to
3597 * be scanned and the size of the index to be scanned, then choose a minimum
3598 * of those.
3599 *
3600 * "heap_pages" is the number of pages from the table that we expect to scan, or
3601 * -1 if we don't expect to scan any.
3602 *
3603 * "index_pages" is the number of pages from the index that we expect to scan, or
3604 * -1 if we don't expect to scan any.
3605 *
3606 * "max_workers" is caller's limit on the number of workers. This typically
3607 * comes from a GUC.
3608 */
3609 int
3610 compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
3611 int max_workers)
3612 {
3613 int parallel_workers = 0;
3614
3615 /*
3616 * If the user has set the parallel_workers reloption, use that; otherwise
3617 * select a default number of workers.
3618 */
3619 if (rel->rel_parallel_workers != -1)
3620 parallel_workers = rel->rel_parallel_workers;
3621 else
3622 {
3623 /*
3624 * If the number of pages being scanned is insufficient to justify a
3625 * parallel scan, just return zero ... unless it's an inheritance
3626 * child. In that case, we want to generate a parallel path here
3627 * anyway. It might not be worthwhile just for this relation, but
3628 * when combined with all of its inheritance siblings it may well pay
3629 * off.
3630 */
3631 if (rel->reloptkind == RELOPT_BASEREL &&
3632 ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
3633 (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
3634 return 0;
3635
3636 if (heap_pages >= 0)
3637 {
3638 int heap_parallel_threshold;
3639 int heap_parallel_workers = 1;
3640
3641 /*
3642 * Select the number of workers based on the log of the size of
3643 * the relation. This probably needs to be a good deal more
3644 * sophisticated, but we need something here for now. Note that
3645 * the upper limit of the min_parallel_table_scan_size GUC is
3646 * chosen to prevent overflow here.
3647 */
3648 heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
3649 while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
3650 {
3651 heap_parallel_workers++;
3652 heap_parallel_threshold *= 3;
3653 if (heap_parallel_threshold > INT_MAX / 3)
3654 break; /* avoid overflow */
3655 }
3656
3657 parallel_workers = heap_parallel_workers;
3658 }
3659
3660 if (index_pages >= 0)
3661 {
3662 int index_parallel_workers = 1;
3663 int index_parallel_threshold;
3664
3665 /* same calculation as for heap_pages above */
3666 index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
3667 while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
3668 {
3669 index_parallel_workers++;
3670 index_parallel_threshold *= 3;
3671 if (index_parallel_threshold > INT_MAX / 3)
3672 break; /* avoid overflow */
3673 }
3674
3675 if (parallel_workers > 0)
3676 parallel_workers = Min(parallel_workers, index_parallel_workers);
3677 else
3678 parallel_workers = index_parallel_workers;
3679 }
3680 }
3681
3682 /* In no case use more than caller supplied maximum number of workers */
3683 parallel_workers = Min(parallel_workers, max_workers);
3684
3685 return parallel_workers;
3686 }
3687
3688 /*
3689 * generate_partitionwise_join_paths
3690 * Create paths representing partitionwise join for given partitioned
3691 * join relation.
3692 *
3693 * This must not be called until after we are done adding paths for all
3694 * child-joins. Otherwise, add_path might delete a path to which some path
3695 * generated here has a reference.
3696 */
3697 void
3698 generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
3699 {
3700 List *live_children = NIL;
3701 int cnt_parts;
3702 int num_parts;
3703 RelOptInfo **part_rels;
3704
3705 /* Handle only join relations here. */
3706 if (!IS_JOIN_REL(rel))
3707 return;
3708
3709 /* We've nothing to do if the relation is not partitioned. */
3710 if (!IS_PARTITIONED_REL(rel))
3711 return;
3712
3713 /* The relation should have consider_partitionwise_join set. */
3714 Assert(rel->consider_partitionwise_join);
3715
3716 /* Guard against stack overflow due to overly deep partition hierarchy. */
3717 check_stack_depth();
3718
3719 num_parts = rel->nparts;
3720 part_rels = rel->part_rels;
3721
3722 /* Collect non-dummy child-joins. */
3723 for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
3724 {
3725 RelOptInfo *child_rel = part_rels[cnt_parts];
3726
3727 /* If it's been pruned entirely, it's certainly dummy. */
3728 if (child_rel == NULL)
3729 continue;
3730
3731 /* Add partitionwise join paths for partitioned child-joins. */
3732 generate_partitionwise_join_paths(root, child_rel);
3733
3734 set_cheapest(child_rel);
3735
3736 /* Dummy children will not be scanned, so ignore those. */
3737 if (IS_DUMMY_REL(child_rel))
3738 continue;
3739
3740 #ifdef OPTIMIZER_DEBUG
3741 debug_print_rel(root, child_rel);
3742 #endif
3743
3744 live_children = lappend(live_children, child_rel);
3745 }
3746
3747 /* If all child-joins are dummy, parent join is also dummy. */
3748 if (!live_children)
3749 {
3750 mark_dummy_rel(rel);
3751 return;
3752 }
3753
3754 /* Build additional paths for this rel from child-join paths. */
3755 add_paths_to_append_rel(root, rel, live_children);
3756 list_free(live_children);
3757 }
3758
3759
3760 /*****************************************************************************
3761 * DEBUG SUPPORT
3762 *****************************************************************************/
3763
3764 #ifdef OPTIMIZER_DEBUG
3765
3766 static void
3767 print_relids(PlannerInfo *root, Relids relids)
3768 {
3769 int x;
3770 bool first = true;
3771
3772 x = -1;
3773 while ((x = bms_next_member(relids, x)) >= 0)
3774 {
3775 if (!first)
3776 printf(" ");
3777 if (x < root->simple_rel_array_size &&
3778 root->simple_rte_array[x])
3779 printf("%s", root->simple_rte_array[x]->eref->aliasname);
3780 else
3781 printf("%d", x);
3782 first = false;
3783 }
3784 }
3785
3786 static void
3787 print_restrictclauses(PlannerInfo *root, List *clauses)
3788 {
3789 ListCell *l;
3790
3791 foreach(l, clauses)
3792 {
3793 RestrictInfo *c = lfirst(l);
3794
3795 print_expr((Node *) c->clause, root->parse->rtable);
3796 if (lnext(l))
3797 printf(", ");
3798 }
3799 }
3800
3801 static void
3802 print_path(PlannerInfo *root, Path *path, int indent)
3803 {
3804 const char *ptype;
3805 bool join = false;
3806 Path *subpath = NULL;
3807 int i;
3808
3809 switch (nodeTag(path))
3810 {
3811 case T_Path:
3812 switch (path->pathtype)
3813 {
3814 case T_SeqScan:
3815 ptype = "SeqScan";
3816 break;
3817 case T_SampleScan:
3818 ptype = "SampleScan";
3819 break;
3820 case T_FunctionScan:
3821 ptype = "FunctionScan";
3822 break;
3823 case T_TableFuncScan:
3824 ptype = "TableFuncScan";
3825 break;
3826 case T_ValuesScan:
3827 ptype = "ValuesScan";
3828 break;
3829 case T_CteScan:
3830 ptype = "CteScan";
3831 break;
3832 case T_NamedTuplestoreScan:
3833 ptype = "NamedTuplestoreScan";
3834 break;
3835 case T_Result:
3836 ptype = "Result";
3837 break;
3838 case T_WorkTableScan:
3839 ptype = "WorkTableScan";
3840 break;
3841 default:
3842 ptype = "???Path";
3843 break;
3844 }
3845 break;
3846 case T_IndexPath:
3847 ptype = "IdxScan";
3848 break;
3849 case T_BitmapHeapPath:
3850 ptype = "BitmapHeapScan";
3851 break;
3852 case T_BitmapAndPath:
3853 ptype = "BitmapAndPath";
3854 break;
3855 case T_BitmapOrPath:
3856 ptype = "BitmapOrPath";
3857 break;
3858 case T_TidPath:
3859 ptype = "TidScan";
3860 break;
3861 case T_SubqueryScanPath:
3862 ptype = "SubqueryScan";
3863 break;
3864 case T_ForeignPath:
3865 ptype = "ForeignScan";
3866 break;
3867 case T_CustomPath:
3868 ptype = "CustomScan";
3869 break;
3870 case T_NestPath:
3871 ptype = "NestLoop";
3872 join = true;
3873 break;
3874 case T_MergePath:
3875 ptype = "MergeJoin";
3876 join = true;
3877 break;
3878 case T_HashPath:
3879 ptype = "HashJoin";
3880 join = true;
3881 break;
3882 case T_AppendPath:
3883 ptype = "Append";
3884 break;
3885 case T_MergeAppendPath:
3886 ptype = "MergeAppend";
3887 break;
3888 case T_GroupResultPath:
3889 ptype = "GroupResult";
3890 break;
3891 case T_MaterialPath:
3892 ptype = "Material";
3893 subpath = ((MaterialPath *) path)->subpath;
3894 break;
3895 case T_UniquePath:
3896 ptype = "Unique";
3897 subpath = ((UniquePath *) path)->subpath;
3898 break;
3899 case T_GatherPath:
3900 ptype = "Gather";
3901 subpath = ((GatherPath *) path)->subpath;
3902 break;
3903 case T_GatherMergePath:
3904 ptype = "GatherMerge";
3905 subpath = ((GatherMergePath *) path)->subpath;
3906 break;
3907 case T_ProjectionPath:
3908 ptype = "Projection";
3909 subpath = ((ProjectionPath *) path)->subpath;
3910 break;
3911 case T_ProjectSetPath:
3912 ptype = "ProjectSet";
3913 subpath = ((ProjectSetPath *) path)->subpath;
3914 break;
3915 case T_SortPath:
3916 ptype = "Sort";
3917 subpath = ((SortPath *) path)->subpath;
3918 break;
3919 case T_GroupPath:
3920 ptype = "Group";
3921 subpath = ((GroupPath *) path)->subpath;
3922 break;
3923 case T_UpperUniquePath:
3924 ptype = "UpperUnique";
3925 subpath = ((UpperUniquePath *) path)->subpath;
3926 break;
3927 case T_AggPath:
3928 ptype = "Agg";
3929 subpath = ((AggPath *) path)->subpath;
3930 break;
3931 case T_GroupingSetsPath:
3932 ptype = "GroupingSets";
3933 subpath = ((GroupingSetsPath *) path)->subpath;
3934 break;
3935 case T_MinMaxAggPath:
3936 ptype = "MinMaxAgg";
3937 break;
3938 case T_WindowAggPath:
3939 ptype = "WindowAgg";
3940 subpath = ((WindowAggPath *) path)->subpath;
3941 break;
3942 case T_SetOpPath:
3943 ptype = "SetOp";
3944 subpath = ((SetOpPath *) path)->subpath;
3945 break;
3946 case T_RecursiveUnionPath:
3947 ptype = "RecursiveUnion";
3948 break;
3949 case T_LockRowsPath:
3950 ptype = "LockRows";
3951 subpath = ((LockRowsPath *) path)->subpath;
3952 break;
3953 case T_ModifyTablePath:
3954 ptype = "ModifyTable";
3955 break;
3956 case T_LimitPath:
3957 ptype = "Limit";
3958 subpath = ((LimitPath *) path)->subpath;
3959 break;
3960 default:
3961 ptype = "???Path";
3962 break;
3963 }
3964
3965 for (i = 0; i < indent; i++)
3966 printf("\t");
3967 printf("%s", ptype);
3968
3969 if (path->parent)
3970 {
3971 printf("(");
3972 print_relids(root, path->parent->relids);
3973 printf(")");
3974 }
3975 if (path->param_info)
3976 {
3977 printf(" required_outer (");
3978 print_relids(root, path->param_info->ppi_req_outer);
3979 printf(")");
3980 }
3981 printf(" rows=%.0f cost=%.2f..%.2f\n",
3982 path->rows, path->startup_cost, path->total_cost);
3983
3984 if (path->pathkeys)
3985 {
3986 for (i = 0; i < indent; i++)
3987 printf("\t");
3988 printf(" pathkeys: ");
3989 print_pathkeys(path->pathkeys, root->parse->rtable);
3990 }
3991
3992 if (join)
3993 {
3994 JoinPath *jp = (JoinPath *) path;
3995
3996 for (i = 0; i < indent; i++)
3997 printf("\t");
3998 printf(" clauses: ");
3999 print_restrictclauses(root, jp->joinrestrictinfo);
4000 printf("\n");
4001
4002 if (IsA(path, MergePath))
4003 {
4004 MergePath *mp = (MergePath *) path;
4005
4006 for (i = 0; i < indent; i++)
4007 printf("\t");
4008 printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
4009 ((mp->outersortkeys) ? 1 : 0),
4010 ((mp->innersortkeys) ? 1 : 0),
4011 ((mp->materialize_inner) ? 1 : 0));
4012 }
4013
4014 print_path(root, jp->outerjoinpath, indent + 1);
4015 print_path(root, jp->innerjoinpath, indent + 1);
4016 }
4017
4018 if (subpath)
4019 print_path(root, subpath, indent + 1);
4020 }
4021
4022 void
4023 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
4024 {
4025 ListCell *l;
4026
4027 printf("RELOPTINFO (");
4028 print_relids(root, rel->relids);
4029 printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
4030
4031 if (rel->baserestrictinfo)
4032 {
4033 printf("\tbaserestrictinfo: ");
4034 print_restrictclauses(root, rel->baserestrictinfo);
4035 printf("\n");
4036 }
4037
4038 if (rel->joininfo)
4039 {
4040 printf("\tjoininfo: ");
4041 print_restrictclauses(root, rel->joininfo);
4042 printf("\n");
4043 }
4044
4045 printf("\tpath list:\n");
4046 foreach(l, rel->pathlist)
4047 print_path(root, lfirst(l), 1);
4048 if (rel->cheapest_parameterized_paths)
4049 {
4050 printf("\n\tcheapest parameterized paths:\n");
4051 foreach(l, rel->cheapest_parameterized_paths)
4052 print_path(root, lfirst(l), 1);
4053 }
4054 if (rel->cheapest_startup_path)
4055 {
4056 printf("\n\tcheapest startup path:\n");
4057 print_path(root, rel->cheapest_startup_path, 1);
4058 }
4059 if (rel->cheapest_total_path)
4060 {
4061 printf("\n\tcheapest total path:\n");
4062 print_path(root, rel->cheapest_total_path, 1);
4063 }
4064 printf("\n");
4065 fflush(stdout);
4066 }
4067
4068 #endif /* OPTIMIZER_DEBUG */
4069