1 /*-------------------------------------------------------------------------
2 *
3 * pathnode.c
4 * Routines to manipulate pathlists and create path nodes
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/util/pathnode.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "miscadmin.h"
20 #include "foreign/fdwapi.h"
21 #include "nodes/extensible.h"
22 #include "nodes/nodeFuncs.h"
23 #include "optimizer/clauses.h"
24 #include "optimizer/cost.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/planmain.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/restrictinfo.h"
30 #include "optimizer/tlist.h"
31 #include "optimizer/var.h"
32 #include "parser/parsetree.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/selfuncs.h"
36
37
38 typedef enum
39 {
40 COSTS_EQUAL, /* path costs are fuzzily equal */
41 COSTS_BETTER1, /* first path is cheaper than second */
42 COSTS_BETTER2, /* second path is cheaper than first */
43 COSTS_DIFFERENT /* neither path dominates the other on cost */
44 } PathCostComparison;
45
46 /*
47 * STD_FUZZ_FACTOR is the normal fuzz factor for compare_path_costs_fuzzily.
48 * XXX is it worth making this user-controllable? It provides a tradeoff
49 * between planner runtime and the accuracy of path cost comparisons.
50 */
51 #define STD_FUZZ_FACTOR 1.01
52
53 static List *translate_sub_tlist(List *tlist, int relid);
54 static int append_total_cost_compare(const void *a, const void *b);
55 static int append_startup_cost_compare(const void *a, const void *b);
56 static List *reparameterize_pathlist_by_child(PlannerInfo *root,
57 List *pathlist,
58 RelOptInfo *child_rel);
59
60
61 /*****************************************************************************
62 * MISC. PATH UTILITIES
63 *****************************************************************************/
64
65 /*
66 * compare_path_costs
67 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
68 * or more expensive than path2 for the specified criterion.
69 */
70 int
compare_path_costs(Path * path1,Path * path2,CostSelector criterion)71 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
72 {
73 if (criterion == STARTUP_COST)
74 {
75 if (path1->startup_cost < path2->startup_cost)
76 return -1;
77 if (path1->startup_cost > path2->startup_cost)
78 return +1;
79
80 /*
81 * If paths have the same startup cost (not at all unlikely), order
82 * them by total cost.
83 */
84 if (path1->total_cost < path2->total_cost)
85 return -1;
86 if (path1->total_cost > path2->total_cost)
87 return +1;
88 }
89 else
90 {
91 if (path1->total_cost < path2->total_cost)
92 return -1;
93 if (path1->total_cost > path2->total_cost)
94 return +1;
95
96 /*
97 * If paths have the same total cost, order them by startup cost.
98 */
99 if (path1->startup_cost < path2->startup_cost)
100 return -1;
101 if (path1->startup_cost > path2->startup_cost)
102 return +1;
103 }
104 return 0;
105 }
106
107 /*
108 * compare_path_fractional_costs
109 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
110 * or more expensive than path2 for fetching the specified fraction
111 * of the total tuples.
112 *
113 * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
114 * path with the cheaper total_cost.
115 */
116 int
compare_fractional_path_costs(Path * path1,Path * path2,double fraction)117 compare_fractional_path_costs(Path *path1, Path *path2,
118 double fraction)
119 {
120 Cost cost1,
121 cost2;
122
123 if (fraction <= 0.0 || fraction >= 1.0)
124 return compare_path_costs(path1, path2, TOTAL_COST);
125 cost1 = path1->startup_cost +
126 fraction * (path1->total_cost - path1->startup_cost);
127 cost2 = path2->startup_cost +
128 fraction * (path2->total_cost - path2->startup_cost);
129 if (cost1 < cost2)
130 return -1;
131 if (cost1 > cost2)
132 return +1;
133 return 0;
134 }
135
136 /*
137 * compare_path_costs_fuzzily
138 * Compare the costs of two paths to see if either can be said to
139 * dominate the other.
140 *
141 * We use fuzzy comparisons so that add_path() can avoid keeping both of
142 * a pair of paths that really have insignificantly different cost.
143 *
144 * The fuzz_factor argument must be 1.0 plus delta, where delta is the
145 * fraction of the smaller cost that is considered to be a significant
146 * difference. For example, fuzz_factor = 1.01 makes the fuzziness limit
147 * be 1% of the smaller cost.
148 *
149 * The two paths are said to have "equal" costs if both startup and total
150 * costs are fuzzily the same. Path1 is said to be better than path2 if
151 * it has fuzzily better startup cost and fuzzily no worse total cost,
152 * or if it has fuzzily better total cost and fuzzily no worse startup cost.
153 * Path2 is better than path1 if the reverse holds. Finally, if one path
154 * is fuzzily better than the other on startup cost and fuzzily worse on
155 * total cost, we just say that their costs are "different", since neither
156 * dominates the other across the whole performance spectrum.
157 *
158 * This function also enforces a policy rule that paths for which the relevant
159 * one of parent->consider_startup and parent->consider_param_startup is false
160 * cannot survive comparisons solely on the grounds of good startup cost, so
161 * we never return COSTS_DIFFERENT when that is true for the total-cost loser.
162 * (But if total costs are fuzzily equal, we compare startup costs anyway,
163 * in hopes of eliminating one path or the other.)
164 */
165 static PathCostComparison
compare_path_costs_fuzzily(Path * path1,Path * path2,double fuzz_factor)166 compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
167 {
168 #define CONSIDER_PATH_STARTUP_COST(p) \
169 ((p)->param_info == NULL ? (p)->parent->consider_startup : (p)->parent->consider_param_startup)
170
171 /*
172 * Check total cost first since it's more likely to be different; many
173 * paths have zero startup cost.
174 */
175 if (path1->total_cost > path2->total_cost * fuzz_factor)
176 {
177 /* path1 fuzzily worse on total cost */
178 if (CONSIDER_PATH_STARTUP_COST(path1) &&
179 path2->startup_cost > path1->startup_cost * fuzz_factor)
180 {
181 /* ... but path2 fuzzily worse on startup, so DIFFERENT */
182 return COSTS_DIFFERENT;
183 }
184 /* else path2 dominates */
185 return COSTS_BETTER2;
186 }
187 if (path2->total_cost > path1->total_cost * fuzz_factor)
188 {
189 /* path2 fuzzily worse on total cost */
190 if (CONSIDER_PATH_STARTUP_COST(path2) &&
191 path1->startup_cost > path2->startup_cost * fuzz_factor)
192 {
193 /* ... but path1 fuzzily worse on startup, so DIFFERENT */
194 return COSTS_DIFFERENT;
195 }
196 /* else path1 dominates */
197 return COSTS_BETTER1;
198 }
199 /* fuzzily the same on total cost ... */
200 if (path1->startup_cost > path2->startup_cost * fuzz_factor)
201 {
202 /* ... but path1 fuzzily worse on startup, so path2 wins */
203 return COSTS_BETTER2;
204 }
205 if (path2->startup_cost > path1->startup_cost * fuzz_factor)
206 {
207 /* ... but path2 fuzzily worse on startup, so path1 wins */
208 return COSTS_BETTER1;
209 }
210 /* fuzzily the same on both costs */
211 return COSTS_EQUAL;
212
213 #undef CONSIDER_PATH_STARTUP_COST
214 }
215
216 /*
217 * set_cheapest
218 * Find the minimum-cost paths from among a relation's paths,
219 * and save them in the rel's cheapest-path fields.
220 *
221 * cheapest_total_path is normally the cheapest-total-cost unparameterized
222 * path; but if there are no unparameterized paths, we assign it to be the
223 * best (cheapest least-parameterized) parameterized path. However, only
224 * unparameterized paths are considered candidates for cheapest_startup_path,
225 * so that will be NULL if there are no unparameterized paths.
226 *
227 * The cheapest_parameterized_paths list collects all parameterized paths
228 * that have survived the add_path() tournament for this relation. (Since
229 * add_path ignores pathkeys for a parameterized path, these will be paths
230 * that have best cost or best row count for their parameterization. We
231 * may also have both a parallel-safe and a non-parallel-safe path in some
232 * cases for the same parameterization in some cases, but this should be
233 * relatively rare since, most typically, all paths for the same relation
234 * will be parallel-safe or none of them will.)
235 *
236 * cheapest_parameterized_paths always includes the cheapest-total
237 * unparameterized path, too, if there is one; the users of that list find
238 * it more convenient if that's included.
239 *
240 * This is normally called only after we've finished constructing the path
241 * list for the rel node.
242 */
243 void
set_cheapest(RelOptInfo * parent_rel)244 set_cheapest(RelOptInfo *parent_rel)
245 {
246 Path *cheapest_startup_path;
247 Path *cheapest_total_path;
248 Path *best_param_path;
249 List *parameterized_paths;
250 ListCell *p;
251
252 Assert(IsA(parent_rel, RelOptInfo));
253
254 if (parent_rel->pathlist == NIL)
255 elog(ERROR, "could not devise a query plan for the given query");
256
257 cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
258 parameterized_paths = NIL;
259
260 foreach(p, parent_rel->pathlist)
261 {
262 Path *path = (Path *) lfirst(p);
263 int cmp;
264
265 if (path->param_info)
266 {
267 /* Parameterized path, so add it to parameterized_paths */
268 parameterized_paths = lappend(parameterized_paths, path);
269
270 /*
271 * If we have an unparameterized cheapest-total, we no longer care
272 * about finding the best parameterized path, so move on.
273 */
274 if (cheapest_total_path)
275 continue;
276
277 /*
278 * Otherwise, track the best parameterized path, which is the one
279 * with least total cost among those of the minimum
280 * parameterization.
281 */
282 if (best_param_path == NULL)
283 best_param_path = path;
284 else
285 {
286 switch (bms_subset_compare(PATH_REQ_OUTER(path),
287 PATH_REQ_OUTER(best_param_path)))
288 {
289 case BMS_EQUAL:
290 /* keep the cheaper one */
291 if (compare_path_costs(path, best_param_path,
292 TOTAL_COST) < 0)
293 best_param_path = path;
294 break;
295 case BMS_SUBSET1:
296 /* new path is less-parameterized */
297 best_param_path = path;
298 break;
299 case BMS_SUBSET2:
300 /* old path is less-parameterized, keep it */
301 break;
302 case BMS_DIFFERENT:
303
304 /*
305 * This means that neither path has the least possible
306 * parameterization for the rel. We'll sit on the old
307 * path until something better comes along.
308 */
309 break;
310 }
311 }
312 }
313 else
314 {
315 /* Unparameterized path, so consider it for cheapest slots */
316 if (cheapest_total_path == NULL)
317 {
318 cheapest_startup_path = cheapest_total_path = path;
319 continue;
320 }
321
322 /*
323 * If we find two paths of identical costs, try to keep the
324 * better-sorted one. The paths might have unrelated sort
325 * orderings, in which case we can only guess which might be
326 * better to keep, but if one is superior then we definitely
327 * should keep that one.
328 */
329 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
330 if (cmp > 0 ||
331 (cmp == 0 &&
332 compare_pathkeys(cheapest_startup_path->pathkeys,
333 path->pathkeys) == PATHKEYS_BETTER2))
334 cheapest_startup_path = path;
335
336 cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
337 if (cmp > 0 ||
338 (cmp == 0 &&
339 compare_pathkeys(cheapest_total_path->pathkeys,
340 path->pathkeys) == PATHKEYS_BETTER2))
341 cheapest_total_path = path;
342 }
343 }
344
345 /* Add cheapest unparameterized path, if any, to parameterized_paths */
346 if (cheapest_total_path)
347 parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
348
349 /*
350 * If there is no unparameterized path, use the best parameterized path as
351 * cheapest_total_path (but not as cheapest_startup_path).
352 */
353 if (cheapest_total_path == NULL)
354 cheapest_total_path = best_param_path;
355 Assert(cheapest_total_path != NULL);
356
357 parent_rel->cheapest_startup_path = cheapest_startup_path;
358 parent_rel->cheapest_total_path = cheapest_total_path;
359 parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
360 parent_rel->cheapest_parameterized_paths = parameterized_paths;
361 }
362
363 /*
364 * add_path
365 * Consider a potential implementation path for the specified parent rel,
366 * and add it to the rel's pathlist if it is worthy of consideration.
367 * A path is worthy if it has a better sort order (better pathkeys) or
368 * cheaper cost (on either dimension), or generates fewer rows, than any
369 * existing path that has the same or superset parameterization rels.
370 * We also consider parallel-safe paths more worthy than others.
371 *
372 * We also remove from the rel's pathlist any old paths that are dominated
373 * by new_path --- that is, new_path is cheaper, at least as well ordered,
374 * generates no more rows, requires no outer rels not required by the old
375 * path, and is no less parallel-safe.
376 *
377 * In most cases, a path with a superset parameterization will generate
378 * fewer rows (since it has more join clauses to apply), so that those two
379 * figures of merit move in opposite directions; this means that a path of
380 * one parameterization can seldom dominate a path of another. But such
381 * cases do arise, so we make the full set of checks anyway.
382 *
383 * There are two policy decisions embedded in this function, along with
384 * its sibling add_path_precheck. First, we treat all parameterized paths
385 * as having NIL pathkeys, so that they cannot win comparisons on the
386 * basis of sort order. This is to reduce the number of parameterized
387 * paths that are kept; see discussion in src/backend/optimizer/README.
388 *
389 * Second, we only consider cheap startup cost to be interesting if
390 * parent_rel->consider_startup is true for an unparameterized path, or
391 * parent_rel->consider_param_startup is true for a parameterized one.
392 * Again, this allows discarding useless paths sooner.
393 *
394 * The pathlist is kept sorted by total_cost, with cheaper paths
395 * at the front. Within this routine, that's simply a speed hack:
396 * doing it that way makes it more likely that we will reject an inferior
397 * path after a few comparisons, rather than many comparisons.
398 * However, add_path_precheck relies on this ordering to exit early
399 * when possible.
400 *
401 * NOTE: discarded Path objects are immediately pfree'd to reduce planner
402 * memory consumption. We dare not try to free the substructure of a Path,
403 * since much of it may be shared with other Paths or the query tree itself;
404 * but just recycling discarded Path nodes is a very useful savings in
405 * a large join tree. We can recycle the List nodes of pathlist, too.
406 *
407 * As noted in optimizer/README, deleting a previously-accepted Path is
408 * safe because we know that Paths of this rel cannot yet be referenced
409 * from any other rel, such as a higher-level join. However, in some cases
410 * it is possible that a Path is referenced by another Path for its own
411 * rel; we must not delete such a Path, even if it is dominated by the new
412 * Path. Currently this occurs only for IndexPath objects, which may be
413 * referenced as children of BitmapHeapPaths as well as being paths in
414 * their own right. Hence, we don't pfree IndexPaths when rejecting them.
415 *
416 * 'parent_rel' is the relation entry to which the path corresponds.
417 * 'new_path' is a potential path for parent_rel.
418 *
419 * Returns nothing, but modifies parent_rel->pathlist.
420 */
421 void
add_path(RelOptInfo * parent_rel,Path * new_path)422 add_path(RelOptInfo *parent_rel, Path *new_path)
423 {
424 bool accept_new = true; /* unless we find a superior old path */
425 ListCell *insert_after = NULL; /* where to insert new item */
426 List *new_path_pathkeys;
427 ListCell *p1;
428 ListCell *p1_prev;
429 ListCell *p1_next;
430
431 /*
432 * This is a convenient place to check for query cancel --- no part of the
433 * planner goes very long without calling add_path().
434 */
435 CHECK_FOR_INTERRUPTS();
436
437 /* Pretend parameterized paths have no pathkeys, per comment above */
438 new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys;
439
440 /*
441 * Loop to check proposed new path against old paths. Note it is possible
442 * for more than one old path to be tossed out because new_path dominates
443 * it.
444 *
445 * We can't use foreach here because the loop body may delete the current
446 * list cell.
447 */
448 p1_prev = NULL;
449 for (p1 = list_head(parent_rel->pathlist); p1 != NULL; p1 = p1_next)
450 {
451 Path *old_path = (Path *) lfirst(p1);
452 bool remove_old = false; /* unless new proves superior */
453 PathCostComparison costcmp;
454 PathKeysComparison keyscmp;
455 BMS_Comparison outercmp;
456
457 p1_next = lnext(p1);
458
459 /*
460 * Do a fuzzy cost comparison with standard fuzziness limit.
461 */
462 costcmp = compare_path_costs_fuzzily(new_path, old_path,
463 STD_FUZZ_FACTOR);
464
465 /*
466 * If the two paths compare differently for startup and total cost,
467 * then we want to keep both, and we can skip comparing pathkeys and
468 * required_outer rels. If they compare the same, proceed with the
469 * other comparisons. Row count is checked last. (We make the tests
470 * in this order because the cost comparison is most likely to turn
471 * out "different", and the pathkeys comparison next most likely. As
472 * explained above, row count very seldom makes a difference, so even
473 * though it's cheap to compare there's not much point in checking it
474 * earlier.)
475 */
476 if (costcmp != COSTS_DIFFERENT)
477 {
478 /* Similarly check to see if either dominates on pathkeys */
479 List *old_path_pathkeys;
480
481 old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
482 keyscmp = compare_pathkeys(new_path_pathkeys,
483 old_path_pathkeys);
484 if (keyscmp != PATHKEYS_DIFFERENT)
485 {
486 switch (costcmp)
487 {
488 case COSTS_EQUAL:
489 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
490 PATH_REQ_OUTER(old_path));
491 if (keyscmp == PATHKEYS_BETTER1)
492 {
493 if ((outercmp == BMS_EQUAL ||
494 outercmp == BMS_SUBSET1) &&
495 new_path->rows <= old_path->rows &&
496 new_path->parallel_safe >= old_path->parallel_safe)
497 remove_old = true; /* new dominates old */
498 }
499 else if (keyscmp == PATHKEYS_BETTER2)
500 {
501 if ((outercmp == BMS_EQUAL ||
502 outercmp == BMS_SUBSET2) &&
503 new_path->rows >= old_path->rows &&
504 new_path->parallel_safe <= old_path->parallel_safe)
505 accept_new = false; /* old dominates new */
506 }
507 else /* keyscmp == PATHKEYS_EQUAL */
508 {
509 if (outercmp == BMS_EQUAL)
510 {
511 /*
512 * Same pathkeys and outer rels, and fuzzily
513 * the same cost, so keep just one; to decide
514 * which, first check parallel-safety, then
515 * rows, then do a fuzzy cost comparison with
516 * very small fuzz limit. (We used to do an
517 * exact cost comparison, but that results in
518 * annoying platform-specific plan variations
519 * due to roundoff in the cost estimates.) If
520 * things are still tied, arbitrarily keep
521 * only the old path. Notice that we will
522 * keep only the old path even if the
523 * less-fuzzy comparison decides the startup
524 * and total costs compare differently.
525 */
526 if (new_path->parallel_safe >
527 old_path->parallel_safe)
528 remove_old = true; /* new dominates old */
529 else if (new_path->parallel_safe <
530 old_path->parallel_safe)
531 accept_new = false; /* old dominates new */
532 else if (new_path->rows < old_path->rows)
533 remove_old = true; /* new dominates old */
534 else if (new_path->rows > old_path->rows)
535 accept_new = false; /* old dominates new */
536 else if (compare_path_costs_fuzzily(new_path,
537 old_path,
538 1.0000000001) == COSTS_BETTER1)
539 remove_old = true; /* new dominates old */
540 else
541 accept_new = false; /* old equals or
542 * dominates new */
543 }
544 else if (outercmp == BMS_SUBSET1 &&
545 new_path->rows <= old_path->rows &&
546 new_path->parallel_safe >= old_path->parallel_safe)
547 remove_old = true; /* new dominates old */
548 else if (outercmp == BMS_SUBSET2 &&
549 new_path->rows >= old_path->rows &&
550 new_path->parallel_safe <= old_path->parallel_safe)
551 accept_new = false; /* old dominates new */
552 /* else different parameterizations, keep both */
553 }
554 break;
555 case COSTS_BETTER1:
556 if (keyscmp != PATHKEYS_BETTER2)
557 {
558 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
559 PATH_REQ_OUTER(old_path));
560 if ((outercmp == BMS_EQUAL ||
561 outercmp == BMS_SUBSET1) &&
562 new_path->rows <= old_path->rows &&
563 new_path->parallel_safe >= old_path->parallel_safe)
564 remove_old = true; /* new dominates old */
565 }
566 break;
567 case COSTS_BETTER2:
568 if (keyscmp != PATHKEYS_BETTER1)
569 {
570 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
571 PATH_REQ_OUTER(old_path));
572 if ((outercmp == BMS_EQUAL ||
573 outercmp == BMS_SUBSET2) &&
574 new_path->rows >= old_path->rows &&
575 new_path->parallel_safe <= old_path->parallel_safe)
576 accept_new = false; /* old dominates new */
577 }
578 break;
579 case COSTS_DIFFERENT:
580
581 /*
582 * can't get here, but keep this case to keep compiler
583 * quiet
584 */
585 break;
586 }
587 }
588 }
589
590 /*
591 * Remove current element from pathlist if dominated by new.
592 */
593 if (remove_old)
594 {
595 parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
596 p1, p1_prev);
597
598 /*
599 * Delete the data pointed-to by the deleted cell, if possible
600 */
601 if (!IsA(old_path, IndexPath))
602 pfree(old_path);
603 /* p1_prev does not advance */
604 }
605 else
606 {
607 /* new belongs after this old path if it has cost >= old's */
608 if (new_path->total_cost >= old_path->total_cost)
609 insert_after = p1;
610 /* p1_prev advances */
611 p1_prev = p1;
612 }
613
614 /*
615 * If we found an old path that dominates new_path, we can quit
616 * scanning the pathlist; we will not add new_path, and we assume
617 * new_path cannot dominate any other elements of the pathlist.
618 */
619 if (!accept_new)
620 break;
621 }
622
623 if (accept_new)
624 {
625 /* Accept the new path: insert it at proper place in pathlist */
626 if (insert_after)
627 lappend_cell(parent_rel->pathlist, insert_after, new_path);
628 else
629 parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
630 }
631 else
632 {
633 /* Reject and recycle the new path */
634 if (!IsA(new_path, IndexPath))
635 pfree(new_path);
636 }
637 }
638
639 /*
640 * add_path_precheck
641 * Check whether a proposed new path could possibly get accepted.
642 * We assume we know the path's pathkeys and parameterization accurately,
643 * and have lower bounds for its costs.
644 *
645 * Note that we do not know the path's rowcount, since getting an estimate for
646 * that is too expensive to do before prechecking. We assume here that paths
647 * of a superset parameterization will generate fewer rows; if that holds,
648 * then paths with different parameterizations cannot dominate each other
649 * and so we can simply ignore existing paths of another parameterization.
650 * (In the infrequent cases where that rule of thumb fails, add_path will
651 * get rid of the inferior path.)
652 *
653 * At the time this is called, we haven't actually built a Path structure,
654 * so the required information has to be passed piecemeal.
655 */
656 bool
add_path_precheck(RelOptInfo * parent_rel,Cost startup_cost,Cost total_cost,List * pathkeys,Relids required_outer)657 add_path_precheck(RelOptInfo *parent_rel,
658 Cost startup_cost, Cost total_cost,
659 List *pathkeys, Relids required_outer)
660 {
661 List *new_path_pathkeys;
662 bool consider_startup;
663 ListCell *p1;
664
665 /* Pretend parameterized paths have no pathkeys, per add_path policy */
666 new_path_pathkeys = required_outer ? NIL : pathkeys;
667
668 /* Decide whether new path's startup cost is interesting */
669 consider_startup = required_outer ? parent_rel->consider_param_startup : parent_rel->consider_startup;
670
671 foreach(p1, parent_rel->pathlist)
672 {
673 Path *old_path = (Path *) lfirst(p1);
674 PathKeysComparison keyscmp;
675
676 /*
677 * We are looking for an old_path with the same parameterization (and
678 * by assumption the same rowcount) that dominates the new path on
679 * pathkeys as well as both cost metrics. If we find one, we can
680 * reject the new path.
681 *
682 * Cost comparisons here should match compare_path_costs_fuzzily.
683 */
684 if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
685 {
686 /* new path can win on startup cost only if consider_startup */
687 if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR ||
688 !consider_startup)
689 {
690 /* new path loses on cost, so check pathkeys... */
691 List *old_path_pathkeys;
692
693 old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
694 keyscmp = compare_pathkeys(new_path_pathkeys,
695 old_path_pathkeys);
696 if (keyscmp == PATHKEYS_EQUAL ||
697 keyscmp == PATHKEYS_BETTER2)
698 {
699 /* new path does not win on pathkeys... */
700 if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
701 {
702 /* Found an old path that dominates the new one */
703 return false;
704 }
705 }
706 }
707 }
708 else
709 {
710 /*
711 * Since the pathlist is sorted by total_cost, we can stop looking
712 * once we reach a path with a total_cost larger than the new
713 * path's.
714 */
715 break;
716 }
717 }
718
719 return true;
720 }
721
722 /*
723 * add_partial_path
724 * Like add_path, our goal here is to consider whether a path is worthy
725 * of being kept around, but the considerations here are a bit different.
726 * A partial path is one which can be executed in any number of workers in
727 * parallel such that each worker will generate a subset of the path's
728 * overall result.
729 *
730 * As in add_path, the partial_pathlist is kept sorted with the cheapest
731 * total path in front. This is depended on by multiple places, which
732 * just take the front entry as the cheapest path without searching.
733 *
734 * We don't generate parameterized partial paths for several reasons. Most
735 * importantly, they're not safe to execute, because there's nothing to
736 * make sure that a parallel scan within the parameterized portion of the
737 * plan is running with the same value in every worker at the same time.
738 * Fortunately, it seems unlikely to be worthwhile anyway, because having
739 * each worker scan the entire outer relation and a subset of the inner
740 * relation will generally be a terrible plan. The inner (parameterized)
741 * side of the plan will be small anyway. There could be rare cases where
742 * this wins big - e.g. if join order constraints put a 1-row relation on
743 * the outer side of the topmost join with a parameterized plan on the inner
744 * side - but we'll have to be content not to handle such cases until
745 * somebody builds an executor infrastructure that can cope with them.
746 *
747 * Because we don't consider parameterized paths here, we also don't
748 * need to consider the row counts as a measure of quality: every path will
749 * produce the same number of rows. Neither do we need to consider startup
750 * costs: parallelism is only used for plans that will be run to completion.
751 * Therefore, this routine is much simpler than add_path: it needs to
752 * consider only pathkeys and total cost.
753 *
754 * As with add_path, we pfree paths that are found to be dominated by
755 * another partial path; this requires that there be no other references to
756 * such paths yet. Hence, GatherPaths must not be created for a rel until
757 * we're done creating all partial paths for it. Unlike add_path, we don't
758 * take an exception for IndexPaths as partial index paths won't be
759 * referenced by partial BitmapHeapPaths.
760 */
761 void
add_partial_path(RelOptInfo * parent_rel,Path * new_path)762 add_partial_path(RelOptInfo *parent_rel, Path *new_path)
763 {
764 bool accept_new = true; /* unless we find a superior old path */
765 ListCell *insert_after = NULL; /* where to insert new item */
766 ListCell *p1;
767 ListCell *p1_prev;
768 ListCell *p1_next;
769
770 /* Check for query cancel. */
771 CHECK_FOR_INTERRUPTS();
772
773 /* Path to be added must be parallel safe. */
774 Assert(new_path->parallel_safe);
775
776 /* Relation should be OK for parallelism, too. */
777 Assert(parent_rel->consider_parallel);
778
779 /*
780 * As in add_path, throw out any paths which are dominated by the new
781 * path, but throw out the new path if some existing path dominates it.
782 */
783 p1_prev = NULL;
784 for (p1 = list_head(parent_rel->partial_pathlist); p1 != NULL;
785 p1 = p1_next)
786 {
787 Path *old_path = (Path *) lfirst(p1);
788 bool remove_old = false; /* unless new proves superior */
789 PathKeysComparison keyscmp;
790
791 p1_next = lnext(p1);
792
793 /* Compare pathkeys. */
794 keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
795
796 /* Unless pathkeys are incompable, keep just one of the two paths. */
797 if (keyscmp != PATHKEYS_DIFFERENT)
798 {
799 if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
800 {
801 /* New path costs more; keep it only if pathkeys are better. */
802 if (keyscmp != PATHKEYS_BETTER1)
803 accept_new = false;
804 }
805 else if (old_path->total_cost > new_path->total_cost
806 * STD_FUZZ_FACTOR)
807 {
808 /* Old path costs more; keep it only if pathkeys are better. */
809 if (keyscmp != PATHKEYS_BETTER2)
810 remove_old = true;
811 }
812 else if (keyscmp == PATHKEYS_BETTER1)
813 {
814 /* Costs are about the same, new path has better pathkeys. */
815 remove_old = true;
816 }
817 else if (keyscmp == PATHKEYS_BETTER2)
818 {
819 /* Costs are about the same, old path has better pathkeys. */
820 accept_new = false;
821 }
822 else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
823 {
824 /* Pathkeys are the same, and the old path costs more. */
825 remove_old = true;
826 }
827 else
828 {
829 /*
830 * Pathkeys are the same, and new path isn't materially
831 * cheaper.
832 */
833 accept_new = false;
834 }
835 }
836
837 /*
838 * Remove current element from partial_pathlist if dominated by new.
839 */
840 if (remove_old)
841 {
842 parent_rel->partial_pathlist =
843 list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
844 pfree(old_path);
845 /* p1_prev does not advance */
846 }
847 else
848 {
849 /* new belongs after this old path if it has cost >= old's */
850 if (new_path->total_cost >= old_path->total_cost)
851 insert_after = p1;
852 /* p1_prev advances */
853 p1_prev = p1;
854 }
855
856 /*
857 * If we found an old path that dominates new_path, we can quit
858 * scanning the partial_pathlist; we will not add new_path, and we
859 * assume new_path cannot dominate any later path.
860 */
861 if (!accept_new)
862 break;
863 }
864
865 if (accept_new)
866 {
867 /* Accept the new path: insert it at proper place */
868 if (insert_after)
869 lappend_cell(parent_rel->partial_pathlist, insert_after, new_path);
870 else
871 parent_rel->partial_pathlist =
872 lcons(new_path, parent_rel->partial_pathlist);
873 }
874 else
875 {
876 /* Reject and recycle the new path */
877 pfree(new_path);
878 }
879 }
880
881 /*
882 * add_partial_path_precheck
883 * Check whether a proposed new partial path could possibly get accepted.
884 *
885 * Unlike add_path_precheck, we can ignore startup cost and parameterization,
886 * since they don't matter for partial paths (see add_partial_path). But
887 * we do want to make sure we don't add a partial path if there's already
888 * a complete path that dominates it, since in that case the proposed path
889 * is surely a loser.
890 */
891 bool
add_partial_path_precheck(RelOptInfo * parent_rel,Cost total_cost,List * pathkeys)892 add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost,
893 List *pathkeys)
894 {
895 ListCell *p1;
896
897 /*
898 * Our goal here is twofold. First, we want to find out whether this path
899 * is clearly inferior to some existing partial path. If so, we want to
900 * reject it immediately. Second, we want to find out whether this path
901 * is clearly superior to some existing partial path -- at least, modulo
902 * final cost computations. If so, we definitely want to consider it.
903 *
904 * Unlike add_path(), we always compare pathkeys here. This is because we
905 * expect partial_pathlist to be very short, and getting a definitive
906 * answer at this stage avoids the need to call add_path_precheck.
907 */
908 foreach(p1, parent_rel->partial_pathlist)
909 {
910 Path *old_path = (Path *) lfirst(p1);
911 PathKeysComparison keyscmp;
912
913 keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
914 if (keyscmp != PATHKEYS_DIFFERENT)
915 {
916 if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
917 keyscmp != PATHKEYS_BETTER1)
918 return false;
919 if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
920 keyscmp != PATHKEYS_BETTER2)
921 return true;
922 }
923 }
924
925 /*
926 * This path is neither clearly inferior to an existing partial path nor
927 * clearly good enough that it might replace one. Compare it to
928 * non-parallel plans. If it loses even before accounting for the cost of
929 * the Gather node, we should definitely reject it.
930 *
931 * Note that we pass the total_cost to add_path_precheck twice. This is
932 * because it's never advantageous to consider the startup cost of a
933 * partial path; the resulting plans, if run in parallel, will be run to
934 * completion.
935 */
936 if (!add_path_precheck(parent_rel, total_cost, total_cost, pathkeys,
937 NULL))
938 return false;
939
940 return true;
941 }
942
943
944 /*****************************************************************************
945 * PATH NODE CREATION ROUTINES
946 *****************************************************************************/
947
948 /*
949 * create_seqscan_path
950 * Creates a path corresponding to a sequential scan, returning the
951 * pathnode.
952 */
953 Path *
create_seqscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer,int parallel_workers)954 create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
955 Relids required_outer, int parallel_workers)
956 {
957 Path *pathnode = makeNode(Path);
958
959 pathnode->pathtype = T_SeqScan;
960 pathnode->parent = rel;
961 pathnode->pathtarget = rel->reltarget;
962 pathnode->param_info = get_baserel_parampathinfo(root, rel,
963 required_outer);
964 pathnode->parallel_aware = parallel_workers > 0 ? true : false;
965 pathnode->parallel_safe = rel->consider_parallel;
966 pathnode->parallel_workers = parallel_workers;
967 pathnode->pathkeys = NIL; /* seqscan has unordered result */
968
969 cost_seqscan(pathnode, root, rel, pathnode->param_info);
970
971 return pathnode;
972 }
973
974 /*
975 * create_samplescan_path
976 * Creates a path node for a sampled table scan.
977 */
978 Path *
create_samplescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)979 create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
980 {
981 Path *pathnode = makeNode(Path);
982
983 pathnode->pathtype = T_SampleScan;
984 pathnode->parent = rel;
985 pathnode->pathtarget = rel->reltarget;
986 pathnode->param_info = get_baserel_parampathinfo(root, rel,
987 required_outer);
988 pathnode->parallel_aware = false;
989 pathnode->parallel_safe = rel->consider_parallel;
990 pathnode->parallel_workers = 0;
991 pathnode->pathkeys = NIL; /* samplescan has unordered result */
992
993 cost_samplescan(pathnode, root, rel, pathnode->param_info);
994
995 return pathnode;
996 }
997
998 /*
999 * create_index_path
1000 * Creates a path node for an index scan.
1001 *
1002 * 'index' is a usable index.
1003 * 'indexclauses' is a list of RestrictInfo nodes representing clauses
1004 * to be used as index qual conditions in the scan.
1005 * 'indexclausecols' is an integer list of index column numbers (zero based)
1006 * the indexclauses can be used with.
1007 * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
1008 * to be used as index ordering operators in the scan.
1009 * 'indexorderbycols' is an integer list of index column numbers (zero based)
1010 * the ordering operators can be used with.
1011 * 'pathkeys' describes the ordering of the path.
1012 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
1013 * for an ordered index, or NoMovementScanDirection for
1014 * an unordered index.
1015 * 'indexonly' is true if an index-only scan is wanted.
1016 * 'required_outer' is the set of outer relids for a parameterized path.
1017 * 'loop_count' is the number of repetitions of the indexscan to factor into
1018 * estimates of caching behavior.
1019 * 'partial_path' is true if constructing a parallel index scan path.
1020 *
1021 * Returns the new path node.
1022 */
1023 IndexPath *
create_index_path(PlannerInfo * root,IndexOptInfo * index,List * indexclauses,List * indexclausecols,List * indexorderbys,List * indexorderbycols,List * pathkeys,ScanDirection indexscandir,bool indexonly,Relids required_outer,double loop_count,bool partial_path)1024 create_index_path(PlannerInfo *root,
1025 IndexOptInfo *index,
1026 List *indexclauses,
1027 List *indexclausecols,
1028 List *indexorderbys,
1029 List *indexorderbycols,
1030 List *pathkeys,
1031 ScanDirection indexscandir,
1032 bool indexonly,
1033 Relids required_outer,
1034 double loop_count,
1035 bool partial_path)
1036 {
1037 IndexPath *pathnode = makeNode(IndexPath);
1038 RelOptInfo *rel = index->rel;
1039 List *indexquals,
1040 *indexqualcols;
1041
1042 pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
1043 pathnode->path.parent = rel;
1044 pathnode->path.pathtarget = rel->reltarget;
1045 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1046 required_outer);
1047 pathnode->path.parallel_aware = false;
1048 pathnode->path.parallel_safe = rel->consider_parallel;
1049 pathnode->path.parallel_workers = 0;
1050 pathnode->path.pathkeys = pathkeys;
1051
1052 /* Convert clauses to indexquals the executor can handle */
1053 expand_indexqual_conditions(index, indexclauses, indexclausecols,
1054 &indexquals, &indexqualcols);
1055
1056 /* Fill in the pathnode */
1057 pathnode->indexinfo = index;
1058 pathnode->indexclauses = indexclauses;
1059 pathnode->indexquals = indexquals;
1060 pathnode->indexqualcols = indexqualcols;
1061 pathnode->indexorderbys = indexorderbys;
1062 pathnode->indexorderbycols = indexorderbycols;
1063 pathnode->indexscandir = indexscandir;
1064
1065 cost_index(pathnode, root, loop_count, partial_path);
1066
1067 return pathnode;
1068 }
1069
1070 /*
1071 * create_bitmap_heap_path
1072 * Creates a path node for a bitmap scan.
1073 *
1074 * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
1075 * 'required_outer' is the set of outer relids for a parameterized path.
1076 * 'loop_count' is the number of repetitions of the indexscan to factor into
1077 * estimates of caching behavior.
1078 *
1079 * loop_count should match the value used when creating the component
1080 * IndexPaths.
1081 */
1082 BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo * root,RelOptInfo * rel,Path * bitmapqual,Relids required_outer,double loop_count,int parallel_degree)1083 create_bitmap_heap_path(PlannerInfo *root,
1084 RelOptInfo *rel,
1085 Path *bitmapqual,
1086 Relids required_outer,
1087 double loop_count,
1088 int parallel_degree)
1089 {
1090 BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
1091
1092 pathnode->path.pathtype = T_BitmapHeapScan;
1093 pathnode->path.parent = rel;
1094 pathnode->path.pathtarget = rel->reltarget;
1095 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1096 required_outer);
1097 pathnode->path.parallel_aware = parallel_degree > 0 ? true : false;
1098 pathnode->path.parallel_safe = rel->consider_parallel;
1099 pathnode->path.parallel_workers = parallel_degree;
1100 pathnode->path.pathkeys = NIL; /* always unordered */
1101
1102 pathnode->bitmapqual = bitmapqual;
1103
1104 cost_bitmap_heap_scan(&pathnode->path, root, rel,
1105 pathnode->path.param_info,
1106 bitmapqual, loop_count);
1107
1108 return pathnode;
1109 }
1110
1111 /*
1112 * create_bitmap_and_path
1113 * Creates a path node representing a BitmapAnd.
1114 */
1115 BitmapAndPath *
create_bitmap_and_path(PlannerInfo * root,RelOptInfo * rel,List * bitmapquals)1116 create_bitmap_and_path(PlannerInfo *root,
1117 RelOptInfo *rel,
1118 List *bitmapquals)
1119 {
1120 BitmapAndPath *pathnode = makeNode(BitmapAndPath);
1121 Relids required_outer = NULL;
1122 ListCell *lc;
1123
1124 pathnode->path.pathtype = T_BitmapAnd;
1125 pathnode->path.parent = rel;
1126 pathnode->path.pathtarget = rel->reltarget;
1127
1128 /*
1129 * Identify the required outer rels as the union of what the child paths
1130 * depend on. (Alternatively, we could insist that the caller pass this
1131 * in, but it's more convenient and reliable to compute it here.)
1132 */
1133 foreach(lc, bitmapquals)
1134 {
1135 Path *bitmapqual = (Path *) lfirst(lc);
1136
1137 required_outer = bms_add_members(required_outer,
1138 PATH_REQ_OUTER(bitmapqual));
1139 }
1140 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1141 required_outer);
1142
1143 /*
1144 * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
1145 * parallel-safe if and only if rel->consider_parallel is set. So, we can
1146 * set the flag for this path based only on the relation-level flag,
1147 * without actually iterating over the list of children.
1148 */
1149 pathnode->path.parallel_aware = false;
1150 pathnode->path.parallel_safe = rel->consider_parallel;
1151 pathnode->path.parallel_workers = 0;
1152
1153 pathnode->path.pathkeys = NIL; /* always unordered */
1154
1155 pathnode->bitmapquals = bitmapquals;
1156
1157 /* this sets bitmapselectivity as well as the regular cost fields: */
1158 cost_bitmap_and_node(pathnode, root);
1159
1160 return pathnode;
1161 }
1162
1163 /*
1164 * create_bitmap_or_path
1165 * Creates a path node representing a BitmapOr.
1166 */
1167 BitmapOrPath *
create_bitmap_or_path(PlannerInfo * root,RelOptInfo * rel,List * bitmapquals)1168 create_bitmap_or_path(PlannerInfo *root,
1169 RelOptInfo *rel,
1170 List *bitmapquals)
1171 {
1172 BitmapOrPath *pathnode = makeNode(BitmapOrPath);
1173 Relids required_outer = NULL;
1174 ListCell *lc;
1175
1176 pathnode->path.pathtype = T_BitmapOr;
1177 pathnode->path.parent = rel;
1178 pathnode->path.pathtarget = rel->reltarget;
1179
1180 /*
1181 * Identify the required outer rels as the union of what the child paths
1182 * depend on. (Alternatively, we could insist that the caller pass this
1183 * in, but it's more convenient and reliable to compute it here.)
1184 */
1185 foreach(lc, bitmapquals)
1186 {
1187 Path *bitmapqual = (Path *) lfirst(lc);
1188
1189 required_outer = bms_add_members(required_outer,
1190 PATH_REQ_OUTER(bitmapqual));
1191 }
1192 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1193 required_outer);
1194
1195 /*
1196 * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
1197 * parallel-safe if and only if rel->consider_parallel is set. So, we can
1198 * set the flag for this path based only on the relation-level flag,
1199 * without actually iterating over the list of children.
1200 */
1201 pathnode->path.parallel_aware = false;
1202 pathnode->path.parallel_safe = rel->consider_parallel;
1203 pathnode->path.parallel_workers = 0;
1204
1205 pathnode->path.pathkeys = NIL; /* always unordered */
1206
1207 pathnode->bitmapquals = bitmapquals;
1208
1209 /* this sets bitmapselectivity as well as the regular cost fields: */
1210 cost_bitmap_or_node(pathnode, root);
1211
1212 return pathnode;
1213 }
1214
1215 /*
1216 * create_tidscan_path
1217 * Creates a path corresponding to a scan by TID, returning the pathnode.
1218 */
1219 TidPath *
create_tidscan_path(PlannerInfo * root,RelOptInfo * rel,List * tidquals,Relids required_outer)1220 create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
1221 Relids required_outer)
1222 {
1223 TidPath *pathnode = makeNode(TidPath);
1224
1225 pathnode->path.pathtype = T_TidScan;
1226 pathnode->path.parent = rel;
1227 pathnode->path.pathtarget = rel->reltarget;
1228 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1229 required_outer);
1230 pathnode->path.parallel_aware = false;
1231 pathnode->path.parallel_safe = rel->consider_parallel;
1232 pathnode->path.parallel_workers = 0;
1233 pathnode->path.pathkeys = NIL; /* always unordered */
1234
1235 pathnode->tidquals = tidquals;
1236
1237 cost_tidscan(&pathnode->path, root, rel, tidquals,
1238 pathnode->path.param_info);
1239
1240 return pathnode;
1241 }
1242
1243 /*
1244 * create_append_path
1245 * Creates a path corresponding to an Append plan, returning the
1246 * pathnode.
1247 *
1248 * Note that we must handle subpaths = NIL, representing a dummy access path.
1249 */
1250 AppendPath *
create_append_path(PlannerInfo * root,RelOptInfo * rel,List * subpaths,List * partial_subpaths,Relids required_outer,int parallel_workers,bool parallel_aware,List * partitioned_rels,double rows)1251 create_append_path(PlannerInfo *root,
1252 RelOptInfo *rel,
1253 List *subpaths, List *partial_subpaths,
1254 Relids required_outer,
1255 int parallel_workers, bool parallel_aware,
1256 List *partitioned_rels, double rows)
1257 {
1258 AppendPath *pathnode = makeNode(AppendPath);
1259 ListCell *l;
1260
1261 Assert(!parallel_aware || parallel_workers > 0);
1262
1263 pathnode->path.pathtype = T_Append;
1264 pathnode->path.parent = rel;
1265 pathnode->path.pathtarget = rel->reltarget;
1266
1267 /*
1268 * When generating an Append path for a partitioned table, there may be
1269 * parameters that are useful so we can eliminate certain partitions
1270 * during execution. Here we'll go all the way and fully populate the
1271 * parameter info data as we do for normal base relations. However, we
1272 * need only bother doing this for RELOPT_BASEREL rels, as
1273 * RELOPT_OTHER_MEMBER_REL's Append paths are merged into the base rel's
1274 * Append subpaths. It would do no harm to do this, we just avoid it to
1275 * save wasting effort.
1276 */
1277 if (partitioned_rels != NIL && root && rel->reloptkind == RELOPT_BASEREL)
1278 pathnode->path.param_info = get_baserel_parampathinfo(root,
1279 rel,
1280 required_outer);
1281 else
1282 pathnode->path.param_info = get_appendrel_parampathinfo(rel,
1283 required_outer);
1284
1285 pathnode->path.parallel_aware = parallel_aware;
1286 pathnode->path.parallel_safe = rel->consider_parallel;
1287 pathnode->path.parallel_workers = parallel_workers;
1288 pathnode->path.pathkeys = NIL; /* result is always considered unsorted */
1289 pathnode->partitioned_rels = list_copy(partitioned_rels);
1290
1291 /*
1292 * For parallel append, non-partial paths are sorted by descending total
1293 * costs. That way, the total time to finish all non-partial paths is
1294 * minimized. Also, the partial paths are sorted by descending startup
1295 * costs. There may be some paths that require to do startup work by a
1296 * single worker. In such case, it's better for workers to choose the
1297 * expensive ones first, whereas the leader should choose the cheapest
1298 * startup plan.
1299 */
1300 if (pathnode->path.parallel_aware)
1301 {
1302 subpaths = list_qsort(subpaths, append_total_cost_compare);
1303 partial_subpaths = list_qsort(partial_subpaths,
1304 append_startup_cost_compare);
1305 }
1306 pathnode->first_partial_path = list_length(subpaths);
1307 pathnode->subpaths = list_concat(subpaths, partial_subpaths);
1308
1309 foreach(l, pathnode->subpaths)
1310 {
1311 Path *subpath = (Path *) lfirst(l);
1312
1313 pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
1314 subpath->parallel_safe;
1315
1316 /* All child paths must have same parameterization */
1317 Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
1318 }
1319
1320 Assert(!parallel_aware || pathnode->path.parallel_safe);
1321
1322 cost_append(pathnode);
1323
1324 /* If the caller provided a row estimate, override the computed value. */
1325 if (rows >= 0)
1326 pathnode->path.rows = rows;
1327
1328 return pathnode;
1329 }
1330
1331 /*
1332 * append_total_cost_compare
1333 * qsort comparator for sorting append child paths by total_cost descending
1334 *
1335 * For equal total costs, we fall back to comparing startup costs; if those
1336 * are equal too, break ties using bms_compare on the paths' relids.
1337 * (This is to avoid getting unpredictable results from qsort.)
1338 */
1339 static int
append_total_cost_compare(const void * a,const void * b)1340 append_total_cost_compare(const void *a, const void *b)
1341 {
1342 Path *path1 = (Path *) lfirst(*(ListCell **) a);
1343 Path *path2 = (Path *) lfirst(*(ListCell **) b);
1344 int cmp;
1345
1346 cmp = compare_path_costs(path1, path2, TOTAL_COST);
1347 if (cmp != 0)
1348 return -cmp;
1349 return bms_compare(path1->parent->relids, path2->parent->relids);
1350 }
1351
1352 /*
1353 * append_startup_cost_compare
1354 * qsort comparator for sorting append child paths by startup_cost descending
1355 *
1356 * For equal startup costs, we fall back to comparing total costs; if those
1357 * are equal too, break ties using bms_compare on the paths' relids.
1358 * (This is to avoid getting unpredictable results from qsort.)
1359 */
1360 static int
append_startup_cost_compare(const void * a,const void * b)1361 append_startup_cost_compare(const void *a, const void *b)
1362 {
1363 Path *path1 = (Path *) lfirst(*(ListCell **) a);
1364 Path *path2 = (Path *) lfirst(*(ListCell **) b);
1365 int cmp;
1366
1367 cmp = compare_path_costs(path1, path2, STARTUP_COST);
1368 if (cmp != 0)
1369 return -cmp;
1370 return bms_compare(path1->parent->relids, path2->parent->relids);
1371 }
1372
1373 /*
1374 * create_merge_append_path
1375 * Creates a path corresponding to a MergeAppend plan, returning the
1376 * pathnode.
1377 */
1378 MergeAppendPath *
create_merge_append_path(PlannerInfo * root,RelOptInfo * rel,List * subpaths,List * pathkeys,Relids required_outer,List * partitioned_rels)1379 create_merge_append_path(PlannerInfo *root,
1380 RelOptInfo *rel,
1381 List *subpaths,
1382 List *pathkeys,
1383 Relids required_outer,
1384 List *partitioned_rels)
1385 {
1386 MergeAppendPath *pathnode = makeNode(MergeAppendPath);
1387 Cost input_startup_cost;
1388 Cost input_total_cost;
1389 ListCell *l;
1390
1391 pathnode->path.pathtype = T_MergeAppend;
1392 pathnode->path.parent = rel;
1393 pathnode->path.pathtarget = rel->reltarget;
1394 pathnode->path.param_info = get_appendrel_parampathinfo(rel,
1395 required_outer);
1396 pathnode->path.parallel_aware = false;
1397 pathnode->path.parallel_safe = rel->consider_parallel;
1398 pathnode->path.parallel_workers = 0;
1399 pathnode->path.pathkeys = pathkeys;
1400 pathnode->partitioned_rels = list_copy(partitioned_rels);
1401 pathnode->subpaths = subpaths;
1402
1403 /*
1404 * Apply query-wide LIMIT if known and path is for sole base relation.
1405 * (Handling this at this low level is a bit klugy.)
1406 */
1407 if (bms_equal(rel->relids, root->all_baserels))
1408 pathnode->limit_tuples = root->limit_tuples;
1409 else
1410 pathnode->limit_tuples = -1.0;
1411
1412 /*
1413 * Add up the sizes and costs of the input paths.
1414 */
1415 pathnode->path.rows = 0;
1416 input_startup_cost = 0;
1417 input_total_cost = 0;
1418 foreach(l, subpaths)
1419 {
1420 Path *subpath = (Path *) lfirst(l);
1421
1422 pathnode->path.rows += subpath->rows;
1423 pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
1424 subpath->parallel_safe;
1425
1426 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
1427 {
1428 /* Subpath is adequately ordered, we won't need to sort it */
1429 input_startup_cost += subpath->startup_cost;
1430 input_total_cost += subpath->total_cost;
1431 }
1432 else
1433 {
1434 /* We'll need to insert a Sort node, so include cost for that */
1435 Path sort_path; /* dummy for result of cost_sort */
1436
1437 cost_sort(&sort_path,
1438 root,
1439 pathkeys,
1440 subpath->total_cost,
1441 subpath->parent->tuples,
1442 subpath->pathtarget->width,
1443 0.0,
1444 work_mem,
1445 pathnode->limit_tuples);
1446 input_startup_cost += sort_path.startup_cost;
1447 input_total_cost += sort_path.total_cost;
1448 }
1449
1450 /* All child paths must have same parameterization */
1451 Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
1452 }
1453
1454 /* Now we can compute total costs of the MergeAppend */
1455 cost_merge_append(&pathnode->path, root,
1456 pathkeys, list_length(subpaths),
1457 input_startup_cost, input_total_cost,
1458 pathnode->path.rows);
1459
1460 return pathnode;
1461 }
1462
1463 /*
1464 * create_result_path
1465 * Creates a path representing a Result-and-nothing-else plan.
1466 *
1467 * This is only used for degenerate cases, such as a query with an empty
1468 * jointree.
1469 */
1470 ResultPath *
create_result_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,List * resconstantqual)1471 create_result_path(PlannerInfo *root, RelOptInfo *rel,
1472 PathTarget *target, List *resconstantqual)
1473 {
1474 ResultPath *pathnode = makeNode(ResultPath);
1475
1476 pathnode->path.pathtype = T_Result;
1477 pathnode->path.parent = rel;
1478 pathnode->path.pathtarget = target;
1479 pathnode->path.param_info = NULL; /* there are no other rels... */
1480 pathnode->path.parallel_aware = false;
1481 pathnode->path.parallel_safe = rel->consider_parallel;
1482 pathnode->path.parallel_workers = 0;
1483 pathnode->path.pathkeys = NIL;
1484 pathnode->quals = resconstantqual;
1485
1486 /* Hardly worth defining a cost_result() function ... just do it */
1487 pathnode->path.rows = 1;
1488 pathnode->path.startup_cost = target->cost.startup;
1489 pathnode->path.total_cost = target->cost.startup +
1490 cpu_tuple_cost + target->cost.per_tuple;
1491
1492 /*
1493 * Add cost of qual, if any --- but we ignore its selectivity, since our
1494 * rowcount estimate should be 1 no matter what the qual is.
1495 */
1496 if (resconstantqual)
1497 {
1498 QualCost qual_cost;
1499
1500 cost_qual_eval(&qual_cost, resconstantqual, root);
1501 /* resconstantqual is evaluated once at startup */
1502 pathnode->path.startup_cost += qual_cost.startup + qual_cost.per_tuple;
1503 pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
1504 }
1505
1506 return pathnode;
1507 }
1508
1509 /*
1510 * create_material_path
1511 * Creates a path corresponding to a Material plan, returning the
1512 * pathnode.
1513 */
1514 MaterialPath *
create_material_path(RelOptInfo * rel,Path * subpath)1515 create_material_path(RelOptInfo *rel, Path *subpath)
1516 {
1517 MaterialPath *pathnode = makeNode(MaterialPath);
1518
1519 Assert(subpath->parent == rel);
1520
1521 pathnode->path.pathtype = T_Material;
1522 pathnode->path.parent = rel;
1523 pathnode->path.pathtarget = rel->reltarget;
1524 pathnode->path.param_info = subpath->param_info;
1525 pathnode->path.parallel_aware = false;
1526 pathnode->path.parallel_safe = rel->consider_parallel &&
1527 subpath->parallel_safe;
1528 pathnode->path.parallel_workers = subpath->parallel_workers;
1529 pathnode->path.pathkeys = subpath->pathkeys;
1530
1531 pathnode->subpath = subpath;
1532
1533 cost_material(&pathnode->path,
1534 subpath->startup_cost,
1535 subpath->total_cost,
1536 subpath->rows,
1537 subpath->pathtarget->width);
1538
1539 return pathnode;
1540 }
1541
1542 /*
1543 * create_unique_path
1544 * Creates a path representing elimination of distinct rows from the
1545 * input data. Distinct-ness is defined according to the needs of the
1546 * semijoin represented by sjinfo. If it is not possible to identify
1547 * how to make the data unique, NULL is returned.
1548 *
1549 * If used at all, this is likely to be called repeatedly on the same rel;
1550 * and the input subpath should always be the same (the cheapest_total path
1551 * for the rel). So we cache the result.
1552 */
1553 UniquePath *
create_unique_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,SpecialJoinInfo * sjinfo)1554 create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1555 SpecialJoinInfo *sjinfo)
1556 {
1557 UniquePath *pathnode;
1558 Path sort_path; /* dummy for result of cost_sort */
1559 Path agg_path; /* dummy for result of cost_agg */
1560 MemoryContext oldcontext;
1561 int numCols;
1562
1563 /* Caller made a mistake if subpath isn't cheapest_total ... */
1564 Assert(subpath == rel->cheapest_total_path);
1565 Assert(subpath->parent == rel);
1566 /* ... or if SpecialJoinInfo is the wrong one */
1567 Assert(sjinfo->jointype == JOIN_SEMI);
1568 Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
1569
1570 /* If result already cached, return it */
1571 if (rel->cheapest_unique_path)
1572 return (UniquePath *) rel->cheapest_unique_path;
1573
1574 /* If it's not possible to unique-ify, return NULL */
1575 if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
1576 return NULL;
1577
1578 /*
1579 * When called during GEQO join planning, we are in a short-lived memory
1580 * context. We must make sure that the path and any subsidiary data
1581 * structures created for a baserel survive the GEQO cycle, else the
1582 * baserel is trashed for future GEQO cycles. On the other hand, when we
1583 * are creating those for a joinrel during GEQO, we don't want them to
1584 * clutter the main planning context. Upshot is that the best solution is
1585 * to explicitly allocate memory in the same context the given RelOptInfo
1586 * is in.
1587 */
1588 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1589
1590 pathnode = makeNode(UniquePath);
1591
1592 pathnode->path.pathtype = T_Unique;
1593 pathnode->path.parent = rel;
1594 pathnode->path.pathtarget = rel->reltarget;
1595 pathnode->path.param_info = subpath->param_info;
1596 pathnode->path.parallel_aware = false;
1597 pathnode->path.parallel_safe = rel->consider_parallel &&
1598 subpath->parallel_safe;
1599 pathnode->path.parallel_workers = subpath->parallel_workers;
1600
1601 /*
1602 * Assume the output is unsorted, since we don't necessarily have pathkeys
1603 * to represent it. (This might get overridden below.)
1604 */
1605 pathnode->path.pathkeys = NIL;
1606
1607 pathnode->subpath = subpath;
1608 pathnode->in_operators = sjinfo->semi_operators;
1609 pathnode->uniq_exprs = sjinfo->semi_rhs_exprs;
1610
1611 /*
1612 * If the input is a relation and it has a unique index that proves the
1613 * semi_rhs_exprs are unique, then we don't need to do anything. Note
1614 * that relation_has_unique_index_for automatically considers restriction
1615 * clauses for the rel, as well.
1616 */
1617 if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
1618 relation_has_unique_index_for(root, rel, NIL,
1619 sjinfo->semi_rhs_exprs,
1620 sjinfo->semi_operators))
1621 {
1622 pathnode->umethod = UNIQUE_PATH_NOOP;
1623 pathnode->path.rows = rel->rows;
1624 pathnode->path.startup_cost = subpath->startup_cost;
1625 pathnode->path.total_cost = subpath->total_cost;
1626 pathnode->path.pathkeys = subpath->pathkeys;
1627
1628 rel->cheapest_unique_path = (Path *) pathnode;
1629
1630 MemoryContextSwitchTo(oldcontext);
1631
1632 return pathnode;
1633 }
1634
1635 /*
1636 * If the input is a subquery whose output must be unique already, then we
1637 * don't need to do anything. The test for uniqueness has to consider
1638 * exactly which columns we are extracting; for example "SELECT DISTINCT
1639 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
1640 * this optimization unless semi_rhs_exprs consists only of simple Vars
1641 * referencing subquery outputs. (Possibly we could do something with
1642 * expressions in the subquery outputs, too, but for now keep it simple.)
1643 */
1644 if (rel->rtekind == RTE_SUBQUERY)
1645 {
1646 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1647
1648 if (query_supports_distinctness(rte->subquery))
1649 {
1650 List *sub_tlist_colnos;
1651
1652 sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
1653 rel->relid);
1654
1655 if (sub_tlist_colnos &&
1656 query_is_distinct_for(rte->subquery,
1657 sub_tlist_colnos,
1658 sjinfo->semi_operators))
1659 {
1660 pathnode->umethod = UNIQUE_PATH_NOOP;
1661 pathnode->path.rows = rel->rows;
1662 pathnode->path.startup_cost = subpath->startup_cost;
1663 pathnode->path.total_cost = subpath->total_cost;
1664 pathnode->path.pathkeys = subpath->pathkeys;
1665
1666 rel->cheapest_unique_path = (Path *) pathnode;
1667
1668 MemoryContextSwitchTo(oldcontext);
1669
1670 return pathnode;
1671 }
1672 }
1673 }
1674
1675 /* Estimate number of output rows */
1676 pathnode->path.rows = estimate_num_groups(root,
1677 sjinfo->semi_rhs_exprs,
1678 rel->rows,
1679 NULL);
1680 numCols = list_length(sjinfo->semi_rhs_exprs);
1681
1682 if (sjinfo->semi_can_btree)
1683 {
1684 /*
1685 * Estimate cost for sort+unique implementation
1686 */
1687 cost_sort(&sort_path, root, NIL,
1688 subpath->total_cost,
1689 rel->rows,
1690 subpath->pathtarget->width,
1691 0.0,
1692 work_mem,
1693 -1.0);
1694
1695 /*
1696 * Charge one cpu_operator_cost per comparison per input tuple. We
1697 * assume all columns get compared at most of the tuples. (XXX
1698 * probably this is an overestimate.) This should agree with
1699 * create_upper_unique_path.
1700 */
1701 sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
1702 }
1703
1704 if (sjinfo->semi_can_hash)
1705 {
1706 /*
1707 * Estimate the overhead per hashtable entry at 64 bytes (same as in
1708 * planner.c).
1709 */
1710 int hashentrysize = subpath->pathtarget->width + 64;
1711
1712 if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
1713 {
1714 /*
1715 * We should not try to hash. Hack the SpecialJoinInfo to
1716 * remember this, in case we come through here again.
1717 */
1718 sjinfo->semi_can_hash = false;
1719 }
1720 else
1721 cost_agg(&agg_path, root,
1722 AGG_HASHED, NULL,
1723 numCols, pathnode->path.rows,
1724 NIL,
1725 subpath->startup_cost,
1726 subpath->total_cost,
1727 rel->rows);
1728 }
1729
1730 if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
1731 {
1732 if (agg_path.total_cost < sort_path.total_cost)
1733 pathnode->umethod = UNIQUE_PATH_HASH;
1734 else
1735 pathnode->umethod = UNIQUE_PATH_SORT;
1736 }
1737 else if (sjinfo->semi_can_btree)
1738 pathnode->umethod = UNIQUE_PATH_SORT;
1739 else if (sjinfo->semi_can_hash)
1740 pathnode->umethod = UNIQUE_PATH_HASH;
1741 else
1742 {
1743 /* we can get here only if we abandoned hashing above */
1744 MemoryContextSwitchTo(oldcontext);
1745 return NULL;
1746 }
1747
1748 if (pathnode->umethod == UNIQUE_PATH_HASH)
1749 {
1750 pathnode->path.startup_cost = agg_path.startup_cost;
1751 pathnode->path.total_cost = agg_path.total_cost;
1752 }
1753 else
1754 {
1755 pathnode->path.startup_cost = sort_path.startup_cost;
1756 pathnode->path.total_cost = sort_path.total_cost;
1757 }
1758
1759 rel->cheapest_unique_path = (Path *) pathnode;
1760
1761 MemoryContextSwitchTo(oldcontext);
1762
1763 return pathnode;
1764 }
1765
1766 /*
1767 * create_gather_merge_path
1768 *
1769 * Creates a path corresponding to a gather merge scan, returning
1770 * the pathnode.
1771 */
1772 GatherMergePath *
create_gather_merge_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,List * pathkeys,Relids required_outer,double * rows)1773 create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1774 PathTarget *target, List *pathkeys,
1775 Relids required_outer, double *rows)
1776 {
1777 GatherMergePath *pathnode = makeNode(GatherMergePath);
1778 Cost input_startup_cost = 0;
1779 Cost input_total_cost = 0;
1780
1781 Assert(subpath->parallel_safe);
1782 Assert(pathkeys);
1783
1784 pathnode->path.pathtype = T_GatherMerge;
1785 pathnode->path.parent = rel;
1786 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1787 required_outer);
1788 pathnode->path.parallel_aware = false;
1789
1790 pathnode->subpath = subpath;
1791 pathnode->num_workers = subpath->parallel_workers;
1792 pathnode->path.pathkeys = pathkeys;
1793 pathnode->path.pathtarget = target ? target : rel->reltarget;
1794 pathnode->path.rows += subpath->rows;
1795
1796 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
1797 {
1798 /* Subpath is adequately ordered, we won't need to sort it */
1799 input_startup_cost += subpath->startup_cost;
1800 input_total_cost += subpath->total_cost;
1801 }
1802 else
1803 {
1804 /* We'll need to insert a Sort node, so include cost for that */
1805 Path sort_path; /* dummy for result of cost_sort */
1806
1807 cost_sort(&sort_path,
1808 root,
1809 pathkeys,
1810 subpath->total_cost,
1811 subpath->rows,
1812 subpath->pathtarget->width,
1813 0.0,
1814 work_mem,
1815 -1);
1816 input_startup_cost += sort_path.startup_cost;
1817 input_total_cost += sort_path.total_cost;
1818 }
1819
1820 cost_gather_merge(pathnode, root, rel, pathnode->path.param_info,
1821 input_startup_cost, input_total_cost, rows);
1822
1823 return pathnode;
1824 }
1825
1826 /*
1827 * translate_sub_tlist - get subquery column numbers represented by tlist
1828 *
1829 * The given targetlist usually contains only Vars referencing the given relid.
1830 * Extract their varattnos (ie, the column numbers of the subquery) and return
1831 * as an integer List.
1832 *
1833 * If any of the tlist items is not a simple Var, we cannot determine whether
1834 * the subquery's uniqueness condition (if any) matches ours, so punt and
1835 * return NIL.
1836 */
1837 static List *
translate_sub_tlist(List * tlist,int relid)1838 translate_sub_tlist(List *tlist, int relid)
1839 {
1840 List *result = NIL;
1841 ListCell *l;
1842
1843 foreach(l, tlist)
1844 {
1845 Var *var = (Var *) lfirst(l);
1846
1847 if (!var || !IsA(var, Var) ||
1848 var->varno != relid)
1849 return NIL; /* punt */
1850
1851 result = lappend_int(result, var->varattno);
1852 }
1853 return result;
1854 }
1855
1856 /*
1857 * create_gather_path
1858 * Creates a path corresponding to a gather scan, returning the
1859 * pathnode.
1860 *
1861 * 'rows' may optionally be set to override row estimates from other sources.
1862 */
1863 GatherPath *
create_gather_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,Relids required_outer,double * rows)1864 create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1865 PathTarget *target, Relids required_outer, double *rows)
1866 {
1867 GatherPath *pathnode = makeNode(GatherPath);
1868
1869 Assert(subpath->parallel_safe);
1870
1871 pathnode->path.pathtype = T_Gather;
1872 pathnode->path.parent = rel;
1873 pathnode->path.pathtarget = target;
1874 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1875 required_outer);
1876 pathnode->path.parallel_aware = false;
1877 pathnode->path.parallel_safe = false;
1878 pathnode->path.parallel_workers = 0;
1879 pathnode->path.pathkeys = NIL; /* Gather has unordered result */
1880
1881 pathnode->subpath = subpath;
1882 pathnode->num_workers = subpath->parallel_workers;
1883 pathnode->single_copy = false;
1884
1885 if (pathnode->num_workers == 0)
1886 {
1887 pathnode->path.pathkeys = subpath->pathkeys;
1888 pathnode->num_workers = 1;
1889 pathnode->single_copy = true;
1890 }
1891
1892 cost_gather(pathnode, root, rel, pathnode->path.param_info, rows);
1893
1894 return pathnode;
1895 }
1896
1897 /*
1898 * create_subqueryscan_path
1899 * Creates a path corresponding to a scan of a subquery,
1900 * returning the pathnode.
1901 */
1902 SubqueryScanPath *
create_subqueryscan_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * pathkeys,Relids required_outer)1903 create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1904 List *pathkeys, Relids required_outer)
1905 {
1906 SubqueryScanPath *pathnode = makeNode(SubqueryScanPath);
1907
1908 pathnode->path.pathtype = T_SubqueryScan;
1909 pathnode->path.parent = rel;
1910 pathnode->path.pathtarget = rel->reltarget;
1911 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1912 required_outer);
1913 pathnode->path.parallel_aware = false;
1914 pathnode->path.parallel_safe = rel->consider_parallel &&
1915 subpath->parallel_safe;
1916 pathnode->path.parallel_workers = subpath->parallel_workers;
1917 pathnode->path.pathkeys = pathkeys;
1918 pathnode->subpath = subpath;
1919
1920 cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info);
1921
1922 return pathnode;
1923 }
1924
1925 /*
1926 * create_functionscan_path
1927 * Creates a path corresponding to a sequential scan of a function,
1928 * returning the pathnode.
1929 */
1930 Path *
create_functionscan_path(PlannerInfo * root,RelOptInfo * rel,List * pathkeys,Relids required_outer)1931 create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
1932 List *pathkeys, Relids required_outer)
1933 {
1934 Path *pathnode = makeNode(Path);
1935
1936 pathnode->pathtype = T_FunctionScan;
1937 pathnode->parent = rel;
1938 pathnode->pathtarget = rel->reltarget;
1939 pathnode->param_info = get_baserel_parampathinfo(root, rel,
1940 required_outer);
1941 pathnode->parallel_aware = false;
1942 pathnode->parallel_safe = rel->consider_parallel;
1943 pathnode->parallel_workers = 0;
1944 pathnode->pathkeys = pathkeys;
1945
1946 cost_functionscan(pathnode, root, rel, pathnode->param_info);
1947
1948 return pathnode;
1949 }
1950
1951 /*
1952 * create_tablefuncscan_path
1953 * Creates a path corresponding to a sequential scan of a table function,
1954 * returning the pathnode.
1955 */
1956 Path *
create_tablefuncscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1957 create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
1958 Relids required_outer)
1959 {
1960 Path *pathnode = makeNode(Path);
1961
1962 pathnode->pathtype = T_TableFuncScan;
1963 pathnode->parent = rel;
1964 pathnode->pathtarget = rel->reltarget;
1965 pathnode->param_info = get_baserel_parampathinfo(root, rel,
1966 required_outer);
1967 pathnode->parallel_aware = false;
1968 pathnode->parallel_safe = rel->consider_parallel;
1969 pathnode->parallel_workers = 0;
1970 pathnode->pathkeys = NIL; /* result is always unordered */
1971
1972 cost_tablefuncscan(pathnode, root, rel, pathnode->param_info);
1973
1974 return pathnode;
1975 }
1976
1977 /*
1978 * create_valuesscan_path
1979 * Creates a path corresponding to a scan of a VALUES list,
1980 * returning the pathnode.
1981 */
1982 Path *
create_valuesscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1983 create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
1984 Relids required_outer)
1985 {
1986 Path *pathnode = makeNode(Path);
1987
1988 pathnode->pathtype = T_ValuesScan;
1989 pathnode->parent = rel;
1990 pathnode->pathtarget = rel->reltarget;
1991 pathnode->param_info = get_baserel_parampathinfo(root, rel,
1992 required_outer);
1993 pathnode->parallel_aware = false;
1994 pathnode->parallel_safe = rel->consider_parallel;
1995 pathnode->parallel_workers = 0;
1996 pathnode->pathkeys = NIL; /* result is always unordered */
1997
1998 cost_valuesscan(pathnode, root, rel, pathnode->param_info);
1999
2000 return pathnode;
2001 }
2002
2003 /*
2004 * create_ctescan_path
2005 * Creates a path corresponding to a scan of a non-self-reference CTE,
2006 * returning the pathnode.
2007 */
2008 Path *
create_ctescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2009 create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
2010 {
2011 Path *pathnode = makeNode(Path);
2012
2013 pathnode->pathtype = T_CteScan;
2014 pathnode->parent = rel;
2015 pathnode->pathtarget = rel->reltarget;
2016 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2017 required_outer);
2018 pathnode->parallel_aware = false;
2019 pathnode->parallel_safe = rel->consider_parallel;
2020 pathnode->parallel_workers = 0;
2021 pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
2022
2023 cost_ctescan(pathnode, root, rel, pathnode->param_info);
2024
2025 return pathnode;
2026 }
2027
2028 /*
2029 * create_namedtuplestorescan_path
2030 * Creates a path corresponding to a scan of a named tuplestore, returning
2031 * the pathnode.
2032 */
2033 Path *
create_namedtuplestorescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2034 create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
2035 Relids required_outer)
2036 {
2037 Path *pathnode = makeNode(Path);
2038
2039 pathnode->pathtype = T_NamedTuplestoreScan;
2040 pathnode->parent = rel;
2041 pathnode->pathtarget = rel->reltarget;
2042 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2043 required_outer);
2044 pathnode->parallel_aware = false;
2045 pathnode->parallel_safe = rel->consider_parallel;
2046 pathnode->parallel_workers = 0;
2047 pathnode->pathkeys = NIL; /* result is always unordered */
2048
2049 cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info);
2050
2051 return pathnode;
2052 }
2053
2054 /*
2055 * create_worktablescan_path
2056 * Creates a path corresponding to a scan of a self-reference CTE,
2057 * returning the pathnode.
2058 */
2059 Path *
create_worktablescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2060 create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
2061 Relids required_outer)
2062 {
2063 Path *pathnode = makeNode(Path);
2064
2065 pathnode->pathtype = T_WorkTableScan;
2066 pathnode->parent = rel;
2067 pathnode->pathtarget = rel->reltarget;
2068 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2069 required_outer);
2070 pathnode->parallel_aware = false;
2071 pathnode->parallel_safe = rel->consider_parallel;
2072 pathnode->parallel_workers = 0;
2073 pathnode->pathkeys = NIL; /* result is always unordered */
2074
2075 /* Cost is the same as for a regular CTE scan */
2076 cost_ctescan(pathnode, root, rel, pathnode->param_info);
2077
2078 return pathnode;
2079 }
2080
2081 /*
2082 * create_foreignscan_path
2083 * Creates a path corresponding to a scan of a foreign table, foreign join,
2084 * or foreign upper-relation processing, returning the pathnode.
2085 *
2086 * This function is never called from core Postgres; rather, it's expected
2087 * to be called by the GetForeignPaths, GetForeignJoinPaths, or
2088 * GetForeignUpperPaths function of a foreign data wrapper. We make the FDW
2089 * supply all fields of the path, since we do not have any way to calculate
2090 * them in core. However, there is a usually-sane default for the pathtarget
2091 * (rel->reltarget), so we let a NULL for "target" select that.
2092 */
2093 ForeignPath *
create_foreignscan_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,double rows,Cost startup_cost,Cost total_cost,List * pathkeys,Relids required_outer,Path * fdw_outerpath,List * fdw_private)2094 create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
2095 PathTarget *target,
2096 double rows, Cost startup_cost, Cost total_cost,
2097 List *pathkeys,
2098 Relids required_outer,
2099 Path *fdw_outerpath,
2100 List *fdw_private)
2101 {
2102 ForeignPath *pathnode = makeNode(ForeignPath);
2103
2104 /*
2105 * Since the path's required_outer should always include all the rel's
2106 * lateral_relids, forcibly add those if necessary. This is a bit of a
2107 * hack, but up till early 2019 the contrib FDWs failed to ensure that,
2108 * and it's likely that the same error has propagated into many external
2109 * FDWs. Don't risk modifying the passed-in relid set here.
2110 */
2111 if (rel->lateral_relids && !bms_is_subset(rel->lateral_relids,
2112 required_outer))
2113 required_outer = bms_union(required_outer, rel->lateral_relids);
2114
2115 /*
2116 * Although this function is only designed to be used for scans of
2117 * baserels, before v12 postgres_fdw abused it to make paths for join and
2118 * upper rels. It will work for such cases as long as required_outer is
2119 * empty (otherwise get_baserel_parampathinfo does the wrong thing), which
2120 * fortunately is the expected case for now.
2121 */
2122 if (!bms_is_empty(required_outer) && !IS_SIMPLE_REL(rel))
2123 elog(ERROR, "parameterized foreign joins are not supported yet");
2124
2125 pathnode->path.pathtype = T_ForeignScan;
2126 pathnode->path.parent = rel;
2127 pathnode->path.pathtarget = target ? target : rel->reltarget;
2128 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
2129 required_outer);
2130 pathnode->path.parallel_aware = false;
2131 pathnode->path.parallel_safe = rel->consider_parallel;
2132 pathnode->path.parallel_workers = 0;
2133 pathnode->path.rows = rows;
2134 pathnode->path.startup_cost = startup_cost;
2135 pathnode->path.total_cost = total_cost;
2136 pathnode->path.pathkeys = pathkeys;
2137
2138 pathnode->fdw_outerpath = fdw_outerpath;
2139 pathnode->fdw_private = fdw_private;
2140
2141 return pathnode;
2142 }
2143
2144 /*
2145 * calc_nestloop_required_outer
2146 * Compute the required_outer set for a nestloop join path
2147 *
2148 * Note: result must not share storage with either input
2149 */
2150 Relids
calc_nestloop_required_outer(Relids outerrelids,Relids outer_paramrels,Relids innerrelids,Relids inner_paramrels)2151 calc_nestloop_required_outer(Relids outerrelids,
2152 Relids outer_paramrels,
2153 Relids innerrelids,
2154 Relids inner_paramrels)
2155 {
2156 Relids required_outer;
2157
2158 /* inner_path can require rels from outer path, but not vice versa */
2159 Assert(!bms_overlap(outer_paramrels, innerrelids));
2160 /* easy case if inner path is not parameterized */
2161 if (!inner_paramrels)
2162 return bms_copy(outer_paramrels);
2163 /* else, form the union ... */
2164 required_outer = bms_union(outer_paramrels, inner_paramrels);
2165 /* ... and remove any mention of now-satisfied outer rels */
2166 required_outer = bms_del_members(required_outer,
2167 outerrelids);
2168 /* maintain invariant that required_outer is exactly NULL if empty */
2169 if (bms_is_empty(required_outer))
2170 {
2171 bms_free(required_outer);
2172 required_outer = NULL;
2173 }
2174 return required_outer;
2175 }
2176
2177 /*
2178 * calc_non_nestloop_required_outer
2179 * Compute the required_outer set for a merge or hash join path
2180 *
2181 * Note: result must not share storage with either input
2182 */
2183 Relids
calc_non_nestloop_required_outer(Path * outer_path,Path * inner_path)2184 calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
2185 {
2186 Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
2187 Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
2188 Relids required_outer;
2189
2190 /* neither path can require rels from the other */
2191 Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
2192 Assert(!bms_overlap(inner_paramrels, outer_path->parent->relids));
2193 /* form the union ... */
2194 required_outer = bms_union(outer_paramrels, inner_paramrels);
2195 /* we do not need an explicit test for empty; bms_union gets it right */
2196 return required_outer;
2197 }
2198
2199 /*
2200 * create_nestloop_path
2201 * Creates a pathnode corresponding to a nestloop join between two
2202 * relations.
2203 *
2204 * 'joinrel' is the join relation.
2205 * 'jointype' is the type of join required
2206 * 'workspace' is the result from initial_cost_nestloop
2207 * 'extra' contains various information about the join
2208 * 'outer_path' is the outer path
2209 * 'inner_path' is the inner path
2210 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2211 * 'pathkeys' are the path keys of the new join path
2212 * 'required_outer' is the set of required outer rels
2213 *
2214 * Returns the resulting path node.
2215 */
2216 NestPath *
create_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,List * restrict_clauses,List * pathkeys,Relids required_outer)2217 create_nestloop_path(PlannerInfo *root,
2218 RelOptInfo *joinrel,
2219 JoinType jointype,
2220 JoinCostWorkspace *workspace,
2221 JoinPathExtraData *extra,
2222 Path *outer_path,
2223 Path *inner_path,
2224 List *restrict_clauses,
2225 List *pathkeys,
2226 Relids required_outer)
2227 {
2228 NestPath *pathnode = makeNode(NestPath);
2229 Relids inner_req_outer = PATH_REQ_OUTER(inner_path);
2230
2231 /*
2232 * If the inner path is parameterized by the outer, we must drop any
2233 * restrict_clauses that are due to be moved into the inner path. We have
2234 * to do this now, rather than postpone the work till createplan time,
2235 * because the restrict_clauses list can affect the size and cost
2236 * estimates for this path.
2237 */
2238 if (bms_overlap(inner_req_outer, outer_path->parent->relids))
2239 {
2240 Relids inner_and_outer = bms_union(inner_path->parent->relids,
2241 inner_req_outer);
2242 List *jclauses = NIL;
2243 ListCell *lc;
2244
2245 foreach(lc, restrict_clauses)
2246 {
2247 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
2248
2249 if (!join_clause_is_movable_into(rinfo,
2250 inner_path->parent->relids,
2251 inner_and_outer))
2252 jclauses = lappend(jclauses, rinfo);
2253 }
2254 restrict_clauses = jclauses;
2255 }
2256
2257 pathnode->path.pathtype = T_NestLoop;
2258 pathnode->path.parent = joinrel;
2259 pathnode->path.pathtarget = joinrel->reltarget;
2260 pathnode->path.param_info =
2261 get_joinrel_parampathinfo(root,
2262 joinrel,
2263 outer_path,
2264 inner_path,
2265 extra->sjinfo,
2266 required_outer,
2267 &restrict_clauses);
2268 pathnode->path.parallel_aware = false;
2269 pathnode->path.parallel_safe = joinrel->consider_parallel &&
2270 outer_path->parallel_safe && inner_path->parallel_safe;
2271 /* This is a foolish way to estimate parallel_workers, but for now... */
2272 pathnode->path.parallel_workers = outer_path->parallel_workers;
2273 pathnode->path.pathkeys = pathkeys;
2274 pathnode->jointype = jointype;
2275 pathnode->inner_unique = extra->inner_unique;
2276 pathnode->outerjoinpath = outer_path;
2277 pathnode->innerjoinpath = inner_path;
2278 pathnode->joinrestrictinfo = restrict_clauses;
2279
2280 final_cost_nestloop(root, pathnode, workspace, extra);
2281
2282 return pathnode;
2283 }
2284
2285 /*
2286 * create_mergejoin_path
2287 * Creates a pathnode corresponding to a mergejoin join between
2288 * two relations
2289 *
2290 * 'joinrel' is the join relation
2291 * 'jointype' is the type of join required
2292 * 'workspace' is the result from initial_cost_mergejoin
2293 * 'extra' contains various information about the join
2294 * 'outer_path' is the outer path
2295 * 'inner_path' is the inner path
2296 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2297 * 'pathkeys' are the path keys of the new join path
2298 * 'required_outer' is the set of required outer rels
2299 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
2300 * (this should be a subset of the restrict_clauses list)
2301 * 'outersortkeys' are the sort varkeys for the outer relation
2302 * 'innersortkeys' are the sort varkeys for the inner relation
2303 */
2304 MergePath *
create_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,List * restrict_clauses,List * pathkeys,Relids required_outer,List * mergeclauses,List * outersortkeys,List * innersortkeys)2305 create_mergejoin_path(PlannerInfo *root,
2306 RelOptInfo *joinrel,
2307 JoinType jointype,
2308 JoinCostWorkspace *workspace,
2309 JoinPathExtraData *extra,
2310 Path *outer_path,
2311 Path *inner_path,
2312 List *restrict_clauses,
2313 List *pathkeys,
2314 Relids required_outer,
2315 List *mergeclauses,
2316 List *outersortkeys,
2317 List *innersortkeys)
2318 {
2319 MergePath *pathnode = makeNode(MergePath);
2320
2321 pathnode->jpath.path.pathtype = T_MergeJoin;
2322 pathnode->jpath.path.parent = joinrel;
2323 pathnode->jpath.path.pathtarget = joinrel->reltarget;
2324 pathnode->jpath.path.param_info =
2325 get_joinrel_parampathinfo(root,
2326 joinrel,
2327 outer_path,
2328 inner_path,
2329 extra->sjinfo,
2330 required_outer,
2331 &restrict_clauses);
2332 pathnode->jpath.path.parallel_aware = false;
2333 pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
2334 outer_path->parallel_safe && inner_path->parallel_safe;
2335 /* This is a foolish way to estimate parallel_workers, but for now... */
2336 pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
2337 pathnode->jpath.path.pathkeys = pathkeys;
2338 pathnode->jpath.jointype = jointype;
2339 pathnode->jpath.inner_unique = extra->inner_unique;
2340 pathnode->jpath.outerjoinpath = outer_path;
2341 pathnode->jpath.innerjoinpath = inner_path;
2342 pathnode->jpath.joinrestrictinfo = restrict_clauses;
2343 pathnode->path_mergeclauses = mergeclauses;
2344 pathnode->outersortkeys = outersortkeys;
2345 pathnode->innersortkeys = innersortkeys;
2346 /* pathnode->skip_mark_restore will be set by final_cost_mergejoin */
2347 /* pathnode->materialize_inner will be set by final_cost_mergejoin */
2348
2349 final_cost_mergejoin(root, pathnode, workspace, extra);
2350
2351 return pathnode;
2352 }
2353
2354 /*
2355 * create_hashjoin_path
2356 * Creates a pathnode corresponding to a hash join between two relations.
2357 *
2358 * 'joinrel' is the join relation
2359 * 'jointype' is the type of join required
2360 * 'workspace' is the result from initial_cost_hashjoin
2361 * 'extra' contains various information about the join
2362 * 'outer_path' is the cheapest outer path
2363 * 'inner_path' is the cheapest inner path
2364 * 'parallel_hash' to select Parallel Hash of inner path (shared hash table)
2365 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2366 * 'required_outer' is the set of required outer rels
2367 * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
2368 * (this should be a subset of the restrict_clauses list)
2369 */
2370 HashPath *
create_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,bool parallel_hash,List * restrict_clauses,Relids required_outer,List * hashclauses)2371 create_hashjoin_path(PlannerInfo *root,
2372 RelOptInfo *joinrel,
2373 JoinType jointype,
2374 JoinCostWorkspace *workspace,
2375 JoinPathExtraData *extra,
2376 Path *outer_path,
2377 Path *inner_path,
2378 bool parallel_hash,
2379 List *restrict_clauses,
2380 Relids required_outer,
2381 List *hashclauses)
2382 {
2383 HashPath *pathnode = makeNode(HashPath);
2384
2385 pathnode->jpath.path.pathtype = T_HashJoin;
2386 pathnode->jpath.path.parent = joinrel;
2387 pathnode->jpath.path.pathtarget = joinrel->reltarget;
2388 pathnode->jpath.path.param_info =
2389 get_joinrel_parampathinfo(root,
2390 joinrel,
2391 outer_path,
2392 inner_path,
2393 extra->sjinfo,
2394 required_outer,
2395 &restrict_clauses);
2396 pathnode->jpath.path.parallel_aware =
2397 joinrel->consider_parallel && parallel_hash;
2398 pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
2399 outer_path->parallel_safe && inner_path->parallel_safe;
2400 /* This is a foolish way to estimate parallel_workers, but for now... */
2401 pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
2402
2403 /*
2404 * A hashjoin never has pathkeys, since its output ordering is
2405 * unpredictable due to possible batching. XXX If the inner relation is
2406 * small enough, we could instruct the executor that it must not batch,
2407 * and then we could assume that the output inherits the outer relation's
2408 * ordering, which might save a sort step. However there is considerable
2409 * downside if our estimate of the inner relation size is badly off. For
2410 * the moment we don't risk it. (Note also that if we wanted to take this
2411 * seriously, joinpath.c would have to consider many more paths for the
2412 * outer rel than it does now.)
2413 */
2414 pathnode->jpath.path.pathkeys = NIL;
2415 pathnode->jpath.jointype = jointype;
2416 pathnode->jpath.inner_unique = extra->inner_unique;
2417 pathnode->jpath.outerjoinpath = outer_path;
2418 pathnode->jpath.innerjoinpath = inner_path;
2419 pathnode->jpath.joinrestrictinfo = restrict_clauses;
2420 pathnode->path_hashclauses = hashclauses;
2421 /* final_cost_hashjoin will fill in pathnode->num_batches */
2422
2423 final_cost_hashjoin(root, pathnode, workspace, extra);
2424
2425 return pathnode;
2426 }
2427
2428 /*
2429 * create_projection_path
2430 * Creates a pathnode that represents performing a projection.
2431 *
2432 * 'rel' is the parent relation associated with the result
2433 * 'subpath' is the path representing the source of data
2434 * 'target' is the PathTarget to be computed
2435 */
2436 ProjectionPath *
create_projection_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target)2437 create_projection_path(PlannerInfo *root,
2438 RelOptInfo *rel,
2439 Path *subpath,
2440 PathTarget *target)
2441 {
2442 ProjectionPath *pathnode = makeNode(ProjectionPath);
2443 PathTarget *oldtarget;
2444
2445 /*
2446 * We mustn't put a ProjectionPath directly above another; it's useless
2447 * and will confuse create_projection_plan. Rather than making sure all
2448 * callers handle that, let's implement it here, by stripping off any
2449 * ProjectionPath in what we're given. Given this rule, there won't be
2450 * more than one.
2451 */
2452 if (IsA(subpath, ProjectionPath))
2453 {
2454 ProjectionPath *subpp = (ProjectionPath *) subpath;
2455
2456 Assert(subpp->path.parent == rel);
2457 subpath = subpp->subpath;
2458 Assert(!IsA(subpath, ProjectionPath));
2459 }
2460
2461 pathnode->path.pathtype = T_Result;
2462 pathnode->path.parent = rel;
2463 pathnode->path.pathtarget = target;
2464 /* For now, assume we are above any joins, so no parameterization */
2465 pathnode->path.param_info = NULL;
2466 pathnode->path.parallel_aware = false;
2467 pathnode->path.parallel_safe = rel->consider_parallel &&
2468 subpath->parallel_safe &&
2469 is_parallel_safe(root, (Node *) target->exprs);
2470 pathnode->path.parallel_workers = subpath->parallel_workers;
2471 /* Projection does not change the sort order */
2472 pathnode->path.pathkeys = subpath->pathkeys;
2473
2474 pathnode->subpath = subpath;
2475
2476 /*
2477 * We might not need a separate Result node. If the input plan node type
2478 * can project, we can just tell it to project something else. Or, if it
2479 * can't project but the desired target has the same expression list as
2480 * what the input will produce anyway, we can still give it the desired
2481 * tlist (possibly changing its ressortgroupref labels, but nothing else).
2482 * Note: in the latter case, create_projection_plan has to recheck our
2483 * conclusion; see comments therein.
2484 */
2485 oldtarget = subpath->pathtarget;
2486 if (is_projection_capable_path(subpath) ||
2487 equal(oldtarget->exprs, target->exprs))
2488 {
2489 /* No separate Result node needed */
2490 pathnode->dummypp = true;
2491
2492 /*
2493 * Set cost of plan as subpath's cost, adjusted for tlist replacement.
2494 */
2495 pathnode->path.rows = subpath->rows;
2496 pathnode->path.startup_cost = subpath->startup_cost +
2497 (target->cost.startup - oldtarget->cost.startup);
2498 pathnode->path.total_cost = subpath->total_cost +
2499 (target->cost.startup - oldtarget->cost.startup) +
2500 (target->cost.per_tuple - oldtarget->cost.per_tuple) * subpath->rows;
2501 }
2502 else
2503 {
2504 /* We really do need the Result node */
2505 pathnode->dummypp = false;
2506
2507 /*
2508 * The Result node's cost is cpu_tuple_cost per row, plus the cost of
2509 * evaluating the tlist. There is no qual to worry about.
2510 */
2511 pathnode->path.rows = subpath->rows;
2512 pathnode->path.startup_cost = subpath->startup_cost +
2513 target->cost.startup;
2514 pathnode->path.total_cost = subpath->total_cost +
2515 target->cost.startup +
2516 (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows;
2517 }
2518
2519 return pathnode;
2520 }
2521
2522 /*
2523 * apply_projection_to_path
2524 * Add a projection step, or just apply the target directly to given path.
2525 *
2526 * This has the same net effect as create_projection_path(), except that if
2527 * a separate Result plan node isn't needed, we just replace the given path's
2528 * pathtarget with the desired one. This must be used only when the caller
2529 * knows that the given path isn't referenced elsewhere and so can be modified
2530 * in-place.
2531 *
2532 * If the input path is a GatherPath or GatherMergePath, we try to push the
2533 * new target down to its input as well; this is a yet more invasive
2534 * modification of the input path, which create_projection_path() can't do.
2535 *
2536 * Note that we mustn't change the source path's parent link; so when it is
2537 * add_path'd to "rel" things will be a bit inconsistent. So far that has
2538 * not caused any trouble.
2539 *
2540 * 'rel' is the parent relation associated with the result
2541 * 'path' is the path representing the source of data
2542 * 'target' is the PathTarget to be computed
2543 */
2544 Path *
apply_projection_to_path(PlannerInfo * root,RelOptInfo * rel,Path * path,PathTarget * target)2545 apply_projection_to_path(PlannerInfo *root,
2546 RelOptInfo *rel,
2547 Path *path,
2548 PathTarget *target)
2549 {
2550 QualCost oldcost;
2551
2552 /*
2553 * If given path can't project, we might need a Result node, so make a
2554 * separate ProjectionPath.
2555 */
2556 if (!is_projection_capable_path(path))
2557 return (Path *) create_projection_path(root, rel, path, target);
2558
2559 /*
2560 * We can just jam the desired tlist into the existing path, being sure to
2561 * update its cost estimates appropriately.
2562 */
2563 oldcost = path->pathtarget->cost;
2564 path->pathtarget = target;
2565
2566 path->startup_cost += target->cost.startup - oldcost.startup;
2567 path->total_cost += target->cost.startup - oldcost.startup +
2568 (target->cost.per_tuple - oldcost.per_tuple) * path->rows;
2569
2570 /*
2571 * If the path happens to be a Gather or GatherMerge path, we'd like to
2572 * arrange for the subpath to return the required target list so that
2573 * workers can help project. But if there is something that is not
2574 * parallel-safe in the target expressions, then we can't.
2575 */
2576 if ((IsA(path, GatherPath) ||IsA(path, GatherMergePath)) &&
2577 is_parallel_safe(root, (Node *) target->exprs))
2578 {
2579 /*
2580 * We always use create_projection_path here, even if the subpath is
2581 * projection-capable, so as to avoid modifying the subpath in place.
2582 * It seems unlikely at present that there could be any other
2583 * references to the subpath, but better safe than sorry.
2584 *
2585 * Note that we don't change the parallel path's cost estimates; it
2586 * might be appropriate to do so, to reflect the fact that the bulk of
2587 * the target evaluation will happen in workers.
2588 */
2589 if (IsA(path, GatherPath))
2590 {
2591 GatherPath *gpath = (GatherPath *) path;
2592
2593 gpath->subpath = (Path *)
2594 create_projection_path(root,
2595 gpath->subpath->parent,
2596 gpath->subpath,
2597 target);
2598 }
2599 else
2600 {
2601 GatherMergePath *gmpath = (GatherMergePath *) path;
2602
2603 gmpath->subpath = (Path *)
2604 create_projection_path(root,
2605 gmpath->subpath->parent,
2606 gmpath->subpath,
2607 target);
2608 }
2609 }
2610 else if (path->parallel_safe &&
2611 !is_parallel_safe(root, (Node *) target->exprs))
2612 {
2613 /*
2614 * We're inserting a parallel-restricted target list into a path
2615 * currently marked parallel-safe, so we have to mark it as no longer
2616 * safe.
2617 */
2618 path->parallel_safe = false;
2619 }
2620
2621 return path;
2622 }
2623
2624 /*
2625 * create_set_projection_path
2626 * Creates a pathnode that represents performing a projection that
2627 * includes set-returning functions.
2628 *
2629 * 'rel' is the parent relation associated with the result
2630 * 'subpath' is the path representing the source of data
2631 * 'target' is the PathTarget to be computed
2632 */
2633 ProjectSetPath *
create_set_projection_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target)2634 create_set_projection_path(PlannerInfo *root,
2635 RelOptInfo *rel,
2636 Path *subpath,
2637 PathTarget *target)
2638 {
2639 ProjectSetPath *pathnode = makeNode(ProjectSetPath);
2640 double tlist_rows;
2641 ListCell *lc;
2642
2643 pathnode->path.pathtype = T_ProjectSet;
2644 pathnode->path.parent = rel;
2645 pathnode->path.pathtarget = target;
2646 /* For now, assume we are above any joins, so no parameterization */
2647 pathnode->path.param_info = NULL;
2648 pathnode->path.parallel_aware = false;
2649 pathnode->path.parallel_safe = rel->consider_parallel &&
2650 subpath->parallel_safe &&
2651 is_parallel_safe(root, (Node *) target->exprs);
2652 pathnode->path.parallel_workers = subpath->parallel_workers;
2653 /* Projection does not change the sort order XXX? */
2654 pathnode->path.pathkeys = subpath->pathkeys;
2655
2656 pathnode->subpath = subpath;
2657
2658 /*
2659 * Estimate number of rows produced by SRFs for each row of input; if
2660 * there's more than one in this node, use the maximum.
2661 */
2662 tlist_rows = 1;
2663 foreach(lc, target->exprs)
2664 {
2665 Node *node = (Node *) lfirst(lc);
2666 double itemrows;
2667
2668 itemrows = expression_returns_set_rows(node);
2669 if (tlist_rows < itemrows)
2670 tlist_rows = itemrows;
2671 }
2672
2673 /*
2674 * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost
2675 * per input row, and half of cpu_tuple_cost for each added output row.
2676 * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit
2677 * this estimate later.
2678 */
2679 pathnode->path.rows = subpath->rows * tlist_rows;
2680 pathnode->path.startup_cost = subpath->startup_cost +
2681 target->cost.startup;
2682 pathnode->path.total_cost = subpath->total_cost +
2683 target->cost.startup +
2684 (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows +
2685 (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2;
2686
2687 return pathnode;
2688 }
2689
2690 /*
2691 * create_sort_path
2692 * Creates a pathnode that represents performing an explicit sort.
2693 *
2694 * 'rel' is the parent relation associated with the result
2695 * 'subpath' is the path representing the source of data
2696 * 'pathkeys' represents the desired sort order
2697 * 'limit_tuples' is the estimated bound on the number of output tuples,
2698 * or -1 if no LIMIT or couldn't estimate
2699 */
2700 SortPath *
create_sort_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * pathkeys,double limit_tuples)2701 create_sort_path(PlannerInfo *root,
2702 RelOptInfo *rel,
2703 Path *subpath,
2704 List *pathkeys,
2705 double limit_tuples)
2706 {
2707 SortPath *pathnode = makeNode(SortPath);
2708
2709 pathnode->path.pathtype = T_Sort;
2710 pathnode->path.parent = rel;
2711 /* Sort doesn't project, so use source path's pathtarget */
2712 pathnode->path.pathtarget = subpath->pathtarget;
2713 /* For now, assume we are above any joins, so no parameterization */
2714 pathnode->path.param_info = NULL;
2715 pathnode->path.parallel_aware = false;
2716 pathnode->path.parallel_safe = rel->consider_parallel &&
2717 subpath->parallel_safe;
2718 pathnode->path.parallel_workers = subpath->parallel_workers;
2719 pathnode->path.pathkeys = pathkeys;
2720
2721 pathnode->subpath = subpath;
2722
2723 cost_sort(&pathnode->path, root, pathkeys,
2724 subpath->total_cost,
2725 subpath->rows,
2726 subpath->pathtarget->width,
2727 0.0, /* XXX comparison_cost shouldn't be 0? */
2728 work_mem, limit_tuples);
2729
2730 return pathnode;
2731 }
2732
2733 /*
2734 * create_group_path
2735 * Creates a pathnode that represents performing grouping of presorted input
2736 *
2737 * 'rel' is the parent relation associated with the result
2738 * 'subpath' is the path representing the source of data
2739 * 'target' is the PathTarget to be computed
2740 * 'groupClause' is a list of SortGroupClause's representing the grouping
2741 * 'qual' is the HAVING quals if any
2742 * 'numGroups' is the estimated number of groups
2743 */
2744 GroupPath *
create_group_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * groupClause,List * qual,double numGroups)2745 create_group_path(PlannerInfo *root,
2746 RelOptInfo *rel,
2747 Path *subpath,
2748 List *groupClause,
2749 List *qual,
2750 double numGroups)
2751 {
2752 GroupPath *pathnode = makeNode(GroupPath);
2753 PathTarget *target = rel->reltarget;
2754
2755 pathnode->path.pathtype = T_Group;
2756 pathnode->path.parent = rel;
2757 pathnode->path.pathtarget = target;
2758 /* For now, assume we are above any joins, so no parameterization */
2759 pathnode->path.param_info = NULL;
2760 pathnode->path.parallel_aware = false;
2761 pathnode->path.parallel_safe = rel->consider_parallel &&
2762 subpath->parallel_safe;
2763 pathnode->path.parallel_workers = subpath->parallel_workers;
2764 /* Group doesn't change sort ordering */
2765 pathnode->path.pathkeys = subpath->pathkeys;
2766
2767 pathnode->subpath = subpath;
2768
2769 pathnode->groupClause = groupClause;
2770 pathnode->qual = qual;
2771
2772 cost_group(&pathnode->path, root,
2773 list_length(groupClause),
2774 numGroups,
2775 qual,
2776 subpath->startup_cost, subpath->total_cost,
2777 subpath->rows);
2778
2779 /* add tlist eval cost for each output row */
2780 pathnode->path.startup_cost += target->cost.startup;
2781 pathnode->path.total_cost += target->cost.startup +
2782 target->cost.per_tuple * pathnode->path.rows;
2783
2784 return pathnode;
2785 }
2786
2787 /*
2788 * create_upper_unique_path
2789 * Creates a pathnode that represents performing an explicit Unique step
2790 * on presorted input.
2791 *
2792 * This produces a Unique plan node, but the use-case is so different from
2793 * create_unique_path that it doesn't seem worth trying to merge the two.
2794 *
2795 * 'rel' is the parent relation associated with the result
2796 * 'subpath' is the path representing the source of data
2797 * 'numCols' is the number of grouping columns
2798 * 'numGroups' is the estimated number of groups
2799 *
2800 * The input path must be sorted on the grouping columns, plus possibly
2801 * additional columns; so the first numCols pathkeys are the grouping columns
2802 */
2803 UpperUniquePath *
create_upper_unique_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,int numCols,double numGroups)2804 create_upper_unique_path(PlannerInfo *root,
2805 RelOptInfo *rel,
2806 Path *subpath,
2807 int numCols,
2808 double numGroups)
2809 {
2810 UpperUniquePath *pathnode = makeNode(UpperUniquePath);
2811
2812 pathnode->path.pathtype = T_Unique;
2813 pathnode->path.parent = rel;
2814 /* Unique doesn't project, so use source path's pathtarget */
2815 pathnode->path.pathtarget = subpath->pathtarget;
2816 /* For now, assume we are above any joins, so no parameterization */
2817 pathnode->path.param_info = NULL;
2818 pathnode->path.parallel_aware = false;
2819 pathnode->path.parallel_safe = rel->consider_parallel &&
2820 subpath->parallel_safe;
2821 pathnode->path.parallel_workers = subpath->parallel_workers;
2822 /* Unique doesn't change the input ordering */
2823 pathnode->path.pathkeys = subpath->pathkeys;
2824
2825 pathnode->subpath = subpath;
2826 pathnode->numkeys = numCols;
2827
2828 /*
2829 * Charge one cpu_operator_cost per comparison per input tuple. We assume
2830 * all columns get compared at most of the tuples. (XXX probably this is
2831 * an overestimate.)
2832 */
2833 pathnode->path.startup_cost = subpath->startup_cost;
2834 pathnode->path.total_cost = subpath->total_cost +
2835 cpu_operator_cost * subpath->rows * numCols;
2836 pathnode->path.rows = numGroups;
2837
2838 return pathnode;
2839 }
2840
2841 /*
2842 * create_agg_path
2843 * Creates a pathnode that represents performing aggregation/grouping
2844 *
2845 * 'rel' is the parent relation associated with the result
2846 * 'subpath' is the path representing the source of data
2847 * 'target' is the PathTarget to be computed
2848 * 'aggstrategy' is the Agg node's basic implementation strategy
2849 * 'aggsplit' is the Agg node's aggregate-splitting mode
2850 * 'groupClause' is a list of SortGroupClause's representing the grouping
2851 * 'qual' is the HAVING quals if any
2852 * 'aggcosts' contains cost info about the aggregate functions to be computed
2853 * 'numGroups' is the estimated number of groups (1 if not grouping)
2854 */
2855 AggPath *
create_agg_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,AggStrategy aggstrategy,AggSplit aggsplit,List * groupClause,List * qual,const AggClauseCosts * aggcosts,double numGroups)2856 create_agg_path(PlannerInfo *root,
2857 RelOptInfo *rel,
2858 Path *subpath,
2859 PathTarget *target,
2860 AggStrategy aggstrategy,
2861 AggSplit aggsplit,
2862 List *groupClause,
2863 List *qual,
2864 const AggClauseCosts *aggcosts,
2865 double numGroups)
2866 {
2867 AggPath *pathnode = makeNode(AggPath);
2868
2869 pathnode->path.pathtype = T_Agg;
2870 pathnode->path.parent = rel;
2871 pathnode->path.pathtarget = target;
2872 /* For now, assume we are above any joins, so no parameterization */
2873 pathnode->path.param_info = NULL;
2874 pathnode->path.parallel_aware = false;
2875 pathnode->path.parallel_safe = rel->consider_parallel &&
2876 subpath->parallel_safe;
2877 pathnode->path.parallel_workers = subpath->parallel_workers;
2878 if (aggstrategy == AGG_SORTED)
2879 pathnode->path.pathkeys = subpath->pathkeys; /* preserves order */
2880 else
2881 pathnode->path.pathkeys = NIL; /* output is unordered */
2882 pathnode->subpath = subpath;
2883
2884 pathnode->aggstrategy = aggstrategy;
2885 pathnode->aggsplit = aggsplit;
2886 pathnode->numGroups = numGroups;
2887 pathnode->groupClause = groupClause;
2888 pathnode->qual = qual;
2889
2890 cost_agg(&pathnode->path, root,
2891 aggstrategy, aggcosts,
2892 list_length(groupClause), numGroups,
2893 qual,
2894 subpath->startup_cost, subpath->total_cost,
2895 subpath->rows);
2896
2897 /* add tlist eval cost for each output row */
2898 pathnode->path.startup_cost += target->cost.startup;
2899 pathnode->path.total_cost += target->cost.startup +
2900 target->cost.per_tuple * pathnode->path.rows;
2901
2902 return pathnode;
2903 }
2904
2905 /*
2906 * create_groupingsets_path
2907 * Creates a pathnode that represents performing GROUPING SETS aggregation
2908 *
2909 * GroupingSetsPath represents sorted grouping with one or more grouping sets.
2910 * The input path's result must be sorted to match the last entry in
2911 * rollup_groupclauses.
2912 *
2913 * 'rel' is the parent relation associated with the result
2914 * 'subpath' is the path representing the source of data
2915 * 'target' is the PathTarget to be computed
2916 * 'having_qual' is the HAVING quals if any
2917 * 'rollups' is a list of RollupData nodes
2918 * 'agg_costs' contains cost info about the aggregate functions to be computed
2919 * 'numGroups' is the estimated total number of groups
2920 */
2921 GroupingSetsPath *
create_groupingsets_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * having_qual,AggStrategy aggstrategy,List * rollups,const AggClauseCosts * agg_costs,double numGroups)2922 create_groupingsets_path(PlannerInfo *root,
2923 RelOptInfo *rel,
2924 Path *subpath,
2925 List *having_qual,
2926 AggStrategy aggstrategy,
2927 List *rollups,
2928 const AggClauseCosts *agg_costs,
2929 double numGroups)
2930 {
2931 GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
2932 PathTarget *target = rel->reltarget;
2933 ListCell *lc;
2934 bool is_first = true;
2935 bool is_first_sort = true;
2936
2937 /* The topmost generated Plan node will be an Agg */
2938 pathnode->path.pathtype = T_Agg;
2939 pathnode->path.parent = rel;
2940 pathnode->path.pathtarget = target;
2941 pathnode->path.param_info = subpath->param_info;
2942 pathnode->path.parallel_aware = false;
2943 pathnode->path.parallel_safe = rel->consider_parallel &&
2944 subpath->parallel_safe;
2945 pathnode->path.parallel_workers = subpath->parallel_workers;
2946 pathnode->subpath = subpath;
2947
2948 /*
2949 * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
2950 * to AGG_HASHED, here if possible.
2951 */
2952 if (aggstrategy == AGG_SORTED &&
2953 list_length(rollups) == 1 &&
2954 ((RollupData *) linitial(rollups))->groupClause == NIL)
2955 aggstrategy = AGG_PLAIN;
2956
2957 if (aggstrategy == AGG_MIXED &&
2958 list_length(rollups) == 1)
2959 aggstrategy = AGG_HASHED;
2960
2961 /*
2962 * Output will be in sorted order by group_pathkeys if, and only if, there
2963 * is a single rollup operation on a non-empty list of grouping
2964 * expressions.
2965 */
2966 if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
2967 pathnode->path.pathkeys = root->group_pathkeys;
2968 else
2969 pathnode->path.pathkeys = NIL;
2970
2971 pathnode->aggstrategy = aggstrategy;
2972 pathnode->rollups = rollups;
2973 pathnode->qual = having_qual;
2974
2975 Assert(rollups != NIL);
2976 Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
2977 Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
2978
2979 foreach(lc, rollups)
2980 {
2981 RollupData *rollup = lfirst(lc);
2982 List *gsets = rollup->gsets;
2983 int numGroupCols = list_length(linitial(gsets));
2984
2985 /*
2986 * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
2987 * (already-sorted) input, and following ones do their own sort.
2988 *
2989 * In AGG_HASHED mode, there is one rollup for each grouping set.
2990 *
2991 * In AGG_MIXED mode, the first rollups are hashed, the first
2992 * non-hashed one takes the (already-sorted) input, and following ones
2993 * do their own sort.
2994 */
2995 if (is_first)
2996 {
2997 cost_agg(&pathnode->path, root,
2998 aggstrategy,
2999 agg_costs,
3000 numGroupCols,
3001 rollup->numGroups,
3002 having_qual,
3003 subpath->startup_cost,
3004 subpath->total_cost,
3005 subpath->rows);
3006 is_first = false;
3007 if (!rollup->is_hashed)
3008 is_first_sort = false;
3009 }
3010 else
3011 {
3012 Path sort_path; /* dummy for result of cost_sort */
3013 Path agg_path; /* dummy for result of cost_agg */
3014
3015 if (rollup->is_hashed || is_first_sort)
3016 {
3017 /*
3018 * Account for cost of aggregation, but don't charge input
3019 * cost again
3020 */
3021 cost_agg(&agg_path, root,
3022 rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
3023 agg_costs,
3024 numGroupCols,
3025 rollup->numGroups,
3026 having_qual,
3027 0.0, 0.0,
3028 subpath->rows);
3029 if (!rollup->is_hashed)
3030 is_first_sort = false;
3031 }
3032 else
3033 {
3034 /* Account for cost of sort, but don't charge input cost again */
3035 cost_sort(&sort_path, root, NIL,
3036 0.0,
3037 subpath->rows,
3038 subpath->pathtarget->width,
3039 0.0,
3040 work_mem,
3041 -1.0);
3042
3043 /* Account for cost of aggregation */
3044
3045 cost_agg(&agg_path, root,
3046 AGG_SORTED,
3047 agg_costs,
3048 numGroupCols,
3049 rollup->numGroups,
3050 having_qual,
3051 sort_path.startup_cost,
3052 sort_path.total_cost,
3053 sort_path.rows);
3054 }
3055
3056 pathnode->path.total_cost += agg_path.total_cost;
3057 pathnode->path.rows += agg_path.rows;
3058 }
3059 }
3060
3061 /* add tlist eval cost for each output row */
3062 pathnode->path.startup_cost += target->cost.startup;
3063 pathnode->path.total_cost += target->cost.startup +
3064 target->cost.per_tuple * pathnode->path.rows;
3065
3066 return pathnode;
3067 }
3068
3069 /*
3070 * create_minmaxagg_path
3071 * Creates a pathnode that represents computation of MIN/MAX aggregates
3072 *
3073 * 'rel' is the parent relation associated with the result
3074 * 'target' is the PathTarget to be computed
3075 * 'mmaggregates' is a list of MinMaxAggInfo structs
3076 * 'quals' is the HAVING quals if any
3077 */
3078 MinMaxAggPath *
create_minmaxagg_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,List * mmaggregates,List * quals)3079 create_minmaxagg_path(PlannerInfo *root,
3080 RelOptInfo *rel,
3081 PathTarget *target,
3082 List *mmaggregates,
3083 List *quals)
3084 {
3085 MinMaxAggPath *pathnode = makeNode(MinMaxAggPath);
3086 Cost initplan_cost;
3087 ListCell *lc;
3088
3089 /* The topmost generated Plan node will be a Result */
3090 pathnode->path.pathtype = T_Result;
3091 pathnode->path.parent = rel;
3092 pathnode->path.pathtarget = target;
3093 /* For now, assume we are above any joins, so no parameterization */
3094 pathnode->path.param_info = NULL;
3095 pathnode->path.parallel_aware = false;
3096 /* A MinMaxAggPath implies use of subplans, so cannot be parallel-safe */
3097 pathnode->path.parallel_safe = false;
3098 pathnode->path.parallel_workers = 0;
3099 /* Result is one unordered row */
3100 pathnode->path.rows = 1;
3101 pathnode->path.pathkeys = NIL;
3102
3103 pathnode->mmaggregates = mmaggregates;
3104 pathnode->quals = quals;
3105
3106 /* Calculate cost of all the initplans ... */
3107 initplan_cost = 0;
3108 foreach(lc, mmaggregates)
3109 {
3110 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
3111
3112 initplan_cost += mminfo->pathcost;
3113 }
3114
3115 /* add tlist eval cost for each output row, plus cpu_tuple_cost */
3116 pathnode->path.startup_cost = initplan_cost + target->cost.startup;
3117 pathnode->path.total_cost = initplan_cost + target->cost.startup +
3118 target->cost.per_tuple + cpu_tuple_cost;
3119
3120 /*
3121 * Add cost of qual, if any --- but we ignore its selectivity, since our
3122 * rowcount estimate should be 1 no matter what the qual is.
3123 */
3124 if (quals)
3125 {
3126 QualCost qual_cost;
3127
3128 cost_qual_eval(&qual_cost, quals, root);
3129 pathnode->path.startup_cost += qual_cost.startup;
3130 pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
3131 }
3132
3133 return pathnode;
3134 }
3135
3136 /*
3137 * create_windowagg_path
3138 * Creates a pathnode that represents computation of window functions
3139 *
3140 * 'rel' is the parent relation associated with the result
3141 * 'subpath' is the path representing the source of data
3142 * 'target' is the PathTarget to be computed
3143 * 'windowFuncs' is a list of WindowFunc structs
3144 * 'winclause' is a WindowClause that is common to all the WindowFuncs
3145 *
3146 * The input must be sorted according to the WindowClause's PARTITION keys
3147 * plus ORDER BY keys.
3148 */
3149 WindowAggPath *
create_windowagg_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,List * windowFuncs,WindowClause * winclause)3150 create_windowagg_path(PlannerInfo *root,
3151 RelOptInfo *rel,
3152 Path *subpath,
3153 PathTarget *target,
3154 List *windowFuncs,
3155 WindowClause *winclause)
3156 {
3157 WindowAggPath *pathnode = makeNode(WindowAggPath);
3158
3159 pathnode->path.pathtype = T_WindowAgg;
3160 pathnode->path.parent = rel;
3161 pathnode->path.pathtarget = target;
3162 /* For now, assume we are above any joins, so no parameterization */
3163 pathnode->path.param_info = NULL;
3164 pathnode->path.parallel_aware = false;
3165 pathnode->path.parallel_safe = rel->consider_parallel &&
3166 subpath->parallel_safe;
3167 pathnode->path.parallel_workers = subpath->parallel_workers;
3168 /* WindowAgg preserves the input sort order */
3169 pathnode->path.pathkeys = subpath->pathkeys;
3170
3171 pathnode->subpath = subpath;
3172 pathnode->winclause = winclause;
3173
3174 /*
3175 * For costing purposes, assume that there are no redundant partitioning
3176 * or ordering columns; it's not worth the trouble to deal with that
3177 * corner case here. So we just pass the unmodified list lengths to
3178 * cost_windowagg.
3179 */
3180 cost_windowagg(&pathnode->path, root,
3181 windowFuncs,
3182 list_length(winclause->partitionClause),
3183 list_length(winclause->orderClause),
3184 subpath->startup_cost,
3185 subpath->total_cost,
3186 subpath->rows);
3187
3188 /* add tlist eval cost for each output row */
3189 pathnode->path.startup_cost += target->cost.startup;
3190 pathnode->path.total_cost += target->cost.startup +
3191 target->cost.per_tuple * pathnode->path.rows;
3192
3193 return pathnode;
3194 }
3195
3196 /*
3197 * create_setop_path
3198 * Creates a pathnode that represents computation of INTERSECT or EXCEPT
3199 *
3200 * 'rel' is the parent relation associated with the result
3201 * 'subpath' is the path representing the source of data
3202 * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
3203 * 'strategy' is the implementation strategy (sorted or hashed)
3204 * 'distinctList' is a list of SortGroupClause's representing the grouping
3205 * 'flagColIdx' is the column number where the flag column will be, if any
3206 * 'firstFlag' is the flag value for the first input relation when hashing;
3207 * or -1 when sorting
3208 * 'numGroups' is the estimated number of distinct groups
3209 * 'outputRows' is the estimated number of output rows
3210 */
3211 SetOpPath *
create_setop_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,SetOpCmd cmd,SetOpStrategy strategy,List * distinctList,AttrNumber flagColIdx,int firstFlag,double numGroups,double outputRows)3212 create_setop_path(PlannerInfo *root,
3213 RelOptInfo *rel,
3214 Path *subpath,
3215 SetOpCmd cmd,
3216 SetOpStrategy strategy,
3217 List *distinctList,
3218 AttrNumber flagColIdx,
3219 int firstFlag,
3220 double numGroups,
3221 double outputRows)
3222 {
3223 SetOpPath *pathnode = makeNode(SetOpPath);
3224
3225 pathnode->path.pathtype = T_SetOp;
3226 pathnode->path.parent = rel;
3227 /* SetOp doesn't project, so use source path's pathtarget */
3228 pathnode->path.pathtarget = subpath->pathtarget;
3229 /* For now, assume we are above any joins, so no parameterization */
3230 pathnode->path.param_info = NULL;
3231 pathnode->path.parallel_aware = false;
3232 pathnode->path.parallel_safe = rel->consider_parallel &&
3233 subpath->parallel_safe;
3234 pathnode->path.parallel_workers = subpath->parallel_workers;
3235 /* SetOp preserves the input sort order if in sort mode */
3236 pathnode->path.pathkeys =
3237 (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
3238
3239 pathnode->subpath = subpath;
3240 pathnode->cmd = cmd;
3241 pathnode->strategy = strategy;
3242 pathnode->distinctList = distinctList;
3243 pathnode->flagColIdx = flagColIdx;
3244 pathnode->firstFlag = firstFlag;
3245 pathnode->numGroups = numGroups;
3246
3247 /*
3248 * Charge one cpu_operator_cost per comparison per input tuple. We assume
3249 * all columns get compared at most of the tuples.
3250 */
3251 pathnode->path.startup_cost = subpath->startup_cost;
3252 pathnode->path.total_cost = subpath->total_cost +
3253 cpu_operator_cost * subpath->rows * list_length(distinctList);
3254 pathnode->path.rows = outputRows;
3255
3256 return pathnode;
3257 }
3258
3259 /*
3260 * create_recursiveunion_path
3261 * Creates a pathnode that represents a recursive UNION node
3262 *
3263 * 'rel' is the parent relation associated with the result
3264 * 'leftpath' is the source of data for the non-recursive term
3265 * 'rightpath' is the source of data for the recursive term
3266 * 'target' is the PathTarget to be computed
3267 * 'distinctList' is a list of SortGroupClause's representing the grouping
3268 * 'wtParam' is the ID of Param representing work table
3269 * 'numGroups' is the estimated number of groups
3270 *
3271 * For recursive UNION ALL, distinctList is empty and numGroups is zero
3272 */
3273 RecursiveUnionPath *
create_recursiveunion_path(PlannerInfo * root,RelOptInfo * rel,Path * leftpath,Path * rightpath,PathTarget * target,List * distinctList,int wtParam,double numGroups)3274 create_recursiveunion_path(PlannerInfo *root,
3275 RelOptInfo *rel,
3276 Path *leftpath,
3277 Path *rightpath,
3278 PathTarget *target,
3279 List *distinctList,
3280 int wtParam,
3281 double numGroups)
3282 {
3283 RecursiveUnionPath *pathnode = makeNode(RecursiveUnionPath);
3284
3285 pathnode->path.pathtype = T_RecursiveUnion;
3286 pathnode->path.parent = rel;
3287 pathnode->path.pathtarget = target;
3288 /* For now, assume we are above any joins, so no parameterization */
3289 pathnode->path.param_info = NULL;
3290 pathnode->path.parallel_aware = false;
3291 pathnode->path.parallel_safe = rel->consider_parallel &&
3292 leftpath->parallel_safe && rightpath->parallel_safe;
3293 /* Foolish, but we'll do it like joins for now: */
3294 pathnode->path.parallel_workers = leftpath->parallel_workers;
3295 /* RecursiveUnion result is always unsorted */
3296 pathnode->path.pathkeys = NIL;
3297
3298 pathnode->leftpath = leftpath;
3299 pathnode->rightpath = rightpath;
3300 pathnode->distinctList = distinctList;
3301 pathnode->wtParam = wtParam;
3302 pathnode->numGroups = numGroups;
3303
3304 cost_recursive_union(&pathnode->path, leftpath, rightpath);
3305
3306 return pathnode;
3307 }
3308
3309 /*
3310 * create_lockrows_path
3311 * Creates a pathnode that represents acquiring row locks
3312 *
3313 * 'rel' is the parent relation associated with the result
3314 * 'subpath' is the path representing the source of data
3315 * 'rowMarks' is a list of PlanRowMark's
3316 * 'epqParam' is the ID of Param for EvalPlanQual re-eval
3317 */
3318 LockRowsPath *
create_lockrows_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * rowMarks,int epqParam)3319 create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
3320 Path *subpath, List *rowMarks, int epqParam)
3321 {
3322 LockRowsPath *pathnode = makeNode(LockRowsPath);
3323
3324 pathnode->path.pathtype = T_LockRows;
3325 pathnode->path.parent = rel;
3326 /* LockRows doesn't project, so use source path's pathtarget */
3327 pathnode->path.pathtarget = subpath->pathtarget;
3328 /* For now, assume we are above any joins, so no parameterization */
3329 pathnode->path.param_info = NULL;
3330 pathnode->path.parallel_aware = false;
3331 pathnode->path.parallel_safe = false;
3332 pathnode->path.parallel_workers = 0;
3333 pathnode->path.rows = subpath->rows;
3334
3335 /*
3336 * The result cannot be assumed sorted, since locking might cause the sort
3337 * key columns to be replaced with new values.
3338 */
3339 pathnode->path.pathkeys = NIL;
3340
3341 pathnode->subpath = subpath;
3342 pathnode->rowMarks = rowMarks;
3343 pathnode->epqParam = epqParam;
3344
3345 /*
3346 * We should charge something extra for the costs of row locking and
3347 * possible refetches, but it's hard to say how much. For now, use
3348 * cpu_tuple_cost per row.
3349 */
3350 pathnode->path.startup_cost = subpath->startup_cost;
3351 pathnode->path.total_cost = subpath->total_cost +
3352 cpu_tuple_cost * subpath->rows;
3353
3354 return pathnode;
3355 }
3356
3357 /*
3358 * create_modifytable_path
3359 * Creates a pathnode that represents performing INSERT/UPDATE/DELETE mods
3360 *
3361 * 'rel' is the parent relation associated with the result
3362 * 'operation' is the operation type
3363 * 'canSetTag' is true if we set the command tag/es_processed
3364 * 'nominalRelation' is the parent RT index for use of EXPLAIN
3365 * 'partitioned_rels' is an integer list of RT indexes of non-leaf tables in
3366 * the partition tree, if this is an UPDATE/DELETE to a partitioned table.
3367 * Otherwise NIL.
3368 * 'partColsUpdated' is true if any partitioning columns are being updated,
3369 * either from the target relation or a descendent partitioned table.
3370 * 'resultRelations' is an integer list of actual RT indexes of target rel(s)
3371 * 'subpaths' is a list of Path(s) producing source data (one per rel)
3372 * 'subroots' is a list of PlannerInfo structs (one per rel)
3373 * 'withCheckOptionLists' is a list of WCO lists (one per rel)
3374 * 'returningLists' is a list of RETURNING tlists (one per rel)
3375 * 'rowMarks' is a list of PlanRowMarks (non-locking only)
3376 * 'onconflict' is the ON CONFLICT clause, or NULL
3377 * 'epqParam' is the ID of Param for EvalPlanQual re-eval
3378 */
3379 ModifyTablePath *
create_modifytable_path(PlannerInfo * root,RelOptInfo * rel,CmdType operation,bool canSetTag,Index nominalRelation,List * partitioned_rels,bool partColsUpdated,List * resultRelations,List * subpaths,List * subroots,List * withCheckOptionLists,List * returningLists,List * rowMarks,OnConflictExpr * onconflict,int epqParam)3380 create_modifytable_path(PlannerInfo *root, RelOptInfo *rel,
3381 CmdType operation, bool canSetTag,
3382 Index nominalRelation, List *partitioned_rels,
3383 bool partColsUpdated,
3384 List *resultRelations, List *subpaths,
3385 List *subroots,
3386 List *withCheckOptionLists, List *returningLists,
3387 List *rowMarks, OnConflictExpr *onconflict,
3388 int epqParam)
3389 {
3390 ModifyTablePath *pathnode = makeNode(ModifyTablePath);
3391 double total_size;
3392 ListCell *lc;
3393
3394 Assert(list_length(resultRelations) == list_length(subpaths));
3395 Assert(list_length(resultRelations) == list_length(subroots));
3396 Assert(withCheckOptionLists == NIL ||
3397 list_length(resultRelations) == list_length(withCheckOptionLists));
3398 Assert(returningLists == NIL ||
3399 list_length(resultRelations) == list_length(returningLists));
3400
3401 pathnode->path.pathtype = T_ModifyTable;
3402 pathnode->path.parent = rel;
3403 /* pathtarget is not interesting, just make it minimally valid */
3404 pathnode->path.pathtarget = rel->reltarget;
3405 /* For now, assume we are above any joins, so no parameterization */
3406 pathnode->path.param_info = NULL;
3407 pathnode->path.parallel_aware = false;
3408 pathnode->path.parallel_safe = false;
3409 pathnode->path.parallel_workers = 0;
3410 pathnode->path.pathkeys = NIL;
3411
3412 /*
3413 * Compute cost & rowcount as sum of subpath costs & rowcounts.
3414 *
3415 * Currently, we don't charge anything extra for the actual table
3416 * modification work, nor for the WITH CHECK OPTIONS or RETURNING
3417 * expressions if any. It would only be window dressing, since
3418 * ModifyTable is always a top-level node and there is no way for the
3419 * costs to change any higher-level planning choices. But we might want
3420 * to make it look better sometime.
3421 */
3422 pathnode->path.startup_cost = 0;
3423 pathnode->path.total_cost = 0;
3424 pathnode->path.rows = 0;
3425 total_size = 0;
3426 foreach(lc, subpaths)
3427 {
3428 Path *subpath = (Path *) lfirst(lc);
3429
3430 if (lc == list_head(subpaths)) /* first node? */
3431 pathnode->path.startup_cost = subpath->startup_cost;
3432 pathnode->path.total_cost += subpath->total_cost;
3433 pathnode->path.rows += subpath->rows;
3434 total_size += subpath->pathtarget->width * subpath->rows;
3435 }
3436
3437 /*
3438 * Set width to the average width of the subpath outputs. XXX this is
3439 * totally wrong: we should report zero if no RETURNING, else an average
3440 * of the RETURNING tlist widths. But it's what happened historically,
3441 * and improving it is a task for another day.
3442 */
3443 if (pathnode->path.rows > 0)
3444 total_size /= pathnode->path.rows;
3445 pathnode->path.pathtarget->width = rint(total_size);
3446
3447 pathnode->operation = operation;
3448 pathnode->canSetTag = canSetTag;
3449 pathnode->nominalRelation = nominalRelation;
3450 pathnode->partitioned_rels = list_copy(partitioned_rels);
3451 pathnode->partColsUpdated = partColsUpdated;
3452 pathnode->resultRelations = resultRelations;
3453 pathnode->subpaths = subpaths;
3454 pathnode->subroots = subroots;
3455 pathnode->withCheckOptionLists = withCheckOptionLists;
3456 pathnode->returningLists = returningLists;
3457 pathnode->rowMarks = rowMarks;
3458 pathnode->onconflict = onconflict;
3459 pathnode->epqParam = epqParam;
3460
3461 return pathnode;
3462 }
3463
3464 /*
3465 * create_limit_path
3466 * Creates a pathnode that represents performing LIMIT/OFFSET
3467 *
3468 * In addition to providing the actual OFFSET and LIMIT expressions,
3469 * the caller must provide estimates of their values for costing purposes.
3470 * The estimates are as computed by preprocess_limit(), ie, 0 represents
3471 * the clause not being present, and -1 means it's present but we could
3472 * not estimate its value.
3473 *
3474 * 'rel' is the parent relation associated with the result
3475 * 'subpath' is the path representing the source of data
3476 * 'limitOffset' is the actual OFFSET expression, or NULL
3477 * 'limitCount' is the actual LIMIT expression, or NULL
3478 * 'offset_est' is the estimated value of the OFFSET expression
3479 * 'count_est' is the estimated value of the LIMIT expression
3480 */
3481 LimitPath *
create_limit_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,Node * limitOffset,Node * limitCount,int64 offset_est,int64 count_est)3482 create_limit_path(PlannerInfo *root, RelOptInfo *rel,
3483 Path *subpath,
3484 Node *limitOffset, Node *limitCount,
3485 int64 offset_est, int64 count_est)
3486 {
3487 LimitPath *pathnode = makeNode(LimitPath);
3488
3489 pathnode->path.pathtype = T_Limit;
3490 pathnode->path.parent = rel;
3491 /* Limit doesn't project, so use source path's pathtarget */
3492 pathnode->path.pathtarget = subpath->pathtarget;
3493 /* For now, assume we are above any joins, so no parameterization */
3494 pathnode->path.param_info = NULL;
3495 pathnode->path.parallel_aware = false;
3496 pathnode->path.parallel_safe = rel->consider_parallel &&
3497 subpath->parallel_safe;
3498 pathnode->path.parallel_workers = subpath->parallel_workers;
3499 pathnode->path.rows = subpath->rows;
3500 pathnode->path.startup_cost = subpath->startup_cost;
3501 pathnode->path.total_cost = subpath->total_cost;
3502 pathnode->path.pathkeys = subpath->pathkeys;
3503 pathnode->subpath = subpath;
3504 pathnode->limitOffset = limitOffset;
3505 pathnode->limitCount = limitCount;
3506
3507 /*
3508 * Adjust the output rows count and costs according to the offset/limit.
3509 * This is only a cosmetic issue if we are at top level, but if we are
3510 * building a subquery then it's important to report correct info to the
3511 * outer planner.
3512 *
3513 * When the offset or count couldn't be estimated, use 10% of the
3514 * estimated number of rows emitted from the subpath.
3515 *
3516 * XXX we don't bother to add eval costs of the offset/limit expressions
3517 * themselves to the path costs. In theory we should, but in most cases
3518 * those expressions are trivial and it's just not worth the trouble.
3519 */
3520 if (offset_est != 0)
3521 {
3522 double offset_rows;
3523
3524 if (offset_est > 0)
3525 offset_rows = (double) offset_est;
3526 else
3527 offset_rows = clamp_row_est(subpath->rows * 0.10);
3528 if (offset_rows > pathnode->path.rows)
3529 offset_rows = pathnode->path.rows;
3530 if (subpath->rows > 0)
3531 pathnode->path.startup_cost +=
3532 (subpath->total_cost - subpath->startup_cost)
3533 * offset_rows / subpath->rows;
3534 pathnode->path.rows -= offset_rows;
3535 if (pathnode->path.rows < 1)
3536 pathnode->path.rows = 1;
3537 }
3538
3539 if (count_est != 0)
3540 {
3541 double count_rows;
3542
3543 if (count_est > 0)
3544 count_rows = (double) count_est;
3545 else
3546 count_rows = clamp_row_est(subpath->rows * 0.10);
3547 if (count_rows > pathnode->path.rows)
3548 count_rows = pathnode->path.rows;
3549 if (subpath->rows > 0)
3550 pathnode->path.total_cost = pathnode->path.startup_cost +
3551 (subpath->total_cost - subpath->startup_cost)
3552 * count_rows / subpath->rows;
3553 pathnode->path.rows = count_rows;
3554 if (pathnode->path.rows < 1)
3555 pathnode->path.rows = 1;
3556 }
3557
3558 return pathnode;
3559 }
3560
3561
3562 /*
3563 * reparameterize_path
3564 * Attempt to modify a Path to have greater parameterization
3565 *
3566 * We use this to attempt to bring all child paths of an appendrel to the
3567 * same parameterization level, ensuring that they all enforce the same set
3568 * of join quals (and thus that that parameterization can be attributed to
3569 * an append path built from such paths). Currently, only a few path types
3570 * are supported here, though more could be added at need. We return NULL
3571 * if we can't reparameterize the given path.
3572 *
3573 * Note: we intentionally do not pass created paths to add_path(); it would
3574 * possibly try to delete them on the grounds of being cost-inferior to the
3575 * paths they were made from, and we don't want that. Paths made here are
3576 * not necessarily of general-purpose usefulness, but they can be useful
3577 * as members of an append path.
3578 */
3579 Path *
reparameterize_path(PlannerInfo * root,Path * path,Relids required_outer,double loop_count)3580 reparameterize_path(PlannerInfo *root, Path *path,
3581 Relids required_outer,
3582 double loop_count)
3583 {
3584 RelOptInfo *rel = path->parent;
3585
3586 /* Can only increase, not decrease, path's parameterization */
3587 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
3588 return NULL;
3589 switch (path->pathtype)
3590 {
3591 case T_SeqScan:
3592 return create_seqscan_path(root, rel, required_outer, 0);
3593 case T_SampleScan:
3594 return (Path *) create_samplescan_path(root, rel, required_outer);
3595 case T_IndexScan:
3596 case T_IndexOnlyScan:
3597 {
3598 IndexPath *ipath = (IndexPath *) path;
3599 IndexPath *newpath = makeNode(IndexPath);
3600
3601 /*
3602 * We can't use create_index_path directly, and would not want
3603 * to because it would re-compute the indexqual conditions
3604 * which is wasted effort. Instead we hack things a bit:
3605 * flat-copy the path node, revise its param_info, and redo
3606 * the cost estimate.
3607 */
3608 memcpy(newpath, ipath, sizeof(IndexPath));
3609 newpath->path.param_info =
3610 get_baserel_parampathinfo(root, rel, required_outer);
3611 cost_index(newpath, root, loop_count, false);
3612 return (Path *) newpath;
3613 }
3614 case T_BitmapHeapScan:
3615 {
3616 BitmapHeapPath *bpath = (BitmapHeapPath *) path;
3617
3618 return (Path *) create_bitmap_heap_path(root,
3619 rel,
3620 bpath->bitmapqual,
3621 required_outer,
3622 loop_count, 0);
3623 }
3624 case T_SubqueryScan:
3625 {
3626 SubqueryScanPath *spath = (SubqueryScanPath *) path;
3627
3628 return (Path *) create_subqueryscan_path(root,
3629 rel,
3630 spath->subpath,
3631 spath->path.pathkeys,
3632 required_outer);
3633 }
3634 case T_Append:
3635 {
3636 AppendPath *apath = (AppendPath *) path;
3637 List *childpaths = NIL;
3638 List *partialpaths = NIL;
3639 int i;
3640 ListCell *lc;
3641
3642 /* Reparameterize the children */
3643 i = 0;
3644 foreach(lc, apath->subpaths)
3645 {
3646 Path *spath = (Path *) lfirst(lc);
3647
3648 spath = reparameterize_path(root, spath,
3649 required_outer,
3650 loop_count);
3651 if (spath == NULL)
3652 return NULL;
3653 /* We have to re-split the regular and partial paths */
3654 if (i < apath->first_partial_path)
3655 childpaths = lappend(childpaths, spath);
3656 else
3657 partialpaths = lappend(partialpaths, spath);
3658 i++;
3659 }
3660 return (Path *)
3661 create_append_path(root, rel, childpaths, partialpaths,
3662 required_outer,
3663 apath->path.parallel_workers,
3664 apath->path.parallel_aware,
3665 apath->partitioned_rels,
3666 -1);
3667 }
3668 default:
3669 break;
3670 }
3671 return NULL;
3672 }
3673
3674 /*
3675 * reparameterize_path_by_child
3676 * Given a path parameterized by the parent of the given child relation,
3677 * translate the path to be parameterized by the given child relation.
3678 *
3679 * The function creates a new path of the same type as the given path, but
3680 * parameterized by the given child relation. Most fields from the original
3681 * path can simply be flat-copied, but any expressions must be adjusted to
3682 * refer to the correct varnos, and any paths must be recursively
3683 * reparameterized. Other fields that refer to specific relids also need
3684 * adjustment.
3685 *
3686 * The cost, number of rows, width and parallel path properties depend upon
3687 * path->parent, which does not change during the translation. Hence those
3688 * members are copied as they are.
3689 *
3690 * If the given path can not be reparameterized, the function returns NULL.
3691 */
3692 Path *
reparameterize_path_by_child(PlannerInfo * root,Path * path,RelOptInfo * child_rel)3693 reparameterize_path_by_child(PlannerInfo *root, Path *path,
3694 RelOptInfo *child_rel)
3695 {
3696
3697 #define FLAT_COPY_PATH(newnode, node, nodetype) \
3698 ( (newnode) = makeNode(nodetype), \
3699 memcpy((newnode), (node), sizeof(nodetype)) )
3700
3701 #define ADJUST_CHILD_ATTRS(node) \
3702 ((node) = \
3703 (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
3704 child_rel->relids, \
3705 child_rel->top_parent_relids))
3706
3707 #define REPARAMETERIZE_CHILD_PATH(path) \
3708 do { \
3709 (path) = reparameterize_path_by_child(root, (path), child_rel); \
3710 if ((path) == NULL) \
3711 return NULL; \
3712 } while(0)
3713
3714 #define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \
3715 do { \
3716 if ((pathlist) != NIL) \
3717 { \
3718 (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \
3719 child_rel); \
3720 if ((pathlist) == NIL) \
3721 return NULL; \
3722 } \
3723 } while(0)
3724
3725 Path *new_path;
3726 ParamPathInfo *new_ppi;
3727 ParamPathInfo *old_ppi;
3728 Relids required_outer;
3729
3730 /*
3731 * If the path is not parameterized by parent of the given relation, it
3732 * doesn't need reparameterization.
3733 */
3734 if (!path->param_info ||
3735 !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids))
3736 return path;
3737
3738 /*
3739 * If possible, reparameterize the given path, making a copy.
3740 *
3741 * This function is currently only applied to the inner side of a nestloop
3742 * join that is being partitioned by the partitionwise-join code. Hence,
3743 * we need only support path types that plausibly arise in that context.
3744 * (In particular, supporting sorted path types would be a waste of code
3745 * and cycles: even if we translated them here, they'd just lose in
3746 * subsequent cost comparisons.) If we do see an unsupported path type,
3747 * that just means we won't be able to generate a partitionwise-join plan
3748 * using that path type.
3749 */
3750 switch (nodeTag(path))
3751 {
3752 case T_Path:
3753 FLAT_COPY_PATH(new_path, path, Path);
3754 break;
3755
3756 case T_IndexPath:
3757 {
3758 IndexPath *ipath;
3759
3760 FLAT_COPY_PATH(ipath, path, IndexPath);
3761 ADJUST_CHILD_ATTRS(ipath->indexclauses);
3762 ADJUST_CHILD_ATTRS(ipath->indexquals);
3763 new_path = (Path *) ipath;
3764 }
3765 break;
3766
3767 case T_BitmapHeapPath:
3768 {
3769 BitmapHeapPath *bhpath;
3770
3771 FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
3772 REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
3773 new_path = (Path *) bhpath;
3774 }
3775 break;
3776
3777 case T_BitmapAndPath:
3778 {
3779 BitmapAndPath *bapath;
3780
3781 FLAT_COPY_PATH(bapath, path, BitmapAndPath);
3782 REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals);
3783 new_path = (Path *) bapath;
3784 }
3785 break;
3786
3787 case T_BitmapOrPath:
3788 {
3789 BitmapOrPath *bopath;
3790
3791 FLAT_COPY_PATH(bopath, path, BitmapOrPath);
3792 REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals);
3793 new_path = (Path *) bopath;
3794 }
3795 break;
3796
3797 case T_ForeignPath:
3798 {
3799 ForeignPath *fpath;
3800 ReparameterizeForeignPathByChild_function rfpc_func;
3801
3802 FLAT_COPY_PATH(fpath, path, ForeignPath);
3803 if (fpath->fdw_outerpath)
3804 REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
3805
3806 /* Hand over to FDW if needed. */
3807 rfpc_func =
3808 path->parent->fdwroutine->ReparameterizeForeignPathByChild;
3809 if (rfpc_func)
3810 fpath->fdw_private = rfpc_func(root, fpath->fdw_private,
3811 child_rel);
3812 new_path = (Path *) fpath;
3813 }
3814 break;
3815
3816 case T_CustomPath:
3817 {
3818 CustomPath *cpath;
3819
3820 FLAT_COPY_PATH(cpath, path, CustomPath);
3821 REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
3822 if (cpath->methods &&
3823 cpath->methods->ReparameterizeCustomPathByChild)
3824 cpath->custom_private =
3825 cpath->methods->ReparameterizeCustomPathByChild(root,
3826 cpath->custom_private,
3827 child_rel);
3828 new_path = (Path *) cpath;
3829 }
3830 break;
3831
3832 case T_NestPath:
3833 {
3834 JoinPath *jpath;
3835
3836 FLAT_COPY_PATH(jpath, path, NestPath);
3837
3838 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
3839 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
3840 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
3841 new_path = (Path *) jpath;
3842 }
3843 break;
3844
3845 case T_MergePath:
3846 {
3847 JoinPath *jpath;
3848 MergePath *mpath;
3849
3850 FLAT_COPY_PATH(mpath, path, MergePath);
3851
3852 jpath = (JoinPath *) mpath;
3853 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
3854 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
3855 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
3856 ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
3857 new_path = (Path *) mpath;
3858 }
3859 break;
3860
3861 case T_HashPath:
3862 {
3863 JoinPath *jpath;
3864 HashPath *hpath;
3865
3866 FLAT_COPY_PATH(hpath, path, HashPath);
3867
3868 jpath = (JoinPath *) hpath;
3869 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
3870 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
3871 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
3872 ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
3873 new_path = (Path *) hpath;
3874 }
3875 break;
3876
3877 case T_AppendPath:
3878 {
3879 AppendPath *apath;
3880
3881 FLAT_COPY_PATH(apath, path, AppendPath);
3882 REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths);
3883 new_path = (Path *) apath;
3884 }
3885 break;
3886
3887 case T_GatherPath:
3888 {
3889 GatherPath *gpath;
3890
3891 FLAT_COPY_PATH(gpath, path, GatherPath);
3892 REPARAMETERIZE_CHILD_PATH(gpath->subpath);
3893 new_path = (Path *) gpath;
3894 }
3895 break;
3896
3897 default:
3898
3899 /* We don't know how to reparameterize this path. */
3900 return NULL;
3901 }
3902
3903 /*
3904 * Adjust the parameterization information, which refers to the topmost
3905 * parent. The topmost parent can be multiple levels away from the given
3906 * child, hence use multi-level expression adjustment routines.
3907 */
3908 old_ppi = new_path->param_info;
3909 required_outer =
3910 adjust_child_relids_multilevel(root, old_ppi->ppi_req_outer,
3911 child_rel->relids,
3912 child_rel->top_parent_relids);
3913
3914 /* If we already have a PPI for this parameterization, just return it */
3915 new_ppi = find_param_path_info(new_path->parent, required_outer);
3916
3917 /*
3918 * If not, build a new one and link it to the list of PPIs. For the same
3919 * reason as explained in mark_dummy_rel(), allocate new PPI in the same
3920 * context the given RelOptInfo is in.
3921 */
3922 if (new_ppi == NULL)
3923 {
3924 MemoryContext oldcontext;
3925 RelOptInfo *rel = path->parent;
3926
3927 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
3928
3929 new_ppi = makeNode(ParamPathInfo);
3930 new_ppi->ppi_req_outer = bms_copy(required_outer);
3931 new_ppi->ppi_rows = old_ppi->ppi_rows;
3932 new_ppi->ppi_clauses = old_ppi->ppi_clauses;
3933 ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
3934 rel->ppilist = lappend(rel->ppilist, new_ppi);
3935
3936 MemoryContextSwitchTo(oldcontext);
3937 }
3938 bms_free(required_outer);
3939
3940 new_path->param_info = new_ppi;
3941
3942 /*
3943 * Adjust the path target if the parent of the outer relation is
3944 * referenced in the targetlist. This can happen when only the parent of
3945 * outer relation is laterally referenced in this relation.
3946 */
3947 if (bms_overlap(path->parent->lateral_relids,
3948 child_rel->top_parent_relids))
3949 {
3950 new_path->pathtarget = copy_pathtarget(new_path->pathtarget);
3951 ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs);
3952 }
3953
3954 return new_path;
3955 }
3956
3957 /*
3958 * reparameterize_pathlist_by_child
3959 * Helper function to reparameterize a list of paths by given child rel.
3960 */
3961 static List *
reparameterize_pathlist_by_child(PlannerInfo * root,List * pathlist,RelOptInfo * child_rel)3962 reparameterize_pathlist_by_child(PlannerInfo *root,
3963 List *pathlist,
3964 RelOptInfo *child_rel)
3965 {
3966 ListCell *lc;
3967 List *result = NIL;
3968
3969 foreach(lc, pathlist)
3970 {
3971 Path *path = reparameterize_path_by_child(root, lfirst(lc),
3972 child_rel);
3973
3974 if (path == NULL)
3975 {
3976 list_free(result);
3977 return NIL;
3978 }
3979
3980 result = lappend(result, path);
3981 }
3982
3983 return result;
3984 }
3985