1 /*-------------------------------------------------------------------------
2 *
3 * pathnode.c
4 * Routines to manipulate pathlists and create path nodes
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/util/pathnode.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "foreign/fdwapi.h"
20 #include "miscadmin.h"
21 #include "nodes/extensible.h"
22 #include "nodes/nodeFuncs.h"
23 #include "optimizer/appendinfo.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/cost.h"
26 #include "optimizer/optimizer.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/paths.h"
29 #include "optimizer/planmain.h"
30 #include "optimizer/prep.h"
31 #include "optimizer/restrictinfo.h"
32 #include "optimizer/tlist.h"
33 #include "parser/parsetree.h"
34 #include "utils/lsyscache.h"
35 #include "utils/memutils.h"
36 #include "utils/selfuncs.h"
37
38 typedef enum
39 {
40 COSTS_EQUAL, /* path costs are fuzzily equal */
41 COSTS_BETTER1, /* first path is cheaper than second */
42 COSTS_BETTER2, /* second path is cheaper than first */
43 COSTS_DIFFERENT /* neither path dominates the other on cost */
44 } PathCostComparison;
45
46 /*
47 * STD_FUZZ_FACTOR is the normal fuzz factor for compare_path_costs_fuzzily.
48 * XXX is it worth making this user-controllable? It provides a tradeoff
49 * between planner runtime and the accuracy of path cost comparisons.
50 */
51 #define STD_FUZZ_FACTOR 1.01
52
53 static List *translate_sub_tlist(List *tlist, int relid);
54 static int append_total_cost_compare(const ListCell *a, const ListCell *b);
55 static int append_startup_cost_compare(const ListCell *a, const ListCell *b);
56 static List *reparameterize_pathlist_by_child(PlannerInfo *root,
57 List *pathlist,
58 RelOptInfo *child_rel);
59
60
61 /*****************************************************************************
62 * MISC. PATH UTILITIES
63 *****************************************************************************/
64
65 /*
66 * compare_path_costs
67 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
68 * or more expensive than path2 for the specified criterion.
69 */
70 int
compare_path_costs(Path * path1,Path * path2,CostSelector criterion)71 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
72 {
73 if (criterion == STARTUP_COST)
74 {
75 if (path1->startup_cost < path2->startup_cost)
76 return -1;
77 if (path1->startup_cost > path2->startup_cost)
78 return +1;
79
80 /*
81 * If paths have the same startup cost (not at all unlikely), order
82 * them by total cost.
83 */
84 if (path1->total_cost < path2->total_cost)
85 return -1;
86 if (path1->total_cost > path2->total_cost)
87 return +1;
88 }
89 else
90 {
91 if (path1->total_cost < path2->total_cost)
92 return -1;
93 if (path1->total_cost > path2->total_cost)
94 return +1;
95
96 /*
97 * If paths have the same total cost, order them by startup cost.
98 */
99 if (path1->startup_cost < path2->startup_cost)
100 return -1;
101 if (path1->startup_cost > path2->startup_cost)
102 return +1;
103 }
104 return 0;
105 }
106
107 /*
108 * compare_path_fractional_costs
109 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
110 * or more expensive than path2 for fetching the specified fraction
111 * of the total tuples.
112 *
113 * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
114 * path with the cheaper total_cost.
115 */
116 int
compare_fractional_path_costs(Path * path1,Path * path2,double fraction)117 compare_fractional_path_costs(Path *path1, Path *path2,
118 double fraction)
119 {
120 Cost cost1,
121 cost2;
122
123 if (fraction <= 0.0 || fraction >= 1.0)
124 return compare_path_costs(path1, path2, TOTAL_COST);
125 cost1 = path1->startup_cost +
126 fraction * (path1->total_cost - path1->startup_cost);
127 cost2 = path2->startup_cost +
128 fraction * (path2->total_cost - path2->startup_cost);
129 if (cost1 < cost2)
130 return -1;
131 if (cost1 > cost2)
132 return +1;
133 return 0;
134 }
135
136 /*
137 * compare_path_costs_fuzzily
138 * Compare the costs of two paths to see if either can be said to
139 * dominate the other.
140 *
141 * We use fuzzy comparisons so that add_path() can avoid keeping both of
142 * a pair of paths that really have insignificantly different cost.
143 *
144 * The fuzz_factor argument must be 1.0 plus delta, where delta is the
145 * fraction of the smaller cost that is considered to be a significant
146 * difference. For example, fuzz_factor = 1.01 makes the fuzziness limit
147 * be 1% of the smaller cost.
148 *
149 * The two paths are said to have "equal" costs if both startup and total
150 * costs are fuzzily the same. Path1 is said to be better than path2 if
151 * it has fuzzily better startup cost and fuzzily no worse total cost,
152 * or if it has fuzzily better total cost and fuzzily no worse startup cost.
153 * Path2 is better than path1 if the reverse holds. Finally, if one path
154 * is fuzzily better than the other on startup cost and fuzzily worse on
155 * total cost, we just say that their costs are "different", since neither
156 * dominates the other across the whole performance spectrum.
157 *
158 * This function also enforces a policy rule that paths for which the relevant
159 * one of parent->consider_startup and parent->consider_param_startup is false
160 * cannot survive comparisons solely on the grounds of good startup cost, so
161 * we never return COSTS_DIFFERENT when that is true for the total-cost loser.
162 * (But if total costs are fuzzily equal, we compare startup costs anyway,
163 * in hopes of eliminating one path or the other.)
164 */
165 static PathCostComparison
compare_path_costs_fuzzily(Path * path1,Path * path2,double fuzz_factor)166 compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
167 {
168 #define CONSIDER_PATH_STARTUP_COST(p) \
169 ((p)->param_info == NULL ? (p)->parent->consider_startup : (p)->parent->consider_param_startup)
170
171 /*
172 * Check total cost first since it's more likely to be different; many
173 * paths have zero startup cost.
174 */
175 if (path1->total_cost > path2->total_cost * fuzz_factor)
176 {
177 /* path1 fuzzily worse on total cost */
178 if (CONSIDER_PATH_STARTUP_COST(path1) &&
179 path2->startup_cost > path1->startup_cost * fuzz_factor)
180 {
181 /* ... but path2 fuzzily worse on startup, so DIFFERENT */
182 return COSTS_DIFFERENT;
183 }
184 /* else path2 dominates */
185 return COSTS_BETTER2;
186 }
187 if (path2->total_cost > path1->total_cost * fuzz_factor)
188 {
189 /* path2 fuzzily worse on total cost */
190 if (CONSIDER_PATH_STARTUP_COST(path2) &&
191 path1->startup_cost > path2->startup_cost * fuzz_factor)
192 {
193 /* ... but path1 fuzzily worse on startup, so DIFFERENT */
194 return COSTS_DIFFERENT;
195 }
196 /* else path1 dominates */
197 return COSTS_BETTER1;
198 }
199 /* fuzzily the same on total cost ... */
200 if (path1->startup_cost > path2->startup_cost * fuzz_factor)
201 {
202 /* ... but path1 fuzzily worse on startup, so path2 wins */
203 return COSTS_BETTER2;
204 }
205 if (path2->startup_cost > path1->startup_cost * fuzz_factor)
206 {
207 /* ... but path2 fuzzily worse on startup, so path1 wins */
208 return COSTS_BETTER1;
209 }
210 /* fuzzily the same on both costs */
211 return COSTS_EQUAL;
212
213 #undef CONSIDER_PATH_STARTUP_COST
214 }
215
216 /*
217 * set_cheapest
218 * Find the minimum-cost paths from among a relation's paths,
219 * and save them in the rel's cheapest-path fields.
220 *
221 * cheapest_total_path is normally the cheapest-total-cost unparameterized
222 * path; but if there are no unparameterized paths, we assign it to be the
223 * best (cheapest least-parameterized) parameterized path. However, only
224 * unparameterized paths are considered candidates for cheapest_startup_path,
225 * so that will be NULL if there are no unparameterized paths.
226 *
227 * The cheapest_parameterized_paths list collects all parameterized paths
228 * that have survived the add_path() tournament for this relation. (Since
229 * add_path ignores pathkeys for a parameterized path, these will be paths
230 * that have best cost or best row count for their parameterization. We
231 * may also have both a parallel-safe and a non-parallel-safe path in some
232 * cases for the same parameterization in some cases, but this should be
233 * relatively rare since, most typically, all paths for the same relation
234 * will be parallel-safe or none of them will.)
235 *
236 * cheapest_parameterized_paths always includes the cheapest-total
237 * unparameterized path, too, if there is one; the users of that list find
238 * it more convenient if that's included.
239 *
240 * This is normally called only after we've finished constructing the path
241 * list for the rel node.
242 */
243 void
set_cheapest(RelOptInfo * parent_rel)244 set_cheapest(RelOptInfo *parent_rel)
245 {
246 Path *cheapest_startup_path;
247 Path *cheapest_total_path;
248 Path *best_param_path;
249 List *parameterized_paths;
250 ListCell *p;
251
252 Assert(IsA(parent_rel, RelOptInfo));
253
254 if (parent_rel->pathlist == NIL)
255 elog(ERROR, "could not devise a query plan for the given query");
256
257 cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
258 parameterized_paths = NIL;
259
260 foreach(p, parent_rel->pathlist)
261 {
262 Path *path = (Path *) lfirst(p);
263 int cmp;
264
265 if (path->param_info)
266 {
267 /* Parameterized path, so add it to parameterized_paths */
268 parameterized_paths = lappend(parameterized_paths, path);
269
270 /*
271 * If we have an unparameterized cheapest-total, we no longer care
272 * about finding the best parameterized path, so move on.
273 */
274 if (cheapest_total_path)
275 continue;
276
277 /*
278 * Otherwise, track the best parameterized path, which is the one
279 * with least total cost among those of the minimum
280 * parameterization.
281 */
282 if (best_param_path == NULL)
283 best_param_path = path;
284 else
285 {
286 switch (bms_subset_compare(PATH_REQ_OUTER(path),
287 PATH_REQ_OUTER(best_param_path)))
288 {
289 case BMS_EQUAL:
290 /* keep the cheaper one */
291 if (compare_path_costs(path, best_param_path,
292 TOTAL_COST) < 0)
293 best_param_path = path;
294 break;
295 case BMS_SUBSET1:
296 /* new path is less-parameterized */
297 best_param_path = path;
298 break;
299 case BMS_SUBSET2:
300 /* old path is less-parameterized, keep it */
301 break;
302 case BMS_DIFFERENT:
303
304 /*
305 * This means that neither path has the least possible
306 * parameterization for the rel. We'll sit on the old
307 * path until something better comes along.
308 */
309 break;
310 }
311 }
312 }
313 else
314 {
315 /* Unparameterized path, so consider it for cheapest slots */
316 if (cheapest_total_path == NULL)
317 {
318 cheapest_startup_path = cheapest_total_path = path;
319 continue;
320 }
321
322 /*
323 * If we find two paths of identical costs, try to keep the
324 * better-sorted one. The paths might have unrelated sort
325 * orderings, in which case we can only guess which might be
326 * better to keep, but if one is superior then we definitely
327 * should keep that one.
328 */
329 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
330 if (cmp > 0 ||
331 (cmp == 0 &&
332 compare_pathkeys(cheapest_startup_path->pathkeys,
333 path->pathkeys) == PATHKEYS_BETTER2))
334 cheapest_startup_path = path;
335
336 cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
337 if (cmp > 0 ||
338 (cmp == 0 &&
339 compare_pathkeys(cheapest_total_path->pathkeys,
340 path->pathkeys) == PATHKEYS_BETTER2))
341 cheapest_total_path = path;
342 }
343 }
344
345 /* Add cheapest unparameterized path, if any, to parameterized_paths */
346 if (cheapest_total_path)
347 parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
348
349 /*
350 * If there is no unparameterized path, use the best parameterized path as
351 * cheapest_total_path (but not as cheapest_startup_path).
352 */
353 if (cheapest_total_path == NULL)
354 cheapest_total_path = best_param_path;
355 Assert(cheapest_total_path != NULL);
356
357 parent_rel->cheapest_startup_path = cheapest_startup_path;
358 parent_rel->cheapest_total_path = cheapest_total_path;
359 parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
360 parent_rel->cheapest_parameterized_paths = parameterized_paths;
361 }
362
363 /*
364 * add_path
365 * Consider a potential implementation path for the specified parent rel,
366 * and add it to the rel's pathlist if it is worthy of consideration.
367 * A path is worthy if it has a better sort order (better pathkeys) or
368 * cheaper cost (on either dimension), or generates fewer rows, than any
369 * existing path that has the same or superset parameterization rels.
370 * We also consider parallel-safe paths more worthy than others.
371 *
372 * We also remove from the rel's pathlist any old paths that are dominated
373 * by new_path --- that is, new_path is cheaper, at least as well ordered,
374 * generates no more rows, requires no outer rels not required by the old
375 * path, and is no less parallel-safe.
376 *
377 * In most cases, a path with a superset parameterization will generate
378 * fewer rows (since it has more join clauses to apply), so that those two
379 * figures of merit move in opposite directions; this means that a path of
380 * one parameterization can seldom dominate a path of another. But such
381 * cases do arise, so we make the full set of checks anyway.
382 *
383 * There are two policy decisions embedded in this function, along with
384 * its sibling add_path_precheck. First, we treat all parameterized paths
385 * as having NIL pathkeys, so that they cannot win comparisons on the
386 * basis of sort order. This is to reduce the number of parameterized
387 * paths that are kept; see discussion in src/backend/optimizer/README.
388 *
389 * Second, we only consider cheap startup cost to be interesting if
390 * parent_rel->consider_startup is true for an unparameterized path, or
391 * parent_rel->consider_param_startup is true for a parameterized one.
392 * Again, this allows discarding useless paths sooner.
393 *
394 * The pathlist is kept sorted by total_cost, with cheaper paths
395 * at the front. Within this routine, that's simply a speed hack:
396 * doing it that way makes it more likely that we will reject an inferior
397 * path after a few comparisons, rather than many comparisons.
398 * However, add_path_precheck relies on this ordering to exit early
399 * when possible.
400 *
401 * NOTE: discarded Path objects are immediately pfree'd to reduce planner
402 * memory consumption. We dare not try to free the substructure of a Path,
403 * since much of it may be shared with other Paths or the query tree itself;
404 * but just recycling discarded Path nodes is a very useful savings in
405 * a large join tree. We can recycle the List nodes of pathlist, too.
406 *
407 * As noted in optimizer/README, deleting a previously-accepted Path is
408 * safe because we know that Paths of this rel cannot yet be referenced
409 * from any other rel, such as a higher-level join. However, in some cases
410 * it is possible that a Path is referenced by another Path for its own
411 * rel; we must not delete such a Path, even if it is dominated by the new
412 * Path. Currently this occurs only for IndexPath objects, which may be
413 * referenced as children of BitmapHeapPaths as well as being paths in
414 * their own right. Hence, we don't pfree IndexPaths when rejecting them.
415 *
416 * 'parent_rel' is the relation entry to which the path corresponds.
417 * 'new_path' is a potential path for parent_rel.
418 *
419 * Returns nothing, but modifies parent_rel->pathlist.
420 */
421 void
add_path(RelOptInfo * parent_rel,Path * new_path)422 add_path(RelOptInfo *parent_rel, Path *new_path)
423 {
424 bool accept_new = true; /* unless we find a superior old path */
425 int insert_at = 0; /* where to insert new item */
426 List *new_path_pathkeys;
427 ListCell *p1;
428
429 /*
430 * This is a convenient place to check for query cancel --- no part of the
431 * planner goes very long without calling add_path().
432 */
433 CHECK_FOR_INTERRUPTS();
434
435 /* Pretend parameterized paths have no pathkeys, per comment above */
436 new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys;
437
438 /*
439 * Loop to check proposed new path against old paths. Note it is possible
440 * for more than one old path to be tossed out because new_path dominates
441 * it.
442 */
443 foreach(p1, parent_rel->pathlist)
444 {
445 Path *old_path = (Path *) lfirst(p1);
446 bool remove_old = false; /* unless new proves superior */
447 PathCostComparison costcmp;
448 PathKeysComparison keyscmp;
449 BMS_Comparison outercmp;
450
451 /*
452 * Do a fuzzy cost comparison with standard fuzziness limit.
453 */
454 costcmp = compare_path_costs_fuzzily(new_path, old_path,
455 STD_FUZZ_FACTOR);
456
457 /*
458 * If the two paths compare differently for startup and total cost,
459 * then we want to keep both, and we can skip comparing pathkeys and
460 * required_outer rels. If they compare the same, proceed with the
461 * other comparisons. Row count is checked last. (We make the tests
462 * in this order because the cost comparison is most likely to turn
463 * out "different", and the pathkeys comparison next most likely. As
464 * explained above, row count very seldom makes a difference, so even
465 * though it's cheap to compare there's not much point in checking it
466 * earlier.)
467 */
468 if (costcmp != COSTS_DIFFERENT)
469 {
470 /* Similarly check to see if either dominates on pathkeys */
471 List *old_path_pathkeys;
472
473 old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
474 keyscmp = compare_pathkeys(new_path_pathkeys,
475 old_path_pathkeys);
476 if (keyscmp != PATHKEYS_DIFFERENT)
477 {
478 switch (costcmp)
479 {
480 case COSTS_EQUAL:
481 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
482 PATH_REQ_OUTER(old_path));
483 if (keyscmp == PATHKEYS_BETTER1)
484 {
485 if ((outercmp == BMS_EQUAL ||
486 outercmp == BMS_SUBSET1) &&
487 new_path->rows <= old_path->rows &&
488 new_path->parallel_safe >= old_path->parallel_safe)
489 remove_old = true; /* new dominates old */
490 }
491 else if (keyscmp == PATHKEYS_BETTER2)
492 {
493 if ((outercmp == BMS_EQUAL ||
494 outercmp == BMS_SUBSET2) &&
495 new_path->rows >= old_path->rows &&
496 new_path->parallel_safe <= old_path->parallel_safe)
497 accept_new = false; /* old dominates new */
498 }
499 else /* keyscmp == PATHKEYS_EQUAL */
500 {
501 if (outercmp == BMS_EQUAL)
502 {
503 /*
504 * Same pathkeys and outer rels, and fuzzily
505 * the same cost, so keep just one; to decide
506 * which, first check parallel-safety, then
507 * rows, then do a fuzzy cost comparison with
508 * very small fuzz limit. (We used to do an
509 * exact cost comparison, but that results in
510 * annoying platform-specific plan variations
511 * due to roundoff in the cost estimates.) If
512 * things are still tied, arbitrarily keep
513 * only the old path. Notice that we will
514 * keep only the old path even if the
515 * less-fuzzy comparison decides the startup
516 * and total costs compare differently.
517 */
518 if (new_path->parallel_safe >
519 old_path->parallel_safe)
520 remove_old = true; /* new dominates old */
521 else if (new_path->parallel_safe <
522 old_path->parallel_safe)
523 accept_new = false; /* old dominates new */
524 else if (new_path->rows < old_path->rows)
525 remove_old = true; /* new dominates old */
526 else if (new_path->rows > old_path->rows)
527 accept_new = false; /* old dominates new */
528 else if (compare_path_costs_fuzzily(new_path,
529 old_path,
530 1.0000000001) == COSTS_BETTER1)
531 remove_old = true; /* new dominates old */
532 else
533 accept_new = false; /* old equals or
534 * dominates new */
535 }
536 else if (outercmp == BMS_SUBSET1 &&
537 new_path->rows <= old_path->rows &&
538 new_path->parallel_safe >= old_path->parallel_safe)
539 remove_old = true; /* new dominates old */
540 else if (outercmp == BMS_SUBSET2 &&
541 new_path->rows >= old_path->rows &&
542 new_path->parallel_safe <= old_path->parallel_safe)
543 accept_new = false; /* old dominates new */
544 /* else different parameterizations, keep both */
545 }
546 break;
547 case COSTS_BETTER1:
548 if (keyscmp != PATHKEYS_BETTER2)
549 {
550 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
551 PATH_REQ_OUTER(old_path));
552 if ((outercmp == BMS_EQUAL ||
553 outercmp == BMS_SUBSET1) &&
554 new_path->rows <= old_path->rows &&
555 new_path->parallel_safe >= old_path->parallel_safe)
556 remove_old = true; /* new dominates old */
557 }
558 break;
559 case COSTS_BETTER2:
560 if (keyscmp != PATHKEYS_BETTER1)
561 {
562 outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
563 PATH_REQ_OUTER(old_path));
564 if ((outercmp == BMS_EQUAL ||
565 outercmp == BMS_SUBSET2) &&
566 new_path->rows >= old_path->rows &&
567 new_path->parallel_safe <= old_path->parallel_safe)
568 accept_new = false; /* old dominates new */
569 }
570 break;
571 case COSTS_DIFFERENT:
572
573 /*
574 * can't get here, but keep this case to keep compiler
575 * quiet
576 */
577 break;
578 }
579 }
580 }
581
582 /*
583 * Remove current element from pathlist if dominated by new.
584 */
585 if (remove_old)
586 {
587 parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist,
588 p1);
589
590 /*
591 * Delete the data pointed-to by the deleted cell, if possible
592 */
593 if (!IsA(old_path, IndexPath))
594 pfree(old_path);
595 }
596 else
597 {
598 /* new belongs after this old path if it has cost >= old's */
599 if (new_path->total_cost >= old_path->total_cost)
600 insert_at = foreach_current_index(p1) + 1;
601 }
602
603 /*
604 * If we found an old path that dominates new_path, we can quit
605 * scanning the pathlist; we will not add new_path, and we assume
606 * new_path cannot dominate any other elements of the pathlist.
607 */
608 if (!accept_new)
609 break;
610 }
611
612 if (accept_new)
613 {
614 /* Accept the new path: insert it at proper place in pathlist */
615 parent_rel->pathlist =
616 list_insert_nth(parent_rel->pathlist, insert_at, new_path);
617 }
618 else
619 {
620 /* Reject and recycle the new path */
621 if (!IsA(new_path, IndexPath))
622 pfree(new_path);
623 }
624 }
625
626 /*
627 * add_path_precheck
628 * Check whether a proposed new path could possibly get accepted.
629 * We assume we know the path's pathkeys and parameterization accurately,
630 * and have lower bounds for its costs.
631 *
632 * Note that we do not know the path's rowcount, since getting an estimate for
633 * that is too expensive to do before prechecking. We assume here that paths
634 * of a superset parameterization will generate fewer rows; if that holds,
635 * then paths with different parameterizations cannot dominate each other
636 * and so we can simply ignore existing paths of another parameterization.
637 * (In the infrequent cases where that rule of thumb fails, add_path will
638 * get rid of the inferior path.)
639 *
640 * At the time this is called, we haven't actually built a Path structure,
641 * so the required information has to be passed piecemeal.
642 */
643 bool
add_path_precheck(RelOptInfo * parent_rel,Cost startup_cost,Cost total_cost,List * pathkeys,Relids required_outer)644 add_path_precheck(RelOptInfo *parent_rel,
645 Cost startup_cost, Cost total_cost,
646 List *pathkeys, Relids required_outer)
647 {
648 List *new_path_pathkeys;
649 bool consider_startup;
650 ListCell *p1;
651
652 /* Pretend parameterized paths have no pathkeys, per add_path policy */
653 new_path_pathkeys = required_outer ? NIL : pathkeys;
654
655 /* Decide whether new path's startup cost is interesting */
656 consider_startup = required_outer ? parent_rel->consider_param_startup : parent_rel->consider_startup;
657
658 foreach(p1, parent_rel->pathlist)
659 {
660 Path *old_path = (Path *) lfirst(p1);
661 PathKeysComparison keyscmp;
662
663 /*
664 * We are looking for an old_path with the same parameterization (and
665 * by assumption the same rowcount) that dominates the new path on
666 * pathkeys as well as both cost metrics. If we find one, we can
667 * reject the new path.
668 *
669 * Cost comparisons here should match compare_path_costs_fuzzily.
670 */
671 if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
672 {
673 /* new path can win on startup cost only if consider_startup */
674 if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR ||
675 !consider_startup)
676 {
677 /* new path loses on cost, so check pathkeys... */
678 List *old_path_pathkeys;
679
680 old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
681 keyscmp = compare_pathkeys(new_path_pathkeys,
682 old_path_pathkeys);
683 if (keyscmp == PATHKEYS_EQUAL ||
684 keyscmp == PATHKEYS_BETTER2)
685 {
686 /* new path does not win on pathkeys... */
687 if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
688 {
689 /* Found an old path that dominates the new one */
690 return false;
691 }
692 }
693 }
694 }
695 else
696 {
697 /*
698 * Since the pathlist is sorted by total_cost, we can stop looking
699 * once we reach a path with a total_cost larger than the new
700 * path's.
701 */
702 break;
703 }
704 }
705
706 return true;
707 }
708
709 /*
710 * add_partial_path
711 * Like add_path, our goal here is to consider whether a path is worthy
712 * of being kept around, but the considerations here are a bit different.
713 * A partial path is one which can be executed in any number of workers in
714 * parallel such that each worker will generate a subset of the path's
715 * overall result.
716 *
717 * As in add_path, the partial_pathlist is kept sorted with the cheapest
718 * total path in front. This is depended on by multiple places, which
719 * just take the front entry as the cheapest path without searching.
720 *
721 * We don't generate parameterized partial paths for several reasons. Most
722 * importantly, they're not safe to execute, because there's nothing to
723 * make sure that a parallel scan within the parameterized portion of the
724 * plan is running with the same value in every worker at the same time.
725 * Fortunately, it seems unlikely to be worthwhile anyway, because having
726 * each worker scan the entire outer relation and a subset of the inner
727 * relation will generally be a terrible plan. The inner (parameterized)
728 * side of the plan will be small anyway. There could be rare cases where
729 * this wins big - e.g. if join order constraints put a 1-row relation on
730 * the outer side of the topmost join with a parameterized plan on the inner
731 * side - but we'll have to be content not to handle such cases until
732 * somebody builds an executor infrastructure that can cope with them.
733 *
734 * Because we don't consider parameterized paths here, we also don't
735 * need to consider the row counts as a measure of quality: every path will
736 * produce the same number of rows. Neither do we need to consider startup
737 * costs: parallelism is only used for plans that will be run to completion.
738 * Therefore, this routine is much simpler than add_path: it needs to
739 * consider only pathkeys and total cost.
740 *
741 * As with add_path, we pfree paths that are found to be dominated by
742 * another partial path; this requires that there be no other references to
743 * such paths yet. Hence, GatherPaths must not be created for a rel until
744 * we're done creating all partial paths for it. Unlike add_path, we don't
745 * take an exception for IndexPaths as partial index paths won't be
746 * referenced by partial BitmapHeapPaths.
747 */
748 void
add_partial_path(RelOptInfo * parent_rel,Path * new_path)749 add_partial_path(RelOptInfo *parent_rel, Path *new_path)
750 {
751 bool accept_new = true; /* unless we find a superior old path */
752 int insert_at = 0; /* where to insert new item */
753 ListCell *p1;
754
755 /* Check for query cancel. */
756 CHECK_FOR_INTERRUPTS();
757
758 /* Path to be added must be parallel safe. */
759 Assert(new_path->parallel_safe);
760
761 /* Relation should be OK for parallelism, too. */
762 Assert(parent_rel->consider_parallel);
763
764 /*
765 * As in add_path, throw out any paths which are dominated by the new
766 * path, but throw out the new path if some existing path dominates it.
767 */
768 foreach(p1, parent_rel->partial_pathlist)
769 {
770 Path *old_path = (Path *) lfirst(p1);
771 bool remove_old = false; /* unless new proves superior */
772 PathKeysComparison keyscmp;
773
774 /* Compare pathkeys. */
775 keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
776
777 /* Unless pathkeys are incompatible, keep just one of the two paths. */
778 if (keyscmp != PATHKEYS_DIFFERENT)
779 {
780 if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
781 {
782 /* New path costs more; keep it only if pathkeys are better. */
783 if (keyscmp != PATHKEYS_BETTER1)
784 accept_new = false;
785 }
786 else if (old_path->total_cost > new_path->total_cost
787 * STD_FUZZ_FACTOR)
788 {
789 /* Old path costs more; keep it only if pathkeys are better. */
790 if (keyscmp != PATHKEYS_BETTER2)
791 remove_old = true;
792 }
793 else if (keyscmp == PATHKEYS_BETTER1)
794 {
795 /* Costs are about the same, new path has better pathkeys. */
796 remove_old = true;
797 }
798 else if (keyscmp == PATHKEYS_BETTER2)
799 {
800 /* Costs are about the same, old path has better pathkeys. */
801 accept_new = false;
802 }
803 else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
804 {
805 /* Pathkeys are the same, and the old path costs more. */
806 remove_old = true;
807 }
808 else
809 {
810 /*
811 * Pathkeys are the same, and new path isn't materially
812 * cheaper.
813 */
814 accept_new = false;
815 }
816 }
817
818 /*
819 * Remove current element from partial_pathlist if dominated by new.
820 */
821 if (remove_old)
822 {
823 parent_rel->partial_pathlist =
824 foreach_delete_current(parent_rel->partial_pathlist, p1);
825 pfree(old_path);
826 }
827 else
828 {
829 /* new belongs after this old path if it has cost >= old's */
830 if (new_path->total_cost >= old_path->total_cost)
831 insert_at = foreach_current_index(p1) + 1;
832 }
833
834 /*
835 * If we found an old path that dominates new_path, we can quit
836 * scanning the partial_pathlist; we will not add new_path, and we
837 * assume new_path cannot dominate any later path.
838 */
839 if (!accept_new)
840 break;
841 }
842
843 if (accept_new)
844 {
845 /* Accept the new path: insert it at proper place */
846 parent_rel->partial_pathlist =
847 list_insert_nth(parent_rel->partial_pathlist, insert_at, new_path);
848 }
849 else
850 {
851 /* Reject and recycle the new path */
852 pfree(new_path);
853 }
854 }
855
856 /*
857 * add_partial_path_precheck
858 * Check whether a proposed new partial path could possibly get accepted.
859 *
860 * Unlike add_path_precheck, we can ignore startup cost and parameterization,
861 * since they don't matter for partial paths (see add_partial_path). But
862 * we do want to make sure we don't add a partial path if there's already
863 * a complete path that dominates it, since in that case the proposed path
864 * is surely a loser.
865 */
866 bool
add_partial_path_precheck(RelOptInfo * parent_rel,Cost total_cost,List * pathkeys)867 add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost,
868 List *pathkeys)
869 {
870 ListCell *p1;
871
872 /*
873 * Our goal here is twofold. First, we want to find out whether this path
874 * is clearly inferior to some existing partial path. If so, we want to
875 * reject it immediately. Second, we want to find out whether this path
876 * is clearly superior to some existing partial path -- at least, modulo
877 * final cost computations. If so, we definitely want to consider it.
878 *
879 * Unlike add_path(), we always compare pathkeys here. This is because we
880 * expect partial_pathlist to be very short, and getting a definitive
881 * answer at this stage avoids the need to call add_path_precheck.
882 */
883 foreach(p1, parent_rel->partial_pathlist)
884 {
885 Path *old_path = (Path *) lfirst(p1);
886 PathKeysComparison keyscmp;
887
888 keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
889 if (keyscmp != PATHKEYS_DIFFERENT)
890 {
891 if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
892 keyscmp != PATHKEYS_BETTER1)
893 return false;
894 if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
895 keyscmp != PATHKEYS_BETTER2)
896 return true;
897 }
898 }
899
900 /*
901 * This path is neither clearly inferior to an existing partial path nor
902 * clearly good enough that it might replace one. Compare it to
903 * non-parallel plans. If it loses even before accounting for the cost of
904 * the Gather node, we should definitely reject it.
905 *
906 * Note that we pass the total_cost to add_path_precheck twice. This is
907 * because it's never advantageous to consider the startup cost of a
908 * partial path; the resulting plans, if run in parallel, will be run to
909 * completion.
910 */
911 if (!add_path_precheck(parent_rel, total_cost, total_cost, pathkeys,
912 NULL))
913 return false;
914
915 return true;
916 }
917
918
919 /*****************************************************************************
920 * PATH NODE CREATION ROUTINES
921 *****************************************************************************/
922
923 /*
924 * create_seqscan_path
925 * Creates a path corresponding to a sequential scan, returning the
926 * pathnode.
927 */
928 Path *
create_seqscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer,int parallel_workers)929 create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
930 Relids required_outer, int parallel_workers)
931 {
932 Path *pathnode = makeNode(Path);
933
934 pathnode->pathtype = T_SeqScan;
935 pathnode->parent = rel;
936 pathnode->pathtarget = rel->reltarget;
937 pathnode->param_info = get_baserel_parampathinfo(root, rel,
938 required_outer);
939 pathnode->parallel_aware = parallel_workers > 0 ? true : false;
940 pathnode->parallel_safe = rel->consider_parallel;
941 pathnode->parallel_workers = parallel_workers;
942 pathnode->pathkeys = NIL; /* seqscan has unordered result */
943
944 cost_seqscan(pathnode, root, rel, pathnode->param_info);
945
946 return pathnode;
947 }
948
949 /*
950 * create_samplescan_path
951 * Creates a path node for a sampled table scan.
952 */
953 Path *
create_samplescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)954 create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
955 {
956 Path *pathnode = makeNode(Path);
957
958 pathnode->pathtype = T_SampleScan;
959 pathnode->parent = rel;
960 pathnode->pathtarget = rel->reltarget;
961 pathnode->param_info = get_baserel_parampathinfo(root, rel,
962 required_outer);
963 pathnode->parallel_aware = false;
964 pathnode->parallel_safe = rel->consider_parallel;
965 pathnode->parallel_workers = 0;
966 pathnode->pathkeys = NIL; /* samplescan has unordered result */
967
968 cost_samplescan(pathnode, root, rel, pathnode->param_info);
969
970 return pathnode;
971 }
972
973 /*
974 * create_index_path
975 * Creates a path node for an index scan.
976 *
977 * 'index' is a usable index.
978 * 'indexclauses' is a list of IndexClause nodes representing clauses
979 * to be enforced as qual conditions in the scan.
980 * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
981 * to be used as index ordering operators in the scan.
982 * 'indexorderbycols' is an integer list of index column numbers (zero based)
983 * the ordering operators can be used with.
984 * 'pathkeys' describes the ordering of the path.
985 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
986 * for an ordered index, or NoMovementScanDirection for
987 * an unordered index.
988 * 'indexonly' is true if an index-only scan is wanted.
989 * 'required_outer' is the set of outer relids for a parameterized path.
990 * 'loop_count' is the number of repetitions of the indexscan to factor into
991 * estimates of caching behavior.
992 * 'partial_path' is true if constructing a parallel index scan path.
993 *
994 * Returns the new path node.
995 */
996 IndexPath *
create_index_path(PlannerInfo * root,IndexOptInfo * index,List * indexclauses,List * indexorderbys,List * indexorderbycols,List * pathkeys,ScanDirection indexscandir,bool indexonly,Relids required_outer,double loop_count,bool partial_path)997 create_index_path(PlannerInfo *root,
998 IndexOptInfo *index,
999 List *indexclauses,
1000 List *indexorderbys,
1001 List *indexorderbycols,
1002 List *pathkeys,
1003 ScanDirection indexscandir,
1004 bool indexonly,
1005 Relids required_outer,
1006 double loop_count,
1007 bool partial_path)
1008 {
1009 IndexPath *pathnode = makeNode(IndexPath);
1010 RelOptInfo *rel = index->rel;
1011
1012 pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
1013 pathnode->path.parent = rel;
1014 pathnode->path.pathtarget = rel->reltarget;
1015 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1016 required_outer);
1017 pathnode->path.parallel_aware = false;
1018 pathnode->path.parallel_safe = rel->consider_parallel;
1019 pathnode->path.parallel_workers = 0;
1020 pathnode->path.pathkeys = pathkeys;
1021
1022 pathnode->indexinfo = index;
1023 pathnode->indexclauses = indexclauses;
1024 pathnode->indexorderbys = indexorderbys;
1025 pathnode->indexorderbycols = indexorderbycols;
1026 pathnode->indexscandir = indexscandir;
1027
1028 cost_index(pathnode, root, loop_count, partial_path);
1029
1030 return pathnode;
1031 }
1032
1033 /*
1034 * create_bitmap_heap_path
1035 * Creates a path node for a bitmap scan.
1036 *
1037 * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
1038 * 'required_outer' is the set of outer relids for a parameterized path.
1039 * 'loop_count' is the number of repetitions of the indexscan to factor into
1040 * estimates of caching behavior.
1041 *
1042 * loop_count should match the value used when creating the component
1043 * IndexPaths.
1044 */
1045 BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo * root,RelOptInfo * rel,Path * bitmapqual,Relids required_outer,double loop_count,int parallel_degree)1046 create_bitmap_heap_path(PlannerInfo *root,
1047 RelOptInfo *rel,
1048 Path *bitmapqual,
1049 Relids required_outer,
1050 double loop_count,
1051 int parallel_degree)
1052 {
1053 BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
1054
1055 pathnode->path.pathtype = T_BitmapHeapScan;
1056 pathnode->path.parent = rel;
1057 pathnode->path.pathtarget = rel->reltarget;
1058 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1059 required_outer);
1060 pathnode->path.parallel_aware = parallel_degree > 0 ? true : false;
1061 pathnode->path.parallel_safe = rel->consider_parallel;
1062 pathnode->path.parallel_workers = parallel_degree;
1063 pathnode->path.pathkeys = NIL; /* always unordered */
1064
1065 pathnode->bitmapqual = bitmapqual;
1066
1067 cost_bitmap_heap_scan(&pathnode->path, root, rel,
1068 pathnode->path.param_info,
1069 bitmapqual, loop_count);
1070
1071 return pathnode;
1072 }
1073
1074 /*
1075 * create_bitmap_and_path
1076 * Creates a path node representing a BitmapAnd.
1077 */
1078 BitmapAndPath *
create_bitmap_and_path(PlannerInfo * root,RelOptInfo * rel,List * bitmapquals)1079 create_bitmap_and_path(PlannerInfo *root,
1080 RelOptInfo *rel,
1081 List *bitmapquals)
1082 {
1083 BitmapAndPath *pathnode = makeNode(BitmapAndPath);
1084 Relids required_outer = NULL;
1085 ListCell *lc;
1086
1087 pathnode->path.pathtype = T_BitmapAnd;
1088 pathnode->path.parent = rel;
1089 pathnode->path.pathtarget = rel->reltarget;
1090
1091 /*
1092 * Identify the required outer rels as the union of what the child paths
1093 * depend on. (Alternatively, we could insist that the caller pass this
1094 * in, but it's more convenient and reliable to compute it here.)
1095 */
1096 foreach(lc, bitmapquals)
1097 {
1098 Path *bitmapqual = (Path *) lfirst(lc);
1099
1100 required_outer = bms_add_members(required_outer,
1101 PATH_REQ_OUTER(bitmapqual));
1102 }
1103 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1104 required_outer);
1105
1106 /*
1107 * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
1108 * parallel-safe if and only if rel->consider_parallel is set. So, we can
1109 * set the flag for this path based only on the relation-level flag,
1110 * without actually iterating over the list of children.
1111 */
1112 pathnode->path.parallel_aware = false;
1113 pathnode->path.parallel_safe = rel->consider_parallel;
1114 pathnode->path.parallel_workers = 0;
1115
1116 pathnode->path.pathkeys = NIL; /* always unordered */
1117
1118 pathnode->bitmapquals = bitmapquals;
1119
1120 /* this sets bitmapselectivity as well as the regular cost fields: */
1121 cost_bitmap_and_node(pathnode, root);
1122
1123 return pathnode;
1124 }
1125
1126 /*
1127 * create_bitmap_or_path
1128 * Creates a path node representing a BitmapOr.
1129 */
1130 BitmapOrPath *
create_bitmap_or_path(PlannerInfo * root,RelOptInfo * rel,List * bitmapquals)1131 create_bitmap_or_path(PlannerInfo *root,
1132 RelOptInfo *rel,
1133 List *bitmapquals)
1134 {
1135 BitmapOrPath *pathnode = makeNode(BitmapOrPath);
1136 Relids required_outer = NULL;
1137 ListCell *lc;
1138
1139 pathnode->path.pathtype = T_BitmapOr;
1140 pathnode->path.parent = rel;
1141 pathnode->path.pathtarget = rel->reltarget;
1142
1143 /*
1144 * Identify the required outer rels as the union of what the child paths
1145 * depend on. (Alternatively, we could insist that the caller pass this
1146 * in, but it's more convenient and reliable to compute it here.)
1147 */
1148 foreach(lc, bitmapquals)
1149 {
1150 Path *bitmapqual = (Path *) lfirst(lc);
1151
1152 required_outer = bms_add_members(required_outer,
1153 PATH_REQ_OUTER(bitmapqual));
1154 }
1155 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1156 required_outer);
1157
1158 /*
1159 * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
1160 * parallel-safe if and only if rel->consider_parallel is set. So, we can
1161 * set the flag for this path based only on the relation-level flag,
1162 * without actually iterating over the list of children.
1163 */
1164 pathnode->path.parallel_aware = false;
1165 pathnode->path.parallel_safe = rel->consider_parallel;
1166 pathnode->path.parallel_workers = 0;
1167
1168 pathnode->path.pathkeys = NIL; /* always unordered */
1169
1170 pathnode->bitmapquals = bitmapquals;
1171
1172 /* this sets bitmapselectivity as well as the regular cost fields: */
1173 cost_bitmap_or_node(pathnode, root);
1174
1175 return pathnode;
1176 }
1177
1178 /*
1179 * create_tidscan_path
1180 * Creates a path corresponding to a scan by TID, returning the pathnode.
1181 */
1182 TidPath *
create_tidscan_path(PlannerInfo * root,RelOptInfo * rel,List * tidquals,Relids required_outer)1183 create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
1184 Relids required_outer)
1185 {
1186 TidPath *pathnode = makeNode(TidPath);
1187
1188 pathnode->path.pathtype = T_TidScan;
1189 pathnode->path.parent = rel;
1190 pathnode->path.pathtarget = rel->reltarget;
1191 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1192 required_outer);
1193 pathnode->path.parallel_aware = false;
1194 pathnode->path.parallel_safe = rel->consider_parallel;
1195 pathnode->path.parallel_workers = 0;
1196 pathnode->path.pathkeys = NIL; /* always unordered */
1197
1198 pathnode->tidquals = tidquals;
1199
1200 cost_tidscan(&pathnode->path, root, rel, tidquals,
1201 pathnode->path.param_info);
1202
1203 return pathnode;
1204 }
1205
1206 /*
1207 * create_append_path
1208 * Creates a path corresponding to an Append plan, returning the
1209 * pathnode.
1210 *
1211 * Note that we must handle subpaths = NIL, representing a dummy access path.
1212 * Also, there are callers that pass root = NULL.
1213 */
1214 AppendPath *
create_append_path(PlannerInfo * root,RelOptInfo * rel,List * subpaths,List * partial_subpaths,List * pathkeys,Relids required_outer,int parallel_workers,bool parallel_aware,List * partitioned_rels,double rows)1215 create_append_path(PlannerInfo *root,
1216 RelOptInfo *rel,
1217 List *subpaths, List *partial_subpaths,
1218 List *pathkeys, Relids required_outer,
1219 int parallel_workers, bool parallel_aware,
1220 List *partitioned_rels, double rows)
1221 {
1222 AppendPath *pathnode = makeNode(AppendPath);
1223 ListCell *l;
1224
1225 Assert(!parallel_aware || parallel_workers > 0);
1226
1227 pathnode->path.pathtype = T_Append;
1228 pathnode->path.parent = rel;
1229 pathnode->path.pathtarget = rel->reltarget;
1230
1231 /*
1232 * When generating an Append path for a partitioned table, there may be
1233 * parameters that are useful so we can eliminate certain partitions
1234 * during execution. Here we'll go all the way and fully populate the
1235 * parameter info data as we do for normal base relations. However, we
1236 * need only bother doing this for RELOPT_BASEREL rels, as
1237 * RELOPT_OTHER_MEMBER_REL's Append paths are merged into the base rel's
1238 * Append subpaths. It would do no harm to do this, we just avoid it to
1239 * save wasting effort.
1240 */
1241 if (partitioned_rels != NIL && root && rel->reloptkind == RELOPT_BASEREL)
1242 pathnode->path.param_info = get_baserel_parampathinfo(root,
1243 rel,
1244 required_outer);
1245 else
1246 pathnode->path.param_info = get_appendrel_parampathinfo(rel,
1247 required_outer);
1248
1249 pathnode->path.parallel_aware = parallel_aware;
1250 pathnode->path.parallel_safe = rel->consider_parallel;
1251 pathnode->path.parallel_workers = parallel_workers;
1252 pathnode->path.pathkeys = pathkeys;
1253 pathnode->partitioned_rels = list_copy(partitioned_rels);
1254
1255 /*
1256 * For parallel append, non-partial paths are sorted by descending total
1257 * costs. That way, the total time to finish all non-partial paths is
1258 * minimized. Also, the partial paths are sorted by descending startup
1259 * costs. There may be some paths that require to do startup work by a
1260 * single worker. In such case, it's better for workers to choose the
1261 * expensive ones first, whereas the leader should choose the cheapest
1262 * startup plan.
1263 */
1264 if (pathnode->path.parallel_aware)
1265 {
1266 /*
1267 * We mustn't fiddle with the order of subpaths when the Append has
1268 * pathkeys. The order they're listed in is critical to keeping the
1269 * pathkeys valid.
1270 */
1271 Assert(pathkeys == NIL);
1272
1273 list_sort(subpaths, append_total_cost_compare);
1274 list_sort(partial_subpaths, append_startup_cost_compare);
1275 }
1276 pathnode->first_partial_path = list_length(subpaths);
1277 pathnode->subpaths = list_concat(subpaths, partial_subpaths);
1278
1279 /*
1280 * Apply query-wide LIMIT if known and path is for sole base relation.
1281 * (Handling this at this low level is a bit klugy.)
1282 */
1283 if (root != NULL && bms_equal(rel->relids, root->all_baserels))
1284 pathnode->limit_tuples = root->limit_tuples;
1285 else
1286 pathnode->limit_tuples = -1.0;
1287
1288 foreach(l, pathnode->subpaths)
1289 {
1290 Path *subpath = (Path *) lfirst(l);
1291
1292 pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
1293 subpath->parallel_safe;
1294
1295 /* All child paths must have same parameterization */
1296 Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
1297 }
1298
1299 Assert(!parallel_aware || pathnode->path.parallel_safe);
1300
1301 /*
1302 * If there's exactly one child path, the Append is a no-op and will be
1303 * discarded later (in setrefs.c); therefore, we can inherit the child's
1304 * size and cost, as well as its pathkeys if any (overriding whatever the
1305 * caller might've said). Otherwise, we must do the normal costsize
1306 * calculation.
1307 */
1308 if (list_length(pathnode->subpaths) == 1)
1309 {
1310 Path *child = (Path *) linitial(pathnode->subpaths);
1311
1312 pathnode->path.rows = child->rows;
1313 pathnode->path.startup_cost = child->startup_cost;
1314 pathnode->path.total_cost = child->total_cost;
1315 pathnode->path.pathkeys = child->pathkeys;
1316 }
1317 else
1318 cost_append(pathnode);
1319
1320 /* If the caller provided a row estimate, override the computed value. */
1321 if (rows >= 0)
1322 pathnode->path.rows = rows;
1323
1324 return pathnode;
1325 }
1326
1327 /*
1328 * append_total_cost_compare
1329 * list_sort comparator for sorting append child paths
1330 * by total_cost descending
1331 *
1332 * For equal total costs, we fall back to comparing startup costs; if those
1333 * are equal too, break ties using bms_compare on the paths' relids.
1334 * (This is to avoid getting unpredictable results from list_sort.)
1335 */
1336 static int
append_total_cost_compare(const ListCell * a,const ListCell * b)1337 append_total_cost_compare(const ListCell *a, const ListCell *b)
1338 {
1339 Path *path1 = (Path *) lfirst(a);
1340 Path *path2 = (Path *) lfirst(b);
1341 int cmp;
1342
1343 cmp = compare_path_costs(path1, path2, TOTAL_COST);
1344 if (cmp != 0)
1345 return -cmp;
1346 return bms_compare(path1->parent->relids, path2->parent->relids);
1347 }
1348
1349 /*
1350 * append_startup_cost_compare
1351 * list_sort comparator for sorting append child paths
1352 * by startup_cost descending
1353 *
1354 * For equal startup costs, we fall back to comparing total costs; if those
1355 * are equal too, break ties using bms_compare on the paths' relids.
1356 * (This is to avoid getting unpredictable results from list_sort.)
1357 */
1358 static int
append_startup_cost_compare(const ListCell * a,const ListCell * b)1359 append_startup_cost_compare(const ListCell *a, const ListCell *b)
1360 {
1361 Path *path1 = (Path *) lfirst(a);
1362 Path *path2 = (Path *) lfirst(b);
1363 int cmp;
1364
1365 cmp = compare_path_costs(path1, path2, STARTUP_COST);
1366 if (cmp != 0)
1367 return -cmp;
1368 return bms_compare(path1->parent->relids, path2->parent->relids);
1369 }
1370
1371 /*
1372 * create_merge_append_path
1373 * Creates a path corresponding to a MergeAppend plan, returning the
1374 * pathnode.
1375 */
1376 MergeAppendPath *
create_merge_append_path(PlannerInfo * root,RelOptInfo * rel,List * subpaths,List * pathkeys,Relids required_outer,List * partitioned_rels)1377 create_merge_append_path(PlannerInfo *root,
1378 RelOptInfo *rel,
1379 List *subpaths,
1380 List *pathkeys,
1381 Relids required_outer,
1382 List *partitioned_rels)
1383 {
1384 MergeAppendPath *pathnode = makeNode(MergeAppendPath);
1385 Cost input_startup_cost;
1386 Cost input_total_cost;
1387 ListCell *l;
1388
1389 pathnode->path.pathtype = T_MergeAppend;
1390 pathnode->path.parent = rel;
1391 pathnode->path.pathtarget = rel->reltarget;
1392 pathnode->path.param_info = get_appendrel_parampathinfo(rel,
1393 required_outer);
1394 pathnode->path.parallel_aware = false;
1395 pathnode->path.parallel_safe = rel->consider_parallel;
1396 pathnode->path.parallel_workers = 0;
1397 pathnode->path.pathkeys = pathkeys;
1398 pathnode->partitioned_rels = list_copy(partitioned_rels);
1399 pathnode->subpaths = subpaths;
1400
1401 /*
1402 * Apply query-wide LIMIT if known and path is for sole base relation.
1403 * (Handling this at this low level is a bit klugy.)
1404 */
1405 if (bms_equal(rel->relids, root->all_baserels))
1406 pathnode->limit_tuples = root->limit_tuples;
1407 else
1408 pathnode->limit_tuples = -1.0;
1409
1410 /*
1411 * Add up the sizes and costs of the input paths.
1412 */
1413 pathnode->path.rows = 0;
1414 input_startup_cost = 0;
1415 input_total_cost = 0;
1416 foreach(l, subpaths)
1417 {
1418 Path *subpath = (Path *) lfirst(l);
1419
1420 pathnode->path.rows += subpath->rows;
1421 pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
1422 subpath->parallel_safe;
1423
1424 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
1425 {
1426 /* Subpath is adequately ordered, we won't need to sort it */
1427 input_startup_cost += subpath->startup_cost;
1428 input_total_cost += subpath->total_cost;
1429 }
1430 else
1431 {
1432 /* We'll need to insert a Sort node, so include cost for that */
1433 Path sort_path; /* dummy for result of cost_sort */
1434
1435 cost_sort(&sort_path,
1436 root,
1437 pathkeys,
1438 subpath->total_cost,
1439 subpath->parent->tuples,
1440 subpath->pathtarget->width,
1441 0.0,
1442 work_mem,
1443 pathnode->limit_tuples);
1444 input_startup_cost += sort_path.startup_cost;
1445 input_total_cost += sort_path.total_cost;
1446 }
1447
1448 /* All child paths must have same parameterization */
1449 Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
1450 }
1451
1452 /*
1453 * Now we can compute total costs of the MergeAppend. If there's exactly
1454 * one child path, the MergeAppend is a no-op and will be discarded later
1455 * (in setrefs.c); otherwise we do the normal cost calculation.
1456 */
1457 if (list_length(subpaths) == 1)
1458 {
1459 pathnode->path.startup_cost = input_startup_cost;
1460 pathnode->path.total_cost = input_total_cost;
1461 }
1462 else
1463 cost_merge_append(&pathnode->path, root,
1464 pathkeys, list_length(subpaths),
1465 input_startup_cost, input_total_cost,
1466 pathnode->path.rows);
1467
1468 return pathnode;
1469 }
1470
1471 /*
1472 * create_group_result_path
1473 * Creates a path representing a Result-and-nothing-else plan.
1474 *
1475 * This is only used for degenerate grouping cases, in which we know we
1476 * need to produce one result row, possibly filtered by a HAVING qual.
1477 */
1478 GroupResultPath *
create_group_result_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,List * havingqual)1479 create_group_result_path(PlannerInfo *root, RelOptInfo *rel,
1480 PathTarget *target, List *havingqual)
1481 {
1482 GroupResultPath *pathnode = makeNode(GroupResultPath);
1483
1484 pathnode->path.pathtype = T_Result;
1485 pathnode->path.parent = rel;
1486 pathnode->path.pathtarget = target;
1487 pathnode->path.param_info = NULL; /* there are no other rels... */
1488 pathnode->path.parallel_aware = false;
1489 pathnode->path.parallel_safe = rel->consider_parallel;
1490 pathnode->path.parallel_workers = 0;
1491 pathnode->path.pathkeys = NIL;
1492 pathnode->quals = havingqual;
1493
1494 /*
1495 * We can't quite use cost_resultscan() because the quals we want to
1496 * account for are not baserestrict quals of the rel. Might as well just
1497 * hack it here.
1498 */
1499 pathnode->path.rows = 1;
1500 pathnode->path.startup_cost = target->cost.startup;
1501 pathnode->path.total_cost = target->cost.startup +
1502 cpu_tuple_cost + target->cost.per_tuple;
1503
1504 /*
1505 * Add cost of qual, if any --- but we ignore its selectivity, since our
1506 * rowcount estimate should be 1 no matter what the qual is.
1507 */
1508 if (havingqual)
1509 {
1510 QualCost qual_cost;
1511
1512 cost_qual_eval(&qual_cost, havingqual, root);
1513 /* havingqual is evaluated once at startup */
1514 pathnode->path.startup_cost += qual_cost.startup + qual_cost.per_tuple;
1515 pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
1516 }
1517
1518 return pathnode;
1519 }
1520
1521 /*
1522 * create_material_path
1523 * Creates a path corresponding to a Material plan, returning the
1524 * pathnode.
1525 */
1526 MaterialPath *
create_material_path(RelOptInfo * rel,Path * subpath)1527 create_material_path(RelOptInfo *rel, Path *subpath)
1528 {
1529 MaterialPath *pathnode = makeNode(MaterialPath);
1530
1531 Assert(subpath->parent == rel);
1532
1533 pathnode->path.pathtype = T_Material;
1534 pathnode->path.parent = rel;
1535 pathnode->path.pathtarget = rel->reltarget;
1536 pathnode->path.param_info = subpath->param_info;
1537 pathnode->path.parallel_aware = false;
1538 pathnode->path.parallel_safe = rel->consider_parallel &&
1539 subpath->parallel_safe;
1540 pathnode->path.parallel_workers = subpath->parallel_workers;
1541 pathnode->path.pathkeys = subpath->pathkeys;
1542
1543 pathnode->subpath = subpath;
1544
1545 cost_material(&pathnode->path,
1546 subpath->startup_cost,
1547 subpath->total_cost,
1548 subpath->rows,
1549 subpath->pathtarget->width);
1550
1551 return pathnode;
1552 }
1553
1554 /*
1555 * create_unique_path
1556 * Creates a path representing elimination of distinct rows from the
1557 * input data. Distinct-ness is defined according to the needs of the
1558 * semijoin represented by sjinfo. If it is not possible to identify
1559 * how to make the data unique, NULL is returned.
1560 *
1561 * If used at all, this is likely to be called repeatedly on the same rel;
1562 * and the input subpath should always be the same (the cheapest_total path
1563 * for the rel). So we cache the result.
1564 */
1565 UniquePath *
create_unique_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,SpecialJoinInfo * sjinfo)1566 create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1567 SpecialJoinInfo *sjinfo)
1568 {
1569 UniquePath *pathnode;
1570 Path sort_path; /* dummy for result of cost_sort */
1571 Path agg_path; /* dummy for result of cost_agg */
1572 MemoryContext oldcontext;
1573 int numCols;
1574
1575 /* Caller made a mistake if subpath isn't cheapest_total ... */
1576 Assert(subpath == rel->cheapest_total_path);
1577 Assert(subpath->parent == rel);
1578 /* ... or if SpecialJoinInfo is the wrong one */
1579 Assert(sjinfo->jointype == JOIN_SEMI);
1580 Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
1581
1582 /* If result already cached, return it */
1583 if (rel->cheapest_unique_path)
1584 return (UniquePath *) rel->cheapest_unique_path;
1585
1586 /* If it's not possible to unique-ify, return NULL */
1587 if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
1588 return NULL;
1589
1590 /*
1591 * When called during GEQO join planning, we are in a short-lived memory
1592 * context. We must make sure that the path and any subsidiary data
1593 * structures created for a baserel survive the GEQO cycle, else the
1594 * baserel is trashed for future GEQO cycles. On the other hand, when we
1595 * are creating those for a joinrel during GEQO, we don't want them to
1596 * clutter the main planning context. Upshot is that the best solution is
1597 * to explicitly allocate memory in the same context the given RelOptInfo
1598 * is in.
1599 */
1600 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1601
1602 pathnode = makeNode(UniquePath);
1603
1604 pathnode->path.pathtype = T_Unique;
1605 pathnode->path.parent = rel;
1606 pathnode->path.pathtarget = rel->reltarget;
1607 pathnode->path.param_info = subpath->param_info;
1608 pathnode->path.parallel_aware = false;
1609 pathnode->path.parallel_safe = rel->consider_parallel &&
1610 subpath->parallel_safe;
1611 pathnode->path.parallel_workers = subpath->parallel_workers;
1612
1613 /*
1614 * Assume the output is unsorted, since we don't necessarily have pathkeys
1615 * to represent it. (This might get overridden below.)
1616 */
1617 pathnode->path.pathkeys = NIL;
1618
1619 pathnode->subpath = subpath;
1620 pathnode->in_operators = sjinfo->semi_operators;
1621 pathnode->uniq_exprs = sjinfo->semi_rhs_exprs;
1622
1623 /*
1624 * If the input is a relation and it has a unique index that proves the
1625 * semi_rhs_exprs are unique, then we don't need to do anything. Note
1626 * that relation_has_unique_index_for automatically considers restriction
1627 * clauses for the rel, as well.
1628 */
1629 if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
1630 relation_has_unique_index_for(root, rel, NIL,
1631 sjinfo->semi_rhs_exprs,
1632 sjinfo->semi_operators))
1633 {
1634 pathnode->umethod = UNIQUE_PATH_NOOP;
1635 pathnode->path.rows = rel->rows;
1636 pathnode->path.startup_cost = subpath->startup_cost;
1637 pathnode->path.total_cost = subpath->total_cost;
1638 pathnode->path.pathkeys = subpath->pathkeys;
1639
1640 rel->cheapest_unique_path = (Path *) pathnode;
1641
1642 MemoryContextSwitchTo(oldcontext);
1643
1644 return pathnode;
1645 }
1646
1647 /*
1648 * If the input is a subquery whose output must be unique already, then we
1649 * don't need to do anything. The test for uniqueness has to consider
1650 * exactly which columns we are extracting; for example "SELECT DISTINCT
1651 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
1652 * this optimization unless semi_rhs_exprs consists only of simple Vars
1653 * referencing subquery outputs. (Possibly we could do something with
1654 * expressions in the subquery outputs, too, but for now keep it simple.)
1655 */
1656 if (rel->rtekind == RTE_SUBQUERY)
1657 {
1658 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1659
1660 if (query_supports_distinctness(rte->subquery))
1661 {
1662 List *sub_tlist_colnos;
1663
1664 sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
1665 rel->relid);
1666
1667 if (sub_tlist_colnos &&
1668 query_is_distinct_for(rte->subquery,
1669 sub_tlist_colnos,
1670 sjinfo->semi_operators))
1671 {
1672 pathnode->umethod = UNIQUE_PATH_NOOP;
1673 pathnode->path.rows = rel->rows;
1674 pathnode->path.startup_cost = subpath->startup_cost;
1675 pathnode->path.total_cost = subpath->total_cost;
1676 pathnode->path.pathkeys = subpath->pathkeys;
1677
1678 rel->cheapest_unique_path = (Path *) pathnode;
1679
1680 MemoryContextSwitchTo(oldcontext);
1681
1682 return pathnode;
1683 }
1684 }
1685 }
1686
1687 /* Estimate number of output rows */
1688 pathnode->path.rows = estimate_num_groups(root,
1689 sjinfo->semi_rhs_exprs,
1690 rel->rows,
1691 NULL);
1692 numCols = list_length(sjinfo->semi_rhs_exprs);
1693
1694 if (sjinfo->semi_can_btree)
1695 {
1696 /*
1697 * Estimate cost for sort+unique implementation
1698 */
1699 cost_sort(&sort_path, root, NIL,
1700 subpath->total_cost,
1701 rel->rows,
1702 subpath->pathtarget->width,
1703 0.0,
1704 work_mem,
1705 -1.0);
1706
1707 /*
1708 * Charge one cpu_operator_cost per comparison per input tuple. We
1709 * assume all columns get compared at most of the tuples. (XXX
1710 * probably this is an overestimate.) This should agree with
1711 * create_upper_unique_path.
1712 */
1713 sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
1714 }
1715
1716 if (sjinfo->semi_can_hash)
1717 {
1718 /*
1719 * Estimate the overhead per hashtable entry at 64 bytes (same as in
1720 * planner.c).
1721 */
1722 int hashentrysize = subpath->pathtarget->width + 64;
1723
1724 if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
1725 {
1726 /*
1727 * We should not try to hash. Hack the SpecialJoinInfo to
1728 * remember this, in case we come through here again.
1729 */
1730 sjinfo->semi_can_hash = false;
1731 }
1732 else
1733 cost_agg(&agg_path, root,
1734 AGG_HASHED, NULL,
1735 numCols, pathnode->path.rows,
1736 NIL,
1737 subpath->startup_cost,
1738 subpath->total_cost,
1739 rel->rows,
1740 subpath->pathtarget->width);
1741 }
1742
1743 if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
1744 {
1745 if (agg_path.total_cost < sort_path.total_cost)
1746 pathnode->umethod = UNIQUE_PATH_HASH;
1747 else
1748 pathnode->umethod = UNIQUE_PATH_SORT;
1749 }
1750 else if (sjinfo->semi_can_btree)
1751 pathnode->umethod = UNIQUE_PATH_SORT;
1752 else if (sjinfo->semi_can_hash)
1753 pathnode->umethod = UNIQUE_PATH_HASH;
1754 else
1755 {
1756 /* we can get here only if we abandoned hashing above */
1757 MemoryContextSwitchTo(oldcontext);
1758 return NULL;
1759 }
1760
1761 if (pathnode->umethod == UNIQUE_PATH_HASH)
1762 {
1763 pathnode->path.startup_cost = agg_path.startup_cost;
1764 pathnode->path.total_cost = agg_path.total_cost;
1765 }
1766 else
1767 {
1768 pathnode->path.startup_cost = sort_path.startup_cost;
1769 pathnode->path.total_cost = sort_path.total_cost;
1770 }
1771
1772 rel->cheapest_unique_path = (Path *) pathnode;
1773
1774 MemoryContextSwitchTo(oldcontext);
1775
1776 return pathnode;
1777 }
1778
1779 /*
1780 * create_gather_merge_path
1781 *
1782 * Creates a path corresponding to a gather merge scan, returning
1783 * the pathnode.
1784 */
1785 GatherMergePath *
create_gather_merge_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,List * pathkeys,Relids required_outer,double * rows)1786 create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1787 PathTarget *target, List *pathkeys,
1788 Relids required_outer, double *rows)
1789 {
1790 GatherMergePath *pathnode = makeNode(GatherMergePath);
1791 Cost input_startup_cost = 0;
1792 Cost input_total_cost = 0;
1793
1794 Assert(subpath->parallel_safe);
1795 Assert(pathkeys);
1796
1797 pathnode->path.pathtype = T_GatherMerge;
1798 pathnode->path.parent = rel;
1799 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1800 required_outer);
1801 pathnode->path.parallel_aware = false;
1802
1803 pathnode->subpath = subpath;
1804 pathnode->num_workers = subpath->parallel_workers;
1805 pathnode->path.pathkeys = pathkeys;
1806 pathnode->path.pathtarget = target ? target : rel->reltarget;
1807 pathnode->path.rows += subpath->rows;
1808
1809 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
1810 {
1811 /* Subpath is adequately ordered, we won't need to sort it */
1812 input_startup_cost += subpath->startup_cost;
1813 input_total_cost += subpath->total_cost;
1814 }
1815 else
1816 {
1817 /* We'll need to insert a Sort node, so include cost for that */
1818 Path sort_path; /* dummy for result of cost_sort */
1819
1820 cost_sort(&sort_path,
1821 root,
1822 pathkeys,
1823 subpath->total_cost,
1824 subpath->rows,
1825 subpath->pathtarget->width,
1826 0.0,
1827 work_mem,
1828 -1);
1829 input_startup_cost += sort_path.startup_cost;
1830 input_total_cost += sort_path.total_cost;
1831 }
1832
1833 cost_gather_merge(pathnode, root, rel, pathnode->path.param_info,
1834 input_startup_cost, input_total_cost, rows);
1835
1836 return pathnode;
1837 }
1838
1839 /*
1840 * translate_sub_tlist - get subquery column numbers represented by tlist
1841 *
1842 * The given targetlist usually contains only Vars referencing the given relid.
1843 * Extract their varattnos (ie, the column numbers of the subquery) and return
1844 * as an integer List.
1845 *
1846 * If any of the tlist items is not a simple Var, we cannot determine whether
1847 * the subquery's uniqueness condition (if any) matches ours, so punt and
1848 * return NIL.
1849 */
1850 static List *
translate_sub_tlist(List * tlist,int relid)1851 translate_sub_tlist(List *tlist, int relid)
1852 {
1853 List *result = NIL;
1854 ListCell *l;
1855
1856 foreach(l, tlist)
1857 {
1858 Var *var = (Var *) lfirst(l);
1859
1860 if (!var || !IsA(var, Var) ||
1861 var->varno != relid)
1862 return NIL; /* punt */
1863
1864 result = lappend_int(result, var->varattno);
1865 }
1866 return result;
1867 }
1868
1869 /*
1870 * create_gather_path
1871 * Creates a path corresponding to a gather scan, returning the
1872 * pathnode.
1873 *
1874 * 'rows' may optionally be set to override row estimates from other sources.
1875 */
1876 GatherPath *
create_gather_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,Relids required_outer,double * rows)1877 create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1878 PathTarget *target, Relids required_outer, double *rows)
1879 {
1880 GatherPath *pathnode = makeNode(GatherPath);
1881
1882 Assert(subpath->parallel_safe);
1883
1884 pathnode->path.pathtype = T_Gather;
1885 pathnode->path.parent = rel;
1886 pathnode->path.pathtarget = target;
1887 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1888 required_outer);
1889 pathnode->path.parallel_aware = false;
1890 pathnode->path.parallel_safe = false;
1891 pathnode->path.parallel_workers = 0;
1892 pathnode->path.pathkeys = NIL; /* Gather has unordered result */
1893
1894 pathnode->subpath = subpath;
1895 pathnode->num_workers = subpath->parallel_workers;
1896 pathnode->single_copy = false;
1897
1898 if (pathnode->num_workers == 0)
1899 {
1900 pathnode->path.pathkeys = subpath->pathkeys;
1901 pathnode->num_workers = 1;
1902 pathnode->single_copy = true;
1903 }
1904
1905 cost_gather(pathnode, root, rel, pathnode->path.param_info, rows);
1906
1907 return pathnode;
1908 }
1909
1910 /*
1911 * create_subqueryscan_path
1912 * Creates a path corresponding to a scan of a subquery,
1913 * returning the pathnode.
1914 */
1915 SubqueryScanPath *
create_subqueryscan_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * pathkeys,Relids required_outer)1916 create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
1917 List *pathkeys, Relids required_outer)
1918 {
1919 SubqueryScanPath *pathnode = makeNode(SubqueryScanPath);
1920
1921 pathnode->path.pathtype = T_SubqueryScan;
1922 pathnode->path.parent = rel;
1923 pathnode->path.pathtarget = rel->reltarget;
1924 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
1925 required_outer);
1926 pathnode->path.parallel_aware = false;
1927 pathnode->path.parallel_safe = rel->consider_parallel &&
1928 subpath->parallel_safe;
1929 pathnode->path.parallel_workers = subpath->parallel_workers;
1930 pathnode->path.pathkeys = pathkeys;
1931 pathnode->subpath = subpath;
1932
1933 cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info);
1934
1935 return pathnode;
1936 }
1937
1938 /*
1939 * create_functionscan_path
1940 * Creates a path corresponding to a sequential scan of a function,
1941 * returning the pathnode.
1942 */
1943 Path *
create_functionscan_path(PlannerInfo * root,RelOptInfo * rel,List * pathkeys,Relids required_outer)1944 create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
1945 List *pathkeys, Relids required_outer)
1946 {
1947 Path *pathnode = makeNode(Path);
1948
1949 pathnode->pathtype = T_FunctionScan;
1950 pathnode->parent = rel;
1951 pathnode->pathtarget = rel->reltarget;
1952 pathnode->param_info = get_baserel_parampathinfo(root, rel,
1953 required_outer);
1954 pathnode->parallel_aware = false;
1955 pathnode->parallel_safe = rel->consider_parallel;
1956 pathnode->parallel_workers = 0;
1957 pathnode->pathkeys = pathkeys;
1958
1959 cost_functionscan(pathnode, root, rel, pathnode->param_info);
1960
1961 return pathnode;
1962 }
1963
1964 /*
1965 * create_tablefuncscan_path
1966 * Creates a path corresponding to a sequential scan of a table function,
1967 * returning the pathnode.
1968 */
1969 Path *
create_tablefuncscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1970 create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
1971 Relids required_outer)
1972 {
1973 Path *pathnode = makeNode(Path);
1974
1975 pathnode->pathtype = T_TableFuncScan;
1976 pathnode->parent = rel;
1977 pathnode->pathtarget = rel->reltarget;
1978 pathnode->param_info = get_baserel_parampathinfo(root, rel,
1979 required_outer);
1980 pathnode->parallel_aware = false;
1981 pathnode->parallel_safe = rel->consider_parallel;
1982 pathnode->parallel_workers = 0;
1983 pathnode->pathkeys = NIL; /* result is always unordered */
1984
1985 cost_tablefuncscan(pathnode, root, rel, pathnode->param_info);
1986
1987 return pathnode;
1988 }
1989
1990 /*
1991 * create_valuesscan_path
1992 * Creates a path corresponding to a scan of a VALUES list,
1993 * returning the pathnode.
1994 */
1995 Path *
create_valuesscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1996 create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
1997 Relids required_outer)
1998 {
1999 Path *pathnode = makeNode(Path);
2000
2001 pathnode->pathtype = T_ValuesScan;
2002 pathnode->parent = rel;
2003 pathnode->pathtarget = rel->reltarget;
2004 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2005 required_outer);
2006 pathnode->parallel_aware = false;
2007 pathnode->parallel_safe = rel->consider_parallel;
2008 pathnode->parallel_workers = 0;
2009 pathnode->pathkeys = NIL; /* result is always unordered */
2010
2011 cost_valuesscan(pathnode, root, rel, pathnode->param_info);
2012
2013 return pathnode;
2014 }
2015
2016 /*
2017 * create_ctescan_path
2018 * Creates a path corresponding to a scan of a non-self-reference CTE,
2019 * returning the pathnode.
2020 */
2021 Path *
create_ctescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2022 create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
2023 {
2024 Path *pathnode = makeNode(Path);
2025
2026 pathnode->pathtype = T_CteScan;
2027 pathnode->parent = rel;
2028 pathnode->pathtarget = rel->reltarget;
2029 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2030 required_outer);
2031 pathnode->parallel_aware = false;
2032 pathnode->parallel_safe = rel->consider_parallel;
2033 pathnode->parallel_workers = 0;
2034 pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
2035
2036 cost_ctescan(pathnode, root, rel, pathnode->param_info);
2037
2038 return pathnode;
2039 }
2040
2041 /*
2042 * create_namedtuplestorescan_path
2043 * Creates a path corresponding to a scan of a named tuplestore, returning
2044 * the pathnode.
2045 */
2046 Path *
create_namedtuplestorescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2047 create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
2048 Relids required_outer)
2049 {
2050 Path *pathnode = makeNode(Path);
2051
2052 pathnode->pathtype = T_NamedTuplestoreScan;
2053 pathnode->parent = rel;
2054 pathnode->pathtarget = rel->reltarget;
2055 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2056 required_outer);
2057 pathnode->parallel_aware = false;
2058 pathnode->parallel_safe = rel->consider_parallel;
2059 pathnode->parallel_workers = 0;
2060 pathnode->pathkeys = NIL; /* result is always unordered */
2061
2062 cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info);
2063
2064 return pathnode;
2065 }
2066
2067 /*
2068 * create_resultscan_path
2069 * Creates a path corresponding to a scan of an RTE_RESULT relation,
2070 * returning the pathnode.
2071 */
2072 Path *
create_resultscan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2073 create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
2074 Relids required_outer)
2075 {
2076 Path *pathnode = makeNode(Path);
2077
2078 pathnode->pathtype = T_Result;
2079 pathnode->parent = rel;
2080 pathnode->pathtarget = rel->reltarget;
2081 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2082 required_outer);
2083 pathnode->parallel_aware = false;
2084 pathnode->parallel_safe = rel->consider_parallel;
2085 pathnode->parallel_workers = 0;
2086 pathnode->pathkeys = NIL; /* result is always unordered */
2087
2088 cost_resultscan(pathnode, root, rel, pathnode->param_info);
2089
2090 return pathnode;
2091 }
2092
2093 /*
2094 * create_worktablescan_path
2095 * Creates a path corresponding to a scan of a self-reference CTE,
2096 * returning the pathnode.
2097 */
2098 Path *
create_worktablescan_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)2099 create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
2100 Relids required_outer)
2101 {
2102 Path *pathnode = makeNode(Path);
2103
2104 pathnode->pathtype = T_WorkTableScan;
2105 pathnode->parent = rel;
2106 pathnode->pathtarget = rel->reltarget;
2107 pathnode->param_info = get_baserel_parampathinfo(root, rel,
2108 required_outer);
2109 pathnode->parallel_aware = false;
2110 pathnode->parallel_safe = rel->consider_parallel;
2111 pathnode->parallel_workers = 0;
2112 pathnode->pathkeys = NIL; /* result is always unordered */
2113
2114 /* Cost is the same as for a regular CTE scan */
2115 cost_ctescan(pathnode, root, rel, pathnode->param_info);
2116
2117 return pathnode;
2118 }
2119
2120 /*
2121 * create_foreignscan_path
2122 * Creates a path corresponding to a scan of a foreign base table,
2123 * returning the pathnode.
2124 *
2125 * This function is never called from core Postgres; rather, it's expected
2126 * to be called by the GetForeignPaths function of a foreign data wrapper.
2127 * We make the FDW supply all fields of the path, since we do not have any way
2128 * to calculate them in core. However, there is a usually-sane default for
2129 * the pathtarget (rel->reltarget), so we let a NULL for "target" select that.
2130 */
2131 ForeignPath *
create_foreignscan_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,double rows,Cost startup_cost,Cost total_cost,List * pathkeys,Relids required_outer,Path * fdw_outerpath,List * fdw_private)2132 create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
2133 PathTarget *target,
2134 double rows, Cost startup_cost, Cost total_cost,
2135 List *pathkeys,
2136 Relids required_outer,
2137 Path *fdw_outerpath,
2138 List *fdw_private)
2139 {
2140 ForeignPath *pathnode = makeNode(ForeignPath);
2141
2142 /* Historically some FDWs were confused about when to use this */
2143 Assert(IS_SIMPLE_REL(rel));
2144
2145 pathnode->path.pathtype = T_ForeignScan;
2146 pathnode->path.parent = rel;
2147 pathnode->path.pathtarget = target ? target : rel->reltarget;
2148 pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
2149 required_outer);
2150 pathnode->path.parallel_aware = false;
2151 pathnode->path.parallel_safe = rel->consider_parallel;
2152 pathnode->path.parallel_workers = 0;
2153 pathnode->path.rows = rows;
2154 pathnode->path.startup_cost = startup_cost;
2155 pathnode->path.total_cost = total_cost;
2156 pathnode->path.pathkeys = pathkeys;
2157
2158 pathnode->fdw_outerpath = fdw_outerpath;
2159 pathnode->fdw_private = fdw_private;
2160
2161 return pathnode;
2162 }
2163
2164 /*
2165 * create_foreign_join_path
2166 * Creates a path corresponding to a scan of a foreign join,
2167 * returning the pathnode.
2168 *
2169 * This function is never called from core Postgres; rather, it's expected
2170 * to be called by the GetForeignJoinPaths function of a foreign data wrapper.
2171 * We make the FDW supply all fields of the path, since we do not have any way
2172 * to calculate them in core. However, there is a usually-sane default for
2173 * the pathtarget (rel->reltarget), so we let a NULL for "target" select that.
2174 */
2175 ForeignPath *
create_foreign_join_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,double rows,Cost startup_cost,Cost total_cost,List * pathkeys,Relids required_outer,Path * fdw_outerpath,List * fdw_private)2176 create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel,
2177 PathTarget *target,
2178 double rows, Cost startup_cost, Cost total_cost,
2179 List *pathkeys,
2180 Relids required_outer,
2181 Path *fdw_outerpath,
2182 List *fdw_private)
2183 {
2184 ForeignPath *pathnode = makeNode(ForeignPath);
2185
2186 /*
2187 * We should use get_joinrel_parampathinfo to handle parameterized paths,
2188 * but the API of this function doesn't support it, and existing
2189 * extensions aren't yet trying to build such paths anyway. For the
2190 * moment just throw an error if someone tries it; eventually we should
2191 * revisit this.
2192 */
2193 if (!bms_is_empty(required_outer) || !bms_is_empty(rel->lateral_relids))
2194 elog(ERROR, "parameterized foreign joins are not supported yet");
2195
2196 pathnode->path.pathtype = T_ForeignScan;
2197 pathnode->path.parent = rel;
2198 pathnode->path.pathtarget = target ? target : rel->reltarget;
2199 pathnode->path.param_info = NULL; /* XXX see above */
2200 pathnode->path.parallel_aware = false;
2201 pathnode->path.parallel_safe = rel->consider_parallel;
2202 pathnode->path.parallel_workers = 0;
2203 pathnode->path.rows = rows;
2204 pathnode->path.startup_cost = startup_cost;
2205 pathnode->path.total_cost = total_cost;
2206 pathnode->path.pathkeys = pathkeys;
2207
2208 pathnode->fdw_outerpath = fdw_outerpath;
2209 pathnode->fdw_private = fdw_private;
2210
2211 return pathnode;
2212 }
2213
2214 /*
2215 * create_foreign_upper_path
2216 * Creates a path corresponding to an upper relation that's computed
2217 * directly by an FDW, returning the pathnode.
2218 *
2219 * This function is never called from core Postgres; rather, it's expected to
2220 * be called by the GetForeignUpperPaths function of a foreign data wrapper.
2221 * We make the FDW supply all fields of the path, since we do not have any way
2222 * to calculate them in core. However, there is a usually-sane default for
2223 * the pathtarget (rel->reltarget), so we let a NULL for "target" select that.
2224 */
2225 ForeignPath *
create_foreign_upper_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,double rows,Cost startup_cost,Cost total_cost,List * pathkeys,Path * fdw_outerpath,List * fdw_private)2226 create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel,
2227 PathTarget *target,
2228 double rows, Cost startup_cost, Cost total_cost,
2229 List *pathkeys,
2230 Path *fdw_outerpath,
2231 List *fdw_private)
2232 {
2233 ForeignPath *pathnode = makeNode(ForeignPath);
2234
2235 /*
2236 * Upper relations should never have any lateral references, since joining
2237 * is complete.
2238 */
2239 Assert(bms_is_empty(rel->lateral_relids));
2240
2241 pathnode->path.pathtype = T_ForeignScan;
2242 pathnode->path.parent = rel;
2243 pathnode->path.pathtarget = target ? target : rel->reltarget;
2244 pathnode->path.param_info = NULL;
2245 pathnode->path.parallel_aware = false;
2246 pathnode->path.parallel_safe = rel->consider_parallel;
2247 pathnode->path.parallel_workers = 0;
2248 pathnode->path.rows = rows;
2249 pathnode->path.startup_cost = startup_cost;
2250 pathnode->path.total_cost = total_cost;
2251 pathnode->path.pathkeys = pathkeys;
2252
2253 pathnode->fdw_outerpath = fdw_outerpath;
2254 pathnode->fdw_private = fdw_private;
2255
2256 return pathnode;
2257 }
2258
2259 /*
2260 * calc_nestloop_required_outer
2261 * Compute the required_outer set for a nestloop join path
2262 *
2263 * Note: result must not share storage with either input
2264 */
2265 Relids
calc_nestloop_required_outer(Relids outerrelids,Relids outer_paramrels,Relids innerrelids,Relids inner_paramrels)2266 calc_nestloop_required_outer(Relids outerrelids,
2267 Relids outer_paramrels,
2268 Relids innerrelids,
2269 Relids inner_paramrels)
2270 {
2271 Relids required_outer;
2272
2273 /* inner_path can require rels from outer path, but not vice versa */
2274 Assert(!bms_overlap(outer_paramrels, innerrelids));
2275 /* easy case if inner path is not parameterized */
2276 if (!inner_paramrels)
2277 return bms_copy(outer_paramrels);
2278 /* else, form the union ... */
2279 required_outer = bms_union(outer_paramrels, inner_paramrels);
2280 /* ... and remove any mention of now-satisfied outer rels */
2281 required_outer = bms_del_members(required_outer,
2282 outerrelids);
2283 /* maintain invariant that required_outer is exactly NULL if empty */
2284 if (bms_is_empty(required_outer))
2285 {
2286 bms_free(required_outer);
2287 required_outer = NULL;
2288 }
2289 return required_outer;
2290 }
2291
2292 /*
2293 * calc_non_nestloop_required_outer
2294 * Compute the required_outer set for a merge or hash join path
2295 *
2296 * Note: result must not share storage with either input
2297 */
2298 Relids
calc_non_nestloop_required_outer(Path * outer_path,Path * inner_path)2299 calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
2300 {
2301 Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
2302 Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
2303 Relids required_outer;
2304
2305 /* neither path can require rels from the other */
2306 Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
2307 Assert(!bms_overlap(inner_paramrels, outer_path->parent->relids));
2308 /* form the union ... */
2309 required_outer = bms_union(outer_paramrels, inner_paramrels);
2310 /* we do not need an explicit test for empty; bms_union gets it right */
2311 return required_outer;
2312 }
2313
2314 /*
2315 * create_nestloop_path
2316 * Creates a pathnode corresponding to a nestloop join between two
2317 * relations.
2318 *
2319 * 'joinrel' is the join relation.
2320 * 'jointype' is the type of join required
2321 * 'workspace' is the result from initial_cost_nestloop
2322 * 'extra' contains various information about the join
2323 * 'outer_path' is the outer path
2324 * 'inner_path' is the inner path
2325 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2326 * 'pathkeys' are the path keys of the new join path
2327 * 'required_outer' is the set of required outer rels
2328 *
2329 * Returns the resulting path node.
2330 */
2331 NestPath *
create_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,List * restrict_clauses,List * pathkeys,Relids required_outer)2332 create_nestloop_path(PlannerInfo *root,
2333 RelOptInfo *joinrel,
2334 JoinType jointype,
2335 JoinCostWorkspace *workspace,
2336 JoinPathExtraData *extra,
2337 Path *outer_path,
2338 Path *inner_path,
2339 List *restrict_clauses,
2340 List *pathkeys,
2341 Relids required_outer)
2342 {
2343 NestPath *pathnode = makeNode(NestPath);
2344 Relids inner_req_outer = PATH_REQ_OUTER(inner_path);
2345
2346 /*
2347 * If the inner path is parameterized by the outer, we must drop any
2348 * restrict_clauses that are due to be moved into the inner path. We have
2349 * to do this now, rather than postpone the work till createplan time,
2350 * because the restrict_clauses list can affect the size and cost
2351 * estimates for this path.
2352 */
2353 if (bms_overlap(inner_req_outer, outer_path->parent->relids))
2354 {
2355 Relids inner_and_outer = bms_union(inner_path->parent->relids,
2356 inner_req_outer);
2357 List *jclauses = NIL;
2358 ListCell *lc;
2359
2360 foreach(lc, restrict_clauses)
2361 {
2362 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
2363
2364 if (!join_clause_is_movable_into(rinfo,
2365 inner_path->parent->relids,
2366 inner_and_outer))
2367 jclauses = lappend(jclauses, rinfo);
2368 }
2369 restrict_clauses = jclauses;
2370 }
2371
2372 pathnode->path.pathtype = T_NestLoop;
2373 pathnode->path.parent = joinrel;
2374 pathnode->path.pathtarget = joinrel->reltarget;
2375 pathnode->path.param_info =
2376 get_joinrel_parampathinfo(root,
2377 joinrel,
2378 outer_path,
2379 inner_path,
2380 extra->sjinfo,
2381 required_outer,
2382 &restrict_clauses);
2383 pathnode->path.parallel_aware = false;
2384 pathnode->path.parallel_safe = joinrel->consider_parallel &&
2385 outer_path->parallel_safe && inner_path->parallel_safe;
2386 /* This is a foolish way to estimate parallel_workers, but for now... */
2387 pathnode->path.parallel_workers = outer_path->parallel_workers;
2388 pathnode->path.pathkeys = pathkeys;
2389 pathnode->jointype = jointype;
2390 pathnode->inner_unique = extra->inner_unique;
2391 pathnode->outerjoinpath = outer_path;
2392 pathnode->innerjoinpath = inner_path;
2393 pathnode->joinrestrictinfo = restrict_clauses;
2394
2395 final_cost_nestloop(root, pathnode, workspace, extra);
2396
2397 return pathnode;
2398 }
2399
2400 /*
2401 * create_mergejoin_path
2402 * Creates a pathnode corresponding to a mergejoin join between
2403 * two relations
2404 *
2405 * 'joinrel' is the join relation
2406 * 'jointype' is the type of join required
2407 * 'workspace' is the result from initial_cost_mergejoin
2408 * 'extra' contains various information about the join
2409 * 'outer_path' is the outer path
2410 * 'inner_path' is the inner path
2411 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2412 * 'pathkeys' are the path keys of the new join path
2413 * 'required_outer' is the set of required outer rels
2414 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
2415 * (this should be a subset of the restrict_clauses list)
2416 * 'outersortkeys' are the sort varkeys for the outer relation
2417 * 'innersortkeys' are the sort varkeys for the inner relation
2418 */
2419 MergePath *
create_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,List * restrict_clauses,List * pathkeys,Relids required_outer,List * mergeclauses,List * outersortkeys,List * innersortkeys)2420 create_mergejoin_path(PlannerInfo *root,
2421 RelOptInfo *joinrel,
2422 JoinType jointype,
2423 JoinCostWorkspace *workspace,
2424 JoinPathExtraData *extra,
2425 Path *outer_path,
2426 Path *inner_path,
2427 List *restrict_clauses,
2428 List *pathkeys,
2429 Relids required_outer,
2430 List *mergeclauses,
2431 List *outersortkeys,
2432 List *innersortkeys)
2433 {
2434 MergePath *pathnode = makeNode(MergePath);
2435
2436 pathnode->jpath.path.pathtype = T_MergeJoin;
2437 pathnode->jpath.path.parent = joinrel;
2438 pathnode->jpath.path.pathtarget = joinrel->reltarget;
2439 pathnode->jpath.path.param_info =
2440 get_joinrel_parampathinfo(root,
2441 joinrel,
2442 outer_path,
2443 inner_path,
2444 extra->sjinfo,
2445 required_outer,
2446 &restrict_clauses);
2447 pathnode->jpath.path.parallel_aware = false;
2448 pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
2449 outer_path->parallel_safe && inner_path->parallel_safe;
2450 /* This is a foolish way to estimate parallel_workers, but for now... */
2451 pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
2452 pathnode->jpath.path.pathkeys = pathkeys;
2453 pathnode->jpath.jointype = jointype;
2454 pathnode->jpath.inner_unique = extra->inner_unique;
2455 pathnode->jpath.outerjoinpath = outer_path;
2456 pathnode->jpath.innerjoinpath = inner_path;
2457 pathnode->jpath.joinrestrictinfo = restrict_clauses;
2458 pathnode->path_mergeclauses = mergeclauses;
2459 pathnode->outersortkeys = outersortkeys;
2460 pathnode->innersortkeys = innersortkeys;
2461 /* pathnode->skip_mark_restore will be set by final_cost_mergejoin */
2462 /* pathnode->materialize_inner will be set by final_cost_mergejoin */
2463
2464 final_cost_mergejoin(root, pathnode, workspace, extra);
2465
2466 return pathnode;
2467 }
2468
2469 /*
2470 * create_hashjoin_path
2471 * Creates a pathnode corresponding to a hash join between two relations.
2472 *
2473 * 'joinrel' is the join relation
2474 * 'jointype' is the type of join required
2475 * 'workspace' is the result from initial_cost_hashjoin
2476 * 'extra' contains various information about the join
2477 * 'outer_path' is the cheapest outer path
2478 * 'inner_path' is the cheapest inner path
2479 * 'parallel_hash' to select Parallel Hash of inner path (shared hash table)
2480 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
2481 * 'required_outer' is the set of required outer rels
2482 * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
2483 * (this should be a subset of the restrict_clauses list)
2484 */
2485 HashPath *
create_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,JoinCostWorkspace * workspace,JoinPathExtraData * extra,Path * outer_path,Path * inner_path,bool parallel_hash,List * restrict_clauses,Relids required_outer,List * hashclauses)2486 create_hashjoin_path(PlannerInfo *root,
2487 RelOptInfo *joinrel,
2488 JoinType jointype,
2489 JoinCostWorkspace *workspace,
2490 JoinPathExtraData *extra,
2491 Path *outer_path,
2492 Path *inner_path,
2493 bool parallel_hash,
2494 List *restrict_clauses,
2495 Relids required_outer,
2496 List *hashclauses)
2497 {
2498 HashPath *pathnode = makeNode(HashPath);
2499
2500 pathnode->jpath.path.pathtype = T_HashJoin;
2501 pathnode->jpath.path.parent = joinrel;
2502 pathnode->jpath.path.pathtarget = joinrel->reltarget;
2503 pathnode->jpath.path.param_info =
2504 get_joinrel_parampathinfo(root,
2505 joinrel,
2506 outer_path,
2507 inner_path,
2508 extra->sjinfo,
2509 required_outer,
2510 &restrict_clauses);
2511 pathnode->jpath.path.parallel_aware =
2512 joinrel->consider_parallel && parallel_hash;
2513 pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
2514 outer_path->parallel_safe && inner_path->parallel_safe;
2515 /* This is a foolish way to estimate parallel_workers, but for now... */
2516 pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
2517
2518 /*
2519 * A hashjoin never has pathkeys, since its output ordering is
2520 * unpredictable due to possible batching. XXX If the inner relation is
2521 * small enough, we could instruct the executor that it must not batch,
2522 * and then we could assume that the output inherits the outer relation's
2523 * ordering, which might save a sort step. However there is considerable
2524 * downside if our estimate of the inner relation size is badly off. For
2525 * the moment we don't risk it. (Note also that if we wanted to take this
2526 * seriously, joinpath.c would have to consider many more paths for the
2527 * outer rel than it does now.)
2528 */
2529 pathnode->jpath.path.pathkeys = NIL;
2530 pathnode->jpath.jointype = jointype;
2531 pathnode->jpath.inner_unique = extra->inner_unique;
2532 pathnode->jpath.outerjoinpath = outer_path;
2533 pathnode->jpath.innerjoinpath = inner_path;
2534 pathnode->jpath.joinrestrictinfo = restrict_clauses;
2535 pathnode->path_hashclauses = hashclauses;
2536 /* final_cost_hashjoin will fill in pathnode->num_batches */
2537
2538 final_cost_hashjoin(root, pathnode, workspace, extra);
2539
2540 return pathnode;
2541 }
2542
2543 /*
2544 * create_projection_path
2545 * Creates a pathnode that represents performing a projection.
2546 *
2547 * 'rel' is the parent relation associated with the result
2548 * 'subpath' is the path representing the source of data
2549 * 'target' is the PathTarget to be computed
2550 */
2551 ProjectionPath *
create_projection_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target)2552 create_projection_path(PlannerInfo *root,
2553 RelOptInfo *rel,
2554 Path *subpath,
2555 PathTarget *target)
2556 {
2557 ProjectionPath *pathnode = makeNode(ProjectionPath);
2558 PathTarget *oldtarget;
2559
2560 /*
2561 * We mustn't put a ProjectionPath directly above another; it's useless
2562 * and will confuse create_projection_plan. Rather than making sure all
2563 * callers handle that, let's implement it here, by stripping off any
2564 * ProjectionPath in what we're given. Given this rule, there won't be
2565 * more than one.
2566 */
2567 if (IsA(subpath, ProjectionPath))
2568 {
2569 ProjectionPath *subpp = (ProjectionPath *) subpath;
2570
2571 Assert(subpp->path.parent == rel);
2572 subpath = subpp->subpath;
2573 Assert(!IsA(subpath, ProjectionPath));
2574 }
2575
2576 pathnode->path.pathtype = T_Result;
2577 pathnode->path.parent = rel;
2578 pathnode->path.pathtarget = target;
2579 /* For now, assume we are above any joins, so no parameterization */
2580 pathnode->path.param_info = NULL;
2581 pathnode->path.parallel_aware = false;
2582 pathnode->path.parallel_safe = rel->consider_parallel &&
2583 subpath->parallel_safe &&
2584 is_parallel_safe(root, (Node *) target->exprs);
2585 pathnode->path.parallel_workers = subpath->parallel_workers;
2586 /* Projection does not change the sort order */
2587 pathnode->path.pathkeys = subpath->pathkeys;
2588
2589 pathnode->subpath = subpath;
2590
2591 /*
2592 * We might not need a separate Result node. If the input plan node type
2593 * can project, we can just tell it to project something else. Or, if it
2594 * can't project but the desired target has the same expression list as
2595 * what the input will produce anyway, we can still give it the desired
2596 * tlist (possibly changing its ressortgroupref labels, but nothing else).
2597 * Note: in the latter case, create_projection_plan has to recheck our
2598 * conclusion; see comments therein.
2599 */
2600 oldtarget = subpath->pathtarget;
2601 if (is_projection_capable_path(subpath) ||
2602 equal(oldtarget->exprs, target->exprs))
2603 {
2604 /* No separate Result node needed */
2605 pathnode->dummypp = true;
2606
2607 /*
2608 * Set cost of plan as subpath's cost, adjusted for tlist replacement.
2609 */
2610 pathnode->path.rows = subpath->rows;
2611 pathnode->path.startup_cost = subpath->startup_cost +
2612 (target->cost.startup - oldtarget->cost.startup);
2613 pathnode->path.total_cost = subpath->total_cost +
2614 (target->cost.startup - oldtarget->cost.startup) +
2615 (target->cost.per_tuple - oldtarget->cost.per_tuple) * subpath->rows;
2616 }
2617 else
2618 {
2619 /* We really do need the Result node */
2620 pathnode->dummypp = false;
2621
2622 /*
2623 * The Result node's cost is cpu_tuple_cost per row, plus the cost of
2624 * evaluating the tlist. There is no qual to worry about.
2625 */
2626 pathnode->path.rows = subpath->rows;
2627 pathnode->path.startup_cost = subpath->startup_cost +
2628 target->cost.startup;
2629 pathnode->path.total_cost = subpath->total_cost +
2630 target->cost.startup +
2631 (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows;
2632 }
2633
2634 return pathnode;
2635 }
2636
2637 /*
2638 * apply_projection_to_path
2639 * Add a projection step, or just apply the target directly to given path.
2640 *
2641 * This has the same net effect as create_projection_path(), except that if
2642 * a separate Result plan node isn't needed, we just replace the given path's
2643 * pathtarget with the desired one. This must be used only when the caller
2644 * knows that the given path isn't referenced elsewhere and so can be modified
2645 * in-place.
2646 *
2647 * If the input path is a GatherPath or GatherMergePath, we try to push the
2648 * new target down to its input as well; this is a yet more invasive
2649 * modification of the input path, which create_projection_path() can't do.
2650 *
2651 * Note that we mustn't change the source path's parent link; so when it is
2652 * add_path'd to "rel" things will be a bit inconsistent. So far that has
2653 * not caused any trouble.
2654 *
2655 * 'rel' is the parent relation associated with the result
2656 * 'path' is the path representing the source of data
2657 * 'target' is the PathTarget to be computed
2658 */
2659 Path *
apply_projection_to_path(PlannerInfo * root,RelOptInfo * rel,Path * path,PathTarget * target)2660 apply_projection_to_path(PlannerInfo *root,
2661 RelOptInfo *rel,
2662 Path *path,
2663 PathTarget *target)
2664 {
2665 QualCost oldcost;
2666
2667 /*
2668 * If given path can't project, we might need a Result node, so make a
2669 * separate ProjectionPath.
2670 */
2671 if (!is_projection_capable_path(path))
2672 return (Path *) create_projection_path(root, rel, path, target);
2673
2674 /*
2675 * We can just jam the desired tlist into the existing path, being sure to
2676 * update its cost estimates appropriately.
2677 */
2678 oldcost = path->pathtarget->cost;
2679 path->pathtarget = target;
2680
2681 path->startup_cost += target->cost.startup - oldcost.startup;
2682 path->total_cost += target->cost.startup - oldcost.startup +
2683 (target->cost.per_tuple - oldcost.per_tuple) * path->rows;
2684
2685 /*
2686 * If the path happens to be a Gather or GatherMerge path, we'd like to
2687 * arrange for the subpath to return the required target list so that
2688 * workers can help project. But if there is something that is not
2689 * parallel-safe in the target expressions, then we can't.
2690 */
2691 if ((IsA(path, GatherPath) || IsA(path, GatherMergePath)) &&
2692 is_parallel_safe(root, (Node *) target->exprs))
2693 {
2694 /*
2695 * We always use create_projection_path here, even if the subpath is
2696 * projection-capable, so as to avoid modifying the subpath in place.
2697 * It seems unlikely at present that there could be any other
2698 * references to the subpath, but better safe than sorry.
2699 *
2700 * Note that we don't change the parallel path's cost estimates; it
2701 * might be appropriate to do so, to reflect the fact that the bulk of
2702 * the target evaluation will happen in workers.
2703 */
2704 if (IsA(path, GatherPath))
2705 {
2706 GatherPath *gpath = (GatherPath *) path;
2707
2708 gpath->subpath = (Path *)
2709 create_projection_path(root,
2710 gpath->subpath->parent,
2711 gpath->subpath,
2712 target);
2713 }
2714 else
2715 {
2716 GatherMergePath *gmpath = (GatherMergePath *) path;
2717
2718 gmpath->subpath = (Path *)
2719 create_projection_path(root,
2720 gmpath->subpath->parent,
2721 gmpath->subpath,
2722 target);
2723 }
2724 }
2725 else if (path->parallel_safe &&
2726 !is_parallel_safe(root, (Node *) target->exprs))
2727 {
2728 /*
2729 * We're inserting a parallel-restricted target list into a path
2730 * currently marked parallel-safe, so we have to mark it as no longer
2731 * safe.
2732 */
2733 path->parallel_safe = false;
2734 }
2735
2736 return path;
2737 }
2738
2739 /*
2740 * create_set_projection_path
2741 * Creates a pathnode that represents performing a projection that
2742 * includes set-returning functions.
2743 *
2744 * 'rel' is the parent relation associated with the result
2745 * 'subpath' is the path representing the source of data
2746 * 'target' is the PathTarget to be computed
2747 */
2748 ProjectSetPath *
create_set_projection_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target)2749 create_set_projection_path(PlannerInfo *root,
2750 RelOptInfo *rel,
2751 Path *subpath,
2752 PathTarget *target)
2753 {
2754 ProjectSetPath *pathnode = makeNode(ProjectSetPath);
2755 double tlist_rows;
2756 ListCell *lc;
2757
2758 pathnode->path.pathtype = T_ProjectSet;
2759 pathnode->path.parent = rel;
2760 pathnode->path.pathtarget = target;
2761 /* For now, assume we are above any joins, so no parameterization */
2762 pathnode->path.param_info = NULL;
2763 pathnode->path.parallel_aware = false;
2764 pathnode->path.parallel_safe = rel->consider_parallel &&
2765 subpath->parallel_safe &&
2766 is_parallel_safe(root, (Node *) target->exprs);
2767 pathnode->path.parallel_workers = subpath->parallel_workers;
2768 /* Projection does not change the sort order XXX? */
2769 pathnode->path.pathkeys = subpath->pathkeys;
2770
2771 pathnode->subpath = subpath;
2772
2773 /*
2774 * Estimate number of rows produced by SRFs for each row of input; if
2775 * there's more than one in this node, use the maximum.
2776 */
2777 tlist_rows = 1;
2778 foreach(lc, target->exprs)
2779 {
2780 Node *node = (Node *) lfirst(lc);
2781 double itemrows;
2782
2783 itemrows = expression_returns_set_rows(root, node);
2784 if (tlist_rows < itemrows)
2785 tlist_rows = itemrows;
2786 }
2787
2788 /*
2789 * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost
2790 * per input row, and half of cpu_tuple_cost for each added output row.
2791 * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit
2792 * this estimate later.
2793 */
2794 pathnode->path.rows = subpath->rows * tlist_rows;
2795 pathnode->path.startup_cost = subpath->startup_cost +
2796 target->cost.startup;
2797 pathnode->path.total_cost = subpath->total_cost +
2798 target->cost.startup +
2799 (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows +
2800 (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2;
2801
2802 return pathnode;
2803 }
2804
2805 /*
2806 * create_incremental_sort_path
2807 * Creates a pathnode that represents performing an incremental sort.
2808 *
2809 * 'rel' is the parent relation associated with the result
2810 * 'subpath' is the path representing the source of data
2811 * 'pathkeys' represents the desired sort order
2812 * 'presorted_keys' is the number of keys by which the input path is
2813 * already sorted
2814 * 'limit_tuples' is the estimated bound on the number of output tuples,
2815 * or -1 if no LIMIT or couldn't estimate
2816 */
2817 IncrementalSortPath *
create_incremental_sort_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * pathkeys,int presorted_keys,double limit_tuples)2818 create_incremental_sort_path(PlannerInfo *root,
2819 RelOptInfo *rel,
2820 Path *subpath,
2821 List *pathkeys,
2822 int presorted_keys,
2823 double limit_tuples)
2824 {
2825 IncrementalSortPath *sort = makeNode(IncrementalSortPath);
2826 SortPath *pathnode = &sort->spath;
2827
2828 pathnode->path.pathtype = T_IncrementalSort;
2829 pathnode->path.parent = rel;
2830 /* Sort doesn't project, so use source path's pathtarget */
2831 pathnode->path.pathtarget = subpath->pathtarget;
2832 /* For now, assume we are above any joins, so no parameterization */
2833 pathnode->path.param_info = NULL;
2834 pathnode->path.parallel_aware = false;
2835 pathnode->path.parallel_safe = rel->consider_parallel &&
2836 subpath->parallel_safe;
2837 pathnode->path.parallel_workers = subpath->parallel_workers;
2838 pathnode->path.pathkeys = pathkeys;
2839
2840 pathnode->subpath = subpath;
2841
2842 cost_incremental_sort(&pathnode->path,
2843 root, pathkeys, presorted_keys,
2844 subpath->startup_cost,
2845 subpath->total_cost,
2846 subpath->rows,
2847 subpath->pathtarget->width,
2848 0.0, /* XXX comparison_cost shouldn't be 0? */
2849 work_mem, limit_tuples);
2850
2851 sort->nPresortedCols = presorted_keys;
2852
2853 return sort;
2854 }
2855
2856 /*
2857 * create_sort_path
2858 * Creates a pathnode that represents performing an explicit sort.
2859 *
2860 * 'rel' is the parent relation associated with the result
2861 * 'subpath' is the path representing the source of data
2862 * 'pathkeys' represents the desired sort order
2863 * 'limit_tuples' is the estimated bound on the number of output tuples,
2864 * or -1 if no LIMIT or couldn't estimate
2865 */
2866 SortPath *
create_sort_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * pathkeys,double limit_tuples)2867 create_sort_path(PlannerInfo *root,
2868 RelOptInfo *rel,
2869 Path *subpath,
2870 List *pathkeys,
2871 double limit_tuples)
2872 {
2873 SortPath *pathnode = makeNode(SortPath);
2874
2875 pathnode->path.pathtype = T_Sort;
2876 pathnode->path.parent = rel;
2877 /* Sort doesn't project, so use source path's pathtarget */
2878 pathnode->path.pathtarget = subpath->pathtarget;
2879 /* For now, assume we are above any joins, so no parameterization */
2880 pathnode->path.param_info = NULL;
2881 pathnode->path.parallel_aware = false;
2882 pathnode->path.parallel_safe = rel->consider_parallel &&
2883 subpath->parallel_safe;
2884 pathnode->path.parallel_workers = subpath->parallel_workers;
2885 pathnode->path.pathkeys = pathkeys;
2886
2887 pathnode->subpath = subpath;
2888
2889 cost_sort(&pathnode->path, root, pathkeys,
2890 subpath->total_cost,
2891 subpath->rows,
2892 subpath->pathtarget->width,
2893 0.0, /* XXX comparison_cost shouldn't be 0? */
2894 work_mem, limit_tuples);
2895
2896 return pathnode;
2897 }
2898
2899 /*
2900 * create_group_path
2901 * Creates a pathnode that represents performing grouping of presorted input
2902 *
2903 * 'rel' is the parent relation associated with the result
2904 * 'subpath' is the path representing the source of data
2905 * 'target' is the PathTarget to be computed
2906 * 'groupClause' is a list of SortGroupClause's representing the grouping
2907 * 'qual' is the HAVING quals if any
2908 * 'numGroups' is the estimated number of groups
2909 */
2910 GroupPath *
create_group_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * groupClause,List * qual,double numGroups)2911 create_group_path(PlannerInfo *root,
2912 RelOptInfo *rel,
2913 Path *subpath,
2914 List *groupClause,
2915 List *qual,
2916 double numGroups)
2917 {
2918 GroupPath *pathnode = makeNode(GroupPath);
2919 PathTarget *target = rel->reltarget;
2920
2921 pathnode->path.pathtype = T_Group;
2922 pathnode->path.parent = rel;
2923 pathnode->path.pathtarget = target;
2924 /* For now, assume we are above any joins, so no parameterization */
2925 pathnode->path.param_info = NULL;
2926 pathnode->path.parallel_aware = false;
2927 pathnode->path.parallel_safe = rel->consider_parallel &&
2928 subpath->parallel_safe;
2929 pathnode->path.parallel_workers = subpath->parallel_workers;
2930 /* Group doesn't change sort ordering */
2931 pathnode->path.pathkeys = subpath->pathkeys;
2932
2933 pathnode->subpath = subpath;
2934
2935 pathnode->groupClause = groupClause;
2936 pathnode->qual = qual;
2937
2938 cost_group(&pathnode->path, root,
2939 list_length(groupClause),
2940 numGroups,
2941 qual,
2942 subpath->startup_cost, subpath->total_cost,
2943 subpath->rows);
2944
2945 /* add tlist eval cost for each output row */
2946 pathnode->path.startup_cost += target->cost.startup;
2947 pathnode->path.total_cost += target->cost.startup +
2948 target->cost.per_tuple * pathnode->path.rows;
2949
2950 return pathnode;
2951 }
2952
2953 /*
2954 * create_upper_unique_path
2955 * Creates a pathnode that represents performing an explicit Unique step
2956 * on presorted input.
2957 *
2958 * This produces a Unique plan node, but the use-case is so different from
2959 * create_unique_path that it doesn't seem worth trying to merge the two.
2960 *
2961 * 'rel' is the parent relation associated with the result
2962 * 'subpath' is the path representing the source of data
2963 * 'numCols' is the number of grouping columns
2964 * 'numGroups' is the estimated number of groups
2965 *
2966 * The input path must be sorted on the grouping columns, plus possibly
2967 * additional columns; so the first numCols pathkeys are the grouping columns
2968 */
2969 UpperUniquePath *
create_upper_unique_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,int numCols,double numGroups)2970 create_upper_unique_path(PlannerInfo *root,
2971 RelOptInfo *rel,
2972 Path *subpath,
2973 int numCols,
2974 double numGroups)
2975 {
2976 UpperUniquePath *pathnode = makeNode(UpperUniquePath);
2977
2978 pathnode->path.pathtype = T_Unique;
2979 pathnode->path.parent = rel;
2980 /* Unique doesn't project, so use source path's pathtarget */
2981 pathnode->path.pathtarget = subpath->pathtarget;
2982 /* For now, assume we are above any joins, so no parameterization */
2983 pathnode->path.param_info = NULL;
2984 pathnode->path.parallel_aware = false;
2985 pathnode->path.parallel_safe = rel->consider_parallel &&
2986 subpath->parallel_safe;
2987 pathnode->path.parallel_workers = subpath->parallel_workers;
2988 /* Unique doesn't change the input ordering */
2989 pathnode->path.pathkeys = subpath->pathkeys;
2990
2991 pathnode->subpath = subpath;
2992 pathnode->numkeys = numCols;
2993
2994 /*
2995 * Charge one cpu_operator_cost per comparison per input tuple. We assume
2996 * all columns get compared at most of the tuples. (XXX probably this is
2997 * an overestimate.)
2998 */
2999 pathnode->path.startup_cost = subpath->startup_cost;
3000 pathnode->path.total_cost = subpath->total_cost +
3001 cpu_operator_cost * subpath->rows * numCols;
3002 pathnode->path.rows = numGroups;
3003
3004 return pathnode;
3005 }
3006
3007 /*
3008 * create_agg_path
3009 * Creates a pathnode that represents performing aggregation/grouping
3010 *
3011 * 'rel' is the parent relation associated with the result
3012 * 'subpath' is the path representing the source of data
3013 * 'target' is the PathTarget to be computed
3014 * 'aggstrategy' is the Agg node's basic implementation strategy
3015 * 'aggsplit' is the Agg node's aggregate-splitting mode
3016 * 'groupClause' is a list of SortGroupClause's representing the grouping
3017 * 'qual' is the HAVING quals if any
3018 * 'aggcosts' contains cost info about the aggregate functions to be computed
3019 * 'numGroups' is the estimated number of groups (1 if not grouping)
3020 */
3021 AggPath *
create_agg_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,AggStrategy aggstrategy,AggSplit aggsplit,List * groupClause,List * qual,const AggClauseCosts * aggcosts,double numGroups)3022 create_agg_path(PlannerInfo *root,
3023 RelOptInfo *rel,
3024 Path *subpath,
3025 PathTarget *target,
3026 AggStrategy aggstrategy,
3027 AggSplit aggsplit,
3028 List *groupClause,
3029 List *qual,
3030 const AggClauseCosts *aggcosts,
3031 double numGroups)
3032 {
3033 AggPath *pathnode = makeNode(AggPath);
3034
3035 pathnode->path.pathtype = T_Agg;
3036 pathnode->path.parent = rel;
3037 pathnode->path.pathtarget = target;
3038 /* For now, assume we are above any joins, so no parameterization */
3039 pathnode->path.param_info = NULL;
3040 pathnode->path.parallel_aware = false;
3041 pathnode->path.parallel_safe = rel->consider_parallel &&
3042 subpath->parallel_safe;
3043 pathnode->path.parallel_workers = subpath->parallel_workers;
3044 if (aggstrategy == AGG_SORTED)
3045 pathnode->path.pathkeys = subpath->pathkeys; /* preserves order */
3046 else
3047 pathnode->path.pathkeys = NIL; /* output is unordered */
3048 pathnode->subpath = subpath;
3049
3050 pathnode->aggstrategy = aggstrategy;
3051 pathnode->aggsplit = aggsplit;
3052 pathnode->numGroups = numGroups;
3053 pathnode->transitionSpace = aggcosts ? aggcosts->transitionSpace : 0;
3054 pathnode->groupClause = groupClause;
3055 pathnode->qual = qual;
3056
3057 cost_agg(&pathnode->path, root,
3058 aggstrategy, aggcosts,
3059 list_length(groupClause), numGroups,
3060 qual,
3061 subpath->startup_cost, subpath->total_cost,
3062 subpath->rows, subpath->pathtarget->width);
3063
3064 /* add tlist eval cost for each output row */
3065 pathnode->path.startup_cost += target->cost.startup;
3066 pathnode->path.total_cost += target->cost.startup +
3067 target->cost.per_tuple * pathnode->path.rows;
3068
3069 return pathnode;
3070 }
3071
3072 /*
3073 * create_groupingsets_path
3074 * Creates a pathnode that represents performing GROUPING SETS aggregation
3075 *
3076 * GroupingSetsPath represents sorted grouping with one or more grouping sets.
3077 * The input path's result must be sorted to match the last entry in
3078 * rollup_groupclauses.
3079 *
3080 * 'rel' is the parent relation associated with the result
3081 * 'subpath' is the path representing the source of data
3082 * 'target' is the PathTarget to be computed
3083 * 'having_qual' is the HAVING quals if any
3084 * 'rollups' is a list of RollupData nodes
3085 * 'agg_costs' contains cost info about the aggregate functions to be computed
3086 * 'numGroups' is the estimated total number of groups
3087 */
3088 GroupingSetsPath *
create_groupingsets_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * having_qual,AggStrategy aggstrategy,List * rollups,const AggClauseCosts * agg_costs,double numGroups)3089 create_groupingsets_path(PlannerInfo *root,
3090 RelOptInfo *rel,
3091 Path *subpath,
3092 List *having_qual,
3093 AggStrategy aggstrategy,
3094 List *rollups,
3095 const AggClauseCosts *agg_costs,
3096 double numGroups)
3097 {
3098 GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
3099 PathTarget *target = rel->reltarget;
3100 ListCell *lc;
3101 bool is_first = true;
3102 bool is_first_sort = true;
3103
3104 /* The topmost generated Plan node will be an Agg */
3105 pathnode->path.pathtype = T_Agg;
3106 pathnode->path.parent = rel;
3107 pathnode->path.pathtarget = target;
3108 pathnode->path.param_info = subpath->param_info;
3109 pathnode->path.parallel_aware = false;
3110 pathnode->path.parallel_safe = rel->consider_parallel &&
3111 subpath->parallel_safe;
3112 pathnode->path.parallel_workers = subpath->parallel_workers;
3113 pathnode->subpath = subpath;
3114
3115 /*
3116 * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
3117 * to AGG_HASHED, here if possible.
3118 */
3119 if (aggstrategy == AGG_SORTED &&
3120 list_length(rollups) == 1 &&
3121 ((RollupData *) linitial(rollups))->groupClause == NIL)
3122 aggstrategy = AGG_PLAIN;
3123
3124 if (aggstrategy == AGG_MIXED &&
3125 list_length(rollups) == 1)
3126 aggstrategy = AGG_HASHED;
3127
3128 /*
3129 * Output will be in sorted order by group_pathkeys if, and only if, there
3130 * is a single rollup operation on a non-empty list of grouping
3131 * expressions.
3132 */
3133 if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
3134 pathnode->path.pathkeys = root->group_pathkeys;
3135 else
3136 pathnode->path.pathkeys = NIL;
3137
3138 pathnode->aggstrategy = aggstrategy;
3139 pathnode->rollups = rollups;
3140 pathnode->qual = having_qual;
3141 pathnode->transitionSpace = agg_costs ? agg_costs->transitionSpace : 0;
3142
3143 Assert(rollups != NIL);
3144 Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
3145 Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
3146
3147 foreach(lc, rollups)
3148 {
3149 RollupData *rollup = lfirst(lc);
3150 List *gsets = rollup->gsets;
3151 int numGroupCols = list_length(linitial(gsets));
3152
3153 /*
3154 * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
3155 * (already-sorted) input, and following ones do their own sort.
3156 *
3157 * In AGG_HASHED mode, there is one rollup for each grouping set.
3158 *
3159 * In AGG_MIXED mode, the first rollups are hashed, the first
3160 * non-hashed one takes the (already-sorted) input, and following ones
3161 * do their own sort.
3162 */
3163 if (is_first)
3164 {
3165 cost_agg(&pathnode->path, root,
3166 aggstrategy,
3167 agg_costs,
3168 numGroupCols,
3169 rollup->numGroups,
3170 having_qual,
3171 subpath->startup_cost,
3172 subpath->total_cost,
3173 subpath->rows,
3174 subpath->pathtarget->width);
3175 is_first = false;
3176 if (!rollup->is_hashed)
3177 is_first_sort = false;
3178 }
3179 else
3180 {
3181 Path sort_path; /* dummy for result of cost_sort */
3182 Path agg_path; /* dummy for result of cost_agg */
3183
3184 if (rollup->is_hashed || is_first_sort)
3185 {
3186 /*
3187 * Account for cost of aggregation, but don't charge input
3188 * cost again
3189 */
3190 cost_agg(&agg_path, root,
3191 rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
3192 agg_costs,
3193 numGroupCols,
3194 rollup->numGroups,
3195 having_qual,
3196 0.0, 0.0,
3197 subpath->rows,
3198 subpath->pathtarget->width);
3199 if (!rollup->is_hashed)
3200 is_first_sort = false;
3201 }
3202 else
3203 {
3204 /* Account for cost of sort, but don't charge input cost again */
3205 cost_sort(&sort_path, root, NIL,
3206 0.0,
3207 subpath->rows,
3208 subpath->pathtarget->width,
3209 0.0,
3210 work_mem,
3211 -1.0);
3212
3213 /* Account for cost of aggregation */
3214
3215 cost_agg(&agg_path, root,
3216 AGG_SORTED,
3217 agg_costs,
3218 numGroupCols,
3219 rollup->numGroups,
3220 having_qual,
3221 sort_path.startup_cost,
3222 sort_path.total_cost,
3223 sort_path.rows,
3224 subpath->pathtarget->width);
3225 }
3226
3227 pathnode->path.total_cost += agg_path.total_cost;
3228 pathnode->path.rows += agg_path.rows;
3229 }
3230 }
3231
3232 /* add tlist eval cost for each output row */
3233 pathnode->path.startup_cost += target->cost.startup;
3234 pathnode->path.total_cost += target->cost.startup +
3235 target->cost.per_tuple * pathnode->path.rows;
3236
3237 return pathnode;
3238 }
3239
3240 /*
3241 * create_minmaxagg_path
3242 * Creates a pathnode that represents computation of MIN/MAX aggregates
3243 *
3244 * 'rel' is the parent relation associated with the result
3245 * 'target' is the PathTarget to be computed
3246 * 'mmaggregates' is a list of MinMaxAggInfo structs
3247 * 'quals' is the HAVING quals if any
3248 */
3249 MinMaxAggPath *
create_minmaxagg_path(PlannerInfo * root,RelOptInfo * rel,PathTarget * target,List * mmaggregates,List * quals)3250 create_minmaxagg_path(PlannerInfo *root,
3251 RelOptInfo *rel,
3252 PathTarget *target,
3253 List *mmaggregates,
3254 List *quals)
3255 {
3256 MinMaxAggPath *pathnode = makeNode(MinMaxAggPath);
3257 Cost initplan_cost;
3258 ListCell *lc;
3259
3260 /* The topmost generated Plan node will be a Result */
3261 pathnode->path.pathtype = T_Result;
3262 pathnode->path.parent = rel;
3263 pathnode->path.pathtarget = target;
3264 /* For now, assume we are above any joins, so no parameterization */
3265 pathnode->path.param_info = NULL;
3266 pathnode->path.parallel_aware = false;
3267 /* A MinMaxAggPath implies use of subplans, so cannot be parallel-safe */
3268 pathnode->path.parallel_safe = false;
3269 pathnode->path.parallel_workers = 0;
3270 /* Result is one unordered row */
3271 pathnode->path.rows = 1;
3272 pathnode->path.pathkeys = NIL;
3273
3274 pathnode->mmaggregates = mmaggregates;
3275 pathnode->quals = quals;
3276
3277 /* Calculate cost of all the initplans ... */
3278 initplan_cost = 0;
3279 foreach(lc, mmaggregates)
3280 {
3281 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
3282
3283 initplan_cost += mminfo->pathcost;
3284 }
3285
3286 /* add tlist eval cost for each output row, plus cpu_tuple_cost */
3287 pathnode->path.startup_cost = initplan_cost + target->cost.startup;
3288 pathnode->path.total_cost = initplan_cost + target->cost.startup +
3289 target->cost.per_tuple + cpu_tuple_cost;
3290
3291 /*
3292 * Add cost of qual, if any --- but we ignore its selectivity, since our
3293 * rowcount estimate should be 1 no matter what the qual is.
3294 */
3295 if (quals)
3296 {
3297 QualCost qual_cost;
3298
3299 cost_qual_eval(&qual_cost, quals, root);
3300 pathnode->path.startup_cost += qual_cost.startup;
3301 pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
3302 }
3303
3304 return pathnode;
3305 }
3306
3307 /*
3308 * create_windowagg_path
3309 * Creates a pathnode that represents computation of window functions
3310 *
3311 * 'rel' is the parent relation associated with the result
3312 * 'subpath' is the path representing the source of data
3313 * 'target' is the PathTarget to be computed
3314 * 'windowFuncs' is a list of WindowFunc structs
3315 * 'winclause' is a WindowClause that is common to all the WindowFuncs
3316 *
3317 * The input must be sorted according to the WindowClause's PARTITION keys
3318 * plus ORDER BY keys.
3319 */
3320 WindowAggPath *
create_windowagg_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,PathTarget * target,List * windowFuncs,WindowClause * winclause)3321 create_windowagg_path(PlannerInfo *root,
3322 RelOptInfo *rel,
3323 Path *subpath,
3324 PathTarget *target,
3325 List *windowFuncs,
3326 WindowClause *winclause)
3327 {
3328 WindowAggPath *pathnode = makeNode(WindowAggPath);
3329
3330 pathnode->path.pathtype = T_WindowAgg;
3331 pathnode->path.parent = rel;
3332 pathnode->path.pathtarget = target;
3333 /* For now, assume we are above any joins, so no parameterization */
3334 pathnode->path.param_info = NULL;
3335 pathnode->path.parallel_aware = false;
3336 pathnode->path.parallel_safe = rel->consider_parallel &&
3337 subpath->parallel_safe;
3338 pathnode->path.parallel_workers = subpath->parallel_workers;
3339 /* WindowAgg preserves the input sort order */
3340 pathnode->path.pathkeys = subpath->pathkeys;
3341
3342 pathnode->subpath = subpath;
3343 pathnode->winclause = winclause;
3344
3345 /*
3346 * For costing purposes, assume that there are no redundant partitioning
3347 * or ordering columns; it's not worth the trouble to deal with that
3348 * corner case here. So we just pass the unmodified list lengths to
3349 * cost_windowagg.
3350 */
3351 cost_windowagg(&pathnode->path, root,
3352 windowFuncs,
3353 list_length(winclause->partitionClause),
3354 list_length(winclause->orderClause),
3355 subpath->startup_cost,
3356 subpath->total_cost,
3357 subpath->rows);
3358
3359 /* add tlist eval cost for each output row */
3360 pathnode->path.startup_cost += target->cost.startup;
3361 pathnode->path.total_cost += target->cost.startup +
3362 target->cost.per_tuple * pathnode->path.rows;
3363
3364 return pathnode;
3365 }
3366
3367 /*
3368 * create_setop_path
3369 * Creates a pathnode that represents computation of INTERSECT or EXCEPT
3370 *
3371 * 'rel' is the parent relation associated with the result
3372 * 'subpath' is the path representing the source of data
3373 * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
3374 * 'strategy' is the implementation strategy (sorted or hashed)
3375 * 'distinctList' is a list of SortGroupClause's representing the grouping
3376 * 'flagColIdx' is the column number where the flag column will be, if any
3377 * 'firstFlag' is the flag value for the first input relation when hashing;
3378 * or -1 when sorting
3379 * 'numGroups' is the estimated number of distinct groups
3380 * 'outputRows' is the estimated number of output rows
3381 */
3382 SetOpPath *
create_setop_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,SetOpCmd cmd,SetOpStrategy strategy,List * distinctList,AttrNumber flagColIdx,int firstFlag,double numGroups,double outputRows)3383 create_setop_path(PlannerInfo *root,
3384 RelOptInfo *rel,
3385 Path *subpath,
3386 SetOpCmd cmd,
3387 SetOpStrategy strategy,
3388 List *distinctList,
3389 AttrNumber flagColIdx,
3390 int firstFlag,
3391 double numGroups,
3392 double outputRows)
3393 {
3394 SetOpPath *pathnode = makeNode(SetOpPath);
3395
3396 pathnode->path.pathtype = T_SetOp;
3397 pathnode->path.parent = rel;
3398 /* SetOp doesn't project, so use source path's pathtarget */
3399 pathnode->path.pathtarget = subpath->pathtarget;
3400 /* For now, assume we are above any joins, so no parameterization */
3401 pathnode->path.param_info = NULL;
3402 pathnode->path.parallel_aware = false;
3403 pathnode->path.parallel_safe = rel->consider_parallel &&
3404 subpath->parallel_safe;
3405 pathnode->path.parallel_workers = subpath->parallel_workers;
3406 /* SetOp preserves the input sort order if in sort mode */
3407 pathnode->path.pathkeys =
3408 (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
3409
3410 pathnode->subpath = subpath;
3411 pathnode->cmd = cmd;
3412 pathnode->strategy = strategy;
3413 pathnode->distinctList = distinctList;
3414 pathnode->flagColIdx = flagColIdx;
3415 pathnode->firstFlag = firstFlag;
3416 pathnode->numGroups = numGroups;
3417
3418 /*
3419 * Charge one cpu_operator_cost per comparison per input tuple. We assume
3420 * all columns get compared at most of the tuples.
3421 */
3422 pathnode->path.startup_cost = subpath->startup_cost;
3423 pathnode->path.total_cost = subpath->total_cost +
3424 cpu_operator_cost * subpath->rows * list_length(distinctList);
3425 pathnode->path.rows = outputRows;
3426
3427 return pathnode;
3428 }
3429
3430 /*
3431 * create_recursiveunion_path
3432 * Creates a pathnode that represents a recursive UNION node
3433 *
3434 * 'rel' is the parent relation associated with the result
3435 * 'leftpath' is the source of data for the non-recursive term
3436 * 'rightpath' is the source of data for the recursive term
3437 * 'target' is the PathTarget to be computed
3438 * 'distinctList' is a list of SortGroupClause's representing the grouping
3439 * 'wtParam' is the ID of Param representing work table
3440 * 'numGroups' is the estimated number of groups
3441 *
3442 * For recursive UNION ALL, distinctList is empty and numGroups is zero
3443 */
3444 RecursiveUnionPath *
create_recursiveunion_path(PlannerInfo * root,RelOptInfo * rel,Path * leftpath,Path * rightpath,PathTarget * target,List * distinctList,int wtParam,double numGroups)3445 create_recursiveunion_path(PlannerInfo *root,
3446 RelOptInfo *rel,
3447 Path *leftpath,
3448 Path *rightpath,
3449 PathTarget *target,
3450 List *distinctList,
3451 int wtParam,
3452 double numGroups)
3453 {
3454 RecursiveUnionPath *pathnode = makeNode(RecursiveUnionPath);
3455
3456 pathnode->path.pathtype = T_RecursiveUnion;
3457 pathnode->path.parent = rel;
3458 pathnode->path.pathtarget = target;
3459 /* For now, assume we are above any joins, so no parameterization */
3460 pathnode->path.param_info = NULL;
3461 pathnode->path.parallel_aware = false;
3462 pathnode->path.parallel_safe = rel->consider_parallel &&
3463 leftpath->parallel_safe && rightpath->parallel_safe;
3464 /* Foolish, but we'll do it like joins for now: */
3465 pathnode->path.parallel_workers = leftpath->parallel_workers;
3466 /* RecursiveUnion result is always unsorted */
3467 pathnode->path.pathkeys = NIL;
3468
3469 pathnode->leftpath = leftpath;
3470 pathnode->rightpath = rightpath;
3471 pathnode->distinctList = distinctList;
3472 pathnode->wtParam = wtParam;
3473 pathnode->numGroups = numGroups;
3474
3475 cost_recursive_union(&pathnode->path, leftpath, rightpath);
3476
3477 return pathnode;
3478 }
3479
3480 /*
3481 * create_lockrows_path
3482 * Creates a pathnode that represents acquiring row locks
3483 *
3484 * 'rel' is the parent relation associated with the result
3485 * 'subpath' is the path representing the source of data
3486 * 'rowMarks' is a list of PlanRowMark's
3487 * 'epqParam' is the ID of Param for EvalPlanQual re-eval
3488 */
3489 LockRowsPath *
create_lockrows_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,List * rowMarks,int epqParam)3490 create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
3491 Path *subpath, List *rowMarks, int epqParam)
3492 {
3493 LockRowsPath *pathnode = makeNode(LockRowsPath);
3494
3495 pathnode->path.pathtype = T_LockRows;
3496 pathnode->path.parent = rel;
3497 /* LockRows doesn't project, so use source path's pathtarget */
3498 pathnode->path.pathtarget = subpath->pathtarget;
3499 /* For now, assume we are above any joins, so no parameterization */
3500 pathnode->path.param_info = NULL;
3501 pathnode->path.parallel_aware = false;
3502 pathnode->path.parallel_safe = false;
3503 pathnode->path.parallel_workers = 0;
3504 pathnode->path.rows = subpath->rows;
3505
3506 /*
3507 * The result cannot be assumed sorted, since locking might cause the sort
3508 * key columns to be replaced with new values.
3509 */
3510 pathnode->path.pathkeys = NIL;
3511
3512 pathnode->subpath = subpath;
3513 pathnode->rowMarks = rowMarks;
3514 pathnode->epqParam = epqParam;
3515
3516 /*
3517 * We should charge something extra for the costs of row locking and
3518 * possible refetches, but it's hard to say how much. For now, use
3519 * cpu_tuple_cost per row.
3520 */
3521 pathnode->path.startup_cost = subpath->startup_cost;
3522 pathnode->path.total_cost = subpath->total_cost +
3523 cpu_tuple_cost * subpath->rows;
3524
3525 return pathnode;
3526 }
3527
3528 /*
3529 * create_modifytable_path
3530 * Creates a pathnode that represents performing INSERT/UPDATE/DELETE mods
3531 *
3532 * 'rel' is the parent relation associated with the result
3533 * 'operation' is the operation type
3534 * 'canSetTag' is true if we set the command tag/es_processed
3535 * 'nominalRelation' is the parent RT index for use of EXPLAIN
3536 * 'rootRelation' is the partitioned table root RT index, or 0 if none
3537 * 'partColsUpdated' is true if any partitioning columns are being updated,
3538 * either from the target relation or a descendent partitioned table.
3539 * 'resultRelations' is an integer list of actual RT indexes of target rel(s)
3540 * 'subpaths' is a list of Path(s) producing source data (one per rel)
3541 * 'subroots' is a list of PlannerInfo structs (one per rel)
3542 * 'withCheckOptionLists' is a list of WCO lists (one per rel)
3543 * 'returningLists' is a list of RETURNING tlists (one per rel)
3544 * 'rowMarks' is a list of PlanRowMarks (non-locking only)
3545 * 'onconflict' is the ON CONFLICT clause, or NULL
3546 * 'epqParam' is the ID of Param for EvalPlanQual re-eval
3547 */
3548 ModifyTablePath *
create_modifytable_path(PlannerInfo * root,RelOptInfo * rel,CmdType operation,bool canSetTag,Index nominalRelation,Index rootRelation,bool partColsUpdated,List * resultRelations,List * subpaths,List * subroots,List * withCheckOptionLists,List * returningLists,List * rowMarks,OnConflictExpr * onconflict,int epqParam)3549 create_modifytable_path(PlannerInfo *root, RelOptInfo *rel,
3550 CmdType operation, bool canSetTag,
3551 Index nominalRelation, Index rootRelation,
3552 bool partColsUpdated,
3553 List *resultRelations, List *subpaths,
3554 List *subroots,
3555 List *withCheckOptionLists, List *returningLists,
3556 List *rowMarks, OnConflictExpr *onconflict,
3557 int epqParam)
3558 {
3559 ModifyTablePath *pathnode = makeNode(ModifyTablePath);
3560 double total_size;
3561 ListCell *lc;
3562
3563 Assert(list_length(resultRelations) == list_length(subpaths));
3564 Assert(list_length(resultRelations) == list_length(subroots));
3565 Assert(withCheckOptionLists == NIL ||
3566 list_length(resultRelations) == list_length(withCheckOptionLists));
3567 Assert(returningLists == NIL ||
3568 list_length(resultRelations) == list_length(returningLists));
3569
3570 pathnode->path.pathtype = T_ModifyTable;
3571 pathnode->path.parent = rel;
3572 /* pathtarget is not interesting, just make it minimally valid */
3573 pathnode->path.pathtarget = rel->reltarget;
3574 /* For now, assume we are above any joins, so no parameterization */
3575 pathnode->path.param_info = NULL;
3576 pathnode->path.parallel_aware = false;
3577 pathnode->path.parallel_safe = false;
3578 pathnode->path.parallel_workers = 0;
3579 pathnode->path.pathkeys = NIL;
3580
3581 /*
3582 * Compute cost & rowcount as sum of subpath costs & rowcounts.
3583 *
3584 * Currently, we don't charge anything extra for the actual table
3585 * modification work, nor for the WITH CHECK OPTIONS or RETURNING
3586 * expressions if any. It would only be window dressing, since
3587 * ModifyTable is always a top-level node and there is no way for the
3588 * costs to change any higher-level planning choices. But we might want
3589 * to make it look better sometime.
3590 */
3591 pathnode->path.startup_cost = 0;
3592 pathnode->path.total_cost = 0;
3593 pathnode->path.rows = 0;
3594 total_size = 0;
3595 foreach(lc, subpaths)
3596 {
3597 Path *subpath = (Path *) lfirst(lc);
3598
3599 if (lc == list_head(subpaths)) /* first node? */
3600 pathnode->path.startup_cost = subpath->startup_cost;
3601 pathnode->path.total_cost += subpath->total_cost;
3602 pathnode->path.rows += subpath->rows;
3603 total_size += subpath->pathtarget->width * subpath->rows;
3604 }
3605
3606 /*
3607 * Set width to the average width of the subpath outputs. XXX this is
3608 * totally wrong: we should report zero if no RETURNING, else an average
3609 * of the RETURNING tlist widths. But it's what happened historically,
3610 * and improving it is a task for another day.
3611 */
3612 if (pathnode->path.rows > 0)
3613 total_size /= pathnode->path.rows;
3614 pathnode->path.pathtarget->width = rint(total_size);
3615
3616 pathnode->operation = operation;
3617 pathnode->canSetTag = canSetTag;
3618 pathnode->nominalRelation = nominalRelation;
3619 pathnode->rootRelation = rootRelation;
3620 pathnode->partColsUpdated = partColsUpdated;
3621 pathnode->resultRelations = resultRelations;
3622 pathnode->subpaths = subpaths;
3623 pathnode->subroots = subroots;
3624 pathnode->withCheckOptionLists = withCheckOptionLists;
3625 pathnode->returningLists = returningLists;
3626 pathnode->rowMarks = rowMarks;
3627 pathnode->onconflict = onconflict;
3628 pathnode->epqParam = epqParam;
3629
3630 return pathnode;
3631 }
3632
3633 /*
3634 * create_limit_path
3635 * Creates a pathnode that represents performing LIMIT/OFFSET
3636 *
3637 * In addition to providing the actual OFFSET and LIMIT expressions,
3638 * the caller must provide estimates of their values for costing purposes.
3639 * The estimates are as computed by preprocess_limit(), ie, 0 represents
3640 * the clause not being present, and -1 means it's present but we could
3641 * not estimate its value.
3642 *
3643 * 'rel' is the parent relation associated with the result
3644 * 'subpath' is the path representing the source of data
3645 * 'limitOffset' is the actual OFFSET expression, or NULL
3646 * 'limitCount' is the actual LIMIT expression, or NULL
3647 * 'offset_est' is the estimated value of the OFFSET expression
3648 * 'count_est' is the estimated value of the LIMIT expression
3649 */
3650 LimitPath *
create_limit_path(PlannerInfo * root,RelOptInfo * rel,Path * subpath,Node * limitOffset,Node * limitCount,LimitOption limitOption,int64 offset_est,int64 count_est)3651 create_limit_path(PlannerInfo *root, RelOptInfo *rel,
3652 Path *subpath,
3653 Node *limitOffset, Node *limitCount,
3654 LimitOption limitOption,
3655 int64 offset_est, int64 count_est)
3656 {
3657 LimitPath *pathnode = makeNode(LimitPath);
3658
3659 pathnode->path.pathtype = T_Limit;
3660 pathnode->path.parent = rel;
3661 /* Limit doesn't project, so use source path's pathtarget */
3662 pathnode->path.pathtarget = subpath->pathtarget;
3663 /* For now, assume we are above any joins, so no parameterization */
3664 pathnode->path.param_info = NULL;
3665 pathnode->path.parallel_aware = false;
3666 pathnode->path.parallel_safe = rel->consider_parallel &&
3667 subpath->parallel_safe;
3668 pathnode->path.parallel_workers = subpath->parallel_workers;
3669 pathnode->path.rows = subpath->rows;
3670 pathnode->path.startup_cost = subpath->startup_cost;
3671 pathnode->path.total_cost = subpath->total_cost;
3672 pathnode->path.pathkeys = subpath->pathkeys;
3673 pathnode->subpath = subpath;
3674 pathnode->limitOffset = limitOffset;
3675 pathnode->limitCount = limitCount;
3676 pathnode->limitOption = limitOption;
3677
3678 /*
3679 * Adjust the output rows count and costs according to the offset/limit.
3680 */
3681 adjust_limit_rows_costs(&pathnode->path.rows,
3682 &pathnode->path.startup_cost,
3683 &pathnode->path.total_cost,
3684 offset_est, count_est);
3685
3686 return pathnode;
3687 }
3688
3689 /*
3690 * adjust_limit_rows_costs
3691 * Adjust the size and cost estimates for a LimitPath node according to the
3692 * offset/limit.
3693 *
3694 * This is only a cosmetic issue if we are at top level, but if we are
3695 * building a subquery then it's important to report correct info to the outer
3696 * planner.
3697 *
3698 * When the offset or count couldn't be estimated, use 10% of the estimated
3699 * number of rows emitted from the subpath.
3700 *
3701 * XXX we don't bother to add eval costs of the offset/limit expressions
3702 * themselves to the path costs. In theory we should, but in most cases those
3703 * expressions are trivial and it's just not worth the trouble.
3704 */
3705 void
adjust_limit_rows_costs(double * rows,Cost * startup_cost,Cost * total_cost,int64 offset_est,int64 count_est)3706 adjust_limit_rows_costs(double *rows, /* in/out parameter */
3707 Cost *startup_cost, /* in/out parameter */
3708 Cost *total_cost, /* in/out parameter */
3709 int64 offset_est,
3710 int64 count_est)
3711 {
3712 double input_rows = *rows;
3713 Cost input_startup_cost = *startup_cost;
3714 Cost input_total_cost = *total_cost;
3715
3716 if (offset_est != 0)
3717 {
3718 double offset_rows;
3719
3720 if (offset_est > 0)
3721 offset_rows = (double) offset_est;
3722 else
3723 offset_rows = clamp_row_est(input_rows * 0.10);
3724 if (offset_rows > *rows)
3725 offset_rows = *rows;
3726 if (input_rows > 0)
3727 *startup_cost +=
3728 (input_total_cost - input_startup_cost)
3729 * offset_rows / input_rows;
3730 *rows -= offset_rows;
3731 if (*rows < 1)
3732 *rows = 1;
3733 }
3734
3735 if (count_est != 0)
3736 {
3737 double count_rows;
3738
3739 if (count_est > 0)
3740 count_rows = (double) count_est;
3741 else
3742 count_rows = clamp_row_est(input_rows * 0.10);
3743 if (count_rows > *rows)
3744 count_rows = *rows;
3745 if (input_rows > 0)
3746 *total_cost = *startup_cost +
3747 (input_total_cost - input_startup_cost)
3748 * count_rows / input_rows;
3749 *rows = count_rows;
3750 if (*rows < 1)
3751 *rows = 1;
3752 }
3753 }
3754
3755
3756 /*
3757 * reparameterize_path
3758 * Attempt to modify a Path to have greater parameterization
3759 *
3760 * We use this to attempt to bring all child paths of an appendrel to the
3761 * same parameterization level, ensuring that they all enforce the same set
3762 * of join quals (and thus that that parameterization can be attributed to
3763 * an append path built from such paths). Currently, only a few path types
3764 * are supported here, though more could be added at need. We return NULL
3765 * if we can't reparameterize the given path.
3766 *
3767 * Note: we intentionally do not pass created paths to add_path(); it would
3768 * possibly try to delete them on the grounds of being cost-inferior to the
3769 * paths they were made from, and we don't want that. Paths made here are
3770 * not necessarily of general-purpose usefulness, but they can be useful
3771 * as members of an append path.
3772 */
3773 Path *
reparameterize_path(PlannerInfo * root,Path * path,Relids required_outer,double loop_count)3774 reparameterize_path(PlannerInfo *root, Path *path,
3775 Relids required_outer,
3776 double loop_count)
3777 {
3778 RelOptInfo *rel = path->parent;
3779
3780 /* Can only increase, not decrease, path's parameterization */
3781 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
3782 return NULL;
3783 switch (path->pathtype)
3784 {
3785 case T_SeqScan:
3786 return create_seqscan_path(root, rel, required_outer, 0);
3787 case T_SampleScan:
3788 return (Path *) create_samplescan_path(root, rel, required_outer);
3789 case T_IndexScan:
3790 case T_IndexOnlyScan:
3791 {
3792 IndexPath *ipath = (IndexPath *) path;
3793 IndexPath *newpath = makeNode(IndexPath);
3794
3795 /*
3796 * We can't use create_index_path directly, and would not want
3797 * to because it would re-compute the indexqual conditions
3798 * which is wasted effort. Instead we hack things a bit:
3799 * flat-copy the path node, revise its param_info, and redo
3800 * the cost estimate.
3801 */
3802 memcpy(newpath, ipath, sizeof(IndexPath));
3803 newpath->path.param_info =
3804 get_baserel_parampathinfo(root, rel, required_outer);
3805 cost_index(newpath, root, loop_count, false);
3806 return (Path *) newpath;
3807 }
3808 case T_BitmapHeapScan:
3809 {
3810 BitmapHeapPath *bpath = (BitmapHeapPath *) path;
3811
3812 return (Path *) create_bitmap_heap_path(root,
3813 rel,
3814 bpath->bitmapqual,
3815 required_outer,
3816 loop_count, 0);
3817 }
3818 case T_SubqueryScan:
3819 {
3820 SubqueryScanPath *spath = (SubqueryScanPath *) path;
3821
3822 return (Path *) create_subqueryscan_path(root,
3823 rel,
3824 spath->subpath,
3825 spath->path.pathkeys,
3826 required_outer);
3827 }
3828 case T_Result:
3829 /* Supported only for RTE_RESULT scan paths */
3830 if (IsA(path, Path))
3831 return create_resultscan_path(root, rel, required_outer);
3832 break;
3833 case T_Append:
3834 {
3835 AppendPath *apath = (AppendPath *) path;
3836 List *childpaths = NIL;
3837 List *partialpaths = NIL;
3838 int i;
3839 ListCell *lc;
3840
3841 /* Reparameterize the children */
3842 i = 0;
3843 foreach(lc, apath->subpaths)
3844 {
3845 Path *spath = (Path *) lfirst(lc);
3846
3847 spath = reparameterize_path(root, spath,
3848 required_outer,
3849 loop_count);
3850 if (spath == NULL)
3851 return NULL;
3852 /* We have to re-split the regular and partial paths */
3853 if (i < apath->first_partial_path)
3854 childpaths = lappend(childpaths, spath);
3855 else
3856 partialpaths = lappend(partialpaths, spath);
3857 i++;
3858 }
3859 return (Path *)
3860 create_append_path(root, rel, childpaths, partialpaths,
3861 apath->path.pathkeys, required_outer,
3862 apath->path.parallel_workers,
3863 apath->path.parallel_aware,
3864 apath->partitioned_rels,
3865 -1);
3866 }
3867 default:
3868 break;
3869 }
3870 return NULL;
3871 }
3872
3873 /*
3874 * reparameterize_path_by_child
3875 * Given a path parameterized by the parent of the given child relation,
3876 * translate the path to be parameterized by the given child relation.
3877 *
3878 * The function creates a new path of the same type as the given path, but
3879 * parameterized by the given child relation. Most fields from the original
3880 * path can simply be flat-copied, but any expressions must be adjusted to
3881 * refer to the correct varnos, and any paths must be recursively
3882 * reparameterized. Other fields that refer to specific relids also need
3883 * adjustment.
3884 *
3885 * The cost, number of rows, width and parallel path properties depend upon
3886 * path->parent, which does not change during the translation. Hence those
3887 * members are copied as they are.
3888 *
3889 * If the given path can not be reparameterized, the function returns NULL.
3890 */
3891 Path *
reparameterize_path_by_child(PlannerInfo * root,Path * path,RelOptInfo * child_rel)3892 reparameterize_path_by_child(PlannerInfo *root, Path *path,
3893 RelOptInfo *child_rel)
3894 {
3895
3896 #define FLAT_COPY_PATH(newnode, node, nodetype) \
3897 ( (newnode) = makeNode(nodetype), \
3898 memcpy((newnode), (node), sizeof(nodetype)) )
3899
3900 #define ADJUST_CHILD_ATTRS(node) \
3901 ((node) = \
3902 (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
3903 child_rel->relids, \
3904 child_rel->top_parent_relids))
3905
3906 #define REPARAMETERIZE_CHILD_PATH(path) \
3907 do { \
3908 (path) = reparameterize_path_by_child(root, (path), child_rel); \
3909 if ((path) == NULL) \
3910 return NULL; \
3911 } while(0)
3912
3913 #define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \
3914 do { \
3915 if ((pathlist) != NIL) \
3916 { \
3917 (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \
3918 child_rel); \
3919 if ((pathlist) == NIL) \
3920 return NULL; \
3921 } \
3922 } while(0)
3923
3924 Path *new_path;
3925 ParamPathInfo *new_ppi;
3926 ParamPathInfo *old_ppi;
3927 Relids required_outer;
3928
3929 /*
3930 * If the path is not parameterized by parent of the given relation, it
3931 * doesn't need reparameterization.
3932 */
3933 if (!path->param_info ||
3934 !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids))
3935 return path;
3936
3937 /*
3938 * If possible, reparameterize the given path, making a copy.
3939 *
3940 * This function is currently only applied to the inner side of a nestloop
3941 * join that is being partitioned by the partitionwise-join code. Hence,
3942 * we need only support path types that plausibly arise in that context.
3943 * (In particular, supporting sorted path types would be a waste of code
3944 * and cycles: even if we translated them here, they'd just lose in
3945 * subsequent cost comparisons.) If we do see an unsupported path type,
3946 * that just means we won't be able to generate a partitionwise-join plan
3947 * using that path type.
3948 */
3949 switch (nodeTag(path))
3950 {
3951 case T_Path:
3952 FLAT_COPY_PATH(new_path, path, Path);
3953 break;
3954
3955 case T_IndexPath:
3956 {
3957 IndexPath *ipath;
3958
3959 FLAT_COPY_PATH(ipath, path, IndexPath);
3960 ADJUST_CHILD_ATTRS(ipath->indexclauses);
3961 new_path = (Path *) ipath;
3962 }
3963 break;
3964
3965 case T_BitmapHeapPath:
3966 {
3967 BitmapHeapPath *bhpath;
3968
3969 FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
3970 REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
3971 new_path = (Path *) bhpath;
3972 }
3973 break;
3974
3975 case T_BitmapAndPath:
3976 {
3977 BitmapAndPath *bapath;
3978
3979 FLAT_COPY_PATH(bapath, path, BitmapAndPath);
3980 REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals);
3981 new_path = (Path *) bapath;
3982 }
3983 break;
3984
3985 case T_BitmapOrPath:
3986 {
3987 BitmapOrPath *bopath;
3988
3989 FLAT_COPY_PATH(bopath, path, BitmapOrPath);
3990 REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals);
3991 new_path = (Path *) bopath;
3992 }
3993 break;
3994
3995 case T_ForeignPath:
3996 {
3997 ForeignPath *fpath;
3998 ReparameterizeForeignPathByChild_function rfpc_func;
3999
4000 FLAT_COPY_PATH(fpath, path, ForeignPath);
4001 if (fpath->fdw_outerpath)
4002 REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
4003
4004 /* Hand over to FDW if needed. */
4005 rfpc_func =
4006 path->parent->fdwroutine->ReparameterizeForeignPathByChild;
4007 if (rfpc_func)
4008 fpath->fdw_private = rfpc_func(root, fpath->fdw_private,
4009 child_rel);
4010 new_path = (Path *) fpath;
4011 }
4012 break;
4013
4014 case T_CustomPath:
4015 {
4016 CustomPath *cpath;
4017
4018 FLAT_COPY_PATH(cpath, path, CustomPath);
4019 REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
4020 if (cpath->methods &&
4021 cpath->methods->ReparameterizeCustomPathByChild)
4022 cpath->custom_private =
4023 cpath->methods->ReparameterizeCustomPathByChild(root,
4024 cpath->custom_private,
4025 child_rel);
4026 new_path = (Path *) cpath;
4027 }
4028 break;
4029
4030 case T_NestPath:
4031 {
4032 JoinPath *jpath;
4033
4034 FLAT_COPY_PATH(jpath, path, NestPath);
4035
4036 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
4037 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
4038 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
4039 new_path = (Path *) jpath;
4040 }
4041 break;
4042
4043 case T_MergePath:
4044 {
4045 JoinPath *jpath;
4046 MergePath *mpath;
4047
4048 FLAT_COPY_PATH(mpath, path, MergePath);
4049
4050 jpath = (JoinPath *) mpath;
4051 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
4052 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
4053 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
4054 ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
4055 new_path = (Path *) mpath;
4056 }
4057 break;
4058
4059 case T_HashPath:
4060 {
4061 JoinPath *jpath;
4062 HashPath *hpath;
4063
4064 FLAT_COPY_PATH(hpath, path, HashPath);
4065
4066 jpath = (JoinPath *) hpath;
4067 REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
4068 REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
4069 ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
4070 ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
4071 new_path = (Path *) hpath;
4072 }
4073 break;
4074
4075 case T_AppendPath:
4076 {
4077 AppendPath *apath;
4078
4079 FLAT_COPY_PATH(apath, path, AppendPath);
4080 REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths);
4081 new_path = (Path *) apath;
4082 }
4083 break;
4084
4085 case T_GatherPath:
4086 {
4087 GatherPath *gpath;
4088
4089 FLAT_COPY_PATH(gpath, path, GatherPath);
4090 REPARAMETERIZE_CHILD_PATH(gpath->subpath);
4091 new_path = (Path *) gpath;
4092 }
4093 break;
4094
4095 default:
4096
4097 /* We don't know how to reparameterize this path. */
4098 return NULL;
4099 }
4100
4101 /*
4102 * Adjust the parameterization information, which refers to the topmost
4103 * parent. The topmost parent can be multiple levels away from the given
4104 * child, hence use multi-level expression adjustment routines.
4105 */
4106 old_ppi = new_path->param_info;
4107 required_outer =
4108 adjust_child_relids_multilevel(root, old_ppi->ppi_req_outer,
4109 child_rel->relids,
4110 child_rel->top_parent_relids);
4111
4112 /* If we already have a PPI for this parameterization, just return it */
4113 new_ppi = find_param_path_info(new_path->parent, required_outer);
4114
4115 /*
4116 * If not, build a new one and link it to the list of PPIs. For the same
4117 * reason as explained in mark_dummy_rel(), allocate new PPI in the same
4118 * context the given RelOptInfo is in.
4119 */
4120 if (new_ppi == NULL)
4121 {
4122 MemoryContext oldcontext;
4123 RelOptInfo *rel = path->parent;
4124
4125 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
4126
4127 new_ppi = makeNode(ParamPathInfo);
4128 new_ppi->ppi_req_outer = bms_copy(required_outer);
4129 new_ppi->ppi_rows = old_ppi->ppi_rows;
4130 new_ppi->ppi_clauses = old_ppi->ppi_clauses;
4131 ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
4132 rel->ppilist = lappend(rel->ppilist, new_ppi);
4133
4134 MemoryContextSwitchTo(oldcontext);
4135 }
4136 bms_free(required_outer);
4137
4138 new_path->param_info = new_ppi;
4139
4140 /*
4141 * Adjust the path target if the parent of the outer relation is
4142 * referenced in the targetlist. This can happen when only the parent of
4143 * outer relation is laterally referenced in this relation.
4144 */
4145 if (bms_overlap(path->parent->lateral_relids,
4146 child_rel->top_parent_relids))
4147 {
4148 new_path->pathtarget = copy_pathtarget(new_path->pathtarget);
4149 ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs);
4150 }
4151
4152 return new_path;
4153 }
4154
4155 /*
4156 * reparameterize_pathlist_by_child
4157 * Helper function to reparameterize a list of paths by given child rel.
4158 */
4159 static List *
reparameterize_pathlist_by_child(PlannerInfo * root,List * pathlist,RelOptInfo * child_rel)4160 reparameterize_pathlist_by_child(PlannerInfo *root,
4161 List *pathlist,
4162 RelOptInfo *child_rel)
4163 {
4164 ListCell *lc;
4165 List *result = NIL;
4166
4167 foreach(lc, pathlist)
4168 {
4169 Path *path = reparameterize_path_by_child(root, lfirst(lc),
4170 child_rel);
4171
4172 if (path == NULL)
4173 {
4174 list_free(result);
4175 return NIL;
4176 }
4177
4178 result = lappend(result, path);
4179 }
4180
4181 return result;
4182 }
4183