1 /*-------------------------------------------------------------------------
2 *
3 * joinpath.c
4 * Routines to find all possible paths for processing a set of joins
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/joinpath.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/paths.h"
24 #include "optimizer/planmain.h"
25
26 /* Hook for plugins to get control in add_paths_to_joinrel() */
27 set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
28
29 #define PATH_PARAM_BY_REL(path, rel) \
30 ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
31
32 static void try_partial_mergejoin_path(PlannerInfo *root,
33 RelOptInfo *joinrel,
34 Path *outer_path,
35 Path *inner_path,
36 List *pathkeys,
37 List *mergeclauses,
38 List *outersortkeys,
39 List *innersortkeys,
40 JoinType jointype,
41 JoinPathExtraData *extra);
42 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
43 RelOptInfo *outerrel, RelOptInfo *innerrel,
44 JoinType jointype, JoinPathExtraData *extra);
45 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
46 RelOptInfo *outerrel, RelOptInfo *innerrel,
47 JoinType jointype, JoinPathExtraData *extra);
48 static void consider_parallel_nestloop(PlannerInfo *root,
49 RelOptInfo *joinrel,
50 RelOptInfo *outerrel,
51 RelOptInfo *innerrel,
52 JoinType jointype,
53 JoinPathExtraData *extra);
54 static void consider_parallel_mergejoin(PlannerInfo *root,
55 RelOptInfo *joinrel,
56 RelOptInfo *outerrel,
57 RelOptInfo *innerrel,
58 JoinType jointype,
59 JoinPathExtraData *extra,
60 Path *inner_cheapest_total);
61 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 RelOptInfo *outerrel, RelOptInfo *innerrel,
63 JoinType jointype, JoinPathExtraData *extra);
64 static List *select_mergejoin_clauses(PlannerInfo *root,
65 RelOptInfo *joinrel,
66 RelOptInfo *outerrel,
67 RelOptInfo *innerrel,
68 List *restrictlist,
69 JoinType jointype,
70 bool *mergejoin_allowed);
71 static void generate_mergejoin_paths(PlannerInfo *root,
72 RelOptInfo *joinrel,
73 RelOptInfo *innerrel,
74 Path *outerpath,
75 JoinType jointype,
76 JoinPathExtraData *extra,
77 bool useallclauses,
78 Path *inner_cheapest_total,
79 List *merge_pathkeys,
80 bool is_partial);
81
82
83 /*
84 * add_paths_to_joinrel
85 * Given a join relation and two component rels from which it can be made,
86 * consider all possible paths that use the two component rels as outer
87 * and inner rel respectively. Add these paths to the join rel's pathlist
88 * if they survive comparison with other paths (and remove any existing
89 * paths that are dominated by these paths).
90 *
91 * Modifies the pathlist field of the joinrel node to contain the best
92 * paths found so far.
93 *
94 * jointype is not necessarily the same as sjinfo->jointype; it might be
95 * "flipped around" if we are considering joining the rels in the opposite
96 * direction from what's indicated in sjinfo.
97 *
98 * Also, this routine and others in this module accept the special JoinTypes
99 * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
100 * unique-ify the outer or inner relation and then apply a regular inner
101 * join. These values are not allowed to propagate outside this module,
102 * however. Path cost estimation code may need to recognize that it's
103 * dealing with such a case --- the combination of nominal jointype INNER
104 * with sjinfo->jointype == JOIN_SEMI indicates that.
105 */
106 void
add_paths_to_joinrel(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,SpecialJoinInfo * sjinfo,List * restrictlist)107 add_paths_to_joinrel(PlannerInfo *root,
108 RelOptInfo *joinrel,
109 RelOptInfo *outerrel,
110 RelOptInfo *innerrel,
111 JoinType jointype,
112 SpecialJoinInfo *sjinfo,
113 List *restrictlist)
114 {
115 JoinPathExtraData extra;
116 bool mergejoin_allowed = true;
117 ListCell *lc;
118
119 extra.restrictlist = restrictlist;
120 extra.mergeclause_list = NIL;
121 extra.sjinfo = sjinfo;
122 extra.param_source_rels = NULL;
123
124 /*
125 * See if the inner relation is provably unique for this outer rel.
126 *
127 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
128 * matter since the executor can make the equivalent optimization anyway;
129 * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
130 * must be considering a semijoin whose inner side is not provably unique
131 * (else reduce_unique_semijoins would've simplified it), so there's no
132 * point in calling innerrel_is_unique. However, if the LHS covers all of
133 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
134 * because the path produced by create_unique_path will be unique relative
135 * to the LHS. (If we have an LHS that's only part of the min_lefthand,
136 * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
137 * letting that value escape this module.
138 */
139 switch (jointype)
140 {
141 case JOIN_SEMI:
142 case JOIN_ANTI:
143 extra.inner_unique = false; /* well, unproven */
144 break;
145 case JOIN_UNIQUE_INNER:
146 extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
147 outerrel->relids);
148 break;
149 case JOIN_UNIQUE_OUTER:
150 extra.inner_unique = innerrel_is_unique(root,
151 outerrel->relids,
152 innerrel,
153 JOIN_INNER,
154 restrictlist,
155 false);
156 break;
157 default:
158 extra.inner_unique = innerrel_is_unique(root,
159 outerrel->relids,
160 innerrel,
161 jointype,
162 restrictlist,
163 false);
164 break;
165 }
166
167 /*
168 * Find potential mergejoin clauses. We can skip this if we are not
169 * interested in doing a mergejoin. However, mergejoin may be our only
170 * way of implementing a full outer join, so override enable_mergejoin if
171 * it's a full join.
172 */
173 if (enable_mergejoin || jointype == JOIN_FULL)
174 extra.mergeclause_list = select_mergejoin_clauses(root,
175 joinrel,
176 outerrel,
177 innerrel,
178 restrictlist,
179 jointype,
180 &mergejoin_allowed);
181
182 /*
183 * If it's SEMI, ANTI, or inner_unique join, compute correction factors
184 * for cost estimation. These will be the same for all paths.
185 */
186 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
187 compute_semi_anti_join_factors(root, outerrel, innerrel,
188 jointype, sjinfo, restrictlist,
189 &extra.semifactors);
190
191 /*
192 * Decide whether it's sensible to generate parameterized paths for this
193 * joinrel, and if so, which relations such paths should require. There
194 * is usually no need to create a parameterized result path unless there
195 * is a join order restriction that prevents joining one of our input rels
196 * directly to the parameter source rel instead of joining to the other
197 * input rel. (But see allow_star_schema_join().) This restriction
198 * reduces the number of parameterized paths we have to deal with at
199 * higher join levels, without compromising the quality of the resulting
200 * plan. We express the restriction as a Relids set that must overlap the
201 * parameterization of any proposed join path.
202 */
203 foreach(lc, root->join_info_list)
204 {
205 SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
206
207 /*
208 * SJ is relevant to this join if we have some part of its RHS
209 * (possibly not all of it), and haven't yet joined to its LHS. (This
210 * test is pretty simplistic, but should be sufficient considering the
211 * join has already been proven legal.) If the SJ is relevant, it
212 * presents constraints for joining to anything not in its RHS.
213 */
214 if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) &&
215 !bms_overlap(joinrel->relids, sjinfo2->min_lefthand))
216 extra.param_source_rels = bms_join(extra.param_source_rels,
217 bms_difference(root->all_baserels,
218 sjinfo2->min_righthand));
219
220 /* full joins constrain both sides symmetrically */
221 if (sjinfo2->jointype == JOIN_FULL &&
222 bms_overlap(joinrel->relids, sjinfo2->min_lefthand) &&
223 !bms_overlap(joinrel->relids, sjinfo2->min_righthand))
224 extra.param_source_rels = bms_join(extra.param_source_rels,
225 bms_difference(root->all_baserels,
226 sjinfo2->min_lefthand));
227 }
228
229 /*
230 * However, when a LATERAL subquery is involved, there will simply not be
231 * any paths for the joinrel that aren't parameterized by whatever the
232 * subquery is parameterized by, unless its parameterization is resolved
233 * within the joinrel. So we might as well allow additional dependencies
234 * on whatever residual lateral dependencies the joinrel will have.
235 */
236 extra.param_source_rels = bms_add_members(extra.param_source_rels,
237 joinrel->lateral_relids);
238
239 /*
240 * 1. Consider mergejoin paths where both relations must be explicitly
241 * sorted. Skip this if we can't mergejoin.
242 */
243 if (mergejoin_allowed)
244 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
245 jointype, &extra);
246
247 /*
248 * 2. Consider paths where the outer relation need not be explicitly
249 * sorted. This includes both nestloops and mergejoins where the outer
250 * path is already ordered. Again, skip this if we can't mergejoin.
251 * (That's okay because we know that nestloop can't handle right/full
252 * joins at all, so it wouldn't work in the prohibited cases either.)
253 */
254 if (mergejoin_allowed)
255 match_unsorted_outer(root, joinrel, outerrel, innerrel,
256 jointype, &extra);
257
258 #ifdef NOT_USED
259
260 /*
261 * 3. Consider paths where the inner relation need not be explicitly
262 * sorted. This includes mergejoins only (nestloops were already built in
263 * match_unsorted_outer).
264 *
265 * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
266 * significant difference between the inner and outer side of a mergejoin,
267 * so match_unsorted_inner creates no paths that aren't equivalent to
268 * those made by match_unsorted_outer when add_paths_to_joinrel() is
269 * invoked with the two rels given in the other order.
270 */
271 if (mergejoin_allowed)
272 match_unsorted_inner(root, joinrel, outerrel, innerrel,
273 jointype, &extra);
274 #endif
275
276 /*
277 * 4. Consider paths where both outer and inner relations must be hashed
278 * before being joined. As above, disregard enable_hashjoin for full
279 * joins, because there may be no other alternative.
280 */
281 if (enable_hashjoin || jointype == JOIN_FULL)
282 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
283 jointype, &extra);
284
285 /*
286 * 5. If inner and outer relations are foreign tables (or joins) belonging
287 * to the same server and assigned to the same user to check access
288 * permissions as, give the FDW a chance to push down joins.
289 */
290 if (joinrel->fdwroutine &&
291 joinrel->fdwroutine->GetForeignJoinPaths)
292 joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
293 outerrel, innerrel,
294 jointype, &extra);
295
296 /*
297 * 6. Finally, give extensions a chance to manipulate the path list.
298 */
299 if (set_join_pathlist_hook)
300 set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
301 jointype, &extra);
302 }
303
304 /*
305 * We override the param_source_rels heuristic to accept nestloop paths in
306 * which the outer rel satisfies some but not all of the inner path's
307 * parameterization. This is necessary to get good plans for star-schema
308 * scenarios, in which a parameterized path for a large table may require
309 * parameters from multiple small tables that will not get joined directly to
310 * each other. We can handle that by stacking nestloops that have the small
311 * tables on the outside; but this breaks the rule the param_source_rels
312 * heuristic is based on, namely that parameters should not be passed down
313 * across joins unless there's a join-order-constraint-based reason to do so.
314 * So we ignore the param_source_rels restriction when this case applies.
315 *
316 * allow_star_schema_join() returns TRUE if the param_source_rels restriction
317 * should be overridden, ie, it's okay to perform this join.
318 */
319 static inline bool
allow_star_schema_join(PlannerInfo * root,Path * outer_path,Path * inner_path)320 allow_star_schema_join(PlannerInfo *root,
321 Path *outer_path,
322 Path *inner_path)
323 {
324 Relids innerparams = PATH_REQ_OUTER(inner_path);
325 Relids outerrelids = outer_path->parent->relids;
326
327 /*
328 * It's a star-schema case if the outer rel provides some but not all of
329 * the inner rel's parameterization.
330 */
331 return (bms_overlap(innerparams, outerrelids) &&
332 bms_nonempty_difference(innerparams, outerrelids));
333 }
334
335 /*
336 * try_nestloop_path
337 * Consider a nestloop join path; if it appears useful, push it into
338 * the joinrel's pathlist via add_path().
339 */
340 static void
try_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)341 try_nestloop_path(PlannerInfo *root,
342 RelOptInfo *joinrel,
343 Path *outer_path,
344 Path *inner_path,
345 List *pathkeys,
346 JoinType jointype,
347 JoinPathExtraData *extra)
348 {
349 Relids required_outer;
350 JoinCostWorkspace workspace;
351
352 /*
353 * Check to see if proposed path is still parameterized, and reject if the
354 * parameterization wouldn't be sensible --- unless allow_star_schema_join
355 * says to allow it anyway. Also, we must reject if have_dangerous_phv
356 * doesn't like the look of it, which could only happen if the nestloop is
357 * still parameterized.
358 */
359 required_outer = calc_nestloop_required_outer(outer_path,
360 inner_path);
361 if (required_outer &&
362 ((!bms_overlap(required_outer, extra->param_source_rels) &&
363 !allow_star_schema_join(root, outer_path, inner_path)) ||
364 have_dangerous_phv(root,
365 outer_path->parent->relids,
366 PATH_REQ_OUTER(inner_path))))
367 {
368 /* Waste no memory when we reject a path here */
369 bms_free(required_outer);
370 return;
371 }
372
373 /*
374 * Do a precheck to quickly eliminate obviously-inferior paths. We
375 * calculate a cheap lower bound on the path's cost and then use
376 * add_path_precheck() to see if the path is clearly going to be dominated
377 * by some existing path for the joinrel. If not, do the full pushup with
378 * creating a fully valid path structure and submitting it to add_path().
379 * The latter two steps are expensive enough to make this two-phase
380 * methodology worthwhile.
381 */
382 initial_cost_nestloop(root, &workspace, jointype,
383 outer_path, inner_path, extra);
384
385 if (add_path_precheck(joinrel,
386 workspace.startup_cost, workspace.total_cost,
387 pathkeys, required_outer))
388 {
389 add_path(joinrel, (Path *)
390 create_nestloop_path(root,
391 joinrel,
392 jointype,
393 &workspace,
394 extra,
395 outer_path,
396 inner_path,
397 extra->restrictlist,
398 pathkeys,
399 required_outer));
400 }
401 else
402 {
403 /* Waste no memory when we reject a path here */
404 bms_free(required_outer);
405 }
406 }
407
408 /*
409 * try_partial_nestloop_path
410 * Consider a partial nestloop join path; if it appears useful, push it into
411 * the joinrel's partial_pathlist via add_partial_path().
412 */
413 static void
try_partial_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)414 try_partial_nestloop_path(PlannerInfo *root,
415 RelOptInfo *joinrel,
416 Path *outer_path,
417 Path *inner_path,
418 List *pathkeys,
419 JoinType jointype,
420 JoinPathExtraData *extra)
421 {
422 JoinCostWorkspace workspace;
423
424 /*
425 * If the inner path is parameterized, the parameterization must be fully
426 * satisfied by the proposed outer path. Parameterized partial paths are
427 * not supported. The caller should already have verified that no
428 * extra_lateral_rels are required here.
429 */
430 Assert(bms_is_empty(joinrel->lateral_relids));
431 if (inner_path->param_info != NULL)
432 {
433 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
434
435 if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
436 return;
437 }
438
439 /*
440 * Before creating a path, get a quick lower bound on what it is likely to
441 * cost. Bail out right away if it looks terrible.
442 */
443 initial_cost_nestloop(root, &workspace, jointype,
444 outer_path, inner_path, extra);
445 if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
446 return;
447
448 /* Might be good enough to be worth trying, so let's try it. */
449 add_partial_path(joinrel, (Path *)
450 create_nestloop_path(root,
451 joinrel,
452 jointype,
453 &workspace,
454 extra,
455 outer_path,
456 inner_path,
457 extra->restrictlist,
458 pathkeys,
459 NULL));
460 }
461
462 /*
463 * try_mergejoin_path
464 * Consider a merge join path; if it appears useful, push it into
465 * the joinrel's pathlist via add_path().
466 */
467 static void
try_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra,bool is_partial)468 try_mergejoin_path(PlannerInfo *root,
469 RelOptInfo *joinrel,
470 Path *outer_path,
471 Path *inner_path,
472 List *pathkeys,
473 List *mergeclauses,
474 List *outersortkeys,
475 List *innersortkeys,
476 JoinType jointype,
477 JoinPathExtraData *extra,
478 bool is_partial)
479 {
480 Relids required_outer;
481 JoinCostWorkspace workspace;
482
483 if (is_partial)
484 {
485 try_partial_mergejoin_path(root,
486 joinrel,
487 outer_path,
488 inner_path,
489 pathkeys,
490 mergeclauses,
491 outersortkeys,
492 innersortkeys,
493 jointype,
494 extra);
495 return;
496 }
497
498 /*
499 * Check to see if proposed path is still parameterized, and reject if the
500 * parameterization wouldn't be sensible.
501 */
502 required_outer = calc_non_nestloop_required_outer(outer_path,
503 inner_path);
504 if (required_outer &&
505 !bms_overlap(required_outer, extra->param_source_rels))
506 {
507 /* Waste no memory when we reject a path here */
508 bms_free(required_outer);
509 return;
510 }
511
512 /*
513 * If the given paths are already well enough ordered, we can skip doing
514 * an explicit sort.
515 */
516 if (outersortkeys &&
517 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
518 outersortkeys = NIL;
519 if (innersortkeys &&
520 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
521 innersortkeys = NIL;
522
523 /*
524 * See comments in try_nestloop_path().
525 */
526 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
527 outer_path, inner_path,
528 outersortkeys, innersortkeys,
529 extra);
530
531 if (add_path_precheck(joinrel,
532 workspace.startup_cost, workspace.total_cost,
533 pathkeys, required_outer))
534 {
535 add_path(joinrel, (Path *)
536 create_mergejoin_path(root,
537 joinrel,
538 jointype,
539 &workspace,
540 extra,
541 outer_path,
542 inner_path,
543 extra->restrictlist,
544 pathkeys,
545 required_outer,
546 mergeclauses,
547 outersortkeys,
548 innersortkeys));
549 }
550 else
551 {
552 /* Waste no memory when we reject a path here */
553 bms_free(required_outer);
554 }
555 }
556
557 /*
558 * try_partial_mergejoin_path
559 * Consider a partial merge join path; if it appears useful, push it into
560 * the joinrel's pathlist via add_partial_path().
561 */
562 static void
try_partial_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra)563 try_partial_mergejoin_path(PlannerInfo *root,
564 RelOptInfo *joinrel,
565 Path *outer_path,
566 Path *inner_path,
567 List *pathkeys,
568 List *mergeclauses,
569 List *outersortkeys,
570 List *innersortkeys,
571 JoinType jointype,
572 JoinPathExtraData *extra)
573 {
574 JoinCostWorkspace workspace;
575
576 /*
577 * See comments in try_partial_hashjoin_path().
578 */
579 Assert(bms_is_empty(joinrel->lateral_relids));
580 if (inner_path->param_info != NULL)
581 {
582 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
583
584 if (!bms_is_empty(inner_paramrels))
585 return;
586 }
587
588 /*
589 * If the given paths are already well enough ordered, we can skip doing
590 * an explicit sort.
591 */
592 if (outersortkeys &&
593 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
594 outersortkeys = NIL;
595 if (innersortkeys &&
596 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
597 innersortkeys = NIL;
598
599 /*
600 * See comments in try_partial_nestloop_path().
601 */
602 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
603 outer_path, inner_path,
604 outersortkeys, innersortkeys,
605 extra);
606
607 if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
608 return;
609
610 /* Might be good enough to be worth trying, so let's try it. */
611 add_partial_path(joinrel, (Path *)
612 create_mergejoin_path(root,
613 joinrel,
614 jointype,
615 &workspace,
616 extra,
617 outer_path,
618 inner_path,
619 extra->restrictlist,
620 pathkeys,
621 NULL,
622 mergeclauses,
623 outersortkeys,
624 innersortkeys));
625 }
626
627 /*
628 * try_hashjoin_path
629 * Consider a hash join path; if it appears useful, push it into
630 * the joinrel's pathlist via add_path().
631 */
632 static void
try_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra)633 try_hashjoin_path(PlannerInfo *root,
634 RelOptInfo *joinrel,
635 Path *outer_path,
636 Path *inner_path,
637 List *hashclauses,
638 JoinType jointype,
639 JoinPathExtraData *extra)
640 {
641 Relids required_outer;
642 JoinCostWorkspace workspace;
643
644 /*
645 * Check to see if proposed path is still parameterized, and reject if the
646 * parameterization wouldn't be sensible.
647 */
648 required_outer = calc_non_nestloop_required_outer(outer_path,
649 inner_path);
650 if (required_outer &&
651 !bms_overlap(required_outer, extra->param_source_rels))
652 {
653 /* Waste no memory when we reject a path here */
654 bms_free(required_outer);
655 return;
656 }
657
658 /*
659 * See comments in try_nestloop_path(). Also note that hashjoin paths
660 * never have any output pathkeys, per comments in create_hashjoin_path.
661 */
662 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
663 outer_path, inner_path, extra);
664
665 if (add_path_precheck(joinrel,
666 workspace.startup_cost, workspace.total_cost,
667 NIL, required_outer))
668 {
669 add_path(joinrel, (Path *)
670 create_hashjoin_path(root,
671 joinrel,
672 jointype,
673 &workspace,
674 extra,
675 outer_path,
676 inner_path,
677 extra->restrictlist,
678 required_outer,
679 hashclauses));
680 }
681 else
682 {
683 /* Waste no memory when we reject a path here */
684 bms_free(required_outer);
685 }
686 }
687
688 /*
689 * try_partial_hashjoin_path
690 * Consider a partial hashjoin join path; if it appears useful, push it into
691 * the joinrel's partial_pathlist via add_partial_path().
692 */
693 static void
try_partial_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra)694 try_partial_hashjoin_path(PlannerInfo *root,
695 RelOptInfo *joinrel,
696 Path *outer_path,
697 Path *inner_path,
698 List *hashclauses,
699 JoinType jointype,
700 JoinPathExtraData *extra)
701 {
702 JoinCostWorkspace workspace;
703
704 /*
705 * If the inner path is parameterized, the parameterization must be fully
706 * satisfied by the proposed outer path. Parameterized partial paths are
707 * not supported. The caller should already have verified that no
708 * extra_lateral_rels are required here.
709 */
710 Assert(bms_is_empty(joinrel->lateral_relids));
711 if (inner_path->param_info != NULL)
712 {
713 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
714
715 if (!bms_is_empty(inner_paramrels))
716 return;
717 }
718
719 /*
720 * Before creating a path, get a quick lower bound on what it is likely to
721 * cost. Bail out right away if it looks terrible.
722 */
723 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
724 outer_path, inner_path, extra);
725 if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
726 return;
727
728 /* Might be good enough to be worth trying, so let's try it. */
729 add_partial_path(joinrel, (Path *)
730 create_hashjoin_path(root,
731 joinrel,
732 jointype,
733 &workspace,
734 extra,
735 outer_path,
736 inner_path,
737 extra->restrictlist,
738 NULL,
739 hashclauses));
740 }
741
742 /*
743 * clause_sides_match_join
744 * Determine whether a join clause is of the right form to use in this join.
745 *
746 * We already know that the clause is a binary opclause referencing only the
747 * rels in the current join. The point here is to check whether it has the
748 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
749 * rather than mixing outer and inner vars on either side. If it matches,
750 * we set the transient flag outer_is_left to identify which side is which.
751 */
752 static inline bool
clause_sides_match_join(RestrictInfo * rinfo,RelOptInfo * outerrel,RelOptInfo * innerrel)753 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
754 RelOptInfo *innerrel)
755 {
756 if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
757 bms_is_subset(rinfo->right_relids, innerrel->relids))
758 {
759 /* lefthand side is outer */
760 rinfo->outer_is_left = true;
761 return true;
762 }
763 else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
764 bms_is_subset(rinfo->right_relids, outerrel->relids))
765 {
766 /* righthand side is outer */
767 rinfo->outer_is_left = false;
768 return true;
769 }
770 return false; /* no good for these input relations */
771 }
772
773 /*
774 * sort_inner_and_outer
775 * Create mergejoin join paths by explicitly sorting both the outer and
776 * inner join relations on each available merge ordering.
777 *
778 * 'joinrel' is the join relation
779 * 'outerrel' is the outer join relation
780 * 'innerrel' is the inner join relation
781 * 'jointype' is the type of join to do
782 * 'extra' contains additional input values
783 */
784 static void
sort_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)785 sort_inner_and_outer(PlannerInfo *root,
786 RelOptInfo *joinrel,
787 RelOptInfo *outerrel,
788 RelOptInfo *innerrel,
789 JoinType jointype,
790 JoinPathExtraData *extra)
791 {
792 JoinType save_jointype = jointype;
793 Path *outer_path;
794 Path *inner_path;
795 Path *cheapest_partial_outer = NULL;
796 Path *cheapest_safe_inner = NULL;
797 List *all_pathkeys;
798 ListCell *l;
799
800 /*
801 * We only consider the cheapest-total-cost input paths, since we are
802 * assuming here that a sort is required. We will consider
803 * cheapest-startup-cost input paths later, and only if they don't need a
804 * sort.
805 *
806 * This function intentionally does not consider parameterized input
807 * paths, except when the cheapest-total is parameterized. If we did so,
808 * we'd have a combinatorial explosion of mergejoin paths of dubious
809 * value. This interacts with decisions elsewhere that also discriminate
810 * against mergejoins with parameterized inputs; see comments in
811 * src/backend/optimizer/README.
812 */
813 outer_path = outerrel->cheapest_total_path;
814 inner_path = innerrel->cheapest_total_path;
815
816 /*
817 * If either cheapest-total path is parameterized by the other rel, we
818 * can't use a mergejoin. (There's no use looking for alternative input
819 * paths, since these should already be the least-parameterized available
820 * paths.)
821 */
822 if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
823 PATH_PARAM_BY_REL(inner_path, outerrel))
824 return;
825
826 /*
827 * If unique-ification is requested, do it and then handle as a plain
828 * inner join.
829 */
830 if (jointype == JOIN_UNIQUE_OUTER)
831 {
832 outer_path = (Path *) create_unique_path(root, outerrel,
833 outer_path, extra->sjinfo);
834 Assert(outer_path);
835 jointype = JOIN_INNER;
836 }
837 else if (jointype == JOIN_UNIQUE_INNER)
838 {
839 inner_path = (Path *) create_unique_path(root, innerrel,
840 inner_path, extra->sjinfo);
841 Assert(inner_path);
842 jointype = JOIN_INNER;
843 }
844
845 /*
846 * If the joinrel is parallel-safe, we may be able to consider a partial
847 * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
848 * outer path will be partial, and therefore we won't be able to properly
849 * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
850 * JOIN_RIGHT, because they can produce false null extended rows. Also,
851 * the resulting path must not be parameterized.
852 */
853 if (joinrel->consider_parallel &&
854 save_jointype != JOIN_UNIQUE_OUTER &&
855 save_jointype != JOIN_FULL &&
856 save_jointype != JOIN_RIGHT &&
857 outerrel->partial_pathlist != NIL &&
858 bms_is_empty(joinrel->lateral_relids))
859 {
860 cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
861
862 if (inner_path->parallel_safe)
863 cheapest_safe_inner = inner_path;
864 else if (save_jointype != JOIN_UNIQUE_INNER)
865 cheapest_safe_inner =
866 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
867 }
868
869 /*
870 * Each possible ordering of the available mergejoin clauses will generate
871 * a differently-sorted result path at essentially the same cost. We have
872 * no basis for choosing one over another at this level of joining, but
873 * some sort orders may be more useful than others for higher-level
874 * mergejoins, so it's worth considering multiple orderings.
875 *
876 * Actually, it's not quite true that every mergeclause ordering will
877 * generate a different path order, because some of the clauses may be
878 * partially redundant (refer to the same EquivalenceClasses). Therefore,
879 * what we do is convert the mergeclause list to a list of canonical
880 * pathkeys, and then consider different orderings of the pathkeys.
881 *
882 * Generating a path for *every* permutation of the pathkeys doesn't seem
883 * like a winning strategy; the cost in planning time is too high. For
884 * now, we generate one path for each pathkey, listing that pathkey first
885 * and the rest in random order. This should allow at least a one-clause
886 * mergejoin without re-sorting against any other possible mergejoin
887 * partner path. But if we've not guessed the right ordering of secondary
888 * keys, we may end up evaluating clauses as qpquals when they could have
889 * been done as mergeclauses. (In practice, it's rare that there's more
890 * than two or three mergeclauses, so expending a huge amount of thought
891 * on that is probably not worth it.)
892 *
893 * The pathkey order returned by select_outer_pathkeys_for_merge() has
894 * some heuristics behind it (see that function), so be sure to try it
895 * exactly as-is as well as making variants.
896 */
897 all_pathkeys = select_outer_pathkeys_for_merge(root,
898 extra->mergeclause_list,
899 joinrel);
900
901 foreach(l, all_pathkeys)
902 {
903 List *front_pathkey = (List *) lfirst(l);
904 List *cur_mergeclauses;
905 List *outerkeys;
906 List *innerkeys;
907 List *merge_pathkeys;
908
909 /* Make a pathkey list with this guy first */
910 if (l != list_head(all_pathkeys))
911 outerkeys = lcons(front_pathkey,
912 list_delete_ptr(list_copy(all_pathkeys),
913 front_pathkey));
914 else
915 outerkeys = all_pathkeys; /* no work at first one... */
916
917 /* Sort the mergeclauses into the corresponding ordering */
918 cur_mergeclauses =
919 find_mergeclauses_for_outer_pathkeys(root,
920 outerkeys,
921 extra->mergeclause_list);
922
923 /* Should have used them all... */
924 Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
925
926 /* Build sort pathkeys for the inner side */
927 innerkeys = make_inner_pathkeys_for_merge(root,
928 cur_mergeclauses,
929 outerkeys);
930
931 /* Build pathkeys representing output sort order */
932 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
933 outerkeys);
934
935 /*
936 * And now we can make the path.
937 *
938 * Note: it's possible that the cheapest paths will already be sorted
939 * properly. try_mergejoin_path will detect that case and suppress an
940 * explicit sort step, so we needn't do so here.
941 */
942 try_mergejoin_path(root,
943 joinrel,
944 outer_path,
945 inner_path,
946 merge_pathkeys,
947 cur_mergeclauses,
948 outerkeys,
949 innerkeys,
950 jointype,
951 extra,
952 false);
953
954 /*
955 * If we have partial outer and parallel safe inner path then try
956 * partial mergejoin path.
957 */
958 if (cheapest_partial_outer && cheapest_safe_inner)
959 try_partial_mergejoin_path(root,
960 joinrel,
961 cheapest_partial_outer,
962 cheapest_safe_inner,
963 merge_pathkeys,
964 cur_mergeclauses,
965 outerkeys,
966 innerkeys,
967 jointype,
968 extra);
969 }
970 }
971
972 /*
973 * generate_mergejoin_paths
974 * Creates possible mergejoin paths for input outerpath.
975 *
976 * We generate mergejoins if mergejoin clauses are available. We have
977 * two ways to generate the inner path for a mergejoin: sort the cheapest
978 * inner path, or use an inner path that is already suitably ordered for the
979 * merge. If we have several mergeclauses, it could be that there is no inner
980 * path (or only a very expensive one) for the full list of mergeclauses, but
981 * better paths exist if we truncate the mergeclause list (thereby discarding
982 * some sort key requirements). So, we consider truncations of the
983 * mergeclause list as well as the full list. (Ideally we'd consider all
984 * subsets of the mergeclause list, but that seems way too expensive.)
985 */
986 static void
generate_mergejoin_paths(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * innerrel,Path * outerpath,JoinType jointype,JoinPathExtraData * extra,bool useallclauses,Path * inner_cheapest_total,List * merge_pathkeys,bool is_partial)987 generate_mergejoin_paths(PlannerInfo *root,
988 RelOptInfo *joinrel,
989 RelOptInfo *innerrel,
990 Path *outerpath,
991 JoinType jointype,
992 JoinPathExtraData *extra,
993 bool useallclauses,
994 Path *inner_cheapest_total,
995 List *merge_pathkeys,
996 bool is_partial)
997 {
998 List *mergeclauses;
999 List *innersortkeys;
1000 List *trialsortkeys;
1001 Path *cheapest_startup_inner;
1002 Path *cheapest_total_inner;
1003 JoinType save_jointype = jointype;
1004 int num_sortkeys;
1005 int sortkeycnt;
1006
1007 if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1008 jointype = JOIN_INNER;
1009
1010 /* Look for useful mergeclauses (if any) */
1011 mergeclauses =
1012 find_mergeclauses_for_outer_pathkeys(root,
1013 outerpath->pathkeys,
1014 extra->mergeclause_list);
1015
1016 /*
1017 * Done with this outer path if no chance for a mergejoin.
1018 *
1019 * Special corner case: for "x FULL JOIN y ON true", there will be no join
1020 * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1021 * but since mergejoin is our only join type that supports FULL JOIN
1022 * without any join clauses, it's necessary to generate a clauseless
1023 * mergejoin path instead.
1024 */
1025 if (mergeclauses == NIL)
1026 {
1027 if (jointype == JOIN_FULL)
1028 /* okay to try for mergejoin */ ;
1029 else
1030 return;
1031 }
1032 if (useallclauses &&
1033 list_length(mergeclauses) != list_length(extra->mergeclause_list))
1034 return;
1035
1036 /* Compute the required ordering of the inner path */
1037 innersortkeys = make_inner_pathkeys_for_merge(root,
1038 mergeclauses,
1039 outerpath->pathkeys);
1040
1041 /*
1042 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1043 * a sort will be needed, only cheapest total cost matters. (But
1044 * try_mergejoin_path will do the right thing if inner_cheapest_total is
1045 * already correctly sorted.)
1046 */
1047 try_mergejoin_path(root,
1048 joinrel,
1049 outerpath,
1050 inner_cheapest_total,
1051 merge_pathkeys,
1052 mergeclauses,
1053 NIL,
1054 innersortkeys,
1055 jointype,
1056 extra,
1057 is_partial);
1058
1059 /* Can't do anything else if inner path needs to be unique'd */
1060 if (save_jointype == JOIN_UNIQUE_INNER)
1061 return;
1062
1063 /*
1064 * Look for presorted inner paths that satisfy the innersortkey list ---
1065 * or any truncation thereof, if we are allowed to build a mergejoin using
1066 * a subset of the merge clauses. Here, we consider both cheap startup
1067 * cost and cheap total cost.
1068 *
1069 * Currently we do not consider parameterized inner paths here. This
1070 * interacts with decisions elsewhere that also discriminate against
1071 * mergejoins with parameterized inputs; see comments in
1072 * src/backend/optimizer/README.
1073 *
1074 * As we shorten the sortkey list, we should consider only paths that are
1075 * strictly cheaper than (in particular, not the same as) any path found
1076 * in an earlier iteration. Otherwise we'd be intentionally using fewer
1077 * merge keys than a given path allows (treating the rest as plain
1078 * joinquals), which is unlikely to be a good idea. Also, eliminating
1079 * paths here on the basis of compare_path_costs is a lot cheaper than
1080 * building the mergejoin path only to throw it away.
1081 *
1082 * If inner_cheapest_total is well enough sorted to have not required a
1083 * sort in the path made above, we shouldn't make a duplicate path with
1084 * it, either. We handle that case with the same logic that handles the
1085 * previous consideration, by initializing the variables that track
1086 * cheapest-so-far properly. Note that we do NOT reject
1087 * inner_cheapest_total if we find it matches some shorter set of
1088 * pathkeys. That case corresponds to using fewer mergekeys to avoid
1089 * sorting inner_cheapest_total, whereas we did sort it above, so the
1090 * plans being considered are different.
1091 */
1092 if (pathkeys_contained_in(innersortkeys,
1093 inner_cheapest_total->pathkeys))
1094 {
1095 /* inner_cheapest_total didn't require a sort */
1096 cheapest_startup_inner = inner_cheapest_total;
1097 cheapest_total_inner = inner_cheapest_total;
1098 }
1099 else
1100 {
1101 /* it did require a sort, at least for the full set of keys */
1102 cheapest_startup_inner = NULL;
1103 cheapest_total_inner = NULL;
1104 }
1105 num_sortkeys = list_length(innersortkeys);
1106 if (num_sortkeys > 1 && !useallclauses)
1107 trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1108 else
1109 trialsortkeys = innersortkeys; /* won't really truncate */
1110
1111 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1112 {
1113 Path *innerpath;
1114 List *newclauses = NIL;
1115
1116 /*
1117 * Look for an inner path ordered well enough for the first
1118 * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1119 * destructively, which is why we made a copy...
1120 */
1121 trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1122 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1123 trialsortkeys,
1124 NULL,
1125 TOTAL_COST,
1126 is_partial);
1127 if (innerpath != NULL &&
1128 (cheapest_total_inner == NULL ||
1129 compare_path_costs(innerpath, cheapest_total_inner,
1130 TOTAL_COST) < 0))
1131 {
1132 /* Found a cheap (or even-cheaper) sorted path */
1133 /* Select the right mergeclauses, if we didn't already */
1134 if (sortkeycnt < num_sortkeys)
1135 {
1136 newclauses =
1137 trim_mergeclauses_for_inner_pathkeys(root,
1138 mergeclauses,
1139 trialsortkeys);
1140 Assert(newclauses != NIL);
1141 }
1142 else
1143 newclauses = mergeclauses;
1144 try_mergejoin_path(root,
1145 joinrel,
1146 outerpath,
1147 innerpath,
1148 merge_pathkeys,
1149 newclauses,
1150 NIL,
1151 NIL,
1152 jointype,
1153 extra,
1154 is_partial);
1155 cheapest_total_inner = innerpath;
1156 }
1157 /* Same on the basis of cheapest startup cost ... */
1158 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1159 trialsortkeys,
1160 NULL,
1161 STARTUP_COST,
1162 is_partial);
1163 if (innerpath != NULL &&
1164 (cheapest_startup_inner == NULL ||
1165 compare_path_costs(innerpath, cheapest_startup_inner,
1166 STARTUP_COST) < 0))
1167 {
1168 /* Found a cheap (or even-cheaper) sorted path */
1169 if (innerpath != cheapest_total_inner)
1170 {
1171 /*
1172 * Avoid rebuilding clause list if we already made one; saves
1173 * memory in big join trees...
1174 */
1175 if (newclauses == NIL)
1176 {
1177 if (sortkeycnt < num_sortkeys)
1178 {
1179 newclauses =
1180 trim_mergeclauses_for_inner_pathkeys(root,
1181 mergeclauses,
1182 trialsortkeys);
1183 Assert(newclauses != NIL);
1184 }
1185 else
1186 newclauses = mergeclauses;
1187 }
1188 try_mergejoin_path(root,
1189 joinrel,
1190 outerpath,
1191 innerpath,
1192 merge_pathkeys,
1193 newclauses,
1194 NIL,
1195 NIL,
1196 jointype,
1197 extra,
1198 is_partial);
1199 }
1200 cheapest_startup_inner = innerpath;
1201 }
1202
1203 /*
1204 * Don't consider truncated sortkeys if we need all clauses.
1205 */
1206 if (useallclauses)
1207 break;
1208 }
1209 }
1210
1211 /*
1212 * match_unsorted_outer
1213 * Creates possible join paths for processing a single join relation
1214 * 'joinrel' by employing either iterative substitution or
1215 * mergejoining on each of its possible outer paths (considering
1216 * only outer paths that are already ordered well enough for merging).
1217 *
1218 * We always generate a nestloop path for each available outer path.
1219 * In fact we may generate as many as five: one on the cheapest-total-cost
1220 * inner path, one on the same with materialization, one on the
1221 * cheapest-startup-cost inner path (if different), one on the
1222 * cheapest-total inner-indexscan path (if any), and one on the
1223 * cheapest-startup inner-indexscan path (if different).
1224 *
1225 * We also consider mergejoins if mergejoin clauses are available. See
1226 * detailed comments in generate_mergejoin_paths.
1227 *
1228 * 'joinrel' is the join relation
1229 * 'outerrel' is the outer join relation
1230 * 'innerrel' is the inner join relation
1231 * 'jointype' is the type of join to do
1232 * 'extra' contains additional input values
1233 */
1234 static void
match_unsorted_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1235 match_unsorted_outer(PlannerInfo *root,
1236 RelOptInfo *joinrel,
1237 RelOptInfo *outerrel,
1238 RelOptInfo *innerrel,
1239 JoinType jointype,
1240 JoinPathExtraData *extra)
1241 {
1242 JoinType save_jointype = jointype;
1243 bool nestjoinOK;
1244 bool useallclauses;
1245 Path *inner_cheapest_total = innerrel->cheapest_total_path;
1246 Path *matpath = NULL;
1247 ListCell *lc1;
1248
1249 /*
1250 * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1251 * are doing a right or full mergejoin, we must use *all* the mergeclauses
1252 * as join clauses, else we will not have a valid plan. (Although these
1253 * two flags are currently inverses, keep them separate for clarity and
1254 * possible future changes.)
1255 */
1256 switch (jointype)
1257 {
1258 case JOIN_INNER:
1259 case JOIN_LEFT:
1260 case JOIN_SEMI:
1261 case JOIN_ANTI:
1262 nestjoinOK = true;
1263 useallclauses = false;
1264 break;
1265 case JOIN_RIGHT:
1266 case JOIN_FULL:
1267 nestjoinOK = false;
1268 useallclauses = true;
1269 break;
1270 case JOIN_UNIQUE_OUTER:
1271 case JOIN_UNIQUE_INNER:
1272 jointype = JOIN_INNER;
1273 nestjoinOK = true;
1274 useallclauses = false;
1275 break;
1276 default:
1277 elog(ERROR, "unrecognized join type: %d",
1278 (int) jointype);
1279 nestjoinOK = false; /* keep compiler quiet */
1280 useallclauses = false;
1281 break;
1282 }
1283
1284 /*
1285 * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1286 * we will consider it below as a member of cheapest_parameterized_paths,
1287 * but the other possibilities considered in this routine aren't usable.
1288 */
1289 if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1290 inner_cheapest_total = NULL;
1291
1292 /*
1293 * If we need to unique-ify the inner path, we will consider only the
1294 * cheapest-total inner.
1295 */
1296 if (save_jointype == JOIN_UNIQUE_INNER)
1297 {
1298 /* No way to do this with an inner path parameterized by outer rel */
1299 if (inner_cheapest_total == NULL)
1300 return;
1301 inner_cheapest_total = (Path *)
1302 create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1303 Assert(inner_cheapest_total);
1304 }
1305 else if (nestjoinOK)
1306 {
1307 /*
1308 * Consider materializing the cheapest inner path, unless
1309 * enable_material is off or the path in question materializes its
1310 * output anyway.
1311 */
1312 if (enable_material && inner_cheapest_total != NULL &&
1313 !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1314 matpath = (Path *)
1315 create_material_path(innerrel, inner_cheapest_total);
1316 }
1317
1318 foreach(lc1, outerrel->pathlist)
1319 {
1320 Path *outerpath = (Path *) lfirst(lc1);
1321 List *merge_pathkeys;
1322
1323 /*
1324 * We cannot use an outer path that is parameterized by the inner rel.
1325 */
1326 if (PATH_PARAM_BY_REL(outerpath, innerrel))
1327 continue;
1328
1329 /*
1330 * If we need to unique-ify the outer path, it's pointless to consider
1331 * any but the cheapest outer. (XXX we don't consider parameterized
1332 * outers, nor inners, for unique-ified cases. Should we?)
1333 */
1334 if (save_jointype == JOIN_UNIQUE_OUTER)
1335 {
1336 if (outerpath != outerrel->cheapest_total_path)
1337 continue;
1338 outerpath = (Path *) create_unique_path(root, outerrel,
1339 outerpath, extra->sjinfo);
1340 Assert(outerpath);
1341 }
1342
1343 /*
1344 * The result will have this sort order (even if it is implemented as
1345 * a nestloop, and even if some of the mergeclauses are implemented by
1346 * qpquals rather than as true mergeclauses):
1347 */
1348 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1349 outerpath->pathkeys);
1350
1351 if (save_jointype == JOIN_UNIQUE_INNER)
1352 {
1353 /*
1354 * Consider nestloop join, but only with the unique-ified cheapest
1355 * inner path
1356 */
1357 try_nestloop_path(root,
1358 joinrel,
1359 outerpath,
1360 inner_cheapest_total,
1361 merge_pathkeys,
1362 jointype,
1363 extra);
1364 }
1365 else if (nestjoinOK)
1366 {
1367 /*
1368 * Consider nestloop joins using this outer path and various
1369 * available paths for the inner relation. We consider the
1370 * cheapest-total paths for each available parameterization of the
1371 * inner relation, including the unparameterized case.
1372 */
1373 ListCell *lc2;
1374
1375 foreach(lc2, innerrel->cheapest_parameterized_paths)
1376 {
1377 Path *innerpath = (Path *) lfirst(lc2);
1378
1379 try_nestloop_path(root,
1380 joinrel,
1381 outerpath,
1382 innerpath,
1383 merge_pathkeys,
1384 jointype,
1385 extra);
1386 }
1387
1388 /* Also consider materialized form of the cheapest inner path */
1389 if (matpath != NULL)
1390 try_nestloop_path(root,
1391 joinrel,
1392 outerpath,
1393 matpath,
1394 merge_pathkeys,
1395 jointype,
1396 extra);
1397 }
1398
1399 /* Can't do anything else if outer path needs to be unique'd */
1400 if (save_jointype == JOIN_UNIQUE_OUTER)
1401 continue;
1402
1403 /* Can't do anything else if inner rel is parameterized by outer */
1404 if (inner_cheapest_total == NULL)
1405 continue;
1406
1407 /* Generate merge join paths */
1408 generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1409 save_jointype, extra, useallclauses,
1410 inner_cheapest_total, merge_pathkeys,
1411 false);
1412 }
1413
1414 /*
1415 * Consider partial nestloop and mergejoin plan if outerrel has any
1416 * partial path and the joinrel is parallel-safe. However, we can't
1417 * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1418 * therefore we won't be able to properly guarantee uniqueness. Nor can
1419 * we handle extra_lateral_rels, since partial paths must not be
1420 * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1421 * because they can produce false null extended rows.
1422 */
1423 if (joinrel->consider_parallel &&
1424 save_jointype != JOIN_UNIQUE_OUTER &&
1425 save_jointype != JOIN_FULL &&
1426 save_jointype != JOIN_RIGHT &&
1427 outerrel->partial_pathlist != NIL &&
1428 bms_is_empty(joinrel->lateral_relids))
1429 {
1430 if (nestjoinOK)
1431 consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1432 save_jointype, extra);
1433
1434 /*
1435 * If inner_cheapest_total is NULL or non parallel-safe then find the
1436 * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1437 * can't use any alternative inner path.
1438 */
1439 if (inner_cheapest_total == NULL ||
1440 !inner_cheapest_total->parallel_safe)
1441 {
1442 if (save_jointype == JOIN_UNIQUE_INNER)
1443 return;
1444
1445 inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
1446 innerrel->pathlist);
1447 }
1448
1449 if (inner_cheapest_total)
1450 consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1451 save_jointype, extra,
1452 inner_cheapest_total);
1453 }
1454 }
1455
1456 /*
1457 * consider_parallel_mergejoin
1458 * Try to build partial paths for a joinrel by joining a partial path
1459 * for the outer relation to a complete path for the inner relation.
1460 *
1461 * 'joinrel' is the join relation
1462 * 'outerrel' is the outer join relation
1463 * 'innerrel' is the inner join relation
1464 * 'jointype' is the type of join to do
1465 * 'extra' contains additional input values
1466 * 'inner_cheapest_total' cheapest total path for innerrel
1467 */
1468 static void
consider_parallel_mergejoin(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra,Path * inner_cheapest_total)1469 consider_parallel_mergejoin(PlannerInfo *root,
1470 RelOptInfo *joinrel,
1471 RelOptInfo *outerrel,
1472 RelOptInfo *innerrel,
1473 JoinType jointype,
1474 JoinPathExtraData *extra,
1475 Path *inner_cheapest_total)
1476 {
1477 ListCell *lc1;
1478
1479 /* generate merge join path for each partial outer path */
1480 foreach(lc1, outerrel->partial_pathlist)
1481 {
1482 Path *outerpath = (Path *) lfirst(lc1);
1483 List *merge_pathkeys;
1484
1485 /*
1486 * Figure out what useful ordering any paths we create will have.
1487 */
1488 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1489 outerpath->pathkeys);
1490
1491 generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1492 extra, false, inner_cheapest_total,
1493 merge_pathkeys, true);
1494 }
1495 }
1496
1497 /*
1498 * consider_parallel_nestloop
1499 * Try to build partial paths for a joinrel by joining a partial path for the
1500 * outer relation to a complete path for the inner relation.
1501 *
1502 * 'joinrel' is the join relation
1503 * 'outerrel' is the outer join relation
1504 * 'innerrel' is the inner join relation
1505 * 'jointype' is the type of join to do
1506 * 'extra' contains additional input values
1507 */
1508 static void
consider_parallel_nestloop(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1509 consider_parallel_nestloop(PlannerInfo *root,
1510 RelOptInfo *joinrel,
1511 RelOptInfo *outerrel,
1512 RelOptInfo *innerrel,
1513 JoinType jointype,
1514 JoinPathExtraData *extra)
1515 {
1516 JoinType save_jointype = jointype;
1517 ListCell *lc1;
1518
1519 if (jointype == JOIN_UNIQUE_INNER)
1520 jointype = JOIN_INNER;
1521
1522 foreach(lc1, outerrel->partial_pathlist)
1523 {
1524 Path *outerpath = (Path *) lfirst(lc1);
1525 List *pathkeys;
1526 ListCell *lc2;
1527
1528 /* Figure out what useful ordering any paths we create will have. */
1529 pathkeys = build_join_pathkeys(root, joinrel, jointype,
1530 outerpath->pathkeys);
1531
1532 /*
1533 * Try the cheapest parameterized paths; only those which will produce
1534 * an unparameterized path when joined to this outerrel will survive
1535 * try_partial_nestloop_path. The cheapest unparameterized path is
1536 * also in this list.
1537 */
1538 foreach(lc2, innerrel->cheapest_parameterized_paths)
1539 {
1540 Path *innerpath = (Path *) lfirst(lc2);
1541
1542 /* Can't join to an inner path that is not parallel-safe */
1543 if (!innerpath->parallel_safe)
1544 continue;
1545
1546 /*
1547 * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1548 * cheapest_total_path, and we have to unique-ify it. (We might
1549 * be able to relax this to allow other safe, unparameterized
1550 * inner paths, but right now create_unique_path is not on board
1551 * with that.)
1552 */
1553 if (save_jointype == JOIN_UNIQUE_INNER)
1554 {
1555 if (innerpath != innerrel->cheapest_total_path)
1556 continue;
1557 innerpath = (Path *) create_unique_path(root, innerrel,
1558 innerpath,
1559 extra->sjinfo);
1560 Assert(innerpath);
1561 }
1562
1563 try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1564 pathkeys, jointype, extra);
1565 }
1566 }
1567 }
1568
1569 /*
1570 * hash_inner_and_outer
1571 * Create hashjoin join paths by explicitly hashing both the outer and
1572 * inner keys of each available hash clause.
1573 *
1574 * 'joinrel' is the join relation
1575 * 'outerrel' is the outer join relation
1576 * 'innerrel' is the inner join relation
1577 * 'jointype' is the type of join to do
1578 * 'extra' contains additional input values
1579 */
1580 static void
hash_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1581 hash_inner_and_outer(PlannerInfo *root,
1582 RelOptInfo *joinrel,
1583 RelOptInfo *outerrel,
1584 RelOptInfo *innerrel,
1585 JoinType jointype,
1586 JoinPathExtraData *extra)
1587 {
1588 JoinType save_jointype = jointype;
1589 bool isouterjoin = IS_OUTER_JOIN(jointype);
1590 List *hashclauses;
1591 ListCell *l;
1592
1593 /*
1594 * We need to build only one hashclauses list for any given pair of outer
1595 * and inner relations; all of the hashable clauses will be used as keys.
1596 *
1597 * Scan the join's restrictinfo list to find hashjoinable clauses that are
1598 * usable with this pair of sub-relations.
1599 */
1600 hashclauses = NIL;
1601 foreach(l, extra->restrictlist)
1602 {
1603 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1604
1605 /*
1606 * If processing an outer join, only use its own join clauses for
1607 * hashing. For inner joins we need not be so picky.
1608 */
1609 if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1610 continue;
1611
1612 if (!restrictinfo->can_join ||
1613 restrictinfo->hashjoinoperator == InvalidOid)
1614 continue; /* not hashjoinable */
1615
1616 /*
1617 * Check if clause has the form "outer op inner" or "inner op outer".
1618 */
1619 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1620 continue; /* no good for these input relations */
1621
1622 hashclauses = lappend(hashclauses, restrictinfo);
1623 }
1624
1625 /* If we found any usable hashclauses, make paths */
1626 if (hashclauses)
1627 {
1628 /*
1629 * We consider both the cheapest-total-cost and cheapest-startup-cost
1630 * outer paths. There's no need to consider any but the
1631 * cheapest-total-cost inner path, however.
1632 */
1633 Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1634 Path *cheapest_total_outer = outerrel->cheapest_total_path;
1635 Path *cheapest_total_inner = innerrel->cheapest_total_path;
1636
1637 /*
1638 * If either cheapest-total path is parameterized by the other rel, we
1639 * can't use a hashjoin. (There's no use looking for alternative
1640 * input paths, since these should already be the least-parameterized
1641 * available paths.)
1642 */
1643 if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1644 PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1645 return;
1646
1647 /* Unique-ify if need be; we ignore parameterized possibilities */
1648 if (jointype == JOIN_UNIQUE_OUTER)
1649 {
1650 cheapest_total_outer = (Path *)
1651 create_unique_path(root, outerrel,
1652 cheapest_total_outer, extra->sjinfo);
1653 Assert(cheapest_total_outer);
1654 jointype = JOIN_INNER;
1655 try_hashjoin_path(root,
1656 joinrel,
1657 cheapest_total_outer,
1658 cheapest_total_inner,
1659 hashclauses,
1660 jointype,
1661 extra);
1662 /* no possibility of cheap startup here */
1663 }
1664 else if (jointype == JOIN_UNIQUE_INNER)
1665 {
1666 cheapest_total_inner = (Path *)
1667 create_unique_path(root, innerrel,
1668 cheapest_total_inner, extra->sjinfo);
1669 Assert(cheapest_total_inner);
1670 jointype = JOIN_INNER;
1671 try_hashjoin_path(root,
1672 joinrel,
1673 cheapest_total_outer,
1674 cheapest_total_inner,
1675 hashclauses,
1676 jointype,
1677 extra);
1678 if (cheapest_startup_outer != NULL &&
1679 cheapest_startup_outer != cheapest_total_outer)
1680 try_hashjoin_path(root,
1681 joinrel,
1682 cheapest_startup_outer,
1683 cheapest_total_inner,
1684 hashclauses,
1685 jointype,
1686 extra);
1687 }
1688 else
1689 {
1690 /*
1691 * For other jointypes, we consider the cheapest startup outer
1692 * together with the cheapest total inner, and then consider
1693 * pairings of cheapest-total paths including parameterized ones.
1694 * There is no use in generating parameterized paths on the basis
1695 * of possibly cheap startup cost, so this is sufficient.
1696 */
1697 ListCell *lc1;
1698 ListCell *lc2;
1699
1700 if (cheapest_startup_outer != NULL)
1701 try_hashjoin_path(root,
1702 joinrel,
1703 cheapest_startup_outer,
1704 cheapest_total_inner,
1705 hashclauses,
1706 jointype,
1707 extra);
1708
1709 foreach(lc1, outerrel->cheapest_parameterized_paths)
1710 {
1711 Path *outerpath = (Path *) lfirst(lc1);
1712
1713 /*
1714 * We cannot use an outer path that is parameterized by the
1715 * inner rel.
1716 */
1717 if (PATH_PARAM_BY_REL(outerpath, innerrel))
1718 continue;
1719
1720 foreach(lc2, innerrel->cheapest_parameterized_paths)
1721 {
1722 Path *innerpath = (Path *) lfirst(lc2);
1723
1724 /*
1725 * We cannot use an inner path that is parameterized by
1726 * the outer rel, either.
1727 */
1728 if (PATH_PARAM_BY_REL(innerpath, outerrel))
1729 continue;
1730
1731 if (outerpath == cheapest_startup_outer &&
1732 innerpath == cheapest_total_inner)
1733 continue; /* already tried it */
1734
1735 try_hashjoin_path(root,
1736 joinrel,
1737 outerpath,
1738 innerpath,
1739 hashclauses,
1740 jointype,
1741 extra);
1742 }
1743 }
1744 }
1745
1746 /*
1747 * If the joinrel is parallel-safe, we may be able to consider a
1748 * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
1749 * because the outer path will be partial, and therefore we won't be
1750 * able to properly guarantee uniqueness. Similarly, we can't handle
1751 * JOIN_FULL and JOIN_RIGHT, because they can produce false null
1752 * extended rows. Also, the resulting path must not be parameterized.
1753 */
1754 if (joinrel->consider_parallel &&
1755 save_jointype != JOIN_UNIQUE_OUTER &&
1756 save_jointype != JOIN_FULL &&
1757 save_jointype != JOIN_RIGHT &&
1758 outerrel->partial_pathlist != NIL &&
1759 bms_is_empty(joinrel->lateral_relids))
1760 {
1761 Path *cheapest_partial_outer;
1762 Path *cheapest_safe_inner = NULL;
1763
1764 cheapest_partial_outer =
1765 (Path *) linitial(outerrel->partial_pathlist);
1766
1767 /*
1768 * Normally, given that the joinrel is parallel-safe, the cheapest
1769 * total inner path will also be parallel-safe, but if not, we'll
1770 * have to search for the cheapest safe, unparameterized inner
1771 * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
1772 * inner path.
1773 */
1774 if (cheapest_total_inner->parallel_safe)
1775 cheapest_safe_inner = cheapest_total_inner;
1776 else if (save_jointype != JOIN_UNIQUE_INNER)
1777 cheapest_safe_inner =
1778 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1779
1780 if (cheapest_safe_inner != NULL)
1781 try_partial_hashjoin_path(root, joinrel,
1782 cheapest_partial_outer,
1783 cheapest_safe_inner,
1784 hashclauses, jointype, extra);
1785 }
1786 }
1787 }
1788
1789 /*
1790 * select_mergejoin_clauses
1791 * Select mergejoin clauses that are usable for a particular join.
1792 * Returns a list of RestrictInfo nodes for those clauses.
1793 *
1794 * *mergejoin_allowed is normally set to TRUE, but it is set to FALSE if
1795 * this is a right/full join and there are nonmergejoinable join clauses.
1796 * The executor's mergejoin machinery cannot handle such cases, so we have
1797 * to avoid generating a mergejoin plan. (Note that this flag does NOT
1798 * consider whether there are actually any mergejoinable clauses. This is
1799 * correct because in some cases we need to build a clauseless mergejoin.
1800 * Simply returning NIL is therefore not enough to distinguish safe from
1801 * unsafe cases.)
1802 *
1803 * We also mark each selected RestrictInfo to show which side is currently
1804 * being considered as outer. These are transient markings that are only
1805 * good for the duration of the current add_paths_to_joinrel() call!
1806 *
1807 * We examine each restrictinfo clause known for the join to see
1808 * if it is mergejoinable and involves vars from the two sub-relations
1809 * currently of interest.
1810 */
1811 static List *
select_mergejoin_clauses(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,List * restrictlist,JoinType jointype,bool * mergejoin_allowed)1812 select_mergejoin_clauses(PlannerInfo *root,
1813 RelOptInfo *joinrel,
1814 RelOptInfo *outerrel,
1815 RelOptInfo *innerrel,
1816 List *restrictlist,
1817 JoinType jointype,
1818 bool *mergejoin_allowed)
1819 {
1820 List *result_list = NIL;
1821 bool isouterjoin = IS_OUTER_JOIN(jointype);
1822 bool have_nonmergeable_joinclause = false;
1823 ListCell *l;
1824
1825 foreach(l, restrictlist)
1826 {
1827 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1828
1829 /*
1830 * If processing an outer join, only use its own join clauses in the
1831 * merge. For inner joins we can use pushed-down clauses too. (Note:
1832 * we don't set have_nonmergeable_joinclause here because pushed-down
1833 * clauses will become otherquals not joinquals.)
1834 */
1835 if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1836 continue;
1837
1838 /* Check that clause is a mergeable operator clause */
1839 if (!restrictinfo->can_join ||
1840 restrictinfo->mergeopfamilies == NIL)
1841 {
1842 /*
1843 * The executor can handle extra joinquals that are constants, but
1844 * not anything else, when doing right/full merge join. (The
1845 * reason to support constants is so we can do FULL JOIN ON
1846 * FALSE.)
1847 */
1848 if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
1849 have_nonmergeable_joinclause = true;
1850 continue; /* not mergejoinable */
1851 }
1852
1853 /*
1854 * Check if clause has the form "outer op inner" or "inner op outer".
1855 */
1856 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1857 {
1858 have_nonmergeable_joinclause = true;
1859 continue; /* no good for these input relations */
1860 }
1861
1862 /*
1863 * Insist that each side have a non-redundant eclass. This
1864 * restriction is needed because various bits of the planner expect
1865 * that each clause in a merge be associable with some pathkey in a
1866 * canonical pathkey list, but redundant eclasses can't appear in
1867 * canonical sort orderings. (XXX it might be worth relaxing this,
1868 * but not enough time to address it for 8.3.)
1869 *
1870 * Note: it would be bad if this condition failed for an otherwise
1871 * mergejoinable FULL JOIN clause, since that would result in
1872 * undesirable planner failure. I believe that is not possible
1873 * however; a variable involved in a full join could only appear in
1874 * below_outer_join eclasses, which aren't considered redundant.
1875 *
1876 * This case *can* happen for left/right join clauses: the outer-side
1877 * variable could be equated to a constant. Because we will propagate
1878 * that constant across the join clause, the loss of ability to do a
1879 * mergejoin is not really all that big a deal, and so it's not clear
1880 * that improving this is important.
1881 */
1882 update_mergeclause_eclasses(root, restrictinfo);
1883
1884 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1885 EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1886 {
1887 have_nonmergeable_joinclause = true;
1888 continue; /* can't handle redundant eclasses */
1889 }
1890
1891 result_list = lappend(result_list, restrictinfo);
1892 }
1893
1894 /*
1895 * Report whether mergejoin is allowed (see comment at top of function).
1896 */
1897 switch (jointype)
1898 {
1899 case JOIN_RIGHT:
1900 case JOIN_FULL:
1901 *mergejoin_allowed = !have_nonmergeable_joinclause;
1902 break;
1903 default:
1904 *mergejoin_allowed = true;
1905 break;
1906 }
1907
1908 return result_list;
1909 }
1910