1 /*-------------------------------------------------------------------------
2 *
3 * joinrels.c
4 * Routines to determine which relations should be joined
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/joinrels.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "miscadmin.h"
18 #include "optimizer/clauses.h"
19 #include "optimizer/joininfo.h"
20 #include "optimizer/pathnode.h"
21 #include "optimizer/paths.h"
22 #include "optimizer/prep.h"
23 #include "partitioning/partbounds.h"
24 #include "utils/lsyscache.h"
25 #include "utils/memutils.h"
26
27
28 static void make_rels_by_clause_joins(PlannerInfo *root,
29 RelOptInfo *old_rel,
30 ListCell *other_rels);
31 static void make_rels_by_clauseless_joins(PlannerInfo *root,
32 RelOptInfo *old_rel,
33 ListCell *other_rels);
34 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
35 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
36 static bool restriction_is_constant_false(List *restrictlist,
37 RelOptInfo *joinrel,
38 bool only_pushed_down);
39 static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
40 RelOptInfo *rel2, RelOptInfo *joinrel,
41 SpecialJoinInfo *sjinfo, List *restrictlist);
42 static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
43 RelOptInfo *rel2, RelOptInfo *joinrel,
44 SpecialJoinInfo *parent_sjinfo,
45 List *parent_restrictlist);
46 static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
47 bool strict_op);
48
49
50 /*
51 * join_search_one_level
gda_server_provider_internal_get_parser(GdaServerProvider * prov)52 * Consider ways to produce join relations containing exactly 'level'
53 * jointree items. (This is one step of the dynamic-programming method
54 * embodied in standard_join_search.) Join rel nodes for each feasible
55 * combination of lower-level rels are created and returned in a list.
56 * Implementation paths are created for each such joinrel, too.
57 *
58 * level: level of rels we want to make this time
59 * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
60 *
61 * The result is returned in root->join_rel_level[level].
62 */
63 void
64 join_search_one_level(PlannerInfo *root, int level)
65 {
66 List **joinrels = root->join_rel_level;
67 ListCell *r;
68 int k;
69
70 Assert(joinrels[level] == NIL);
71
72 /* Set join_cur_level so that new joinrels are added to proper list */
73 root->join_cur_level = level;
74
75 /*
76 * First, consider left-sided and right-sided plans, in which rels of
77 * exactly level-1 member relations are joined against initial relations.
78 * We prefer to join using join clauses, but if we find a rel of level-1
79 * members that has no join clauses, we will generate Cartesian-product
80 * joins against all initial rels not already contained in it.
81 */
82 foreach(r, joinrels[level - 1])
83 {
84 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
85
86 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
87 has_join_restriction(root, old_rel))
88 {
89 /*
90 * There are join clauses or join order restrictions relevant to
91 * this rel, so consider joins between this rel and (only) those
92 * initial rels it is linked to by a clause or restriction.
93 *
94 * At level 2 this condition is symmetric, so there is no need to
95 * look at initial rels before this one in the list; we already
96 * considered such joins when we were at the earlier rel. (The
97 * mirror-image joins are handled automatically by make_join_rel.)
98 * In later passes (level > 2), we join rels of the previous level
99 * to each initial rel they don't already include but have a join
100 * clause or restriction with.
101 */
102 ListCell *other_rels;
103
104 if (level == 2) /* consider remaining initial rels */
105 other_rels = lnext(r);
106 else /* consider all initial rels */
107 other_rels = list_head(joinrels[1]);
108
109 make_rels_by_clause_joins(root,
110 old_rel,
111 other_rels);
112 }
113 else
114 {
115 /*
116 * Oops, we have a relation that is not joined to any other
117 * relation, either directly or by join-order restrictions.
118 * Cartesian product time.
119 *
120 * We consider a cartesian product with each not-already-included
121 * initial rel, whether it has other join clauses or not. At
122 * level 2, if there are two or more clauseless initial rels, we
123 * will redundantly consider joining them in both directions; but
124 * such cases aren't common enough to justify adding complexity to
125 * avoid the duplicated effort.
126 */
127 make_rels_by_clauseless_joins(root,
128 old_rel,
129 list_head(joinrels[1]));
130 }
131 }
132
133 /*
134 * Now, consider "bushy plans" in which relations of k initial rels are
135 * joined to relations of level-k initial rels, for 2 <= k <= level-2.
136 *
137 * We only consider bushy-plan joins for pairs of rels where there is a
138 * suitable join clause (or join order restriction), in order to avoid
139 * unreasonable growth of planning time.
140 */
141 for (k = 2;; k++)
142 {
143 int other_level = level - k;
144
145 /*
146 * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
147 * need to go as far as the halfway point.
148 */
149 if (k > other_level)
150 break;
151
152 foreach(r, joinrels[k])
153 {
154 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
155 ListCell *other_rels;
156 ListCell *r2;
157
158 /*
159 * We can ignore relations without join clauses here, unless they
160 * participate in join-order restrictions --- then we might have
161 * to force a bushy join plan.
162 */
163 if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
164 !has_join_restriction(root, old_rel))
165 continue;
166
167 if (k == other_level)
168 other_rels = lnext(r); /* only consider remaining rels */
169 else
170 other_rels = list_head(joinrels[other_level]);
171
172 for_each_cell(r2, other_rels)
173 {
174 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
175
176 if (!bms_overlap(old_rel->relids, new_rel->relids))
177 {
178 /*
179 * OK, we can build a rel of the right level from this
180 * pair of rels. Do so if there is at least one relevant
181 * join clause or join order restriction.
182 */
183 if (have_relevant_joinclause(root, old_rel, new_rel) ||
184 have_join_order_restriction(root, old_rel, new_rel))
185 {
186 (void) make_join_rel(root, old_rel, new_rel);
187 }
188 }
189 }
190 }
191 }
192
193 /*----------
194 * Last-ditch effort: if we failed to find any usable joins so far, force
195 * a set of cartesian-product joins to be generated. This handles the
196 * special case where all the available rels have join clauses but we
197 * cannot use any of those clauses yet. This can only happen when we are
198 * considering a join sub-problem (a sub-joinlist) and all the rels in the
199 * sub-problem have only join clauses with rels outside the sub-problem.
200 * An example is
201 *
202 * SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
203 * WHERE a.w = c.x and b.y = d.z;
204 *
205 * If the "a INNER JOIN b" sub-problem does not get flattened into the
206 * upper level, we must be willing to make a cartesian join of a and b;
207 * but the code above will not have done so, because it thought that both
208 * a and b have joinclauses. We consider only left-sided and right-sided
209 * cartesian joins in this case (no bushy).
210 *----------
211 */
212 if (joinrels[level] == NIL)
213 {
214 /*
215 * This loop is just like the first one, except we always call
216 * make_rels_by_clauseless_joins().
217 */
218 foreach(r, joinrels[level - 1])
219 {
220 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
221
222 make_rels_by_clauseless_joins(root,
223 old_rel,
224 list_head(joinrels[1]));
225 }
226
227 /*----------
228 * When special joins are involved, there may be no legal way
229 * to make an N-way join for some values of N. For example consider
230 *
231 * SELECT ... FROM t1 WHERE
232 * x IN (SELECT ... FROM t2,t3 WHERE ...) AND
233 * y IN (SELECT ... FROM t4,t5 WHERE ...)
234 *
235 * We will flatten this query to a 5-way join problem, but there are
236 * no 4-way joins that join_is_legal() will consider legal. We have
237 * to accept failure at level 4 and go on to discover a workable
238 * bushy plan at level 5.
239 *
240 * However, if there are no special joins and no lateral references
241 * then join_is_legal() should never fail, and so the following sanity
242 * check is useful.
243 *----------
244 */
245 if (joinrels[level] == NIL &&
246 root->join_info_list == NIL &&
247 !root->hasLateralRTEs)
248 elog(ERROR, "failed to build any %d-way joins", level);
249 }
250 }
251
252 /*
253 * make_rels_by_clause_joins
254 * Build joins between the given relation 'old_rel' and other relations
255 * that participate in join clauses that 'old_rel' also participates in
256 * (or participate in join-order restrictions with it).
257 * The join rels are returned in root->join_rel_level[join_cur_level].
258 *
259 * Note: at levels above 2 we will generate the same joined relation in
260 * multiple ways --- for example (a join b) join c is the same RelOptInfo as
261 * (b join c) join a, though the second case will add a different set of Paths
262 * to it. This is the reason for using the join_rel_level mechanism, which
263 * automatically ensures that each new joinrel is only added to the list once.
264 *
265 * 'old_rel' is the relation entry for the relation to be joined
266 * 'other_rels': the first cell in a linked list containing the other
267 * rels to be considered for joining
268 *
269 * Currently, this is only used with initial rels in other_rels, but it
270 * will work for joining to joinrels too.
271 */
272 static void
273 make_rels_by_clause_joins(PlannerInfo *root,
274 RelOptInfo *old_rel,
275 ListCell *other_rels)
276 {
277 ListCell *l;
278
279 for_each_cell(l, other_rels)
280 {
281 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
282
283 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
284 (have_relevant_joinclause(root, old_rel, other_rel) ||
285 have_join_order_restriction(root, old_rel, other_rel)))
286 {
287 (void) make_join_rel(root, old_rel, other_rel);
288 }
289 }
290 }
291
292 /*
293 * make_rels_by_clauseless_joins
294 * Given a relation 'old_rel' and a list of other relations
295 * 'other_rels', create a join relation between 'old_rel' and each
296 * member of 'other_rels' that isn't already included in 'old_rel'.
297 * The join rels are returned in root->join_rel_level[join_cur_level].
298 *
299 * 'old_rel' is the relation entry for the relation to be joined
300 * 'other_rels': the first cell of a linked list containing the
gda_server_provider_handler_find(GdaServerProvider * prov,GdaConnection * cnc,GType g_type,const gchar * dbms_type)301 * other rels to be considered for joining
302 *
303 * Currently, this is only used with initial rels in other_rels, but it would
304 * work for joining to joinrels too.
305 */
306 static void
307 make_rels_by_clauseless_joins(PlannerInfo *root,
308 RelOptInfo *old_rel,
309 ListCell *other_rels)
310 {
311 ListCell *l;
312
313 for_each_cell(l, other_rels)
314 {
315 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
316
317 if (!bms_overlap(other_rel->relids, old_rel->relids))
318 {
319 (void) make_join_rel(root, old_rel, other_rel);
320 }
321 }
322 }
323
324
gda_server_provider_handler_declare(GdaServerProvider * prov,GdaDataHandler * dh,GdaConnection * cnc,GType g_type,const gchar * dbms_type)325 /*
326 * join_is_legal
327 * Determine whether a proposed join is legal given the query's
328 * join order constraints; and if it is, determine the join type.
329 *
330 * Caller must supply not only the two rels, but the union of their relids.
331 * (We could simplify the API by computing joinrelids locally, but this
332 * would be redundant work in the normal path through make_join_rel.)
333 *
334 * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
335 * else it's set to point to the associated SpecialJoinInfo node. Also,
336 * *reversed_p is set true if the given relations need to be swapped to
337 * match the SpecialJoinInfo node.
338 */
339 static bool
340 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
341 Relids joinrelids,
342 SpecialJoinInfo **sjinfo_p, bool *reversed_p)
343 {
344 SpecialJoinInfo *match_sjinfo;
345 bool reversed;
346 bool unique_ified;
347 bool must_be_leftjoin;
348 ListCell *l;
349
350 /*
351 * Ensure output params are set on failure return. This is just to
352 * suppress uninitialized-variable warnings from overly anal compilers.
353 */
354 *sjinfo_p = NULL;
355 *reversed_p = false;
356
357 /*
358 * If we have any special joins, the proposed join might be illegal; and
359 * in any case we have to determine its join type. Scan the join info
360 * list for matches and conflicts.
361 */
362 match_sjinfo = NULL;
363 reversed = false;
364 unique_ified = false;
365 must_be_leftjoin = false;
366
367 foreach(l, root->join_info_list)
368 {
369 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
370
371 /*
372 * This special join is not relevant unless its RHS overlaps the
373 * proposed join. (Check this first as a fast path for dismissing
374 * most irrelevant SJs quickly.)
375 */
376 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
377 continue;
378
379 /*
380 * Also, not relevant if proposed join is fully contained within RHS
381 * (ie, we're still building up the RHS).
382 */
383 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
384 continue;
385
386 /*
387 * Also, not relevant if SJ is already done within either input.
388 */
389 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
390 bms_is_subset(sjinfo->min_righthand, rel1->relids))
391 continue;
392 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
393 bms_is_subset(sjinfo->min_righthand, rel2->relids))
394 continue;
395
396 /*
397 * If it's a semijoin and we already joined the RHS to any other rels
398 * within either input, then we must have unique-ified the RHS at that
399 * point (see below). Therefore the semijoin is no longer relevant in
400 * this join path.
401 */
402 if (sjinfo->jointype == JOIN_SEMI)
403 {
404 if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
405 !bms_equal(sjinfo->syn_righthand, rel1->relids))
406 continue;
407 if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
408 !bms_equal(sjinfo->syn_righthand, rel2->relids))
409 continue;
410 }
411
412 /*
413 * If one input contains min_lefthand and the other contains
414 * min_righthand, then we can perform the SJ at this join.
415 *
416 * Reject if we get matches to more than one SJ; that implies we're
417 * considering something that's not really valid.
418 */
419 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
420 bms_is_subset(sjinfo->min_righthand, rel2->relids))
421 {
422 if (match_sjinfo)
423 return false; /* invalid join path */
424 match_sjinfo = sjinfo;
425 reversed = false;
426 }
427 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
428 bms_is_subset(sjinfo->min_righthand, rel1->relids))
429 {
430 if (match_sjinfo)
431 return false; /* invalid join path */
432 match_sjinfo = sjinfo;
433 reversed = true;
434 }
435 else if (sjinfo->jointype == JOIN_SEMI &&
436 bms_equal(sjinfo->syn_righthand, rel2->relids) &&
437 create_unique_path(root, rel2, rel2->cheapest_total_path,
438 sjinfo) != NULL)
439 {
440 /*----------
441 * For a semijoin, we can join the RHS to anything else by
442 * unique-ifying the RHS (if the RHS can be unique-ified).
443 * We will only get here if we have the full RHS but less
444 * than min_lefthand on the LHS.
445 *
446 * The reason to consider such a join path is exemplified by
447 * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
448 * If we insist on doing this as a semijoin we will first have
449 * to form the cartesian product of A*B. But if we unique-ify
450 * C then the semijoin becomes a plain innerjoin and we can join
451 * in any order, eg C to A and then to B. When C is much smaller
452 * than A and B this can be a huge win. So we allow C to be
453 * joined to just A or just B here, and then make_join_rel has
454 * to handle the case properly.
455 *
456 * Note that actually we'll allow unique-ified C to be joined to
457 * some other relation D here, too. That is legal, if usually not
458 * very sane, and this routine is only concerned with legality not
459 * with whether the join is good strategy.
460 *----------
461 */
462 if (match_sjinfo)
463 return false; /* invalid join path */
464 match_sjinfo = sjinfo;
465 reversed = false;
466 unique_ified = true;
467 }
468 else if (sjinfo->jointype == JOIN_SEMI &&
469 bms_equal(sjinfo->syn_righthand, rel1->relids) &&
470 create_unique_path(root, rel1, rel1->cheapest_total_path,
471 sjinfo) != NULL)
472 {
473 /* Reversed semijoin case */
474 if (match_sjinfo)
475 return false; /* invalid join path */
476 match_sjinfo = sjinfo;
477 reversed = true;
478 unique_ified = true;
479 }
480 else
481 {
482 /*
483 * Otherwise, the proposed join overlaps the RHS but isn't a valid
484 * implementation of this SJ. But don't panic quite yet: the RHS
485 * violation might have occurred previously, in one or both input
486 * relations, in which case we must have previously decided that
487 * it was OK to commute some other SJ with this one. If we need
488 * to perform this join to finish building up the RHS, rejecting
489 * it could lead to not finding any plan at all. (This can occur
490 * because of the heuristics elsewhere in this file that postpone
491 * clauseless joins: we might not consider doing a clauseless join
492 * within the RHS until after we've performed other, validly
493 * commutable SJs with one or both sides of the clauseless join.)
494 * This consideration boils down to the rule that if both inputs
495 * overlap the RHS, we can allow the join --- they are either
496 * fully within the RHS, or represent previously-allowed joins to
497 * rels outside it.
498 */
499 if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
500 bms_overlap(rel2->relids, sjinfo->min_righthand))
501 continue; /* assume valid previous violation of RHS */
502
503 /*
504 * The proposed join could still be legal, but only if we're
505 * allowed to associate it into the RHS of this SJ. That means
506 * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
507 * not FULL) and the proposed join must not overlap the LHS.
508 */
509 if (sjinfo->jointype != JOIN_LEFT ||
510 bms_overlap(joinrelids, sjinfo->min_lefthand))
511 return false; /* invalid join path */
512
513 /*
514 * To be valid, the proposed join must be a LEFT join; otherwise
515 * it can't associate into this SJ's RHS. But we may not yet have
516 * found the SpecialJoinInfo matching the proposed join, so we
517 * can't test that yet. Remember the requirement for later.
518 */
519 must_be_leftjoin = true;
520 }
521 }
522
523 /*
524 * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
525 * proposed join can't associate into an SJ's RHS.
526 *
527 * Also, fail if the proposed join's predicate isn't strict; we're
528 * essentially checking to see if we can apply outer-join identity 3, and
529 * that's a requirement. (This check may be redundant with checks in
530 * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
531 */
532 if (must_be_leftjoin &&
533 (match_sjinfo == NULL ||
534 match_sjinfo->jointype != JOIN_LEFT ||
535 !match_sjinfo->lhs_strict))
536 return false; /* invalid join path */
537
538 /*
539 * We also have to check for constraints imposed by LATERAL references.
540 */
541 if (root->hasLateralRTEs)
542 {
543 bool lateral_fwd;
544 bool lateral_rev;
545 Relids join_lateral_rels;
546
547 /*
548 * The proposed rels could each contain lateral references to the
549 * other, in which case the join is impossible. If there are lateral
550 * references in just one direction, then the join has to be done with
551 * a nestloop with the lateral referencer on the inside. If the join
552 * matches an SJ that cannot be implemented by such a nestloop, the
553 * join is impossible.
554 *
555 * Also, if the lateral reference is only indirect, we should reject
556 * the join; whatever rel(s) the reference chain goes through must be
557 * joined to first.
558 *
559 * Another case that might keep us from building a valid plan is the
560 * implementation restriction described by have_dangerous_phv().
561 */
562 lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
563 lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
564 if (lateral_fwd && lateral_rev)
565 return false; /* have lateral refs in both directions */
566 if (lateral_fwd)
567 {
568 /* has to be implemented as nestloop with rel1 on left */
569 if (match_sjinfo &&
570 (reversed ||
571 unique_ified ||
572 match_sjinfo->jointype == JOIN_FULL))
573 return false; /* not implementable as nestloop */
574 /* check there is a direct reference from rel2 to rel1 */
575 if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
576 return false; /* only indirect refs, so reject */
577 /* check we won't have a dangerous PHV */
578 if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
579 return false; /* might be unable to handle required PHV */
580 }
581 else if (lateral_rev)
582 {
583 /* has to be implemented as nestloop with rel2 on left */
584 if (match_sjinfo &&
585 (!reversed ||
586 unique_ified ||
587 match_sjinfo->jointype == JOIN_FULL))
588 return false; /* not implementable as nestloop */
589 /* check there is a direct reference from rel1 to rel2 */
590 if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
591 return false; /* only indirect refs, so reject */
592 /* check we won't have a dangerous PHV */
593 if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
594 return false; /* might be unable to handle required PHV */
595 }
596
597 /*
598 * LATERAL references could also cause problems later on if we accept
599 * this join: if the join's minimum parameterization includes any rels
600 * that would have to be on the inside of an outer join with this join
601 * rel, then it's never going to be possible to build the complete
602 * query using this join. We should reject this join not only because
603 * it'll save work, but because if we don't, the clauseless-join
604 * heuristics might think that legality of this join means that some
605 * other join rel need not be formed, and that could lead to failure
606 * to find any plan at all. We have to consider not only rels that
607 * are directly on the inner side of an OJ with the joinrel, but also
608 * ones that are indirectly so, so search to find all such rels.
609 */
610 join_lateral_rels = min_join_parameterization(root, joinrelids,
611 rel1, rel2);
612 if (join_lateral_rels)
613 {
614 Relids join_plus_rhs = bms_copy(joinrelids);
615 bool more;
616
617 do
618 {
619 more = false;
620 foreach(l, root->join_info_list)
621 {
622 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
623
624 /* ignore full joins --- their ordering is predetermined */
625 if (sjinfo->jointype == JOIN_FULL)
626 continue;
627
628 if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
629 !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
630 {
631 join_plus_rhs = bms_add_members(join_plus_rhs,
632 sjinfo->min_righthand);
633 more = true;
634 }
635 }
636 } while (more);
637 if (bms_overlap(join_plus_rhs, join_lateral_rels))
638 return false; /* will not be able to join to some RHS rel */
639 }
640 }
641
642 /* Otherwise, it's a valid join */
643 *sjinfo_p = match_sjinfo;
644 *reversed_p = reversed;
645 return true;
646 }
647
648
649 /*
650 * make_join_rel
651 * Find or create a join RelOptInfo that represents the join of
652 * the two given rels, and add to it path information for paths
653 * created with the two rels as outer and inner rel.
654 * (The join rel may already contain paths generated from other
655 * pairs of rels that add up to the same set of base rels.)
656 *
657 * NB: will return NULL if attempted join is not valid. This can happen
658 * when working with outer joins, or with IN or EXISTS clauses that have been
659 * turned into joins.
660 */
661 RelOptInfo *
662 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
663 {
664 Relids joinrelids;
665 SpecialJoinInfo *sjinfo;
666 bool reversed;
667 SpecialJoinInfo sjinfo_data;
668 RelOptInfo *joinrel;
669 List *restrictlist;
670
671 /* We should never try to join two overlapping sets of rels. */
672 Assert(!bms_overlap(rel1->relids, rel2->relids));
673
674 /* Construct Relids set that identifies the joinrel. */
675 joinrelids = bms_union(rel1->relids, rel2->relids);
676
677 /* Check validity and determine join type. */
678 if (!join_is_legal(root, rel1, rel2, joinrelids,
679 &sjinfo, &reversed))
680 {
681 /* invalid join path */
682 bms_free(joinrelids);
683 return NULL;
684 }
685
686 /* Swap rels if needed to match the join info. */
687 if (reversed)
688 {
689 RelOptInfo *trel = rel1;
690
691 rel1 = rel2;
692 rel2 = trel;
693 }
694
695 /*
696 * If it's a plain inner join, then we won't have found anything in
697 * join_info_list. Make up a SpecialJoinInfo so that selectivity
698 * estimation functions will know what's being joined.
699 */
700 if (sjinfo == NULL)
701 {
702 sjinfo = &sjinfo_data;
703 sjinfo->type = T_SpecialJoinInfo;
704 sjinfo->min_lefthand = rel1->relids;
705 sjinfo->min_righthand = rel2->relids;
706 sjinfo->syn_lefthand = rel1->relids;
707 sjinfo->syn_righthand = rel2->relids;
708 sjinfo->jointype = JOIN_INNER;
709 /* we don't bother trying to make the remaining fields valid */
710 sjinfo->lhs_strict = false;
711 sjinfo->delay_upper_joins = false;
712 sjinfo->semi_can_btree = false;
713 sjinfo->semi_can_hash = false;
714 sjinfo->semi_operators = NIL;
715 sjinfo->semi_rhs_exprs = NIL;
716 }
717
718 /*
719 * Find or build the join RelOptInfo, and compute the restrictlist that
720 * goes with this particular joining.
721 */
722 joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
723 &restrictlist);
724
725 /*
726 * If we've already proven this join is empty, we needn't consider any
727 * more paths for it.
728 */
729 if (is_dummy_rel(joinrel))
730 {
731 bms_free(joinrelids);
732 return joinrel;
733 }
734
735 /* Add paths to the join relation. */
736 populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
737 restrictlist);
738
739 bms_free(joinrelids);
740
741 return joinrel;
742 }
743
744 /*
745 * populate_joinrel_with_paths
746 * Add paths to the given joinrel for given pair of joining relations. The
747 * SpecialJoinInfo provides details about the join and the restrictlist
748 * contains the join clauses and the other clauses applicable for given pair
749 * of the joining relations.
750 */
751 static void
752 populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
753 RelOptInfo *rel2, RelOptInfo *joinrel,
754 SpecialJoinInfo *sjinfo, List *restrictlist)
755 {
756 /*
757 * Consider paths using each rel as both outer and inner. Depending on
758 * the join type, a provably empty outer or inner rel might mean the join
759 * is provably empty too; in which case throw away any previously computed
760 * paths and mark the join as dummy. (We do it this way since it's
761 * conceivable that dummy-ness of a multi-element join might only be
762 * noticeable for certain construction paths.)
763 *
764 * Also, a provably constant-false join restriction typically means that
765 * we can skip evaluating one or both sides of the join. We do this by
766 * marking the appropriate rel as dummy. For outer joins, a
767 * constant-false restriction that is pushed down still means the whole
768 * join is dummy, while a non-pushed-down one means that no inner rows
769 * will join so we can treat the inner rel as dummy.
770 *
771 * We need only consider the jointypes that appear in join_info_list, plus
772 * JOIN_INNER.
773 */
774 switch (sjinfo->jointype)
775 {
776 case JOIN_INNER:
777 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
778 restriction_is_constant_false(restrictlist, joinrel, false))
779 {
780 mark_dummy_rel(joinrel);
781 break;
782 }
783 add_paths_to_joinrel(root, joinrel, rel1, rel2,
784 JOIN_INNER, sjinfo,
785 restrictlist);
786 add_paths_to_joinrel(root, joinrel, rel2, rel1,
787 JOIN_INNER, sjinfo,
788 restrictlist);
789 break;
790 case JOIN_LEFT:
791 if (is_dummy_rel(rel1) ||
792 restriction_is_constant_false(restrictlist, joinrel, true))
793 {
794 mark_dummy_rel(joinrel);
795 break;
796 }
797 if (restriction_is_constant_false(restrictlist, joinrel, false) &&
798 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
799 mark_dummy_rel(rel2);
800 add_paths_to_joinrel(root, joinrel, rel1, rel2,
801 JOIN_LEFT, sjinfo,
802 restrictlist);
803 add_paths_to_joinrel(root, joinrel, rel2, rel1,
804 JOIN_RIGHT, sjinfo,
805 restrictlist);
806 break;
807 case JOIN_FULL:
808 if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
809 restriction_is_constant_false(restrictlist, joinrel, true))
810 {
811 mark_dummy_rel(joinrel);
812 break;
813 }
814 add_paths_to_joinrel(root, joinrel, rel1, rel2,
815 JOIN_FULL, sjinfo,
816 restrictlist);
817 add_paths_to_joinrel(root, joinrel, rel2, rel1,
818 JOIN_FULL, sjinfo,
819 restrictlist);
820
821 /*
822 * If there are join quals that aren't mergeable or hashable, we
823 * may not be able to build any valid plan. Complain here so that
824 * we can give a somewhat-useful error message. (Since we have no
825 * flexibility of planning for a full join, there's no chance of
826 * succeeding later with another pair of input rels.)
827 */
828 if (joinrel->pathlist == NIL)
829 ereport(ERROR,
830 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
831 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
832 break;
833 case JOIN_SEMI:
834
835 /*
836 * We might have a normal semijoin, or a case where we don't have
837 * enough rels to do the semijoin but can unique-ify the RHS and
838 * then do an innerjoin (see comments in join_is_legal). In the
839 * latter case we can't apply JOIN_SEMI joining.
840 */
841 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
842 bms_is_subset(sjinfo->min_righthand, rel2->relids))
843 {
844 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
845 restriction_is_constant_false(restrictlist, joinrel, false))
846 {
847 mark_dummy_rel(joinrel);
848 break;
849 }
850 add_paths_to_joinrel(root, joinrel, rel1, rel2,
851 JOIN_SEMI, sjinfo,
852 restrictlist);
853 }
854
855 /*
856 * If we know how to unique-ify the RHS and one input rel is
857 * exactly the RHS (not a superset) we can consider unique-ifying
858 * it and then doing a regular join. (The create_unique_path
859 * check here is probably redundant with what join_is_legal did,
860 * but if so the check is cheap because it's cached. So test
861 * anyway to be sure.)
862 */
863 if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
864 create_unique_path(root, rel2, rel2->cheapest_total_path,
865 sjinfo) != NULL)
866 {
867 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
868 restriction_is_constant_false(restrictlist, joinrel, false))
869 {
870 mark_dummy_rel(joinrel);
871 break;
872 }
873 add_paths_to_joinrel(root, joinrel, rel1, rel2,
874 JOIN_UNIQUE_INNER, sjinfo,
875 restrictlist);
876 add_paths_to_joinrel(root, joinrel, rel2, rel1,
877 JOIN_UNIQUE_OUTER, sjinfo,
878 restrictlist);
879 }
880 break;
881 case JOIN_ANTI:
882 if (is_dummy_rel(rel1) ||
883 restriction_is_constant_false(restrictlist, joinrel, true))
884 {
885 mark_dummy_rel(joinrel);
886 break;
887 }
888 if (restriction_is_constant_false(restrictlist, joinrel, false) &&
889 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
890 mark_dummy_rel(rel2);
891 add_paths_to_joinrel(root, joinrel, rel1, rel2,
892 JOIN_ANTI, sjinfo,
893 restrictlist);
894 break;
895 default:
896 /* other values not expected here */
897 elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
898 break;
899 }
900
901 /* Apply partitionwise join technique, if possible. */
902 try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
903 }
904
905
906 /*
907 * have_join_order_restriction
908 * Detect whether the two relations should be joined to satisfy
909 * a join-order restriction arising from special or lateral joins.
910 *
911 * In practice this is always used with have_relevant_joinclause(), and so
912 * could be merged with that function, but it seems clearer to separate the
913 * two concerns. We need this test because there are degenerate cases where
914 * a clauseless join must be performed to satisfy join-order restrictions.
915 * Also, if one rel has a lateral reference to the other, or both are needed
916 * to compute some PHV, we should consider joining them even if the join would
917 * be clauseless.
918 *
919 * Note: this is only a problem if one side of a degenerate outer join
920 * contains multiple rels, or a clauseless join is required within an
921 * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
922 * join_search_one_level(). We could dispense with this test if we were
923 * willing to try bushy plans in the "last ditch" case, but that seems much
924 * less efficient.
925 */
926 bool
927 have_join_order_restriction(PlannerInfo *root,
928 RelOptInfo *rel1, RelOptInfo *rel2)
929 {
930 bool result = false;
931 ListCell *l;
932
933 /*
934 * If either side has a direct lateral reference to the other, attempt the
935 * join regardless of outer-join considerations.
936 */
937 if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) ||
938 bms_overlap(rel2->relids, rel1->direct_lateral_relids))
939 return true;
940
941 /*
942 * Likewise, if both rels are needed to compute some PlaceHolderVar,
943 * attempt the join regardless of outer-join considerations. (This is not
944 * very desirable, because a PHV with a large eval_at set will cause a lot
945 * of probably-useless joins to be considered, but failing to do this can
946 * cause us to fail to construct a plan at all.)
947 */
948 foreach(l, root->placeholder_list)
949 {
950 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
951
952 if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
953 bms_is_subset(rel2->relids, phinfo->ph_eval_at))
954 return true;
955 }
956
957 /*
958 * It's possible that the rels correspond to the left and right sides of a
959 * degenerate outer join, that is, one with no joinclause mentioning the
960 * non-nullable side; in which case we should force the join to occur.
961 *
962 * Also, the two rels could represent a clauseless join that has to be
963 * completed to build up the LHS or RHS of an outer join.
964 */
965 foreach(l, root->join_info_list)
966 {
967 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
968
969 /* ignore full joins --- other mechanisms handle them */
970 if (sjinfo->jointype == JOIN_FULL)
971 continue;
972
973 /* Can we perform the SJ with these rels? */
974 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
975 bms_is_subset(sjinfo->min_righthand, rel2->relids))
976 {
977 result = true;
978 break;
979 }
980 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
981 bms_is_subset(sjinfo->min_righthand, rel1->relids))
982 {
983 result = true;
984 break;
985 }
986
987 /*
988 * Might we need to join these rels to complete the RHS? We have to
989 * use "overlap" tests since either rel might include a lower SJ that
990 * has been proven to commute with this one.
991 */
992 if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
993 bms_overlap(sjinfo->min_righthand, rel2->relids))
994 {
995 result = true;
996 break;
997 }
998
999 /* Likewise for the LHS. */
1000 if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
1001 bms_overlap(sjinfo->min_lefthand, rel2->relids))
1002 {
1003 result = true;
1004 break;
1005 }
1006 }
1007
1008 /*
1009 * We do not force the join to occur if either input rel can legally be
1010 * joined to anything else using joinclauses. This essentially means that
1011 * clauseless bushy joins are put off as long as possible. The reason is
1012 * that when there is a join order restriction high up in the join tree
1013 * (that is, with many rels inside the LHS or RHS), we would otherwise
1014 * expend lots of effort considering very stupid join combinations within
1015 * its LHS or RHS.
1016 */
1017 if (result)
1018 {
1019 if (has_legal_joinclause(root, rel1) ||
1020 has_legal_joinclause(root, rel2))
1021 result = false;
1022 }
1023
1024 return result;
1025 }
1026
1027
1028 /*
1029 * has_join_restriction
1030 * Detect whether the specified relation has join-order restrictions,
1031 * due to being inside an outer join or an IN (sub-SELECT),
1032 * or participating in any LATERAL references or multi-rel PHVs.
1033 *
1034 * Essentially, this tests whether have_join_order_restriction() could
1035 * succeed with this rel and some other one. It's OK if we sometimes
1036 * say "true" incorrectly. (Therefore, we don't bother with the relatively
1037 * expensive has_legal_joinclause test.)
1038 */
1039 static bool
1040 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
1041 {
1042 ListCell *l;
1043
1044 if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
1045 return true;
1046
1047 foreach(l, root->placeholder_list)
1048 {
1049 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
1050
1051 if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
1052 !bms_equal(rel->relids, phinfo->ph_eval_at))
1053 return true;
1054 }
1055
1056 foreach(l, root->join_info_list)
1057 {
1058 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
1059
1060 /* ignore full joins --- other mechanisms preserve their ordering */
1061 if (sjinfo->jointype == JOIN_FULL)
1062 continue;
1063
1064 /* ignore if SJ is already contained in rel */
1065 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
1066 bms_is_subset(sjinfo->min_righthand, rel->relids))
1067 continue;
1068
1069 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
1070 if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
1071 bms_overlap(sjinfo->min_righthand, rel->relids))
1072 return true;
1073 }
1074
1075 return false;
1076 }
1077
1078
1079 /*
1080 * has_legal_joinclause
1081 * Detect whether the specified relation can legally be joined
1082 * to any other rels using join clauses.
1083 *
1084 * We consider only joins to single other relations in the current
1085 * initial_rels list. This is sufficient to get a "true" result in most real
1086 * queries, and an occasional erroneous "false" will only cost a bit more
1087 * planning time. The reason for this limitation is that considering joins to
1088 * other joins would require proving that the other join rel can legally be
1089 * formed, which seems like too much trouble for something that's only a
1090 * heuristic to save planning time. (Note: we must look at initial_rels
1091 * and not all of the query, since when we are planning a sub-joinlist we
1092 * may be forced to make clauseless joins within initial_rels even though
1093 * there are join clauses linking to other parts of the query.)
1094 */
1095 static bool
1096 has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
1097 {
1098 ListCell *lc;
1099
1100 foreach(lc, root->initial_rels)
1101 {
1102 RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
1103
1104 /* ignore rels that are already in "rel" */
1105 if (bms_overlap(rel->relids, rel2->relids))
1106 continue;
1107
1108 if (have_relevant_joinclause(root, rel, rel2))
1109 {
1110 Relids joinrelids;
1111 SpecialJoinInfo *sjinfo;
1112 bool reversed;
1113
1114 /* join_is_legal needs relids of the union */
1115 joinrelids = bms_union(rel->relids, rel2->relids);
1116
1117 if (join_is_legal(root, rel, rel2, joinrelids,
1118 &sjinfo, &reversed))
1119 {
1120 /* Yes, this will work */
1121 bms_free(joinrelids);
1122 return true;
1123 }
1124
1125 bms_free(joinrelids);
1126 }
1127 }
1128
1129 return false;
1130 }
1131
1132
1133 /*
1134 * There's a pitfall for creating parameterized nestloops: suppose the inner
1135 * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
1136 * minimum eval_at set includes the outer rel (B) and some third rel (C).
1137 * We might think we could create a B/A nestloop join that's parameterized by
1138 * C. But we would end up with a plan in which the PHV's expression has to be
1139 * evaluated as a nestloop parameter at the B/A join; and the executor is only
1140 * set up to handle simple Vars as NestLoopParams. Rather than add complexity
1141 * and overhead to the executor for such corner cases, it seems better to
1142 * forbid the join. (Note that we can still make use of A's parameterized
1143 * path with pre-joined B+C as the outer rel. have_join_order_restriction()
1144 * ensures that we will consider making such a join even if there are not
1145 * other reasons to do so.)
1146 *
1147 * So we check whether any PHVs used in the query could pose such a hazard.
1148 * We don't have any simple way of checking whether a risky PHV would actually
1149 * be used in the inner plan, and the case is so unusual that it doesn't seem
1150 * worth working very hard on it.
1151 *
1152 * This needs to be checked in two places. If the inner rel's minimum
1153 * parameterization would trigger the restriction, then join_is_legal() should
1154 * reject the join altogether, because there will be no workable paths for it.
1155 * But joinpath.c has to check again for every proposed nestloop path, because
1156 * the inner path might have more than the minimum parameterization, causing
1157 * some PHV to be dangerous for it that otherwise wouldn't be.
1158 */
1159 bool
1160 have_dangerous_phv(PlannerInfo *root,
1161 Relids outer_relids, Relids inner_params)
1162 {
1163 ListCell *lc;
1164
1165 foreach(lc, root->placeholder_list)
1166 {
1167 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
1168
1169 if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
1170 continue; /* ignore, could not be a nestloop param */
1171 if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
1172 continue; /* ignore, not relevant to this join */
1173 if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
1174 continue; /* safe, it can be eval'd within outerrel */
1175 /* Otherwise, it's potentially unsafe, so reject the join */
1176 return true;
1177 }
1178
1179 /* OK to perform the join */
1180 return false;
1181 }
1182
1183
1184 /*
1185 * is_dummy_rel --- has relation been proven empty?
1186 */
1187 bool
1188 is_dummy_rel(RelOptInfo *rel)
1189 {
1190 Path *path;
1191
1192 /*
1193 * A rel that is known dummy will have just one path that is a childless
1194 * Append. (Even if somehow it has more paths, a childless Append will
1195 * have cost zero and hence should be at the front of the pathlist.)
1196 */
1197 if (rel->pathlist == NIL)
1198 return false;
1199 path = (Path *) linitial(rel->pathlist);
1200
1201 /*
1202 * Initially, a dummy path will just be a childless Append. But in later
1203 * planning stages we might stick a ProjectSetPath and/or ProjectionPath
1204 * on top, since Append can't project. Rather than make assumptions about
1205 * which combinations can occur, just descend through whatever we find.
1206 */
1207 for (;;)
1208 {
1209 if (IsA(path, ProjectionPath))
1210 path = ((ProjectionPath *) path)->subpath;
1211 else if (IsA(path, ProjectSetPath))
1212 path = ((ProjectSetPath *) path)->subpath;
1213 else
1214 break;
1215 }
1216 if (IS_DUMMY_APPEND(path))
1217 return true;
1218 return false;
1219 }
1220
1221 /*
1222 * Mark a relation as proven empty.
1223 *
1224 * During GEQO planning, this can get invoked more than once on the same
1225 * baserel struct, so it's worth checking to see if the rel is already marked
1226 * dummy.
1227 *
1228 * Also, when called during GEQO join planning, we are in a short-lived
1229 * memory context. We must make sure that the dummy path attached to a
1230 * baserel survives the GEQO cycle, else the baserel is trashed for future
1231 * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO,
1232 * we don't want the dummy path to clutter the main planning context. Upshot
1233 * is that the best solution is to explicitly make the dummy path in the same
1234 * context the given RelOptInfo is in.
1235 */
1236 void
1237 mark_dummy_rel(RelOptInfo *rel)
1238 {
1239 MemoryContext oldcontext;
1240
1241 /* Already marked? */
1242 if (is_dummy_rel(rel))
1243 return;
1244
1245 /* No, so choose correct context to make the dummy path in */
1246 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1247
1248 /* Set dummy size estimate */
1249 rel->rows = 0;
1250
1251 /* Evict any previously chosen paths */
1252 rel->pathlist = NIL;
1253 rel->partial_pathlist = NIL;
1254
1255 /* Set up the dummy path */
1256 add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
1257 rel->lateral_relids,
1258 0, false, NIL, -1));
1259
1260 /* Set or update cheapest_total_path and related fields */
1261 set_cheapest(rel);
1262
1263 MemoryContextSwitchTo(oldcontext);
1264 }
1265
1266
1267 /*
1268 * restriction_is_constant_false --- is a restrictlist just FALSE?
1269 *
1270 * In cases where a qual is provably constant FALSE, eval_const_expressions
1271 * will generally have thrown away anything that's ANDed with it. In outer
1272 * join situations this will leave us computing cartesian products only to
1273 * decide there's no match for an outer row, which is pretty stupid. So,
1274 * we need to detect the case.
1275 *
1276 * If only_pushed_down is true, then consider only quals that are pushed-down
1277 * from the point of view of the joinrel.
1278 */
1279 static bool
1280 restriction_is_constant_false(List *restrictlist,
1281 RelOptInfo *joinrel,
1282 bool only_pushed_down)
1283 {
1284 ListCell *lc;
1285
1286 /*
1287 * Despite the above comment, the restriction list we see here might
1288 * possibly have other members besides the FALSE constant, since other
1289 * quals could get "pushed down" to the outer join level. So we check
1290 * each member of the list.
1291 */
1292 foreach(lc, restrictlist)
1293 {
1294 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1295
1296 if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1297 continue;
1298
1299 if (rinfo->clause && IsA(rinfo->clause, Const))
1300 {
1301 Const *con = (Const *) rinfo->clause;
1302
1303 /* constant NULL is as good as constant FALSE for our purposes */
1304 if (con->constisnull)
1305 return true;
1306 if (!DatumGetBool(con->constvalue))
1307 return true;
1308 }
1309 }
1310 return false;
1311 }
1312
1313 /*
1314 * Assess whether join between given two partitioned relations can be broken
1315 * down into joins between matching partitions; a technique called
1316 * "partitionwise join"
1317 *
1318 * Partitionwise join is possible when a. Joining relations have same
1319 * partitioning scheme b. There exists an equi-join between the partition keys
1320 * of the two relations.
1321 *
1322 * Partitionwise join is planned as follows (details: optimizer/README.)
1323 *
1324 * 1. Create the RelOptInfos for joins between matching partitions i.e
1325 * child-joins and add paths to them.
1326 *
1327 * 2. Construct Append or MergeAppend paths across the set of child joins.
1328 * This second phase is implemented by generate_partitionwise_join_paths().
1329 *
1330 * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
1331 * obtained by translating the respective parent join structures.
1332 */
1333 static void
1334 try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
1335 RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
1336 List *parent_restrictlist)
1337 {
1338 bool rel1_is_simple = IS_SIMPLE_REL(rel1);
1339 bool rel2_is_simple = IS_SIMPLE_REL(rel2);
1340 int nparts;
1341 int cnt_parts;
1342
1343 /* Guard against stack overflow due to overly deep partition hierarchy. */
1344 check_stack_depth();
1345
1346 /* Nothing to do, if the join relation is not partitioned. */
1347 if (!IS_PARTITIONED_REL(joinrel))
1348 return;
1349
1350 /* The join relation should have consider_partitionwise_join set. */
1351 Assert(joinrel->consider_partitionwise_join);
1352
1353 /*
1354 * Since this join relation is partitioned, all the base relations
1355 * participating in this join must be partitioned and so are all the
1356 * intermediate join relations.
1357 */
1358 Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
1359 Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
1360
1361 /* The joining relations should have consider_partitionwise_join set. */
1362 Assert(rel1->consider_partitionwise_join &&
1363 rel2->consider_partitionwise_join);
1364
1365 /*
1366 * The partition scheme of the join relation should match that of the
1367 * joining relations.
1368 */
1369 Assert(joinrel->part_scheme == rel1->part_scheme &&
1370 joinrel->part_scheme == rel2->part_scheme);
1371
1372 /*
1373 * Since we allow partitionwise join only when the partition bounds of the
1374 * joining relations exactly match, the partition bounds of the join
1375 * should match those of the joining relations.
1376 */
1377 Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1378 joinrel->part_scheme->parttyplen,
1379 joinrel->part_scheme->parttypbyval,
1380 joinrel->boundinfo, rel1->boundinfo));
1381 Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1382 joinrel->part_scheme->parttyplen,
1383 joinrel->part_scheme->parttypbyval,
1384 joinrel->boundinfo, rel2->boundinfo));
1385
1386 nparts = joinrel->nparts;
1387
1388 /*
1389 * Create child-join relations for this partitioned join, if those don't
1390 * exist. Add paths to child-joins for a pair of child relations
1391 * corresponding to the given pair of parent relations.
1392 */
1393 for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++)
1394 {
1395 RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
1396 RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
1397 bool rel1_empty = (child_rel1 == NULL ||
1398 IS_DUMMY_REL(child_rel1));
1399 bool rel2_empty = (child_rel2 == NULL ||
1400 IS_DUMMY_REL(child_rel2));
1401 SpecialJoinInfo *child_sjinfo;
1402 List *child_restrictlist;
1403 RelOptInfo *child_joinrel;
1404 Relids child_joinrelids;
1405 AppendRelInfo **appinfos;
1406 int nappinfos;
1407
1408 /*
1409 * Check for cases where we can prove that this segment of the join
1410 * returns no rows, due to one or both inputs being empty (including
1411 * inputs that have been pruned away entirely). If so just ignore it.
1412 * These rules are equivalent to populate_joinrel_with_paths's rules
1413 * for dummy input relations.
1414 */
1415 switch (parent_sjinfo->jointype)
1416 {
1417 case JOIN_INNER:
1418 case JOIN_SEMI:
1419 if (rel1_empty || rel2_empty)
1420 continue; /* ignore this join segment */
1421 break;
1422 case JOIN_LEFT:
1423 case JOIN_ANTI:
1424 if (rel1_empty)
1425 continue; /* ignore this join segment */
1426 break;
1427 case JOIN_FULL:
1428 if (rel1_empty && rel2_empty)
1429 continue; /* ignore this join segment */
1430 break;
1431 default:
1432 /* other values not expected here */
1433 elog(ERROR, "unrecognized join type: %d",
1434 (int) parent_sjinfo->jointype);
1435 break;
1436 }
1437
1438 /*
1439 * If a child has been pruned entirely then we can't generate paths
1440 * for it, so we have to reject partitionwise joining unless we were
1441 * able to eliminate this partition above.
1442 */
1443 if (child_rel1 == NULL || child_rel2 == NULL)
1444 {
1445 /*
1446 * Mark the joinrel as unpartitioned so that later functions treat
1447 * it correctly.
1448 */
1449 joinrel->nparts = 0;
1450 return;
1451 }
1452
1453 /*
1454 * If a leaf relation has consider_partitionwise_join=false, it means
1455 * that it's a dummy relation for which we skipped setting up tlist
1456 * expressions and adding EC members in set_append_rel_size(), so
1457 * again we have to fail here.
1458 */
1459 if (rel1_is_simple && !child_rel1->consider_partitionwise_join)
1460 {
1461 Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL);
1462 Assert(IS_DUMMY_REL(child_rel1));
1463 joinrel->nparts = 0;
1464 return;
1465 }
1466 if (rel2_is_simple && !child_rel2->consider_partitionwise_join)
1467 {
1468 Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL);
1469 Assert(IS_DUMMY_REL(child_rel2));
1470 joinrel->nparts = 0;
1471 return;
1472 }
1473
1474 /* We should never try to join two overlapping sets of rels. */
1475 Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
1476 child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
1477 appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
1478
1479 /*
1480 * Construct SpecialJoinInfo from parent join relations's
1481 * SpecialJoinInfo.
1482 */
1483 child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
1484 child_rel1->relids,
1485 child_rel2->relids);
1486
1487 /*
1488 * Construct restrictions applicable to the child join from those
1489 * applicable to the parent join.
1490 */
1491 child_restrictlist =
1492 (List *) adjust_appendrel_attrs(root,
1493 (Node *) parent_restrictlist,
1494 nappinfos, appinfos);
1495 pfree(appinfos);
1496
1497 child_joinrel = joinrel->part_rels[cnt_parts];
1498 if (!child_joinrel)
1499 {
1500 child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
1501 joinrel, child_restrictlist,
1502 child_sjinfo,
1503 child_sjinfo->jointype);
1504 joinrel->part_rels[cnt_parts] = child_joinrel;
1505 }
1506
1507 Assert(bms_equal(child_joinrel->relids, child_joinrelids));
1508
1509 populate_joinrel_with_paths(root, child_rel1, child_rel2,
1510 child_joinrel, child_sjinfo,
1511 child_restrictlist);
1512 }
1513 }
1514
1515 /*
1516 * Returns true if there exists an equi-join condition for each pair of
1517 * partition keys from given relations being joined.
1518 */
1519 bool
1520 have_partkey_equi_join(RelOptInfo *joinrel,
1521 RelOptInfo *rel1, RelOptInfo *rel2,
1522 JoinType jointype, List *restrictlist)
1523 {
1524 PartitionScheme part_scheme = rel1->part_scheme;
1525 ListCell *lc;
1526 int cnt_pks;
1527 bool pk_has_clause[PARTITION_MAX_KEYS];
1528 bool strict_op;
1529
1530 /*
1531 * This function should be called when the joining relations have same
1532 * partitioning scheme.
1533 */
1534 Assert(rel1->part_scheme == rel2->part_scheme);
1535 Assert(part_scheme);
1536
1537 memset(pk_has_clause, 0, sizeof(pk_has_clause));
1538 foreach(lc, restrictlist)
1539 {
1540 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1541 OpExpr *opexpr;
1542 Expr *expr1;
1543 Expr *expr2;
1544 int ipk1;
1545 int ipk2;
1546
1547 /* If processing an outer join, only use its own join clauses. */
1548 if (IS_OUTER_JOIN(jointype) &&
1549 RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1550 continue;
1551
1552 /* Skip clauses which can not be used for a join. */
1553 if (!rinfo->can_join)
1554 continue;
1555
1556 /* Skip clauses which are not equality conditions. */
1557 if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator))
1558 continue;
1559
1560 opexpr = (OpExpr *) rinfo->clause;
1561 Assert(is_opclause(opexpr));
1562
1563 /*
1564 * The equi-join between partition keys is strict if equi-join between
1565 * at least one partition key is using a strict operator. See
1566 * explanation about outer join reordering identity 3 in
1567 * optimizer/README
1568 */
1569 strict_op = op_strict(opexpr->opno);
1570
1571 /* Match the operands to the relation. */
1572 if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
1573 bms_is_subset(rinfo->right_relids, rel2->relids))
1574 {
1575 expr1 = linitial(opexpr->args);
1576 expr2 = lsecond(opexpr->args);
1577 }
1578 else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
1579 bms_is_subset(rinfo->right_relids, rel1->relids))
1580 {
1581 expr1 = lsecond(opexpr->args);
1582 expr2 = linitial(opexpr->args);
1583 }
1584 else
1585 continue;
1586
1587 /*
1588 * Only clauses referencing the partition keys are useful for
1589 * partitionwise join.
1590 */
1591 ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
1592 if (ipk1 < 0)
1593 continue;
1594 ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
1595 if (ipk2 < 0)
1596 continue;
1597
1598 /*
1599 * If the clause refers to keys at different ordinal positions, it can
1600 * not be used for partitionwise join.
1601 */
1602 if (ipk1 != ipk2)
1603 continue;
1604
1605 /*
1606 * The clause allows partitionwise join if only it uses the same
1607 * operator family as that specified by the partition key.
1608 */
1609 if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH)
1610 {
1611 if (!op_in_opfamily(rinfo->hashjoinoperator,
1612 part_scheme->partopfamily[ipk1]))
1613 continue;
1614 }
1615 else if (!list_member_oid(rinfo->mergeopfamilies,
1616 part_scheme->partopfamily[ipk1]))
1617 continue;
1618
1619 /* Mark the partition key as having an equi-join clause. */
1620 pk_has_clause[ipk1] = true;
1621 }
1622
1623 /* Check whether every partition key has an equi-join condition. */
1624 for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
1625 {
1626 if (!pk_has_clause[cnt_pks])
1627 return false;
1628 }
1629
1630 return true;
1631 }
1632
1633 /*
1634 * Find the partition key from the given relation matching the given
1635 * expression. If found, return the index of the partition key, else return -1.
1636 */
1637 static int
1638 match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
1639 {
1640 int cnt;
1641
1642 /* This function should be called only for partitioned relations. */
1643 Assert(rel->part_scheme);
1644
1645 /* Remove any relabel decorations. */
1646 while (IsA(expr, RelabelType))
1647 expr = (Expr *) (castNode(RelabelType, expr))->arg;
1648
1649 for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
1650 {
1651 ListCell *lc;
1652
1653 Assert(rel->partexprs);
1654 foreach(lc, rel->partexprs[cnt])
1655 {
1656 if (equal(lfirst(lc), expr))
1657 return cnt;
1658 }
1659
1660 if (!strict_op)
1661 continue;
1662
1663 /*
1664 * If it's a strict equi-join a NULL partition key on one side will
1665 * not join a NULL partition key on the other side. So, rows with NULL
1666 * partition key from a partition on one side can not join with those
1667 * from a non-matching partition on the other side. So, search the
1668 * nullable partition keys as well.
1669 */
1670 Assert(rel->nullable_partexprs);
1671 foreach(lc, rel->nullable_partexprs[cnt])
1672 {
1673 if (equal(lfirst(lc), expr))
1674 return cnt;
1675 }
1676 }
1677
1678 return -1;
1679 }
1680