1 /*-------------------------------------------------------------------------
2  *
3  * joinrels.c
4  *	  Routines to determine which relations should be joined
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/joinrels.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "miscadmin.h"
18 #include "optimizer/appendinfo.h"
19 #include "optimizer/joininfo.h"
20 #include "optimizer/pathnode.h"
21 #include "optimizer/paths.h"
22 #include "partitioning/partbounds.h"
23 #include "utils/lsyscache.h"
24 #include "utils/memutils.h"
25 
26 
27 static void make_rels_by_clause_joins(PlannerInfo *root,
28 									  RelOptInfo *old_rel,
29 									  ListCell *other_rels);
30 static void make_rels_by_clauseless_joins(PlannerInfo *root,
31 										  RelOptInfo *old_rel,
32 										  ListCell *other_rels);
33 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
34 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
35 static bool restriction_is_constant_false(List *restrictlist,
36 										  RelOptInfo *joinrel,
37 										  bool only_pushed_down);
38 static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
39 										RelOptInfo *rel2, RelOptInfo *joinrel,
40 										SpecialJoinInfo *sjinfo, List *restrictlist);
41 static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
42 								   RelOptInfo *rel2, RelOptInfo *joinrel,
43 								   SpecialJoinInfo *parent_sjinfo,
44 								   List *parent_restrictlist);
45 static SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
46 												SpecialJoinInfo *parent_sjinfo,
47 												Relids left_relids, Relids right_relids);
48 static int	match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
49 										 bool strict_op);
50 
51 
52 /*
53  * join_search_one_level
54  *	  Consider ways to produce join relations containing exactly 'level'
55  *	  jointree items.  (This is one step of the dynamic-programming method
56  *	  embodied in standard_join_search.)  Join rel nodes for each feasible
57  *	  combination of lower-level rels are created and returned in a list.
58  *	  Implementation paths are created for each such joinrel, too.
59  *
60  * level: level of rels we want to make this time
61  * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
62  *
63  * The result is returned in root->join_rel_level[level].
64  */
65 void
join_search_one_level(PlannerInfo * root,int level)66 join_search_one_level(PlannerInfo *root, int level)
67 {
68 	List	  **joinrels = root->join_rel_level;
69 	ListCell   *r;
70 	int			k;
71 
72 	Assert(joinrels[level] == NIL);
73 
74 	/* Set join_cur_level so that new joinrels are added to proper list */
75 	root->join_cur_level = level;
76 
77 	/*
78 	 * First, consider left-sided and right-sided plans, in which rels of
79 	 * exactly level-1 member relations are joined against initial relations.
80 	 * We prefer to join using join clauses, but if we find a rel of level-1
81 	 * members that has no join clauses, we will generate Cartesian-product
82 	 * joins against all initial rels not already contained in it.
83 	 */
84 	foreach(r, joinrels[level - 1])
85 	{
86 		RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
87 
88 		if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
89 			has_join_restriction(root, old_rel))
90 		{
91 			/*
92 			 * There are join clauses or join order restrictions relevant to
93 			 * this rel, so consider joins between this rel and (only) those
94 			 * initial rels it is linked to by a clause or restriction.
95 			 *
96 			 * At level 2 this condition is symmetric, so there is no need to
97 			 * look at initial rels before this one in the list; we already
98 			 * considered such joins when we were at the earlier rel.  (The
99 			 * mirror-image joins are handled automatically by make_join_rel.)
100 			 * In later passes (level > 2), we join rels of the previous level
101 			 * to each initial rel they don't already include but have a join
102 			 * clause or restriction with.
103 			 */
104 			ListCell   *other_rels;
105 
106 			if (level == 2)		/* consider remaining initial rels */
107 				other_rels = lnext(r);
108 			else				/* consider all initial rels */
109 				other_rels = list_head(joinrels[1]);
110 
111 			make_rels_by_clause_joins(root,
112 									  old_rel,
113 									  other_rels);
114 		}
115 		else
116 		{
117 			/*
118 			 * Oops, we have a relation that is not joined to any other
119 			 * relation, either directly or by join-order restrictions.
120 			 * Cartesian product time.
121 			 *
122 			 * We consider a cartesian product with each not-already-included
123 			 * initial rel, whether it has other join clauses or not.  At
124 			 * level 2, if there are two or more clauseless initial rels, we
125 			 * will redundantly consider joining them in both directions; but
126 			 * such cases aren't common enough to justify adding complexity to
127 			 * avoid the duplicated effort.
128 			 */
129 			make_rels_by_clauseless_joins(root,
130 										  old_rel,
131 										  list_head(joinrels[1]));
132 		}
133 	}
134 
135 	/*
136 	 * Now, consider "bushy plans" in which relations of k initial rels are
137 	 * joined to relations of level-k initial rels, for 2 <= k <= level-2.
138 	 *
139 	 * We only consider bushy-plan joins for pairs of rels where there is a
140 	 * suitable join clause (or join order restriction), in order to avoid
141 	 * unreasonable growth of planning time.
142 	 */
143 	for (k = 2;; k++)
144 	{
145 		int			other_level = level - k;
146 
147 		/*
148 		 * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
149 		 * need to go as far as the halfway point.
150 		 */
151 		if (k > other_level)
152 			break;
153 
154 		foreach(r, joinrels[k])
155 		{
156 			RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
157 			ListCell   *other_rels;
158 			ListCell   *r2;
159 
160 			/*
161 			 * We can ignore relations without join clauses here, unless they
162 			 * participate in join-order restrictions --- then we might have
163 			 * to force a bushy join plan.
164 			 */
165 			if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
166 				!has_join_restriction(root, old_rel))
167 				continue;
168 
169 			if (k == other_level)
170 				other_rels = lnext(r);	/* only consider remaining rels */
171 			else
172 				other_rels = list_head(joinrels[other_level]);
173 
174 			for_each_cell(r2, other_rels)
175 			{
176 				RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
177 
178 				if (!bms_overlap(old_rel->relids, new_rel->relids))
179 				{
180 					/*
181 					 * OK, we can build a rel of the right level from this
182 					 * pair of rels.  Do so if there is at least one relevant
183 					 * join clause or join order restriction.
184 					 */
185 					if (have_relevant_joinclause(root, old_rel, new_rel) ||
186 						have_join_order_restriction(root, old_rel, new_rel))
187 					{
188 						(void) make_join_rel(root, old_rel, new_rel);
189 					}
190 				}
191 			}
192 		}
193 	}
194 
195 	/*----------
196 	 * Last-ditch effort: if we failed to find any usable joins so far, force
197 	 * a set of cartesian-product joins to be generated.  This handles the
198 	 * special case where all the available rels have join clauses but we
199 	 * cannot use any of those clauses yet.  This can only happen when we are
200 	 * considering a join sub-problem (a sub-joinlist) and all the rels in the
201 	 * sub-problem have only join clauses with rels outside the sub-problem.
202 	 * An example is
203 	 *
204 	 *		SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
205 	 *		WHERE a.w = c.x and b.y = d.z;
206 	 *
207 	 * If the "a INNER JOIN b" sub-problem does not get flattened into the
208 	 * upper level, we must be willing to make a cartesian join of a and b;
209 	 * but the code above will not have done so, because it thought that both
210 	 * a and b have joinclauses.  We consider only left-sided and right-sided
211 	 * cartesian joins in this case (no bushy).
212 	 *----------
213 	 */
214 	if (joinrels[level] == NIL)
215 	{
216 		/*
217 		 * This loop is just like the first one, except we always call
218 		 * make_rels_by_clauseless_joins().
219 		 */
220 		foreach(r, joinrels[level - 1])
221 		{
222 			RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
223 
224 			make_rels_by_clauseless_joins(root,
225 										  old_rel,
226 										  list_head(joinrels[1]));
227 		}
228 
229 		/*----------
230 		 * When special joins are involved, there may be no legal way
231 		 * to make an N-way join for some values of N.  For example consider
232 		 *
233 		 * SELECT ... FROM t1 WHERE
234 		 *	 x IN (SELECT ... FROM t2,t3 WHERE ...) AND
235 		 *	 y IN (SELECT ... FROM t4,t5 WHERE ...)
236 		 *
237 		 * We will flatten this query to a 5-way join problem, but there are
238 		 * no 4-way joins that join_is_legal() will consider legal.  We have
239 		 * to accept failure at level 4 and go on to discover a workable
240 		 * bushy plan at level 5.
241 		 *
242 		 * However, if there are no special joins and no lateral references
243 		 * then join_is_legal() should never fail, and so the following sanity
244 		 * check is useful.
245 		 *----------
246 		 */
247 		if (joinrels[level] == NIL &&
248 			root->join_info_list == NIL &&
249 			!root->hasLateralRTEs)
250 			elog(ERROR, "failed to build any %d-way joins", level);
251 	}
252 }
253 
254 /*
255  * make_rels_by_clause_joins
256  *	  Build joins between the given relation 'old_rel' and other relations
257  *	  that participate in join clauses that 'old_rel' also participates in
258  *	  (or participate in join-order restrictions with it).
259  *	  The join rels are returned in root->join_rel_level[join_cur_level].
260  *
261  * Note: at levels above 2 we will generate the same joined relation in
262  * multiple ways --- for example (a join b) join c is the same RelOptInfo as
263  * (b join c) join a, though the second case will add a different set of Paths
264  * to it.  This is the reason for using the join_rel_level mechanism, which
265  * automatically ensures that each new joinrel is only added to the list once.
266  *
267  * 'old_rel' is the relation entry for the relation to be joined
268  * 'other_rels': the first cell in a linked list containing the other
269  * rels to be considered for joining
270  *
271  * Currently, this is only used with initial rels in other_rels, but it
272  * will work for joining to joinrels too.
273  */
274 static void
make_rels_by_clause_joins(PlannerInfo * root,RelOptInfo * old_rel,ListCell * other_rels)275 make_rels_by_clause_joins(PlannerInfo *root,
276 						  RelOptInfo *old_rel,
277 						  ListCell *other_rels)
278 {
279 	ListCell   *l;
280 
281 	for_each_cell(l, other_rels)
282 	{
283 		RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
284 
285 		if (!bms_overlap(old_rel->relids, other_rel->relids) &&
286 			(have_relevant_joinclause(root, old_rel, other_rel) ||
287 			 have_join_order_restriction(root, old_rel, other_rel)))
288 		{
289 			(void) make_join_rel(root, old_rel, other_rel);
290 		}
291 	}
292 }
293 
294 /*
295  * make_rels_by_clauseless_joins
296  *	  Given a relation 'old_rel' and a list of other relations
297  *	  'other_rels', create a join relation between 'old_rel' and each
298  *	  member of 'other_rels' that isn't already included in 'old_rel'.
299  *	  The join rels are returned in root->join_rel_level[join_cur_level].
300  *
301  * 'old_rel' is the relation entry for the relation to be joined
302  * 'other_rels': the first cell of a linked list containing the
303  * other rels to be considered for joining
304  *
305  * Currently, this is only used with initial rels in other_rels, but it would
306  * work for joining to joinrels too.
307  */
308 static void
make_rels_by_clauseless_joins(PlannerInfo * root,RelOptInfo * old_rel,ListCell * other_rels)309 make_rels_by_clauseless_joins(PlannerInfo *root,
310 							  RelOptInfo *old_rel,
311 							  ListCell *other_rels)
312 {
313 	ListCell   *l;
314 
315 	for_each_cell(l, other_rels)
316 	{
317 		RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
318 
319 		if (!bms_overlap(other_rel->relids, old_rel->relids))
320 		{
321 			(void) make_join_rel(root, old_rel, other_rel);
322 		}
323 	}
324 }
325 
326 
327 /*
328  * join_is_legal
329  *	   Determine whether a proposed join is legal given the query's
330  *	   join order constraints; and if it is, determine the join type.
331  *
332  * Caller must supply not only the two rels, but the union of their relids.
333  * (We could simplify the API by computing joinrelids locally, but this
334  * would be redundant work in the normal path through make_join_rel.)
335  *
336  * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
337  * else it's set to point to the associated SpecialJoinInfo node.  Also,
338  * *reversed_p is set true if the given relations need to be swapped to
339  * match the SpecialJoinInfo node.
340  */
341 static bool
join_is_legal(PlannerInfo * root,RelOptInfo * rel1,RelOptInfo * rel2,Relids joinrelids,SpecialJoinInfo ** sjinfo_p,bool * reversed_p)342 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
343 			  Relids joinrelids,
344 			  SpecialJoinInfo **sjinfo_p, bool *reversed_p)
345 {
346 	SpecialJoinInfo *match_sjinfo;
347 	bool		reversed;
348 	bool		unique_ified;
349 	bool		must_be_leftjoin;
350 	ListCell   *l;
351 
352 	/*
353 	 * Ensure output params are set on failure return.  This is just to
354 	 * suppress uninitialized-variable warnings from overly anal compilers.
355 	 */
356 	*sjinfo_p = NULL;
357 	*reversed_p = false;
358 
359 	/*
360 	 * If we have any special joins, the proposed join might be illegal; and
361 	 * in any case we have to determine its join type.  Scan the join info
362 	 * list for matches and conflicts.
363 	 */
364 	match_sjinfo = NULL;
365 	reversed = false;
366 	unique_ified = false;
367 	must_be_leftjoin = false;
368 
369 	foreach(l, root->join_info_list)
370 	{
371 		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
372 
373 		/*
374 		 * This special join is not relevant unless its RHS overlaps the
375 		 * proposed join.  (Check this first as a fast path for dismissing
376 		 * most irrelevant SJs quickly.)
377 		 */
378 		if (!bms_overlap(sjinfo->min_righthand, joinrelids))
379 			continue;
380 
381 		/*
382 		 * Also, not relevant if proposed join is fully contained within RHS
383 		 * (ie, we're still building up the RHS).
384 		 */
385 		if (bms_is_subset(joinrelids, sjinfo->min_righthand))
386 			continue;
387 
388 		/*
389 		 * Also, not relevant if SJ is already done within either input.
390 		 */
391 		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
392 			bms_is_subset(sjinfo->min_righthand, rel1->relids))
393 			continue;
394 		if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
395 			bms_is_subset(sjinfo->min_righthand, rel2->relids))
396 			continue;
397 
398 		/*
399 		 * If it's a semijoin and we already joined the RHS to any other rels
400 		 * within either input, then we must have unique-ified the RHS at that
401 		 * point (see below).  Therefore the semijoin is no longer relevant in
402 		 * this join path.
403 		 */
404 		if (sjinfo->jointype == JOIN_SEMI)
405 		{
406 			if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
407 				!bms_equal(sjinfo->syn_righthand, rel1->relids))
408 				continue;
409 			if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
410 				!bms_equal(sjinfo->syn_righthand, rel2->relids))
411 				continue;
412 		}
413 
414 		/*
415 		 * If one input contains min_lefthand and the other contains
416 		 * min_righthand, then we can perform the SJ at this join.
417 		 *
418 		 * Reject if we get matches to more than one SJ; that implies we're
419 		 * considering something that's not really valid.
420 		 */
421 		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
422 			bms_is_subset(sjinfo->min_righthand, rel2->relids))
423 		{
424 			if (match_sjinfo)
425 				return false;	/* invalid join path */
426 			match_sjinfo = sjinfo;
427 			reversed = false;
428 		}
429 		else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
430 				 bms_is_subset(sjinfo->min_righthand, rel1->relids))
431 		{
432 			if (match_sjinfo)
433 				return false;	/* invalid join path */
434 			match_sjinfo = sjinfo;
435 			reversed = true;
436 		}
437 		else if (sjinfo->jointype == JOIN_SEMI &&
438 				 bms_equal(sjinfo->syn_righthand, rel2->relids) &&
439 				 create_unique_path(root, rel2, rel2->cheapest_total_path,
440 									sjinfo) != NULL)
441 		{
442 			/*----------
443 			 * For a semijoin, we can join the RHS to anything else by
444 			 * unique-ifying the RHS (if the RHS can be unique-ified).
445 			 * We will only get here if we have the full RHS but less
446 			 * than min_lefthand on the LHS.
447 			 *
448 			 * The reason to consider such a join path is exemplified by
449 			 *	SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
450 			 * If we insist on doing this as a semijoin we will first have
451 			 * to form the cartesian product of A*B.  But if we unique-ify
452 			 * C then the semijoin becomes a plain innerjoin and we can join
453 			 * in any order, eg C to A and then to B.  When C is much smaller
454 			 * than A and B this can be a huge win.  So we allow C to be
455 			 * joined to just A or just B here, and then make_join_rel has
456 			 * to handle the case properly.
457 			 *
458 			 * Note that actually we'll allow unique-ified C to be joined to
459 			 * some other relation D here, too.  That is legal, if usually not
460 			 * very sane, and this routine is only concerned with legality not
461 			 * with whether the join is good strategy.
462 			 *----------
463 			 */
464 			if (match_sjinfo)
465 				return false;	/* invalid join path */
466 			match_sjinfo = sjinfo;
467 			reversed = false;
468 			unique_ified = true;
469 		}
470 		else if (sjinfo->jointype == JOIN_SEMI &&
471 				 bms_equal(sjinfo->syn_righthand, rel1->relids) &&
472 				 create_unique_path(root, rel1, rel1->cheapest_total_path,
473 									sjinfo) != NULL)
474 		{
475 			/* Reversed semijoin case */
476 			if (match_sjinfo)
477 				return false;	/* invalid join path */
478 			match_sjinfo = sjinfo;
479 			reversed = true;
480 			unique_ified = true;
481 		}
482 		else
483 		{
484 			/*
485 			 * Otherwise, the proposed join overlaps the RHS but isn't a valid
486 			 * implementation of this SJ.  But don't panic quite yet: the RHS
487 			 * violation might have occurred previously, in one or both input
488 			 * relations, in which case we must have previously decided that
489 			 * it was OK to commute some other SJ with this one.  If we need
490 			 * to perform this join to finish building up the RHS, rejecting
491 			 * it could lead to not finding any plan at all.  (This can occur
492 			 * because of the heuristics elsewhere in this file that postpone
493 			 * clauseless joins: we might not consider doing a clauseless join
494 			 * within the RHS until after we've performed other, validly
495 			 * commutable SJs with one or both sides of the clauseless join.)
496 			 * This consideration boils down to the rule that if both inputs
497 			 * overlap the RHS, we can allow the join --- they are either
498 			 * fully within the RHS, or represent previously-allowed joins to
499 			 * rels outside it.
500 			 */
501 			if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
502 				bms_overlap(rel2->relids, sjinfo->min_righthand))
503 				continue;		/* assume valid previous violation of RHS */
504 
505 			/*
506 			 * The proposed join could still be legal, but only if we're
507 			 * allowed to associate it into the RHS of this SJ.  That means
508 			 * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
509 			 * not FULL) and the proposed join must not overlap the LHS.
510 			 */
511 			if (sjinfo->jointype != JOIN_LEFT ||
512 				bms_overlap(joinrelids, sjinfo->min_lefthand))
513 				return false;	/* invalid join path */
514 
515 			/*
516 			 * To be valid, the proposed join must be a LEFT join; otherwise
517 			 * it can't associate into this SJ's RHS.  But we may not yet have
518 			 * found the SpecialJoinInfo matching the proposed join, so we
519 			 * can't test that yet.  Remember the requirement for later.
520 			 */
521 			must_be_leftjoin = true;
522 		}
523 	}
524 
525 	/*
526 	 * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
527 	 * proposed join can't associate into an SJ's RHS.
528 	 *
529 	 * Also, fail if the proposed join's predicate isn't strict; we're
530 	 * essentially checking to see if we can apply outer-join identity 3, and
531 	 * that's a requirement.  (This check may be redundant with checks in
532 	 * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
533 	 */
534 	if (must_be_leftjoin &&
535 		(match_sjinfo == NULL ||
536 		 match_sjinfo->jointype != JOIN_LEFT ||
537 		 !match_sjinfo->lhs_strict))
538 		return false;			/* invalid join path */
539 
540 	/*
541 	 * We also have to check for constraints imposed by LATERAL references.
542 	 */
543 	if (root->hasLateralRTEs)
544 	{
545 		bool		lateral_fwd;
546 		bool		lateral_rev;
547 		Relids		join_lateral_rels;
548 
549 		/*
550 		 * The proposed rels could each contain lateral references to the
551 		 * other, in which case the join is impossible.  If there are lateral
552 		 * references in just one direction, then the join has to be done with
553 		 * a nestloop with the lateral referencer on the inside.  If the join
554 		 * matches an SJ that cannot be implemented by such a nestloop, the
555 		 * join is impossible.
556 		 *
557 		 * Also, if the lateral reference is only indirect, we should reject
558 		 * the join; whatever rel(s) the reference chain goes through must be
559 		 * joined to first.
560 		 *
561 		 * Another case that might keep us from building a valid plan is the
562 		 * implementation restriction described by have_dangerous_phv().
563 		 */
564 		lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
565 		lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
566 		if (lateral_fwd && lateral_rev)
567 			return false;		/* have lateral refs in both directions */
568 		if (lateral_fwd)
569 		{
570 			/* has to be implemented as nestloop with rel1 on left */
571 			if (match_sjinfo &&
572 				(reversed ||
573 				 unique_ified ||
574 				 match_sjinfo->jointype == JOIN_FULL))
575 				return false;	/* not implementable as nestloop */
576 			/* check there is a direct reference from rel2 to rel1 */
577 			if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
578 				return false;	/* only indirect refs, so reject */
579 			/* check we won't have a dangerous PHV */
580 			if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
581 				return false;	/* might be unable to handle required PHV */
582 		}
583 		else if (lateral_rev)
584 		{
585 			/* has to be implemented as nestloop with rel2 on left */
586 			if (match_sjinfo &&
587 				(!reversed ||
588 				 unique_ified ||
589 				 match_sjinfo->jointype == JOIN_FULL))
590 				return false;	/* not implementable as nestloop */
591 			/* check there is a direct reference from rel1 to rel2 */
592 			if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
593 				return false;	/* only indirect refs, so reject */
594 			/* check we won't have a dangerous PHV */
595 			if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
596 				return false;	/* might be unable to handle required PHV */
597 		}
598 
599 		/*
600 		 * LATERAL references could also cause problems later on if we accept
601 		 * this join: if the join's minimum parameterization includes any rels
602 		 * that would have to be on the inside of an outer join with this join
603 		 * rel, then it's never going to be possible to build the complete
604 		 * query using this join.  We should reject this join not only because
605 		 * it'll save work, but because if we don't, the clauseless-join
606 		 * heuristics might think that legality of this join means that some
607 		 * other join rel need not be formed, and that could lead to failure
608 		 * to find any plan at all.  We have to consider not only rels that
609 		 * are directly on the inner side of an OJ with the joinrel, but also
610 		 * ones that are indirectly so, so search to find all such rels.
611 		 */
612 		join_lateral_rels = min_join_parameterization(root, joinrelids,
613 													  rel1, rel2);
614 		if (join_lateral_rels)
615 		{
616 			Relids		join_plus_rhs = bms_copy(joinrelids);
617 			bool		more;
618 
619 			do
620 			{
621 				more = false;
622 				foreach(l, root->join_info_list)
623 				{
624 					SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
625 
626 					/* ignore full joins --- their ordering is predetermined */
627 					if (sjinfo->jointype == JOIN_FULL)
628 						continue;
629 
630 					if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
631 						!bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
632 					{
633 						join_plus_rhs = bms_add_members(join_plus_rhs,
634 														sjinfo->min_righthand);
635 						more = true;
636 					}
637 				}
638 			} while (more);
639 			if (bms_overlap(join_plus_rhs, join_lateral_rels))
640 				return false;	/* will not be able to join to some RHS rel */
641 		}
642 	}
643 
644 	/* Otherwise, it's a valid join */
645 	*sjinfo_p = match_sjinfo;
646 	*reversed_p = reversed;
647 	return true;
648 }
649 
650 
651 /*
652  * make_join_rel
653  *	   Find or create a join RelOptInfo that represents the join of
654  *	   the two given rels, and add to it path information for paths
655  *	   created with the two rels as outer and inner rel.
656  *	   (The join rel may already contain paths generated from other
657  *	   pairs of rels that add up to the same set of base rels.)
658  *
659  * NB: will return NULL if attempted join is not valid.  This can happen
660  * when working with outer joins, or with IN or EXISTS clauses that have been
661  * turned into joins.
662  */
663 RelOptInfo *
make_join_rel(PlannerInfo * root,RelOptInfo * rel1,RelOptInfo * rel2)664 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
665 {
666 	Relids		joinrelids;
667 	SpecialJoinInfo *sjinfo;
668 	bool		reversed;
669 	SpecialJoinInfo sjinfo_data;
670 	RelOptInfo *joinrel;
671 	List	   *restrictlist;
672 
673 	/* We should never try to join two overlapping sets of rels. */
674 	Assert(!bms_overlap(rel1->relids, rel2->relids));
675 
676 	/* Construct Relids set that identifies the joinrel. */
677 	joinrelids = bms_union(rel1->relids, rel2->relids);
678 
679 	/* Check validity and determine join type. */
680 	if (!join_is_legal(root, rel1, rel2, joinrelids,
681 					   &sjinfo, &reversed))
682 	{
683 		/* invalid join path */
684 		bms_free(joinrelids);
685 		return NULL;
686 	}
687 
688 	/* Swap rels if needed to match the join info. */
689 	if (reversed)
690 	{
691 		RelOptInfo *trel = rel1;
692 
693 		rel1 = rel2;
694 		rel2 = trel;
695 	}
696 
697 	/*
698 	 * If it's a plain inner join, then we won't have found anything in
699 	 * join_info_list.  Make up a SpecialJoinInfo so that selectivity
700 	 * estimation functions will know what's being joined.
701 	 */
702 	if (sjinfo == NULL)
703 	{
704 		sjinfo = &sjinfo_data;
705 		sjinfo->type = T_SpecialJoinInfo;
706 		sjinfo->min_lefthand = rel1->relids;
707 		sjinfo->min_righthand = rel2->relids;
708 		sjinfo->syn_lefthand = rel1->relids;
709 		sjinfo->syn_righthand = rel2->relids;
710 		sjinfo->jointype = JOIN_INNER;
711 		/* we don't bother trying to make the remaining fields valid */
712 		sjinfo->lhs_strict = false;
713 		sjinfo->delay_upper_joins = false;
714 		sjinfo->semi_can_btree = false;
715 		sjinfo->semi_can_hash = false;
716 		sjinfo->semi_operators = NIL;
717 		sjinfo->semi_rhs_exprs = NIL;
718 	}
719 
720 	/*
721 	 * Find or build the join RelOptInfo, and compute the restrictlist that
722 	 * goes with this particular joining.
723 	 */
724 	joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
725 							 &restrictlist);
726 
727 	/*
728 	 * If we've already proven this join is empty, we needn't consider any
729 	 * more paths for it.
730 	 */
731 	if (is_dummy_rel(joinrel))
732 	{
733 		bms_free(joinrelids);
734 		return joinrel;
735 	}
736 
737 	/* Add paths to the join relation. */
738 	populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
739 								restrictlist);
740 
741 	bms_free(joinrelids);
742 
743 	return joinrel;
744 }
745 
746 /*
747  * populate_joinrel_with_paths
748  *	  Add paths to the given joinrel for given pair of joining relations. The
749  *	  SpecialJoinInfo provides details about the join and the restrictlist
750  *	  contains the join clauses and the other clauses applicable for given pair
751  *	  of the joining relations.
752  */
753 static void
populate_joinrel_with_paths(PlannerInfo * root,RelOptInfo * rel1,RelOptInfo * rel2,RelOptInfo * joinrel,SpecialJoinInfo * sjinfo,List * restrictlist)754 populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
755 							RelOptInfo *rel2, RelOptInfo *joinrel,
756 							SpecialJoinInfo *sjinfo, List *restrictlist)
757 {
758 	/*
759 	 * Consider paths using each rel as both outer and inner.  Depending on
760 	 * the join type, a provably empty outer or inner rel might mean the join
761 	 * is provably empty too; in which case throw away any previously computed
762 	 * paths and mark the join as dummy.  (We do it this way since it's
763 	 * conceivable that dummy-ness of a multi-element join might only be
764 	 * noticeable for certain construction paths.)
765 	 *
766 	 * Also, a provably constant-false join restriction typically means that
767 	 * we can skip evaluating one or both sides of the join.  We do this by
768 	 * marking the appropriate rel as dummy.  For outer joins, a
769 	 * constant-false restriction that is pushed down still means the whole
770 	 * join is dummy, while a non-pushed-down one means that no inner rows
771 	 * will join so we can treat the inner rel as dummy.
772 	 *
773 	 * We need only consider the jointypes that appear in join_info_list, plus
774 	 * JOIN_INNER.
775 	 */
776 	switch (sjinfo->jointype)
777 	{
778 		case JOIN_INNER:
779 			if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
780 				restriction_is_constant_false(restrictlist, joinrel, false))
781 			{
782 				mark_dummy_rel(joinrel);
783 				break;
784 			}
785 			add_paths_to_joinrel(root, joinrel, rel1, rel2,
786 								 JOIN_INNER, sjinfo,
787 								 restrictlist);
788 			add_paths_to_joinrel(root, joinrel, rel2, rel1,
789 								 JOIN_INNER, sjinfo,
790 								 restrictlist);
791 			break;
792 		case JOIN_LEFT:
793 			if (is_dummy_rel(rel1) ||
794 				restriction_is_constant_false(restrictlist, joinrel, true))
795 			{
796 				mark_dummy_rel(joinrel);
797 				break;
798 			}
799 			if (restriction_is_constant_false(restrictlist, joinrel, false) &&
800 				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
801 				mark_dummy_rel(rel2);
802 			add_paths_to_joinrel(root, joinrel, rel1, rel2,
803 								 JOIN_LEFT, sjinfo,
804 								 restrictlist);
805 			add_paths_to_joinrel(root, joinrel, rel2, rel1,
806 								 JOIN_RIGHT, sjinfo,
807 								 restrictlist);
808 			break;
809 		case JOIN_FULL:
810 			if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
811 				restriction_is_constant_false(restrictlist, joinrel, true))
812 			{
813 				mark_dummy_rel(joinrel);
814 				break;
815 			}
816 			add_paths_to_joinrel(root, joinrel, rel1, rel2,
817 								 JOIN_FULL, sjinfo,
818 								 restrictlist);
819 			add_paths_to_joinrel(root, joinrel, rel2, rel1,
820 								 JOIN_FULL, sjinfo,
821 								 restrictlist);
822 
823 			/*
824 			 * If there are join quals that aren't mergeable or hashable, we
825 			 * may not be able to build any valid plan.  Complain here so that
826 			 * we can give a somewhat-useful error message.  (Since we have no
827 			 * flexibility of planning for a full join, there's no chance of
828 			 * succeeding later with another pair of input rels.)
829 			 */
830 			if (joinrel->pathlist == NIL)
831 				ereport(ERROR,
832 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
833 						 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
834 			break;
835 		case JOIN_SEMI:
836 
837 			/*
838 			 * We might have a normal semijoin, or a case where we don't have
839 			 * enough rels to do the semijoin but can unique-ify the RHS and
840 			 * then do an innerjoin (see comments in join_is_legal).  In the
841 			 * latter case we can't apply JOIN_SEMI joining.
842 			 */
843 			if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
844 				bms_is_subset(sjinfo->min_righthand, rel2->relids))
845 			{
846 				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
847 					restriction_is_constant_false(restrictlist, joinrel, false))
848 				{
849 					mark_dummy_rel(joinrel);
850 					break;
851 				}
852 				add_paths_to_joinrel(root, joinrel, rel1, rel2,
853 									 JOIN_SEMI, sjinfo,
854 									 restrictlist);
855 			}
856 
857 			/*
858 			 * If we know how to unique-ify the RHS and one input rel is
859 			 * exactly the RHS (not a superset) we can consider unique-ifying
860 			 * it and then doing a regular join.  (The create_unique_path
861 			 * check here is probably redundant with what join_is_legal did,
862 			 * but if so the check is cheap because it's cached.  So test
863 			 * anyway to be sure.)
864 			 */
865 			if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
866 				create_unique_path(root, rel2, rel2->cheapest_total_path,
867 								   sjinfo) != NULL)
868 			{
869 				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
870 					restriction_is_constant_false(restrictlist, joinrel, false))
871 				{
872 					mark_dummy_rel(joinrel);
873 					break;
874 				}
875 				add_paths_to_joinrel(root, joinrel, rel1, rel2,
876 									 JOIN_UNIQUE_INNER, sjinfo,
877 									 restrictlist);
878 				add_paths_to_joinrel(root, joinrel, rel2, rel1,
879 									 JOIN_UNIQUE_OUTER, sjinfo,
880 									 restrictlist);
881 			}
882 			break;
883 		case JOIN_ANTI:
884 			if (is_dummy_rel(rel1) ||
885 				restriction_is_constant_false(restrictlist, joinrel, true))
886 			{
887 				mark_dummy_rel(joinrel);
888 				break;
889 			}
890 			if (restriction_is_constant_false(restrictlist, joinrel, false) &&
891 				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
892 				mark_dummy_rel(rel2);
893 			add_paths_to_joinrel(root, joinrel, rel1, rel2,
894 								 JOIN_ANTI, sjinfo,
895 								 restrictlist);
896 			break;
897 		default:
898 			/* other values not expected here */
899 			elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
900 			break;
901 	}
902 
903 	/* Apply partitionwise join technique, if possible. */
904 	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
905 }
906 
907 
908 /*
909  * have_join_order_restriction
910  *		Detect whether the two relations should be joined to satisfy
911  *		a join-order restriction arising from special or lateral joins.
912  *
913  * In practice this is always used with have_relevant_joinclause(), and so
914  * could be merged with that function, but it seems clearer to separate the
915  * two concerns.  We need this test because there are degenerate cases where
916  * a clauseless join must be performed to satisfy join-order restrictions.
917  * Also, if one rel has a lateral reference to the other, or both are needed
918  * to compute some PHV, we should consider joining them even if the join would
919  * be clauseless.
920  *
921  * Note: this is only a problem if one side of a degenerate outer join
922  * contains multiple rels, or a clauseless join is required within an
923  * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
924  * join_search_one_level().  We could dispense with this test if we were
925  * willing to try bushy plans in the "last ditch" case, but that seems much
926  * less efficient.
927  */
928 bool
have_join_order_restriction(PlannerInfo * root,RelOptInfo * rel1,RelOptInfo * rel2)929 have_join_order_restriction(PlannerInfo *root,
930 							RelOptInfo *rel1, RelOptInfo *rel2)
931 {
932 	bool		result = false;
933 	ListCell   *l;
934 
935 	/*
936 	 * If either side has a direct lateral reference to the other, attempt the
937 	 * join regardless of outer-join considerations.
938 	 */
939 	if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) ||
940 		bms_overlap(rel2->relids, rel1->direct_lateral_relids))
941 		return true;
942 
943 	/*
944 	 * Likewise, if both rels are needed to compute some PlaceHolderVar,
945 	 * attempt the join regardless of outer-join considerations.  (This is not
946 	 * very desirable, because a PHV with a large eval_at set will cause a lot
947 	 * of probably-useless joins to be considered, but failing to do this can
948 	 * cause us to fail to construct a plan at all.)
949 	 */
950 	foreach(l, root->placeholder_list)
951 	{
952 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
953 
954 		if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
955 			bms_is_subset(rel2->relids, phinfo->ph_eval_at))
956 			return true;
957 	}
958 
959 	/*
960 	 * It's possible that the rels correspond to the left and right sides of a
961 	 * degenerate outer join, that is, one with no joinclause mentioning the
962 	 * non-nullable side; in which case we should force the join to occur.
963 	 *
964 	 * Also, the two rels could represent a clauseless join that has to be
965 	 * completed to build up the LHS or RHS of an outer join.
966 	 */
967 	foreach(l, root->join_info_list)
968 	{
969 		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
970 
971 		/* ignore full joins --- other mechanisms handle them */
972 		if (sjinfo->jointype == JOIN_FULL)
973 			continue;
974 
975 		/* Can we perform the SJ with these rels? */
976 		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
977 			bms_is_subset(sjinfo->min_righthand, rel2->relids))
978 		{
979 			result = true;
980 			break;
981 		}
982 		if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
983 			bms_is_subset(sjinfo->min_righthand, rel1->relids))
984 		{
985 			result = true;
986 			break;
987 		}
988 
989 		/*
990 		 * Might we need to join these rels to complete the RHS?  We have to
991 		 * use "overlap" tests since either rel might include a lower SJ that
992 		 * has been proven to commute with this one.
993 		 */
994 		if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
995 			bms_overlap(sjinfo->min_righthand, rel2->relids))
996 		{
997 			result = true;
998 			break;
999 		}
1000 
1001 		/* Likewise for the LHS. */
1002 		if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
1003 			bms_overlap(sjinfo->min_lefthand, rel2->relids))
1004 		{
1005 			result = true;
1006 			break;
1007 		}
1008 	}
1009 
1010 	/*
1011 	 * We do not force the join to occur if either input rel can legally be
1012 	 * joined to anything else using joinclauses.  This essentially means that
1013 	 * clauseless bushy joins are put off as long as possible. The reason is
1014 	 * that when there is a join order restriction high up in the join tree
1015 	 * (that is, with many rels inside the LHS or RHS), we would otherwise
1016 	 * expend lots of effort considering very stupid join combinations within
1017 	 * its LHS or RHS.
1018 	 */
1019 	if (result)
1020 	{
1021 		if (has_legal_joinclause(root, rel1) ||
1022 			has_legal_joinclause(root, rel2))
1023 			result = false;
1024 	}
1025 
1026 	return result;
1027 }
1028 
1029 
1030 /*
1031  * has_join_restriction
1032  *		Detect whether the specified relation has join-order restrictions,
1033  *		due to being inside an outer join or an IN (sub-SELECT),
1034  *		or participating in any LATERAL references or multi-rel PHVs.
1035  *
1036  * Essentially, this tests whether have_join_order_restriction() could
1037  * succeed with this rel and some other one.  It's OK if we sometimes
1038  * say "true" incorrectly.  (Therefore, we don't bother with the relatively
1039  * expensive has_legal_joinclause test.)
1040  */
1041 static bool
has_join_restriction(PlannerInfo * root,RelOptInfo * rel)1042 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
1043 {
1044 	ListCell   *l;
1045 
1046 	if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
1047 		return true;
1048 
1049 	foreach(l, root->placeholder_list)
1050 	{
1051 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
1052 
1053 		if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
1054 			!bms_equal(rel->relids, phinfo->ph_eval_at))
1055 			return true;
1056 	}
1057 
1058 	foreach(l, root->join_info_list)
1059 	{
1060 		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
1061 
1062 		/* ignore full joins --- other mechanisms preserve their ordering */
1063 		if (sjinfo->jointype == JOIN_FULL)
1064 			continue;
1065 
1066 		/* ignore if SJ is already contained in rel */
1067 		if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
1068 			bms_is_subset(sjinfo->min_righthand, rel->relids))
1069 			continue;
1070 
1071 		/* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
1072 		if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
1073 			bms_overlap(sjinfo->min_righthand, rel->relids))
1074 			return true;
1075 	}
1076 
1077 	return false;
1078 }
1079 
1080 
1081 /*
1082  * has_legal_joinclause
1083  *		Detect whether the specified relation can legally be joined
1084  *		to any other rels using join clauses.
1085  *
1086  * We consider only joins to single other relations in the current
1087  * initial_rels list.  This is sufficient to get a "true" result in most real
1088  * queries, and an occasional erroneous "false" will only cost a bit more
1089  * planning time.  The reason for this limitation is that considering joins to
1090  * other joins would require proving that the other join rel can legally be
1091  * formed, which seems like too much trouble for something that's only a
1092  * heuristic to save planning time.  (Note: we must look at initial_rels
1093  * and not all of the query, since when we are planning a sub-joinlist we
1094  * may be forced to make clauseless joins within initial_rels even though
1095  * there are join clauses linking to other parts of the query.)
1096  */
1097 static bool
has_legal_joinclause(PlannerInfo * root,RelOptInfo * rel)1098 has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
1099 {
1100 	ListCell   *lc;
1101 
1102 	foreach(lc, root->initial_rels)
1103 	{
1104 		RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
1105 
1106 		/* ignore rels that are already in "rel" */
1107 		if (bms_overlap(rel->relids, rel2->relids))
1108 			continue;
1109 
1110 		if (have_relevant_joinclause(root, rel, rel2))
1111 		{
1112 			Relids		joinrelids;
1113 			SpecialJoinInfo *sjinfo;
1114 			bool		reversed;
1115 
1116 			/* join_is_legal needs relids of the union */
1117 			joinrelids = bms_union(rel->relids, rel2->relids);
1118 
1119 			if (join_is_legal(root, rel, rel2, joinrelids,
1120 							  &sjinfo, &reversed))
1121 			{
1122 				/* Yes, this will work */
1123 				bms_free(joinrelids);
1124 				return true;
1125 			}
1126 
1127 			bms_free(joinrelids);
1128 		}
1129 	}
1130 
1131 	return false;
1132 }
1133 
1134 
1135 /*
1136  * There's a pitfall for creating parameterized nestloops: suppose the inner
1137  * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
1138  * minimum eval_at set includes the outer rel (B) and some third rel (C).
1139  * We might think we could create a B/A nestloop join that's parameterized by
1140  * C.  But we would end up with a plan in which the PHV's expression has to be
1141  * evaluated as a nestloop parameter at the B/A join; and the executor is only
1142  * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
1143  * and overhead to the executor for such corner cases, it seems better to
1144  * forbid the join.  (Note that we can still make use of A's parameterized
1145  * path with pre-joined B+C as the outer rel.  have_join_order_restriction()
1146  * ensures that we will consider making such a join even if there are not
1147  * other reasons to do so.)
1148  *
1149  * So we check whether any PHVs used in the query could pose such a hazard.
1150  * We don't have any simple way of checking whether a risky PHV would actually
1151  * be used in the inner plan, and the case is so unusual that it doesn't seem
1152  * worth working very hard on it.
1153  *
1154  * This needs to be checked in two places.  If the inner rel's minimum
1155  * parameterization would trigger the restriction, then join_is_legal() should
1156  * reject the join altogether, because there will be no workable paths for it.
1157  * But joinpath.c has to check again for every proposed nestloop path, because
1158  * the inner path might have more than the minimum parameterization, causing
1159  * some PHV to be dangerous for it that otherwise wouldn't be.
1160  */
1161 bool
have_dangerous_phv(PlannerInfo * root,Relids outer_relids,Relids inner_params)1162 have_dangerous_phv(PlannerInfo *root,
1163 				   Relids outer_relids, Relids inner_params)
1164 {
1165 	ListCell   *lc;
1166 
1167 	foreach(lc, root->placeholder_list)
1168 	{
1169 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
1170 
1171 		if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
1172 			continue;			/* ignore, could not be a nestloop param */
1173 		if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
1174 			continue;			/* ignore, not relevant to this join */
1175 		if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
1176 			continue;			/* safe, it can be eval'd within outerrel */
1177 		/* Otherwise, it's potentially unsafe, so reject the join */
1178 		return true;
1179 	}
1180 
1181 	/* OK to perform the join */
1182 	return false;
1183 }
1184 
1185 
1186 /*
1187  * is_dummy_rel --- has relation been proven empty?
1188  */
1189 bool
is_dummy_rel(RelOptInfo * rel)1190 is_dummy_rel(RelOptInfo *rel)
1191 {
1192 	Path	   *path;
1193 
1194 	/*
1195 	 * A rel that is known dummy will have just one path that is a childless
1196 	 * Append.  (Even if somehow it has more paths, a childless Append will
1197 	 * have cost zero and hence should be at the front of the pathlist.)
1198 	 */
1199 	if (rel->pathlist == NIL)
1200 		return false;
1201 	path = (Path *) linitial(rel->pathlist);
1202 
1203 	/*
1204 	 * Initially, a dummy path will just be a childless Append.  But in later
1205 	 * planning stages we might stick a ProjectSetPath and/or ProjectionPath
1206 	 * on top, since Append can't project.  Rather than make assumptions about
1207 	 * which combinations can occur, just descend through whatever we find.
1208 	 */
1209 	for (;;)
1210 	{
1211 		if (IsA(path, ProjectionPath))
1212 			path = ((ProjectionPath *) path)->subpath;
1213 		else if (IsA(path, ProjectSetPath))
1214 			path = ((ProjectSetPath *) path)->subpath;
1215 		else
1216 			break;
1217 	}
1218 	if (IS_DUMMY_APPEND(path))
1219 		return true;
1220 	return false;
1221 }
1222 
1223 /*
1224  * Mark a relation as proven empty.
1225  *
1226  * During GEQO planning, this can get invoked more than once on the same
1227  * baserel struct, so it's worth checking to see if the rel is already marked
1228  * dummy.
1229  *
1230  * Also, when called during GEQO join planning, we are in a short-lived
1231  * memory context.  We must make sure that the dummy path attached to a
1232  * baserel survives the GEQO cycle, else the baserel is trashed for future
1233  * GEQO cycles.  On the other hand, when we are marking a joinrel during GEQO,
1234  * we don't want the dummy path to clutter the main planning context.  Upshot
1235  * is that the best solution is to explicitly make the dummy path in the same
1236  * context the given RelOptInfo is in.
1237  */
1238 void
mark_dummy_rel(RelOptInfo * rel)1239 mark_dummy_rel(RelOptInfo *rel)
1240 {
1241 	MemoryContext oldcontext;
1242 
1243 	/* Already marked? */
1244 	if (is_dummy_rel(rel))
1245 		return;
1246 
1247 	/* No, so choose correct context to make the dummy path in */
1248 	oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1249 
1250 	/* Set dummy size estimate */
1251 	rel->rows = 0;
1252 
1253 	/* Evict any previously chosen paths */
1254 	rel->pathlist = NIL;
1255 	rel->partial_pathlist = NIL;
1256 
1257 	/* Set up the dummy path */
1258 	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
1259 											  NIL, rel->lateral_relids,
1260 											  0, false, NIL, -1));
1261 
1262 	/* Set or update cheapest_total_path and related fields */
1263 	set_cheapest(rel);
1264 
1265 	MemoryContextSwitchTo(oldcontext);
1266 }
1267 
1268 
1269 /*
1270  * restriction_is_constant_false --- is a restrictlist just FALSE?
1271  *
1272  * In cases where a qual is provably constant FALSE, eval_const_expressions
1273  * will generally have thrown away anything that's ANDed with it.  In outer
1274  * join situations this will leave us computing cartesian products only to
1275  * decide there's no match for an outer row, which is pretty stupid.  So,
1276  * we need to detect the case.
1277  *
1278  * If only_pushed_down is true, then consider only quals that are pushed-down
1279  * from the point of view of the joinrel.
1280  */
1281 static bool
restriction_is_constant_false(List * restrictlist,RelOptInfo * joinrel,bool only_pushed_down)1282 restriction_is_constant_false(List *restrictlist,
1283 							  RelOptInfo *joinrel,
1284 							  bool only_pushed_down)
1285 {
1286 	ListCell   *lc;
1287 
1288 	/*
1289 	 * Despite the above comment, the restriction list we see here might
1290 	 * possibly have other members besides the FALSE constant, since other
1291 	 * quals could get "pushed down" to the outer join level.  So we check
1292 	 * each member of the list.
1293 	 */
1294 	foreach(lc, restrictlist)
1295 	{
1296 		RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1297 
1298 		if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1299 			continue;
1300 
1301 		if (rinfo->clause && IsA(rinfo->clause, Const))
1302 		{
1303 			Const	   *con = (Const *) rinfo->clause;
1304 
1305 			/* constant NULL is as good as constant FALSE for our purposes */
1306 			if (con->constisnull)
1307 				return true;
1308 			if (!DatumGetBool(con->constvalue))
1309 				return true;
1310 		}
1311 	}
1312 	return false;
1313 }
1314 
1315 /*
1316  * Assess whether join between given two partitioned relations can be broken
1317  * down into joins between matching partitions; a technique called
1318  * "partitionwise join"
1319  *
1320  * Partitionwise join is possible when a. Joining relations have same
1321  * partitioning scheme b. There exists an equi-join between the partition keys
1322  * of the two relations.
1323  *
1324  * Partitionwise join is planned as follows (details: optimizer/README.)
1325  *
1326  * 1. Create the RelOptInfos for joins between matching partitions i.e
1327  * child-joins and add paths to them.
1328  *
1329  * 2. Construct Append or MergeAppend paths across the set of child joins.
1330  * This second phase is implemented by generate_partitionwise_join_paths().
1331  *
1332  * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
1333  * obtained by translating the respective parent join structures.
1334  */
1335 static void
try_partitionwise_join(PlannerInfo * root,RelOptInfo * rel1,RelOptInfo * rel2,RelOptInfo * joinrel,SpecialJoinInfo * parent_sjinfo,List * parent_restrictlist)1336 try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
1337 					   RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
1338 					   List *parent_restrictlist)
1339 {
1340 	bool		rel1_is_simple = IS_SIMPLE_REL(rel1);
1341 	bool		rel2_is_simple = IS_SIMPLE_REL(rel2);
1342 	int			nparts;
1343 	int			cnt_parts;
1344 
1345 	/* Guard against stack overflow due to overly deep partition hierarchy. */
1346 	check_stack_depth();
1347 
1348 	/* Nothing to do, if the join relation is not partitioned. */
1349 	if (!IS_PARTITIONED_REL(joinrel))
1350 		return;
1351 
1352 	/* The join relation should have consider_partitionwise_join set. */
1353 	Assert(joinrel->consider_partitionwise_join);
1354 
1355 	/*
1356 	 * Since this join relation is partitioned, all the base relations
1357 	 * participating in this join must be partitioned and so are all the
1358 	 * intermediate join relations.
1359 	 */
1360 	Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
1361 	Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
1362 
1363 	/* The joining relations should have consider_partitionwise_join set. */
1364 	Assert(rel1->consider_partitionwise_join &&
1365 		   rel2->consider_partitionwise_join);
1366 
1367 	/*
1368 	 * The partition scheme of the join relation should match that of the
1369 	 * joining relations.
1370 	 */
1371 	Assert(joinrel->part_scheme == rel1->part_scheme &&
1372 		   joinrel->part_scheme == rel2->part_scheme);
1373 
1374 	/*
1375 	 * Since we allow partitionwise join only when the partition bounds of the
1376 	 * joining relations exactly match, the partition bounds of the join
1377 	 * should match those of the joining relations.
1378 	 */
1379 	Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1380 								  joinrel->part_scheme->parttyplen,
1381 								  joinrel->part_scheme->parttypbyval,
1382 								  joinrel->boundinfo, rel1->boundinfo));
1383 	Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1384 								  joinrel->part_scheme->parttyplen,
1385 								  joinrel->part_scheme->parttypbyval,
1386 								  joinrel->boundinfo, rel2->boundinfo));
1387 
1388 	nparts = joinrel->nparts;
1389 
1390 	/*
1391 	 * Create child-join relations for this partitioned join, if those don't
1392 	 * exist. Add paths to child-joins for a pair of child relations
1393 	 * corresponding to the given pair of parent relations.
1394 	 */
1395 	for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++)
1396 	{
1397 		RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
1398 		RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
1399 		bool		rel1_empty = (child_rel1 == NULL ||
1400 								  IS_DUMMY_REL(child_rel1));
1401 		bool		rel2_empty = (child_rel2 == NULL ||
1402 								  IS_DUMMY_REL(child_rel2));
1403 		SpecialJoinInfo *child_sjinfo;
1404 		List	   *child_restrictlist;
1405 		RelOptInfo *child_joinrel;
1406 		Relids		child_joinrelids;
1407 		AppendRelInfo **appinfos;
1408 		int			nappinfos;
1409 
1410 		/*
1411 		 * Check for cases where we can prove that this segment of the join
1412 		 * returns no rows, due to one or both inputs being empty (including
1413 		 * inputs that have been pruned away entirely).  If so just ignore it.
1414 		 * These rules are equivalent to populate_joinrel_with_paths's rules
1415 		 * for dummy input relations.
1416 		 */
1417 		switch (parent_sjinfo->jointype)
1418 		{
1419 			case JOIN_INNER:
1420 			case JOIN_SEMI:
1421 				if (rel1_empty || rel2_empty)
1422 					continue;	/* ignore this join segment */
1423 				break;
1424 			case JOIN_LEFT:
1425 			case JOIN_ANTI:
1426 				if (rel1_empty)
1427 					continue;	/* ignore this join segment */
1428 				break;
1429 			case JOIN_FULL:
1430 				if (rel1_empty && rel2_empty)
1431 					continue;	/* ignore this join segment */
1432 				break;
1433 			default:
1434 				/* other values not expected here */
1435 				elog(ERROR, "unrecognized join type: %d",
1436 					 (int) parent_sjinfo->jointype);
1437 				break;
1438 		}
1439 
1440 		/*
1441 		 * If a child has been pruned entirely then we can't generate paths
1442 		 * for it, so we have to reject partitionwise joining unless we were
1443 		 * able to eliminate this partition above.
1444 		 */
1445 		if (child_rel1 == NULL || child_rel2 == NULL)
1446 		{
1447 			/*
1448 			 * Mark the joinrel as unpartitioned so that later functions treat
1449 			 * it correctly.
1450 			 */
1451 			joinrel->nparts = 0;
1452 			return;
1453 		}
1454 
1455 		/*
1456 		 * If a leaf relation has consider_partitionwise_join=false, it means
1457 		 * that it's a dummy relation for which we skipped setting up tlist
1458 		 * expressions and adding EC members in set_append_rel_size(), so
1459 		 * again we have to fail here.
1460 		 */
1461 		if (rel1_is_simple && !child_rel1->consider_partitionwise_join)
1462 		{
1463 			Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL);
1464 			Assert(IS_DUMMY_REL(child_rel1));
1465 			joinrel->nparts = 0;
1466 			return;
1467 		}
1468 		if (rel2_is_simple && !child_rel2->consider_partitionwise_join)
1469 		{
1470 			Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL);
1471 			Assert(IS_DUMMY_REL(child_rel2));
1472 			joinrel->nparts = 0;
1473 			return;
1474 		}
1475 
1476 		/* We should never try to join two overlapping sets of rels. */
1477 		Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
1478 		child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
1479 		appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
1480 
1481 		/*
1482 		 * Construct SpecialJoinInfo from parent join relations's
1483 		 * SpecialJoinInfo.
1484 		 */
1485 		child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
1486 											   child_rel1->relids,
1487 											   child_rel2->relids);
1488 
1489 		/*
1490 		 * Construct restrictions applicable to the child join from those
1491 		 * applicable to the parent join.
1492 		 */
1493 		child_restrictlist =
1494 			(List *) adjust_appendrel_attrs(root,
1495 											(Node *) parent_restrictlist,
1496 											nappinfos, appinfos);
1497 		pfree(appinfos);
1498 
1499 		child_joinrel = joinrel->part_rels[cnt_parts];
1500 		if (!child_joinrel)
1501 		{
1502 			child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
1503 												 joinrel, child_restrictlist,
1504 												 child_sjinfo,
1505 												 child_sjinfo->jointype);
1506 			joinrel->part_rels[cnt_parts] = child_joinrel;
1507 		}
1508 
1509 		Assert(bms_equal(child_joinrel->relids, child_joinrelids));
1510 
1511 		populate_joinrel_with_paths(root, child_rel1, child_rel2,
1512 									child_joinrel, child_sjinfo,
1513 									child_restrictlist);
1514 	}
1515 }
1516 
1517 /*
1518  * Construct the SpecialJoinInfo for a child-join by translating
1519  * SpecialJoinInfo for the join between parents. left_relids and right_relids
1520  * are the relids of left and right side of the join respectively.
1521  */
1522 static SpecialJoinInfo *
build_child_join_sjinfo(PlannerInfo * root,SpecialJoinInfo * parent_sjinfo,Relids left_relids,Relids right_relids)1523 build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
1524 						Relids left_relids, Relids right_relids)
1525 {
1526 	SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
1527 	AppendRelInfo **left_appinfos;
1528 	int			left_nappinfos;
1529 	AppendRelInfo **right_appinfos;
1530 	int			right_nappinfos;
1531 
1532 	memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
1533 	left_appinfos = find_appinfos_by_relids(root, left_relids,
1534 											&left_nappinfos);
1535 	right_appinfos = find_appinfos_by_relids(root, right_relids,
1536 											 &right_nappinfos);
1537 
1538 	sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
1539 											   left_nappinfos, left_appinfos);
1540 	sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
1541 												right_nappinfos,
1542 												right_appinfos);
1543 	sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
1544 											   left_nappinfos, left_appinfos);
1545 	sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
1546 												right_nappinfos,
1547 												right_appinfos);
1548 	sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
1549 															 (Node *) sjinfo->semi_rhs_exprs,
1550 															 right_nappinfos,
1551 															 right_appinfos);
1552 
1553 	pfree(left_appinfos);
1554 	pfree(right_appinfos);
1555 
1556 	return sjinfo;
1557 }
1558 
1559 /*
1560  * Returns true if there exists an equi-join condition for each pair of
1561  * partition keys from given relations being joined.
1562  */
1563 bool
have_partkey_equi_join(RelOptInfo * joinrel,RelOptInfo * rel1,RelOptInfo * rel2,JoinType jointype,List * restrictlist)1564 have_partkey_equi_join(RelOptInfo *joinrel,
1565 					   RelOptInfo *rel1, RelOptInfo *rel2,
1566 					   JoinType jointype, List *restrictlist)
1567 {
1568 	PartitionScheme part_scheme = rel1->part_scheme;
1569 	ListCell   *lc;
1570 	int			cnt_pks;
1571 	bool		pk_has_clause[PARTITION_MAX_KEYS];
1572 	bool		strict_op;
1573 
1574 	/*
1575 	 * This function should be called when the joining relations have same
1576 	 * partitioning scheme.
1577 	 */
1578 	Assert(rel1->part_scheme == rel2->part_scheme);
1579 	Assert(part_scheme);
1580 
1581 	memset(pk_has_clause, 0, sizeof(pk_has_clause));
1582 	foreach(lc, restrictlist)
1583 	{
1584 		RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1585 		OpExpr	   *opexpr;
1586 		Expr	   *expr1;
1587 		Expr	   *expr2;
1588 		int			ipk1;
1589 		int			ipk2;
1590 
1591 		/* If processing an outer join, only use its own join clauses. */
1592 		if (IS_OUTER_JOIN(jointype) &&
1593 			RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1594 			continue;
1595 
1596 		/* Skip clauses which can not be used for a join. */
1597 		if (!rinfo->can_join)
1598 			continue;
1599 
1600 		/* Skip clauses which are not equality conditions. */
1601 		if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator))
1602 			continue;
1603 
1604 		opexpr = castNode(OpExpr, rinfo->clause);
1605 
1606 		/*
1607 		 * The equi-join between partition keys is strict if equi-join between
1608 		 * at least one partition key is using a strict operator. See
1609 		 * explanation about outer join reordering identity 3 in
1610 		 * optimizer/README
1611 		 */
1612 		strict_op = op_strict(opexpr->opno);
1613 
1614 		/* Match the operands to the relation. */
1615 		if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
1616 			bms_is_subset(rinfo->right_relids, rel2->relids))
1617 		{
1618 			expr1 = linitial(opexpr->args);
1619 			expr2 = lsecond(opexpr->args);
1620 		}
1621 		else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
1622 				 bms_is_subset(rinfo->right_relids, rel1->relids))
1623 		{
1624 			expr1 = lsecond(opexpr->args);
1625 			expr2 = linitial(opexpr->args);
1626 		}
1627 		else
1628 			continue;
1629 
1630 		/*
1631 		 * Only clauses referencing the partition keys are useful for
1632 		 * partitionwise join.
1633 		 */
1634 		ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
1635 		if (ipk1 < 0)
1636 			continue;
1637 		ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
1638 		if (ipk2 < 0)
1639 			continue;
1640 
1641 		/*
1642 		 * If the clause refers to keys at different ordinal positions, it can
1643 		 * not be used for partitionwise join.
1644 		 */
1645 		if (ipk1 != ipk2)
1646 			continue;
1647 
1648 		/*
1649 		 * The clause allows partitionwise join if only it uses the same
1650 		 * operator family as that specified by the partition key.
1651 		 */
1652 		if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH)
1653 		{
1654 			if (!op_in_opfamily(rinfo->hashjoinoperator,
1655 								part_scheme->partopfamily[ipk1]))
1656 				continue;
1657 		}
1658 		else if (!list_member_oid(rinfo->mergeopfamilies,
1659 								  part_scheme->partopfamily[ipk1]))
1660 			continue;
1661 
1662 		/* Mark the partition key as having an equi-join clause. */
1663 		pk_has_clause[ipk1] = true;
1664 	}
1665 
1666 	/* Check whether every partition key has an equi-join condition. */
1667 	for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
1668 	{
1669 		if (!pk_has_clause[cnt_pks])
1670 			return false;
1671 	}
1672 
1673 	return true;
1674 }
1675 
1676 /*
1677  * Find the partition key from the given relation matching the given
1678  * expression. If found, return the index of the partition key, else return -1.
1679  */
1680 static int
match_expr_to_partition_keys(Expr * expr,RelOptInfo * rel,bool strict_op)1681 match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
1682 {
1683 	int			cnt;
1684 
1685 	/* This function should be called only for partitioned relations. */
1686 	Assert(rel->part_scheme);
1687 
1688 	/* Remove any relabel decorations. */
1689 	while (IsA(expr, RelabelType))
1690 		expr = (Expr *) (castNode(RelabelType, expr))->arg;
1691 
1692 	for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
1693 	{
1694 		ListCell   *lc;
1695 
1696 		Assert(rel->partexprs);
1697 		foreach(lc, rel->partexprs[cnt])
1698 		{
1699 			if (equal(lfirst(lc), expr))
1700 				return cnt;
1701 		}
1702 
1703 		if (!strict_op)
1704 			continue;
1705 
1706 		/*
1707 		 * If it's a strict equi-join a NULL partition key on one side will
1708 		 * not join a NULL partition key on the other side. So, rows with NULL
1709 		 * partition key from a partition on one side can not join with those
1710 		 * from a non-matching partition on the other side. So, search the
1711 		 * nullable partition keys as well.
1712 		 */
1713 		Assert(rel->nullable_partexprs);
1714 		foreach(lc, rel->nullable_partexprs[cnt])
1715 		{
1716 			if (equal(lfirst(lc), expr))
1717 				return cnt;
1718 		}
1719 	}
1720 
1721 	return -1;
1722 }
1723