1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  *	  Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/joinpath.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/paths.h"
24 #include "optimizer/planmain.h"
25 
26 /* Hook for plugins to get control in add_paths_to_joinrel() */
27 set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
28 
29 #define PATH_PARAM_BY_REL(path, rel)  \
30 	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
31 
32 static void try_partial_mergejoin_path(PlannerInfo *root,
33 						   RelOptInfo *joinrel,
34 						   Path *outer_path,
35 						   Path *inner_path,
36 						   List *pathkeys,
37 						   List *mergeclauses,
38 						   List *outersortkeys,
39 						   List *innersortkeys,
40 						   JoinType jointype,
41 						   JoinPathExtraData *extra);
42 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
43 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
44 					 JoinType jointype, JoinPathExtraData *extra);
45 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
46 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
47 					 JoinType jointype, JoinPathExtraData *extra);
48 static void consider_parallel_nestloop(PlannerInfo *root,
49 						   RelOptInfo *joinrel,
50 						   RelOptInfo *outerrel,
51 						   RelOptInfo *innerrel,
52 						   JoinType jointype,
53 						   JoinPathExtraData *extra);
54 static void consider_parallel_mergejoin(PlannerInfo *root,
55 							RelOptInfo *joinrel,
56 							RelOptInfo *outerrel,
57 							RelOptInfo *innerrel,
58 							JoinType jointype,
59 							JoinPathExtraData *extra,
60 							Path *inner_cheapest_total);
61 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
63 					 JoinType jointype, JoinPathExtraData *extra);
64 static List *select_mergejoin_clauses(PlannerInfo *root,
65 						 RelOptInfo *joinrel,
66 						 RelOptInfo *outerrel,
67 						 RelOptInfo *innerrel,
68 						 List *restrictlist,
69 						 JoinType jointype,
70 						 bool *mergejoin_allowed);
71 static void generate_mergejoin_paths(PlannerInfo *root,
72 						 RelOptInfo *joinrel,
73 						 RelOptInfo *innerrel,
74 						 Path *outerpath,
75 						 JoinType jointype,
76 						 JoinPathExtraData *extra,
77 						 bool useallclauses,
78 						 Path *inner_cheapest_total,
79 						 List *merge_pathkeys,
80 						 bool is_partial);
81 
82 
83 /*
84  * add_paths_to_joinrel
85  *	  Given a join relation and two component rels from which it can be made,
86  *	  consider all possible paths that use the two component rels as outer
87  *	  and inner rel respectively.  Add these paths to the join rel's pathlist
88  *	  if they survive comparison with other paths (and remove any existing
89  *	  paths that are dominated by these paths).
90  *
91  * Modifies the pathlist field of the joinrel node to contain the best
92  * paths found so far.
93  *
94  * jointype is not necessarily the same as sjinfo->jointype; it might be
95  * "flipped around" if we are considering joining the rels in the opposite
96  * direction from what's indicated in sjinfo.
97  *
98  * Also, this routine and others in this module accept the special JoinTypes
99  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
100  * unique-ify the outer or inner relation and then apply a regular inner
101  * join.  These values are not allowed to propagate outside this module,
102  * however.  Path cost estimation code may need to recognize that it's
103  * dealing with such a case --- the combination of nominal jointype INNER
104  * with sjinfo->jointype == JOIN_SEMI indicates that.
105  */
106 void
add_paths_to_joinrel(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,SpecialJoinInfo * sjinfo,List * restrictlist)107 add_paths_to_joinrel(PlannerInfo *root,
108 					 RelOptInfo *joinrel,
109 					 RelOptInfo *outerrel,
110 					 RelOptInfo *innerrel,
111 					 JoinType jointype,
112 					 SpecialJoinInfo *sjinfo,
113 					 List *restrictlist)
114 {
115 	JoinPathExtraData extra;
116 	bool		mergejoin_allowed = true;
117 	ListCell   *lc;
118 
119 	extra.restrictlist = restrictlist;
120 	extra.mergeclause_list = NIL;
121 	extra.sjinfo = sjinfo;
122 	extra.param_source_rels = NULL;
123 
124 	/*
125 	 * See if the inner relation is provably unique for this outer rel.
126 	 *
127 	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
128 	 * matter since the executor can make the equivalent optimization anyway;
129 	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
130 	 * must be considering a semijoin whose inner side is not provably unique
131 	 * (else reduce_unique_semijoins would've simplified it), so there's no
132 	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
133 	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
134 	 * because the path produced by create_unique_path will be unique relative
135 	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
136 	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
137 	 * letting that value escape this module.
138 	 */
139 	switch (jointype)
140 	{
141 		case JOIN_SEMI:
142 		case JOIN_ANTI:
143 			extra.inner_unique = false; /* well, unproven */
144 			break;
145 		case JOIN_UNIQUE_INNER:
146 			extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
147 											   outerrel->relids);
148 			break;
149 		case JOIN_UNIQUE_OUTER:
150 			extra.inner_unique = innerrel_is_unique(root,
151 													outerrel->relids,
152 													innerrel,
153 													JOIN_INNER,
154 													restrictlist,
155 													false);
156 			break;
157 		default:
158 			extra.inner_unique = innerrel_is_unique(root,
159 													outerrel->relids,
160 													innerrel,
161 													jointype,
162 													restrictlist,
163 													false);
164 			break;
165 	}
166 
167 	/*
168 	 * Find potential mergejoin clauses.  We can skip this if we are not
169 	 * interested in doing a mergejoin.  However, mergejoin may be our only
170 	 * way of implementing a full outer join, so override enable_mergejoin if
171 	 * it's a full join.
172 	 */
173 	if (enable_mergejoin || jointype == JOIN_FULL)
174 		extra.mergeclause_list = select_mergejoin_clauses(root,
175 														  joinrel,
176 														  outerrel,
177 														  innerrel,
178 														  restrictlist,
179 														  jointype,
180 														  &mergejoin_allowed);
181 
182 	/*
183 	 * If it's SEMI, ANTI, or inner_unique join, compute correction factors
184 	 * for cost estimation.  These will be the same for all paths.
185 	 */
186 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
187 		compute_semi_anti_join_factors(root, outerrel, innerrel,
188 									   jointype, sjinfo, restrictlist,
189 									   &extra.semifactors);
190 
191 	/*
192 	 * Decide whether it's sensible to generate parameterized paths for this
193 	 * joinrel, and if so, which relations such paths should require.  There
194 	 * is usually no need to create a parameterized result path unless there
195 	 * is a join order restriction that prevents joining one of our input rels
196 	 * directly to the parameter source rel instead of joining to the other
197 	 * input rel.  (But see allow_star_schema_join().)	This restriction
198 	 * reduces the number of parameterized paths we have to deal with at
199 	 * higher join levels, without compromising the quality of the resulting
200 	 * plan.  We express the restriction as a Relids set that must overlap the
201 	 * parameterization of any proposed join path.
202 	 */
203 	foreach(lc, root->join_info_list)
204 	{
205 		SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
206 
207 		/*
208 		 * SJ is relevant to this join if we have some part of its RHS
209 		 * (possibly not all of it), and haven't yet joined to its LHS.  (This
210 		 * test is pretty simplistic, but should be sufficient considering the
211 		 * join has already been proven legal.)  If the SJ is relevant, it
212 		 * presents constraints for joining to anything not in its RHS.
213 		 */
214 		if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) &&
215 			!bms_overlap(joinrel->relids, sjinfo2->min_lefthand))
216 			extra.param_source_rels = bms_join(extra.param_source_rels,
217 											   bms_difference(root->all_baserels,
218 															  sjinfo2->min_righthand));
219 
220 		/* full joins constrain both sides symmetrically */
221 		if (sjinfo2->jointype == JOIN_FULL &&
222 			bms_overlap(joinrel->relids, sjinfo2->min_lefthand) &&
223 			!bms_overlap(joinrel->relids, sjinfo2->min_righthand))
224 			extra.param_source_rels = bms_join(extra.param_source_rels,
225 											   bms_difference(root->all_baserels,
226 															  sjinfo2->min_lefthand));
227 	}
228 
229 	/*
230 	 * However, when a LATERAL subquery is involved, there will simply not be
231 	 * any paths for the joinrel that aren't parameterized by whatever the
232 	 * subquery is parameterized by, unless its parameterization is resolved
233 	 * within the joinrel.  So we might as well allow additional dependencies
234 	 * on whatever residual lateral dependencies the joinrel will have.
235 	 */
236 	extra.param_source_rels = bms_add_members(extra.param_source_rels,
237 											  joinrel->lateral_relids);
238 
239 	/*
240 	 * 1. Consider mergejoin paths where both relations must be explicitly
241 	 * sorted.  Skip this if we can't mergejoin.
242 	 */
243 	if (mergejoin_allowed)
244 		sort_inner_and_outer(root, joinrel, outerrel, innerrel,
245 							 jointype, &extra);
246 
247 	/*
248 	 * 2. Consider paths where the outer relation need not be explicitly
249 	 * sorted. This includes both nestloops and mergejoins where the outer
250 	 * path is already ordered.  Again, skip this if we can't mergejoin.
251 	 * (That's okay because we know that nestloop can't handle right/full
252 	 * joins at all, so it wouldn't work in the prohibited cases either.)
253 	 */
254 	if (mergejoin_allowed)
255 		match_unsorted_outer(root, joinrel, outerrel, innerrel,
256 							 jointype, &extra);
257 
258 #ifdef NOT_USED
259 
260 	/*
261 	 * 3. Consider paths where the inner relation need not be explicitly
262 	 * sorted.  This includes mergejoins only (nestloops were already built in
263 	 * match_unsorted_outer).
264 	 *
265 	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
266 	 * significant difference between the inner and outer side of a mergejoin,
267 	 * so match_unsorted_inner creates no paths that aren't equivalent to
268 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
269 	 * invoked with the two rels given in the other order.
270 	 */
271 	if (mergejoin_allowed)
272 		match_unsorted_inner(root, joinrel, outerrel, innerrel,
273 							 jointype, &extra);
274 #endif
275 
276 	/*
277 	 * 4. Consider paths where both outer and inner relations must be hashed
278 	 * before being joined.  As above, disregard enable_hashjoin for full
279 	 * joins, because there may be no other alternative.
280 	 */
281 	if (enable_hashjoin || jointype == JOIN_FULL)
282 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
283 							 jointype, &extra);
284 
285 	/*
286 	 * 5. If inner and outer relations are foreign tables (or joins) belonging
287 	 * to the same server and assigned to the same user to check access
288 	 * permissions as, give the FDW a chance to push down joins.
289 	 */
290 	if (joinrel->fdwroutine &&
291 		joinrel->fdwroutine->GetForeignJoinPaths)
292 		joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
293 												 outerrel, innerrel,
294 												 jointype, &extra);
295 
296 	/*
297 	 * 6. Finally, give extensions a chance to manipulate the path list.
298 	 */
299 	if (set_join_pathlist_hook)
300 		set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
301 							   jointype, &extra);
302 }
303 
304 /*
305  * We override the param_source_rels heuristic to accept nestloop paths in
306  * which the outer rel satisfies some but not all of the inner path's
307  * parameterization.  This is necessary to get good plans for star-schema
308  * scenarios, in which a parameterized path for a large table may require
309  * parameters from multiple small tables that will not get joined directly to
310  * each other.  We can handle that by stacking nestloops that have the small
311  * tables on the outside; but this breaks the rule the param_source_rels
312  * heuristic is based on, namely that parameters should not be passed down
313  * across joins unless there's a join-order-constraint-based reason to do so.
314  * So we ignore the param_source_rels restriction when this case applies.
315  *
316  * allow_star_schema_join() returns TRUE if the param_source_rels restriction
317  * should be overridden, ie, it's okay to perform this join.
318  */
319 static inline bool
allow_star_schema_join(PlannerInfo * root,Path * outer_path,Path * inner_path)320 allow_star_schema_join(PlannerInfo *root,
321 					   Path *outer_path,
322 					   Path *inner_path)
323 {
324 	Relids		innerparams = PATH_REQ_OUTER(inner_path);
325 	Relids		outerrelids = outer_path->parent->relids;
326 
327 	/*
328 	 * It's a star-schema case if the outer rel provides some but not all of
329 	 * the inner rel's parameterization.
330 	 */
331 	return (bms_overlap(innerparams, outerrelids) &&
332 			bms_nonempty_difference(innerparams, outerrelids));
333 }
334 
335 /*
336  * try_nestloop_path
337  *	  Consider a nestloop join path; if it appears useful, push it into
338  *	  the joinrel's pathlist via add_path().
339  */
340 static void
try_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)341 try_nestloop_path(PlannerInfo *root,
342 				  RelOptInfo *joinrel,
343 				  Path *outer_path,
344 				  Path *inner_path,
345 				  List *pathkeys,
346 				  JoinType jointype,
347 				  JoinPathExtraData *extra)
348 {
349 	Relids		required_outer;
350 	JoinCostWorkspace workspace;
351 
352 	/*
353 	 * Check to see if proposed path is still parameterized, and reject if the
354 	 * parameterization wouldn't be sensible --- unless allow_star_schema_join
355 	 * says to allow it anyway.  Also, we must reject if have_dangerous_phv
356 	 * doesn't like the look of it, which could only happen if the nestloop is
357 	 * still parameterized.
358 	 */
359 	required_outer = calc_nestloop_required_outer(outer_path,
360 												  inner_path);
361 	if (required_outer &&
362 		((!bms_overlap(required_outer, extra->param_source_rels) &&
363 		  !allow_star_schema_join(root, outer_path, inner_path)) ||
364 		 have_dangerous_phv(root,
365 							outer_path->parent->relids,
366 							PATH_REQ_OUTER(inner_path))))
367 	{
368 		/* Waste no memory when we reject a path here */
369 		bms_free(required_outer);
370 		return;
371 	}
372 
373 	/*
374 	 * Do a precheck to quickly eliminate obviously-inferior paths.  We
375 	 * calculate a cheap lower bound on the path's cost and then use
376 	 * add_path_precheck() to see if the path is clearly going to be dominated
377 	 * by some existing path for the joinrel.  If not, do the full pushup with
378 	 * creating a fully valid path structure and submitting it to add_path().
379 	 * The latter two steps are expensive enough to make this two-phase
380 	 * methodology worthwhile.
381 	 */
382 	initial_cost_nestloop(root, &workspace, jointype,
383 						  outer_path, inner_path, extra);
384 
385 	if (add_path_precheck(joinrel,
386 						  workspace.startup_cost, workspace.total_cost,
387 						  pathkeys, required_outer))
388 	{
389 		add_path(joinrel, (Path *)
390 				 create_nestloop_path(root,
391 									  joinrel,
392 									  jointype,
393 									  &workspace,
394 									  extra,
395 									  outer_path,
396 									  inner_path,
397 									  extra->restrictlist,
398 									  pathkeys,
399 									  required_outer));
400 	}
401 	else
402 	{
403 		/* Waste no memory when we reject a path here */
404 		bms_free(required_outer);
405 	}
406 }
407 
408 /*
409  * try_partial_nestloop_path
410  *	  Consider a partial nestloop join path; if it appears useful, push it into
411  *	  the joinrel's partial_pathlist via add_partial_path().
412  */
413 static void
try_partial_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)414 try_partial_nestloop_path(PlannerInfo *root,
415 						  RelOptInfo *joinrel,
416 						  Path *outer_path,
417 						  Path *inner_path,
418 						  List *pathkeys,
419 						  JoinType jointype,
420 						  JoinPathExtraData *extra)
421 {
422 	JoinCostWorkspace workspace;
423 
424 	/*
425 	 * If the inner path is parameterized, the parameterization must be fully
426 	 * satisfied by the proposed outer path.  Parameterized partial paths are
427 	 * not supported.  The caller should already have verified that no
428 	 * extra_lateral_rels are required here.
429 	 */
430 	Assert(bms_is_empty(joinrel->lateral_relids));
431 	if (inner_path->param_info != NULL)
432 	{
433 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
434 
435 		if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
436 			return;
437 	}
438 
439 	/*
440 	 * Before creating a path, get a quick lower bound on what it is likely to
441 	 * cost.  Bail out right away if it looks terrible.
442 	 */
443 	initial_cost_nestloop(root, &workspace, jointype,
444 						  outer_path, inner_path, extra);
445 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
446 		return;
447 
448 	/* Might be good enough to be worth trying, so let's try it. */
449 	add_partial_path(joinrel, (Path *)
450 					 create_nestloop_path(root,
451 										  joinrel,
452 										  jointype,
453 										  &workspace,
454 										  extra,
455 										  outer_path,
456 										  inner_path,
457 										  extra->restrictlist,
458 										  pathkeys,
459 										  NULL));
460 }
461 
462 /*
463  * try_mergejoin_path
464  *	  Consider a merge join path; if it appears useful, push it into
465  *	  the joinrel's pathlist via add_path().
466  */
467 static void
try_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra,bool is_partial)468 try_mergejoin_path(PlannerInfo *root,
469 				   RelOptInfo *joinrel,
470 				   Path *outer_path,
471 				   Path *inner_path,
472 				   List *pathkeys,
473 				   List *mergeclauses,
474 				   List *outersortkeys,
475 				   List *innersortkeys,
476 				   JoinType jointype,
477 				   JoinPathExtraData *extra,
478 				   bool is_partial)
479 {
480 	Relids		required_outer;
481 	JoinCostWorkspace workspace;
482 
483 	if (is_partial)
484 	{
485 		try_partial_mergejoin_path(root,
486 								   joinrel,
487 								   outer_path,
488 								   inner_path,
489 								   pathkeys,
490 								   mergeclauses,
491 								   outersortkeys,
492 								   innersortkeys,
493 								   jointype,
494 								   extra);
495 		return;
496 	}
497 
498 	/*
499 	 * Check to see if proposed path is still parameterized, and reject if the
500 	 * parameterization wouldn't be sensible.
501 	 */
502 	required_outer = calc_non_nestloop_required_outer(outer_path,
503 													  inner_path);
504 	if (required_outer &&
505 		!bms_overlap(required_outer, extra->param_source_rels))
506 	{
507 		/* Waste no memory when we reject a path here */
508 		bms_free(required_outer);
509 		return;
510 	}
511 
512 	/*
513 	 * If the given paths are already well enough ordered, we can skip doing
514 	 * an explicit sort.
515 	 */
516 	if (outersortkeys &&
517 		pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
518 		outersortkeys = NIL;
519 	if (innersortkeys &&
520 		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
521 		innersortkeys = NIL;
522 
523 	/*
524 	 * See comments in try_nestloop_path().
525 	 */
526 	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
527 						   outer_path, inner_path,
528 						   outersortkeys, innersortkeys,
529 						   extra);
530 
531 	if (add_path_precheck(joinrel,
532 						  workspace.startup_cost, workspace.total_cost,
533 						  pathkeys, required_outer))
534 	{
535 		add_path(joinrel, (Path *)
536 				 create_mergejoin_path(root,
537 									   joinrel,
538 									   jointype,
539 									   &workspace,
540 									   extra,
541 									   outer_path,
542 									   inner_path,
543 									   extra->restrictlist,
544 									   pathkeys,
545 									   required_outer,
546 									   mergeclauses,
547 									   outersortkeys,
548 									   innersortkeys));
549 	}
550 	else
551 	{
552 		/* Waste no memory when we reject a path here */
553 		bms_free(required_outer);
554 	}
555 }
556 
557 /*
558  * try_partial_mergejoin_path
559  *	  Consider a partial merge join path; if it appears useful, push it into
560  *	  the joinrel's pathlist via add_partial_path().
561  */
562 static void
try_partial_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra)563 try_partial_mergejoin_path(PlannerInfo *root,
564 						   RelOptInfo *joinrel,
565 						   Path *outer_path,
566 						   Path *inner_path,
567 						   List *pathkeys,
568 						   List *mergeclauses,
569 						   List *outersortkeys,
570 						   List *innersortkeys,
571 						   JoinType jointype,
572 						   JoinPathExtraData *extra)
573 {
574 	JoinCostWorkspace workspace;
575 
576 	/*
577 	 * See comments in try_partial_hashjoin_path().
578 	 */
579 	Assert(bms_is_empty(joinrel->lateral_relids));
580 	if (inner_path->param_info != NULL)
581 	{
582 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
583 
584 		if (!bms_is_empty(inner_paramrels))
585 			return;
586 	}
587 
588 	/*
589 	 * If the given paths are already well enough ordered, we can skip doing
590 	 * an explicit sort.
591 	 */
592 	if (outersortkeys &&
593 		pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
594 		outersortkeys = NIL;
595 	if (innersortkeys &&
596 		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
597 		innersortkeys = NIL;
598 
599 	/*
600 	 * See comments in try_partial_nestloop_path().
601 	 */
602 	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
603 						   outer_path, inner_path,
604 						   outersortkeys, innersortkeys,
605 						   extra);
606 
607 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
608 		return;
609 
610 	/* Might be good enough to be worth trying, so let's try it. */
611 	add_partial_path(joinrel, (Path *)
612 					 create_mergejoin_path(root,
613 										   joinrel,
614 										   jointype,
615 										   &workspace,
616 										   extra,
617 										   outer_path,
618 										   inner_path,
619 										   extra->restrictlist,
620 										   pathkeys,
621 										   NULL,
622 										   mergeclauses,
623 										   outersortkeys,
624 										   innersortkeys));
625 }
626 
627 /*
628  * try_hashjoin_path
629  *	  Consider a hash join path; if it appears useful, push it into
630  *	  the joinrel's pathlist via add_path().
631  */
632 static void
try_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra)633 try_hashjoin_path(PlannerInfo *root,
634 				  RelOptInfo *joinrel,
635 				  Path *outer_path,
636 				  Path *inner_path,
637 				  List *hashclauses,
638 				  JoinType jointype,
639 				  JoinPathExtraData *extra)
640 {
641 	Relids		required_outer;
642 	JoinCostWorkspace workspace;
643 
644 	/*
645 	 * Check to see if proposed path is still parameterized, and reject if the
646 	 * parameterization wouldn't be sensible.
647 	 */
648 	required_outer = calc_non_nestloop_required_outer(outer_path,
649 													  inner_path);
650 	if (required_outer &&
651 		!bms_overlap(required_outer, extra->param_source_rels))
652 	{
653 		/* Waste no memory when we reject a path here */
654 		bms_free(required_outer);
655 		return;
656 	}
657 
658 	/*
659 	 * See comments in try_nestloop_path().  Also note that hashjoin paths
660 	 * never have any output pathkeys, per comments in create_hashjoin_path.
661 	 */
662 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
663 						  outer_path, inner_path, extra);
664 
665 	if (add_path_precheck(joinrel,
666 						  workspace.startup_cost, workspace.total_cost,
667 						  NIL, required_outer))
668 	{
669 		add_path(joinrel, (Path *)
670 				 create_hashjoin_path(root,
671 									  joinrel,
672 									  jointype,
673 									  &workspace,
674 									  extra,
675 									  outer_path,
676 									  inner_path,
677 									  extra->restrictlist,
678 									  required_outer,
679 									  hashclauses));
680 	}
681 	else
682 	{
683 		/* Waste no memory when we reject a path here */
684 		bms_free(required_outer);
685 	}
686 }
687 
688 /*
689  * try_partial_hashjoin_path
690  *	  Consider a partial hashjoin join path; if it appears useful, push it into
691  *	  the joinrel's partial_pathlist via add_partial_path().
692  */
693 static void
try_partial_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra)694 try_partial_hashjoin_path(PlannerInfo *root,
695 						  RelOptInfo *joinrel,
696 						  Path *outer_path,
697 						  Path *inner_path,
698 						  List *hashclauses,
699 						  JoinType jointype,
700 						  JoinPathExtraData *extra)
701 {
702 	JoinCostWorkspace workspace;
703 
704 	/*
705 	 * If the inner path is parameterized, the parameterization must be fully
706 	 * satisfied by the proposed outer path.  Parameterized partial paths are
707 	 * not supported.  The caller should already have verified that no
708 	 * extra_lateral_rels are required here.
709 	 */
710 	Assert(bms_is_empty(joinrel->lateral_relids));
711 	if (inner_path->param_info != NULL)
712 	{
713 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
714 
715 		if (!bms_is_empty(inner_paramrels))
716 			return;
717 	}
718 
719 	/*
720 	 * Before creating a path, get a quick lower bound on what it is likely to
721 	 * cost.  Bail out right away if it looks terrible.
722 	 */
723 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
724 						  outer_path, inner_path, extra);
725 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
726 		return;
727 
728 	/* Might be good enough to be worth trying, so let's try it. */
729 	add_partial_path(joinrel, (Path *)
730 					 create_hashjoin_path(root,
731 										  joinrel,
732 										  jointype,
733 										  &workspace,
734 										  extra,
735 										  outer_path,
736 										  inner_path,
737 										  extra->restrictlist,
738 										  NULL,
739 										  hashclauses));
740 }
741 
742 /*
743  * clause_sides_match_join
744  *	  Determine whether a join clause is of the right form to use in this join.
745  *
746  * We already know that the clause is a binary opclause referencing only the
747  * rels in the current join.  The point here is to check whether it has the
748  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
749  * rather than mixing outer and inner vars on either side.  If it matches,
750  * we set the transient flag outer_is_left to identify which side is which.
751  */
752 static inline bool
clause_sides_match_join(RestrictInfo * rinfo,RelOptInfo * outerrel,RelOptInfo * innerrel)753 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
754 						RelOptInfo *innerrel)
755 {
756 	if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
757 		bms_is_subset(rinfo->right_relids, innerrel->relids))
758 	{
759 		/* lefthand side is outer */
760 		rinfo->outer_is_left = true;
761 		return true;
762 	}
763 	else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
764 			 bms_is_subset(rinfo->right_relids, outerrel->relids))
765 	{
766 		/* righthand side is outer */
767 		rinfo->outer_is_left = false;
768 		return true;
769 	}
770 	return false;				/* no good for these input relations */
771 }
772 
773 /*
774  * sort_inner_and_outer
775  *	  Create mergejoin join paths by explicitly sorting both the outer and
776  *	  inner join relations on each available merge ordering.
777  *
778  * 'joinrel' is the join relation
779  * 'outerrel' is the outer join relation
780  * 'innerrel' is the inner join relation
781  * 'jointype' is the type of join to do
782  * 'extra' contains additional input values
783  */
784 static void
sort_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)785 sort_inner_and_outer(PlannerInfo *root,
786 					 RelOptInfo *joinrel,
787 					 RelOptInfo *outerrel,
788 					 RelOptInfo *innerrel,
789 					 JoinType jointype,
790 					 JoinPathExtraData *extra)
791 {
792 	JoinType	save_jointype = jointype;
793 	Path	   *outer_path;
794 	Path	   *inner_path;
795 	Path	   *cheapest_partial_outer = NULL;
796 	Path	   *cheapest_safe_inner = NULL;
797 	List	   *all_pathkeys;
798 	ListCell   *l;
799 
800 	/*
801 	 * We only consider the cheapest-total-cost input paths, since we are
802 	 * assuming here that a sort is required.  We will consider
803 	 * cheapest-startup-cost input paths later, and only if they don't need a
804 	 * sort.
805 	 *
806 	 * This function intentionally does not consider parameterized input
807 	 * paths, except when the cheapest-total is parameterized.  If we did so,
808 	 * we'd have a combinatorial explosion of mergejoin paths of dubious
809 	 * value.  This interacts with decisions elsewhere that also discriminate
810 	 * against mergejoins with parameterized inputs; see comments in
811 	 * src/backend/optimizer/README.
812 	 */
813 	outer_path = outerrel->cheapest_total_path;
814 	inner_path = innerrel->cheapest_total_path;
815 
816 	/*
817 	 * If either cheapest-total path is parameterized by the other rel, we
818 	 * can't use a mergejoin.  (There's no use looking for alternative input
819 	 * paths, since these should already be the least-parameterized available
820 	 * paths.)
821 	 */
822 	if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
823 		PATH_PARAM_BY_REL(inner_path, outerrel))
824 		return;
825 
826 	/*
827 	 * If unique-ification is requested, do it and then handle as a plain
828 	 * inner join.
829 	 */
830 	if (jointype == JOIN_UNIQUE_OUTER)
831 	{
832 		outer_path = (Path *) create_unique_path(root, outerrel,
833 												 outer_path, extra->sjinfo);
834 		Assert(outer_path);
835 		jointype = JOIN_INNER;
836 	}
837 	else if (jointype == JOIN_UNIQUE_INNER)
838 	{
839 		inner_path = (Path *) create_unique_path(root, innerrel,
840 												 inner_path, extra->sjinfo);
841 		Assert(inner_path);
842 		jointype = JOIN_INNER;
843 	}
844 
845 	/*
846 	 * If the joinrel is parallel-safe, we may be able to consider a partial
847 	 * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
848 	 * outer path will be partial, and therefore we won't be able to properly
849 	 * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL and
850 	 * JOIN_RIGHT, because they can produce false null extended rows.  Also,
851 	 * the resulting path must not be parameterized.
852 	 */
853 	if (joinrel->consider_parallel &&
854 		save_jointype != JOIN_UNIQUE_OUTER &&
855 		save_jointype != JOIN_FULL &&
856 		save_jointype != JOIN_RIGHT &&
857 		outerrel->partial_pathlist != NIL &&
858 		bms_is_empty(joinrel->lateral_relids))
859 	{
860 		cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
861 
862 		if (inner_path->parallel_safe)
863 			cheapest_safe_inner = inner_path;
864 		else if (save_jointype != JOIN_UNIQUE_INNER)
865 			cheapest_safe_inner =
866 				get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
867 	}
868 
869 	/*
870 	 * Each possible ordering of the available mergejoin clauses will generate
871 	 * a differently-sorted result path at essentially the same cost.  We have
872 	 * no basis for choosing one over another at this level of joining, but
873 	 * some sort orders may be more useful than others for higher-level
874 	 * mergejoins, so it's worth considering multiple orderings.
875 	 *
876 	 * Actually, it's not quite true that every mergeclause ordering will
877 	 * generate a different path order, because some of the clauses may be
878 	 * partially redundant (refer to the same EquivalenceClasses).  Therefore,
879 	 * what we do is convert the mergeclause list to a list of canonical
880 	 * pathkeys, and then consider different orderings of the pathkeys.
881 	 *
882 	 * Generating a path for *every* permutation of the pathkeys doesn't seem
883 	 * like a winning strategy; the cost in planning time is too high. For
884 	 * now, we generate one path for each pathkey, listing that pathkey first
885 	 * and the rest in random order.  This should allow at least a one-clause
886 	 * mergejoin without re-sorting against any other possible mergejoin
887 	 * partner path.  But if we've not guessed the right ordering of secondary
888 	 * keys, we may end up evaluating clauses as qpquals when they could have
889 	 * been done as mergeclauses.  (In practice, it's rare that there's more
890 	 * than two or three mergeclauses, so expending a huge amount of thought
891 	 * on that is probably not worth it.)
892 	 *
893 	 * The pathkey order returned by select_outer_pathkeys_for_merge() has
894 	 * some heuristics behind it (see that function), so be sure to try it
895 	 * exactly as-is as well as making variants.
896 	 */
897 	all_pathkeys = select_outer_pathkeys_for_merge(root,
898 												   extra->mergeclause_list,
899 												   joinrel);
900 
901 	foreach(l, all_pathkeys)
902 	{
903 		List	   *front_pathkey = (List *) lfirst(l);
904 		List	   *cur_mergeclauses;
905 		List	   *outerkeys;
906 		List	   *innerkeys;
907 		List	   *merge_pathkeys;
908 
909 		/* Make a pathkey list with this guy first */
910 		if (l != list_head(all_pathkeys))
911 			outerkeys = lcons(front_pathkey,
912 							  list_delete_ptr(list_copy(all_pathkeys),
913 											  front_pathkey));
914 		else
915 			outerkeys = all_pathkeys;	/* no work at first one... */
916 
917 		/* Sort the mergeclauses into the corresponding ordering */
918 		cur_mergeclauses =
919 			find_mergeclauses_for_outer_pathkeys(root,
920 												 outerkeys,
921 												 extra->mergeclause_list);
922 
923 		/* Should have used them all... */
924 		Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
925 
926 		/* Build sort pathkeys for the inner side */
927 		innerkeys = make_inner_pathkeys_for_merge(root,
928 												  cur_mergeclauses,
929 												  outerkeys);
930 
931 		/* Build pathkeys representing output sort order */
932 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
933 											 outerkeys);
934 
935 		/*
936 		 * And now we can make the path.
937 		 *
938 		 * Note: it's possible that the cheapest paths will already be sorted
939 		 * properly.  try_mergejoin_path will detect that case and suppress an
940 		 * explicit sort step, so we needn't do so here.
941 		 */
942 		try_mergejoin_path(root,
943 						   joinrel,
944 						   outer_path,
945 						   inner_path,
946 						   merge_pathkeys,
947 						   cur_mergeclauses,
948 						   outerkeys,
949 						   innerkeys,
950 						   jointype,
951 						   extra,
952 						   false);
953 
954 		/*
955 		 * If we have partial outer and parallel safe inner path then try
956 		 * partial mergejoin path.
957 		 */
958 		if (cheapest_partial_outer && cheapest_safe_inner)
959 			try_partial_mergejoin_path(root,
960 									   joinrel,
961 									   cheapest_partial_outer,
962 									   cheapest_safe_inner,
963 									   merge_pathkeys,
964 									   cur_mergeclauses,
965 									   outerkeys,
966 									   innerkeys,
967 									   jointype,
968 									   extra);
969 	}
970 }
971 
972 /*
973  * generate_mergejoin_paths
974  *	Creates possible mergejoin paths for input outerpath.
975  *
976  * We generate mergejoins if mergejoin clauses are available.  We have
977  * two ways to generate the inner path for a mergejoin: sort the cheapest
978  * inner path, or use an inner path that is already suitably ordered for the
979  * merge.  If we have several mergeclauses, it could be that there is no inner
980  * path (or only a very expensive one) for the full list of mergeclauses, but
981  * better paths exist if we truncate the mergeclause list (thereby discarding
982  * some sort key requirements).  So, we consider truncations of the
983  * mergeclause list as well as the full list.  (Ideally we'd consider all
984  * subsets of the mergeclause list, but that seems way too expensive.)
985  */
986 static void
generate_mergejoin_paths(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * innerrel,Path * outerpath,JoinType jointype,JoinPathExtraData * extra,bool useallclauses,Path * inner_cheapest_total,List * merge_pathkeys,bool is_partial)987 generate_mergejoin_paths(PlannerInfo *root,
988 						 RelOptInfo *joinrel,
989 						 RelOptInfo *innerrel,
990 						 Path *outerpath,
991 						 JoinType jointype,
992 						 JoinPathExtraData *extra,
993 						 bool useallclauses,
994 						 Path *inner_cheapest_total,
995 						 List *merge_pathkeys,
996 						 bool is_partial)
997 {
998 	List	   *mergeclauses;
999 	List	   *innersortkeys;
1000 	List	   *trialsortkeys;
1001 	Path	   *cheapest_startup_inner;
1002 	Path	   *cheapest_total_inner;
1003 	JoinType	save_jointype = jointype;
1004 	int			num_sortkeys;
1005 	int			sortkeycnt;
1006 
1007 	if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1008 		jointype = JOIN_INNER;
1009 
1010 	/* Look for useful mergeclauses (if any) */
1011 	mergeclauses =
1012 		find_mergeclauses_for_outer_pathkeys(root,
1013 											 outerpath->pathkeys,
1014 											 extra->mergeclause_list);
1015 
1016 	/*
1017 	 * Done with this outer path if no chance for a mergejoin.
1018 	 *
1019 	 * Special corner case: for "x FULL JOIN y ON true", there will be no join
1020 	 * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
1021 	 * but since mergejoin is our only join type that supports FULL JOIN
1022 	 * without any join clauses, it's necessary to generate a clauseless
1023 	 * mergejoin path instead.
1024 	 */
1025 	if (mergeclauses == NIL)
1026 	{
1027 		if (jointype == JOIN_FULL)
1028 			 /* okay to try for mergejoin */ ;
1029 		else
1030 			return;
1031 	}
1032 	if (useallclauses &&
1033 		list_length(mergeclauses) != list_length(extra->mergeclause_list))
1034 		return;
1035 
1036 	/* Compute the required ordering of the inner path */
1037 	innersortkeys = make_inner_pathkeys_for_merge(root,
1038 												  mergeclauses,
1039 												  outerpath->pathkeys);
1040 
1041 	/*
1042 	 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1043 	 * a sort will be needed, only cheapest total cost matters. (But
1044 	 * try_mergejoin_path will do the right thing if inner_cheapest_total is
1045 	 * already correctly sorted.)
1046 	 */
1047 	try_mergejoin_path(root,
1048 					   joinrel,
1049 					   outerpath,
1050 					   inner_cheapest_total,
1051 					   merge_pathkeys,
1052 					   mergeclauses,
1053 					   NIL,
1054 					   innersortkeys,
1055 					   jointype,
1056 					   extra,
1057 					   is_partial);
1058 
1059 	/* Can't do anything else if inner path needs to be unique'd */
1060 	if (save_jointype == JOIN_UNIQUE_INNER)
1061 		return;
1062 
1063 	/*
1064 	 * Look for presorted inner paths that satisfy the innersortkey list ---
1065 	 * or any truncation thereof, if we are allowed to build a mergejoin using
1066 	 * a subset of the merge clauses.  Here, we consider both cheap startup
1067 	 * cost and cheap total cost.
1068 	 *
1069 	 * Currently we do not consider parameterized inner paths here. This
1070 	 * interacts with decisions elsewhere that also discriminate against
1071 	 * mergejoins with parameterized inputs; see comments in
1072 	 * src/backend/optimizer/README.
1073 	 *
1074 	 * As we shorten the sortkey list, we should consider only paths that are
1075 	 * strictly cheaper than (in particular, not the same as) any path found
1076 	 * in an earlier iteration.  Otherwise we'd be intentionally using fewer
1077 	 * merge keys than a given path allows (treating the rest as plain
1078 	 * joinquals), which is unlikely to be a good idea.  Also, eliminating
1079 	 * paths here on the basis of compare_path_costs is a lot cheaper than
1080 	 * building the mergejoin path only to throw it away.
1081 	 *
1082 	 * If inner_cheapest_total is well enough sorted to have not required a
1083 	 * sort in the path made above, we shouldn't make a duplicate path with
1084 	 * it, either.  We handle that case with the same logic that handles the
1085 	 * previous consideration, by initializing the variables that track
1086 	 * cheapest-so-far properly.  Note that we do NOT reject
1087 	 * inner_cheapest_total if we find it matches some shorter set of
1088 	 * pathkeys.  That case corresponds to using fewer mergekeys to avoid
1089 	 * sorting inner_cheapest_total, whereas we did sort it above, so the
1090 	 * plans being considered are different.
1091 	 */
1092 	if (pathkeys_contained_in(innersortkeys,
1093 							  inner_cheapest_total->pathkeys))
1094 	{
1095 		/* inner_cheapest_total didn't require a sort */
1096 		cheapest_startup_inner = inner_cheapest_total;
1097 		cheapest_total_inner = inner_cheapest_total;
1098 	}
1099 	else
1100 	{
1101 		/* it did require a sort, at least for the full set of keys */
1102 		cheapest_startup_inner = NULL;
1103 		cheapest_total_inner = NULL;
1104 	}
1105 	num_sortkeys = list_length(innersortkeys);
1106 	if (num_sortkeys > 1 && !useallclauses)
1107 		trialsortkeys = list_copy(innersortkeys);	/* need modifiable copy */
1108 	else
1109 		trialsortkeys = innersortkeys;	/* won't really truncate */
1110 
1111 	for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1112 	{
1113 		Path	   *innerpath;
1114 		List	   *newclauses = NIL;
1115 
1116 		/*
1117 		 * Look for an inner path ordered well enough for the first
1118 		 * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
1119 		 * destructively, which is why we made a copy...
1120 		 */
1121 		trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1122 		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1123 												   trialsortkeys,
1124 												   NULL,
1125 												   TOTAL_COST,
1126 												   is_partial);
1127 		if (innerpath != NULL &&
1128 			(cheapest_total_inner == NULL ||
1129 			 compare_path_costs(innerpath, cheapest_total_inner,
1130 								TOTAL_COST) < 0))
1131 		{
1132 			/* Found a cheap (or even-cheaper) sorted path */
1133 			/* Select the right mergeclauses, if we didn't already */
1134 			if (sortkeycnt < num_sortkeys)
1135 			{
1136 				newclauses =
1137 					trim_mergeclauses_for_inner_pathkeys(root,
1138 														 mergeclauses,
1139 														 trialsortkeys);
1140 				Assert(newclauses != NIL);
1141 			}
1142 			else
1143 				newclauses = mergeclauses;
1144 			try_mergejoin_path(root,
1145 							   joinrel,
1146 							   outerpath,
1147 							   innerpath,
1148 							   merge_pathkeys,
1149 							   newclauses,
1150 							   NIL,
1151 							   NIL,
1152 							   jointype,
1153 							   extra,
1154 							   is_partial);
1155 			cheapest_total_inner = innerpath;
1156 		}
1157 		/* Same on the basis of cheapest startup cost ... */
1158 		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1159 												   trialsortkeys,
1160 												   NULL,
1161 												   STARTUP_COST,
1162 												   is_partial);
1163 		if (innerpath != NULL &&
1164 			(cheapest_startup_inner == NULL ||
1165 			 compare_path_costs(innerpath, cheapest_startup_inner,
1166 								STARTUP_COST) < 0))
1167 		{
1168 			/* Found a cheap (or even-cheaper) sorted path */
1169 			if (innerpath != cheapest_total_inner)
1170 			{
1171 				/*
1172 				 * Avoid rebuilding clause list if we already made one; saves
1173 				 * memory in big join trees...
1174 				 */
1175 				if (newclauses == NIL)
1176 				{
1177 					if (sortkeycnt < num_sortkeys)
1178 					{
1179 						newclauses =
1180 							trim_mergeclauses_for_inner_pathkeys(root,
1181 																 mergeclauses,
1182 																 trialsortkeys);
1183 						Assert(newclauses != NIL);
1184 					}
1185 					else
1186 						newclauses = mergeclauses;
1187 				}
1188 				try_mergejoin_path(root,
1189 								   joinrel,
1190 								   outerpath,
1191 								   innerpath,
1192 								   merge_pathkeys,
1193 								   newclauses,
1194 								   NIL,
1195 								   NIL,
1196 								   jointype,
1197 								   extra,
1198 								   is_partial);
1199 			}
1200 			cheapest_startup_inner = innerpath;
1201 		}
1202 
1203 		/*
1204 		 * Don't consider truncated sortkeys if we need all clauses.
1205 		 */
1206 		if (useallclauses)
1207 			break;
1208 	}
1209 }
1210 
1211 /*
1212  * match_unsorted_outer
1213  *	  Creates possible join paths for processing a single join relation
1214  *	  'joinrel' by employing either iterative substitution or
1215  *	  mergejoining on each of its possible outer paths (considering
1216  *	  only outer paths that are already ordered well enough for merging).
1217  *
1218  * We always generate a nestloop path for each available outer path.
1219  * In fact we may generate as many as five: one on the cheapest-total-cost
1220  * inner path, one on the same with materialization, one on the
1221  * cheapest-startup-cost inner path (if different), one on the
1222  * cheapest-total inner-indexscan path (if any), and one on the
1223  * cheapest-startup inner-indexscan path (if different).
1224  *
1225  * We also consider mergejoins if mergejoin clauses are available.  See
1226  * detailed comments in generate_mergejoin_paths.
1227  *
1228  * 'joinrel' is the join relation
1229  * 'outerrel' is the outer join relation
1230  * 'innerrel' is the inner join relation
1231  * 'jointype' is the type of join to do
1232  * 'extra' contains additional input values
1233  */
1234 static void
match_unsorted_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1235 match_unsorted_outer(PlannerInfo *root,
1236 					 RelOptInfo *joinrel,
1237 					 RelOptInfo *outerrel,
1238 					 RelOptInfo *innerrel,
1239 					 JoinType jointype,
1240 					 JoinPathExtraData *extra)
1241 {
1242 	JoinType	save_jointype = jointype;
1243 	bool		nestjoinOK;
1244 	bool		useallclauses;
1245 	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
1246 	Path	   *matpath = NULL;
1247 	ListCell   *lc1;
1248 
1249 	/*
1250 	 * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
1251 	 * are doing a right or full mergejoin, we must use *all* the mergeclauses
1252 	 * as join clauses, else we will not have a valid plan.  (Although these
1253 	 * two flags are currently inverses, keep them separate for clarity and
1254 	 * possible future changes.)
1255 	 */
1256 	switch (jointype)
1257 	{
1258 		case JOIN_INNER:
1259 		case JOIN_LEFT:
1260 		case JOIN_SEMI:
1261 		case JOIN_ANTI:
1262 			nestjoinOK = true;
1263 			useallclauses = false;
1264 			break;
1265 		case JOIN_RIGHT:
1266 		case JOIN_FULL:
1267 			nestjoinOK = false;
1268 			useallclauses = true;
1269 			break;
1270 		case JOIN_UNIQUE_OUTER:
1271 		case JOIN_UNIQUE_INNER:
1272 			jointype = JOIN_INNER;
1273 			nestjoinOK = true;
1274 			useallclauses = false;
1275 			break;
1276 		default:
1277 			elog(ERROR, "unrecognized join type: %d",
1278 				 (int) jointype);
1279 			nestjoinOK = false; /* keep compiler quiet */
1280 			useallclauses = false;
1281 			break;
1282 	}
1283 
1284 	/*
1285 	 * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1286 	 * we will consider it below as a member of cheapest_parameterized_paths,
1287 	 * but the other possibilities considered in this routine aren't usable.
1288 	 */
1289 	if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1290 		inner_cheapest_total = NULL;
1291 
1292 	/*
1293 	 * If we need to unique-ify the inner path, we will consider only the
1294 	 * cheapest-total inner.
1295 	 */
1296 	if (save_jointype == JOIN_UNIQUE_INNER)
1297 	{
1298 		/* No way to do this with an inner path parameterized by outer rel */
1299 		if (inner_cheapest_total == NULL)
1300 			return;
1301 		inner_cheapest_total = (Path *)
1302 			create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1303 		Assert(inner_cheapest_total);
1304 	}
1305 	else if (nestjoinOK)
1306 	{
1307 		/*
1308 		 * Consider materializing the cheapest inner path, unless
1309 		 * enable_material is off or the path in question materializes its
1310 		 * output anyway.
1311 		 */
1312 		if (enable_material && inner_cheapest_total != NULL &&
1313 			!ExecMaterializesOutput(inner_cheapest_total->pathtype))
1314 			matpath = (Path *)
1315 				create_material_path(innerrel, inner_cheapest_total);
1316 	}
1317 
1318 	foreach(lc1, outerrel->pathlist)
1319 	{
1320 		Path	   *outerpath = (Path *) lfirst(lc1);
1321 		List	   *merge_pathkeys;
1322 
1323 		/*
1324 		 * We cannot use an outer path that is parameterized by the inner rel.
1325 		 */
1326 		if (PATH_PARAM_BY_REL(outerpath, innerrel))
1327 			continue;
1328 
1329 		/*
1330 		 * If we need to unique-ify the outer path, it's pointless to consider
1331 		 * any but the cheapest outer.  (XXX we don't consider parameterized
1332 		 * outers, nor inners, for unique-ified cases.  Should we?)
1333 		 */
1334 		if (save_jointype == JOIN_UNIQUE_OUTER)
1335 		{
1336 			if (outerpath != outerrel->cheapest_total_path)
1337 				continue;
1338 			outerpath = (Path *) create_unique_path(root, outerrel,
1339 													outerpath, extra->sjinfo);
1340 			Assert(outerpath);
1341 		}
1342 
1343 		/*
1344 		 * The result will have this sort order (even if it is implemented as
1345 		 * a nestloop, and even if some of the mergeclauses are implemented by
1346 		 * qpquals rather than as true mergeclauses):
1347 		 */
1348 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1349 											 outerpath->pathkeys);
1350 
1351 		if (save_jointype == JOIN_UNIQUE_INNER)
1352 		{
1353 			/*
1354 			 * Consider nestloop join, but only with the unique-ified cheapest
1355 			 * inner path
1356 			 */
1357 			try_nestloop_path(root,
1358 							  joinrel,
1359 							  outerpath,
1360 							  inner_cheapest_total,
1361 							  merge_pathkeys,
1362 							  jointype,
1363 							  extra);
1364 		}
1365 		else if (nestjoinOK)
1366 		{
1367 			/*
1368 			 * Consider nestloop joins using this outer path and various
1369 			 * available paths for the inner relation.  We consider the
1370 			 * cheapest-total paths for each available parameterization of the
1371 			 * inner relation, including the unparameterized case.
1372 			 */
1373 			ListCell   *lc2;
1374 
1375 			foreach(lc2, innerrel->cheapest_parameterized_paths)
1376 			{
1377 				Path	   *innerpath = (Path *) lfirst(lc2);
1378 
1379 				try_nestloop_path(root,
1380 								  joinrel,
1381 								  outerpath,
1382 								  innerpath,
1383 								  merge_pathkeys,
1384 								  jointype,
1385 								  extra);
1386 			}
1387 
1388 			/* Also consider materialized form of the cheapest inner path */
1389 			if (matpath != NULL)
1390 				try_nestloop_path(root,
1391 								  joinrel,
1392 								  outerpath,
1393 								  matpath,
1394 								  merge_pathkeys,
1395 								  jointype,
1396 								  extra);
1397 		}
1398 
1399 		/* Can't do anything else if outer path needs to be unique'd */
1400 		if (save_jointype == JOIN_UNIQUE_OUTER)
1401 			continue;
1402 
1403 		/* Can't do anything else if inner rel is parameterized by outer */
1404 		if (inner_cheapest_total == NULL)
1405 			continue;
1406 
1407 		/* Generate merge join paths */
1408 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1409 								 save_jointype, extra, useallclauses,
1410 								 inner_cheapest_total, merge_pathkeys,
1411 								 false);
1412 	}
1413 
1414 	/*
1415 	 * Consider partial nestloop and mergejoin plan if outerrel has any
1416 	 * partial path and the joinrel is parallel-safe.  However, we can't
1417 	 * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1418 	 * therefore we won't be able to properly guarantee uniqueness.  Nor can
1419 	 * we handle extra_lateral_rels, since partial paths must not be
1420 	 * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1421 	 * because they can produce false null extended rows.
1422 	 */
1423 	if (joinrel->consider_parallel &&
1424 		save_jointype != JOIN_UNIQUE_OUTER &&
1425 		save_jointype != JOIN_FULL &&
1426 		save_jointype != JOIN_RIGHT &&
1427 		outerrel->partial_pathlist != NIL &&
1428 		bms_is_empty(joinrel->lateral_relids))
1429 	{
1430 		if (nestjoinOK)
1431 			consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1432 									   save_jointype, extra);
1433 
1434 		/*
1435 		 * If inner_cheapest_total is NULL or non parallel-safe then find the
1436 		 * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
1437 		 * can't use any alternative inner path.
1438 		 */
1439 		if (inner_cheapest_total == NULL ||
1440 			!inner_cheapest_total->parallel_safe)
1441 		{
1442 			if (save_jointype == JOIN_UNIQUE_INNER)
1443 				return;
1444 
1445 			inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
1446 																		  innerrel->pathlist);
1447 		}
1448 
1449 		if (inner_cheapest_total)
1450 			consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1451 										save_jointype, extra,
1452 										inner_cheapest_total);
1453 	}
1454 }
1455 
1456 /*
1457  * consider_parallel_mergejoin
1458  *	  Try to build partial paths for a joinrel by joining a partial path
1459  *	  for the outer relation to a complete path for the inner relation.
1460  *
1461  * 'joinrel' is the join relation
1462  * 'outerrel' is the outer join relation
1463  * 'innerrel' is the inner join relation
1464  * 'jointype' is the type of join to do
1465  * 'extra' contains additional input values
1466  * 'inner_cheapest_total' cheapest total path for innerrel
1467  */
1468 static void
consider_parallel_mergejoin(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra,Path * inner_cheapest_total)1469 consider_parallel_mergejoin(PlannerInfo *root,
1470 							RelOptInfo *joinrel,
1471 							RelOptInfo *outerrel,
1472 							RelOptInfo *innerrel,
1473 							JoinType jointype,
1474 							JoinPathExtraData *extra,
1475 							Path *inner_cheapest_total)
1476 {
1477 	ListCell   *lc1;
1478 
1479 	/* generate merge join path for each partial outer path */
1480 	foreach(lc1, outerrel->partial_pathlist)
1481 	{
1482 		Path	   *outerpath = (Path *) lfirst(lc1);
1483 		List	   *merge_pathkeys;
1484 
1485 		/*
1486 		 * Figure out what useful ordering any paths we create will have.
1487 		 */
1488 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1489 											 outerpath->pathkeys);
1490 
1491 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1492 								 extra, false, inner_cheapest_total,
1493 								 merge_pathkeys, true);
1494 	}
1495 }
1496 
1497 /*
1498  * consider_parallel_nestloop
1499  *	  Try to build partial paths for a joinrel by joining a partial path for the
1500  *	  outer relation to a complete path for the inner relation.
1501  *
1502  * 'joinrel' is the join relation
1503  * 'outerrel' is the outer join relation
1504  * 'innerrel' is the inner join relation
1505  * 'jointype' is the type of join to do
1506  * 'extra' contains additional input values
1507  */
1508 static void
consider_parallel_nestloop(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1509 consider_parallel_nestloop(PlannerInfo *root,
1510 						   RelOptInfo *joinrel,
1511 						   RelOptInfo *outerrel,
1512 						   RelOptInfo *innerrel,
1513 						   JoinType jointype,
1514 						   JoinPathExtraData *extra)
1515 {
1516 	JoinType	save_jointype = jointype;
1517 	ListCell   *lc1;
1518 
1519 	if (jointype == JOIN_UNIQUE_INNER)
1520 		jointype = JOIN_INNER;
1521 
1522 	foreach(lc1, outerrel->partial_pathlist)
1523 	{
1524 		Path	   *outerpath = (Path *) lfirst(lc1);
1525 		List	   *pathkeys;
1526 		ListCell   *lc2;
1527 
1528 		/* Figure out what useful ordering any paths we create will have. */
1529 		pathkeys = build_join_pathkeys(root, joinrel, jointype,
1530 									   outerpath->pathkeys);
1531 
1532 		/*
1533 		 * Try the cheapest parameterized paths; only those which will produce
1534 		 * an unparameterized path when joined to this outerrel will survive
1535 		 * try_partial_nestloop_path.  The cheapest unparameterized path is
1536 		 * also in this list.
1537 		 */
1538 		foreach(lc2, innerrel->cheapest_parameterized_paths)
1539 		{
1540 			Path	   *innerpath = (Path *) lfirst(lc2);
1541 
1542 			/* Can't join to an inner path that is not parallel-safe */
1543 			if (!innerpath->parallel_safe)
1544 				continue;
1545 
1546 			/*
1547 			 * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1548 			 * cheapest_total_path, and we have to unique-ify it.  (We might
1549 			 * be able to relax this to allow other safe, unparameterized
1550 			 * inner paths, but right now create_unique_path is not on board
1551 			 * with that.)
1552 			 */
1553 			if (save_jointype == JOIN_UNIQUE_INNER)
1554 			{
1555 				if (innerpath != innerrel->cheapest_total_path)
1556 					continue;
1557 				innerpath = (Path *) create_unique_path(root, innerrel,
1558 														innerpath,
1559 														extra->sjinfo);
1560 				Assert(innerpath);
1561 			}
1562 
1563 			try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1564 									  pathkeys, jointype, extra);
1565 		}
1566 	}
1567 }
1568 
1569 /*
1570  * hash_inner_and_outer
1571  *	  Create hashjoin join paths by explicitly hashing both the outer and
1572  *	  inner keys of each available hash clause.
1573  *
1574  * 'joinrel' is the join relation
1575  * 'outerrel' is the outer join relation
1576  * 'innerrel' is the inner join relation
1577  * 'jointype' is the type of join to do
1578  * 'extra' contains additional input values
1579  */
1580 static void
hash_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1581 hash_inner_and_outer(PlannerInfo *root,
1582 					 RelOptInfo *joinrel,
1583 					 RelOptInfo *outerrel,
1584 					 RelOptInfo *innerrel,
1585 					 JoinType jointype,
1586 					 JoinPathExtraData *extra)
1587 {
1588 	JoinType	save_jointype = jointype;
1589 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
1590 	List	   *hashclauses;
1591 	ListCell   *l;
1592 
1593 	/*
1594 	 * We need to build only one hashclauses list for any given pair of outer
1595 	 * and inner relations; all of the hashable clauses will be used as keys.
1596 	 *
1597 	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
1598 	 * usable with this pair of sub-relations.
1599 	 */
1600 	hashclauses = NIL;
1601 	foreach(l, extra->restrictlist)
1602 	{
1603 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1604 
1605 		/*
1606 		 * If processing an outer join, only use its own join clauses for
1607 		 * hashing.  For inner joins we need not be so picky.
1608 		 */
1609 		if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1610 			continue;
1611 
1612 		if (!restrictinfo->can_join ||
1613 			restrictinfo->hashjoinoperator == InvalidOid)
1614 			continue;			/* not hashjoinable */
1615 
1616 		/*
1617 		 * Check if clause has the form "outer op inner" or "inner op outer".
1618 		 */
1619 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1620 			continue;			/* no good for these input relations */
1621 
1622 		hashclauses = lappend(hashclauses, restrictinfo);
1623 	}
1624 
1625 	/* If we found any usable hashclauses, make paths */
1626 	if (hashclauses)
1627 	{
1628 		/*
1629 		 * We consider both the cheapest-total-cost and cheapest-startup-cost
1630 		 * outer paths.  There's no need to consider any but the
1631 		 * cheapest-total-cost inner path, however.
1632 		 */
1633 		Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
1634 		Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
1635 		Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
1636 
1637 		/*
1638 		 * If either cheapest-total path is parameterized by the other rel, we
1639 		 * can't use a hashjoin.  (There's no use looking for alternative
1640 		 * input paths, since these should already be the least-parameterized
1641 		 * available paths.)
1642 		 */
1643 		if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1644 			PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1645 			return;
1646 
1647 		/* Unique-ify if need be; we ignore parameterized possibilities */
1648 		if (jointype == JOIN_UNIQUE_OUTER)
1649 		{
1650 			cheapest_total_outer = (Path *)
1651 				create_unique_path(root, outerrel,
1652 								   cheapest_total_outer, extra->sjinfo);
1653 			Assert(cheapest_total_outer);
1654 			jointype = JOIN_INNER;
1655 			try_hashjoin_path(root,
1656 							  joinrel,
1657 							  cheapest_total_outer,
1658 							  cheapest_total_inner,
1659 							  hashclauses,
1660 							  jointype,
1661 							  extra);
1662 			/* no possibility of cheap startup here */
1663 		}
1664 		else if (jointype == JOIN_UNIQUE_INNER)
1665 		{
1666 			cheapest_total_inner = (Path *)
1667 				create_unique_path(root, innerrel,
1668 								   cheapest_total_inner, extra->sjinfo);
1669 			Assert(cheapest_total_inner);
1670 			jointype = JOIN_INNER;
1671 			try_hashjoin_path(root,
1672 							  joinrel,
1673 							  cheapest_total_outer,
1674 							  cheapest_total_inner,
1675 							  hashclauses,
1676 							  jointype,
1677 							  extra);
1678 			if (cheapest_startup_outer != NULL &&
1679 				cheapest_startup_outer != cheapest_total_outer)
1680 				try_hashjoin_path(root,
1681 								  joinrel,
1682 								  cheapest_startup_outer,
1683 								  cheapest_total_inner,
1684 								  hashclauses,
1685 								  jointype,
1686 								  extra);
1687 		}
1688 		else
1689 		{
1690 			/*
1691 			 * For other jointypes, we consider the cheapest startup outer
1692 			 * together with the cheapest total inner, and then consider
1693 			 * pairings of cheapest-total paths including parameterized ones.
1694 			 * There is no use in generating parameterized paths on the basis
1695 			 * of possibly cheap startup cost, so this is sufficient.
1696 			 */
1697 			ListCell   *lc1;
1698 			ListCell   *lc2;
1699 
1700 			if (cheapest_startup_outer != NULL)
1701 				try_hashjoin_path(root,
1702 								  joinrel,
1703 								  cheapest_startup_outer,
1704 								  cheapest_total_inner,
1705 								  hashclauses,
1706 								  jointype,
1707 								  extra);
1708 
1709 			foreach(lc1, outerrel->cheapest_parameterized_paths)
1710 			{
1711 				Path	   *outerpath = (Path *) lfirst(lc1);
1712 
1713 				/*
1714 				 * We cannot use an outer path that is parameterized by the
1715 				 * inner rel.
1716 				 */
1717 				if (PATH_PARAM_BY_REL(outerpath, innerrel))
1718 					continue;
1719 
1720 				foreach(lc2, innerrel->cheapest_parameterized_paths)
1721 				{
1722 					Path	   *innerpath = (Path *) lfirst(lc2);
1723 
1724 					/*
1725 					 * We cannot use an inner path that is parameterized by
1726 					 * the outer rel, either.
1727 					 */
1728 					if (PATH_PARAM_BY_REL(innerpath, outerrel))
1729 						continue;
1730 
1731 					if (outerpath == cheapest_startup_outer &&
1732 						innerpath == cheapest_total_inner)
1733 						continue;	/* already tried it */
1734 
1735 					try_hashjoin_path(root,
1736 									  joinrel,
1737 									  outerpath,
1738 									  innerpath,
1739 									  hashclauses,
1740 									  jointype,
1741 									  extra);
1742 				}
1743 			}
1744 		}
1745 
1746 		/*
1747 		 * If the joinrel is parallel-safe, we may be able to consider a
1748 		 * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
1749 		 * because the outer path will be partial, and therefore we won't be
1750 		 * able to properly guarantee uniqueness.  Similarly, we can't handle
1751 		 * JOIN_FULL and JOIN_RIGHT, because they can produce false null
1752 		 * extended rows.  Also, the resulting path must not be parameterized.
1753 		 */
1754 		if (joinrel->consider_parallel &&
1755 			save_jointype != JOIN_UNIQUE_OUTER &&
1756 			save_jointype != JOIN_FULL &&
1757 			save_jointype != JOIN_RIGHT &&
1758 			outerrel->partial_pathlist != NIL &&
1759 			bms_is_empty(joinrel->lateral_relids))
1760 		{
1761 			Path	   *cheapest_partial_outer;
1762 			Path	   *cheapest_safe_inner = NULL;
1763 
1764 			cheapest_partial_outer =
1765 				(Path *) linitial(outerrel->partial_pathlist);
1766 
1767 			/*
1768 			 * Normally, given that the joinrel is parallel-safe, the cheapest
1769 			 * total inner path will also be parallel-safe, but if not, we'll
1770 			 * have to search for the cheapest safe, unparameterized inner
1771 			 * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
1772 			 * inner path.
1773 			 */
1774 			if (cheapest_total_inner->parallel_safe)
1775 				cheapest_safe_inner = cheapest_total_inner;
1776 			else if (save_jointype != JOIN_UNIQUE_INNER)
1777 				cheapest_safe_inner =
1778 					get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1779 
1780 			if (cheapest_safe_inner != NULL)
1781 				try_partial_hashjoin_path(root, joinrel,
1782 										  cheapest_partial_outer,
1783 										  cheapest_safe_inner,
1784 										  hashclauses, jointype, extra);
1785 		}
1786 	}
1787 }
1788 
1789 /*
1790  * select_mergejoin_clauses
1791  *	  Select mergejoin clauses that are usable for a particular join.
1792  *	  Returns a list of RestrictInfo nodes for those clauses.
1793  *
1794  * *mergejoin_allowed is normally set to TRUE, but it is set to FALSE if
1795  * this is a right/full join and there are nonmergejoinable join clauses.
1796  * The executor's mergejoin machinery cannot handle such cases, so we have
1797  * to avoid generating a mergejoin plan.  (Note that this flag does NOT
1798  * consider whether there are actually any mergejoinable clauses.  This is
1799  * correct because in some cases we need to build a clauseless mergejoin.
1800  * Simply returning NIL is therefore not enough to distinguish safe from
1801  * unsafe cases.)
1802  *
1803  * We also mark each selected RestrictInfo to show which side is currently
1804  * being considered as outer.  These are transient markings that are only
1805  * good for the duration of the current add_paths_to_joinrel() call!
1806  *
1807  * We examine each restrictinfo clause known for the join to see
1808  * if it is mergejoinable and involves vars from the two sub-relations
1809  * currently of interest.
1810  */
1811 static List *
select_mergejoin_clauses(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,List * restrictlist,JoinType jointype,bool * mergejoin_allowed)1812 select_mergejoin_clauses(PlannerInfo *root,
1813 						 RelOptInfo *joinrel,
1814 						 RelOptInfo *outerrel,
1815 						 RelOptInfo *innerrel,
1816 						 List *restrictlist,
1817 						 JoinType jointype,
1818 						 bool *mergejoin_allowed)
1819 {
1820 	List	   *result_list = NIL;
1821 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
1822 	bool		have_nonmergeable_joinclause = false;
1823 	ListCell   *l;
1824 
1825 	foreach(l, restrictlist)
1826 	{
1827 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1828 
1829 		/*
1830 		 * If processing an outer join, only use its own join clauses in the
1831 		 * merge.  For inner joins we can use pushed-down clauses too. (Note:
1832 		 * we don't set have_nonmergeable_joinclause here because pushed-down
1833 		 * clauses will become otherquals not joinquals.)
1834 		 */
1835 		if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1836 			continue;
1837 
1838 		/* Check that clause is a mergeable operator clause */
1839 		if (!restrictinfo->can_join ||
1840 			restrictinfo->mergeopfamilies == NIL)
1841 		{
1842 			/*
1843 			 * The executor can handle extra joinquals that are constants, but
1844 			 * not anything else, when doing right/full merge join.  (The
1845 			 * reason to support constants is so we can do FULL JOIN ON
1846 			 * FALSE.)
1847 			 */
1848 			if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
1849 				have_nonmergeable_joinclause = true;
1850 			continue;			/* not mergejoinable */
1851 		}
1852 
1853 		/*
1854 		 * Check if clause has the form "outer op inner" or "inner op outer".
1855 		 */
1856 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1857 		{
1858 			have_nonmergeable_joinclause = true;
1859 			continue;			/* no good for these input relations */
1860 		}
1861 
1862 		/*
1863 		 * Insist that each side have a non-redundant eclass.  This
1864 		 * restriction is needed because various bits of the planner expect
1865 		 * that each clause in a merge be associable with some pathkey in a
1866 		 * canonical pathkey list, but redundant eclasses can't appear in
1867 		 * canonical sort orderings.  (XXX it might be worth relaxing this,
1868 		 * but not enough time to address it for 8.3.)
1869 		 *
1870 		 * Note: it would be bad if this condition failed for an otherwise
1871 		 * mergejoinable FULL JOIN clause, since that would result in
1872 		 * undesirable planner failure.  I believe that is not possible
1873 		 * however; a variable involved in a full join could only appear in
1874 		 * below_outer_join eclasses, which aren't considered redundant.
1875 		 *
1876 		 * This case *can* happen for left/right join clauses: the outer-side
1877 		 * variable could be equated to a constant.  Because we will propagate
1878 		 * that constant across the join clause, the loss of ability to do a
1879 		 * mergejoin is not really all that big a deal, and so it's not clear
1880 		 * that improving this is important.
1881 		 */
1882 		update_mergeclause_eclasses(root, restrictinfo);
1883 
1884 		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1885 			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1886 		{
1887 			have_nonmergeable_joinclause = true;
1888 			continue;			/* can't handle redundant eclasses */
1889 		}
1890 
1891 		result_list = lappend(result_list, restrictinfo);
1892 	}
1893 
1894 	/*
1895 	 * Report whether mergejoin is allowed (see comment at top of function).
1896 	 */
1897 	switch (jointype)
1898 	{
1899 		case JOIN_RIGHT:
1900 		case JOIN_FULL:
1901 			*mergejoin_allowed = !have_nonmergeable_joinclause;
1902 			break;
1903 		default:
1904 			*mergejoin_allowed = true;
1905 			break;
1906 	}
1907 
1908 	return result_list;
1909 }
1910