1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  *	  Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/joinpath.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/paths.h"
24 #include "optimizer/planmain.h"
25 
26 /* Hook for plugins to get control in add_paths_to_joinrel() */
27 set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
28 
29 /*
30  * Paths parameterized by the parent can be considered to be parameterized by
31  * any of its child.
32  */
33 #define PATH_PARAM_BY_PARENT(path, rel)	\
34 	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path),	\
35 									   (rel)->top_parent_relids))
36 #define PATH_PARAM_BY_REL_SELF(path, rel)  \
37 	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
38 
39 #define PATH_PARAM_BY_REL(path, rel)	\
40 	(PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
41 
42 static void try_partial_mergejoin_path(PlannerInfo *root,
43 									   RelOptInfo *joinrel,
44 									   Path *outer_path,
45 									   Path *inner_path,
46 									   List *pathkeys,
47 									   List *mergeclauses,
48 									   List *outersortkeys,
49 									   List *innersortkeys,
50 									   JoinType jointype,
51 									   JoinPathExtraData *extra);
52 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
53 								 RelOptInfo *outerrel, RelOptInfo *innerrel,
54 								 JoinType jointype, JoinPathExtraData *extra);
55 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
56 								 RelOptInfo *outerrel, RelOptInfo *innerrel,
57 								 JoinType jointype, JoinPathExtraData *extra);
58 static void consider_parallel_nestloop(PlannerInfo *root,
59 									   RelOptInfo *joinrel,
60 									   RelOptInfo *outerrel,
61 									   RelOptInfo *innerrel,
62 									   JoinType jointype,
63 									   JoinPathExtraData *extra);
64 static void consider_parallel_mergejoin(PlannerInfo *root,
65 										RelOptInfo *joinrel,
66 										RelOptInfo *outerrel,
67 										RelOptInfo *innerrel,
68 										JoinType jointype,
69 										JoinPathExtraData *extra,
70 										Path *inner_cheapest_total);
71 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
72 								 RelOptInfo *outerrel, RelOptInfo *innerrel,
73 								 JoinType jointype, JoinPathExtraData *extra);
74 static List *select_mergejoin_clauses(PlannerInfo *root,
75 									  RelOptInfo *joinrel,
76 									  RelOptInfo *outerrel,
77 									  RelOptInfo *innerrel,
78 									  List *restrictlist,
79 									  JoinType jointype,
80 									  bool *mergejoin_allowed);
81 static void generate_mergejoin_paths(PlannerInfo *root,
82 									 RelOptInfo *joinrel,
83 									 RelOptInfo *innerrel,
84 									 Path *outerpath,
85 									 JoinType jointype,
86 									 JoinPathExtraData *extra,
87 									 bool useallclauses,
88 									 Path *inner_cheapest_total,
89 									 List *merge_pathkeys,
90 									 bool is_partial);
91 
92 
93 /*
94  * add_paths_to_joinrel
95  *	  Given a join relation and two component rels from which it can be made,
96  *	  consider all possible paths that use the two component rels as outer
97  *	  and inner rel respectively.  Add these paths to the join rel's pathlist
98  *	  if they survive comparison with other paths (and remove any existing
99  *	  paths that are dominated by these paths).
100  *
101  * Modifies the pathlist field of the joinrel node to contain the best
102  * paths found so far.
103  *
104  * jointype is not necessarily the same as sjinfo->jointype; it might be
105  * "flipped around" if we are considering joining the rels in the opposite
106  * direction from what's indicated in sjinfo.
107  *
108  * Also, this routine and others in this module accept the special JoinTypes
109  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
110  * unique-ify the outer or inner relation and then apply a regular inner
111  * join.  These values are not allowed to propagate outside this module,
112  * however.  Path cost estimation code may need to recognize that it's
113  * dealing with such a case --- the combination of nominal jointype INNER
114  * with sjinfo->jointype == JOIN_SEMI indicates that.
115  */
116 void
add_paths_to_joinrel(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,SpecialJoinInfo * sjinfo,List * restrictlist)117 add_paths_to_joinrel(PlannerInfo *root,
118 					 RelOptInfo *joinrel,
119 					 RelOptInfo *outerrel,
120 					 RelOptInfo *innerrel,
121 					 JoinType jointype,
122 					 SpecialJoinInfo *sjinfo,
123 					 List *restrictlist)
124 {
125 	JoinPathExtraData extra;
126 	bool		mergejoin_allowed = true;
127 	ListCell   *lc;
128 	Relids		joinrelids;
129 
130 	/*
131 	 * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
132 	 * between child relations, even if there is a SpecialJoinInfo node for
133 	 * the join between the topmost parents. So, while calculating Relids set
134 	 * representing the restriction, consider relids of topmost parent of
135 	 * partitions.
136 	 */
137 	if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
138 		joinrelids = joinrel->top_parent_relids;
139 	else
140 		joinrelids = joinrel->relids;
141 
142 	extra.restrictlist = restrictlist;
143 	extra.mergeclause_list = NIL;
144 	extra.sjinfo = sjinfo;
145 	extra.param_source_rels = NULL;
146 
147 	/*
148 	 * See if the inner relation is provably unique for this outer rel.
149 	 *
150 	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
151 	 * matter since the executor can make the equivalent optimization anyway;
152 	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
153 	 * must be considering a semijoin whose inner side is not provably unique
154 	 * (else reduce_unique_semijoins would've simplified it), so there's no
155 	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
156 	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
157 	 * because the path produced by create_unique_path will be unique relative
158 	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
159 	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
160 	 * letting that value escape this module.
161 	 */
162 	switch (jointype)
163 	{
164 		case JOIN_SEMI:
165 		case JOIN_ANTI:
166 			extra.inner_unique = false; /* well, unproven */
167 			break;
168 		case JOIN_UNIQUE_INNER:
169 			extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
170 											   outerrel->relids);
171 			break;
172 		case JOIN_UNIQUE_OUTER:
173 			extra.inner_unique = innerrel_is_unique(root,
174 													joinrel->relids,
175 													outerrel->relids,
176 													innerrel,
177 													JOIN_INNER,
178 													restrictlist,
179 													false);
180 			break;
181 		default:
182 			extra.inner_unique = innerrel_is_unique(root,
183 													joinrel->relids,
184 													outerrel->relids,
185 													innerrel,
186 													jointype,
187 													restrictlist,
188 													false);
189 			break;
190 	}
191 
192 	/*
193 	 * Find potential mergejoin clauses.  We can skip this if we are not
194 	 * interested in doing a mergejoin.  However, mergejoin may be our only
195 	 * way of implementing a full outer join, so override enable_mergejoin if
196 	 * it's a full join.
197 	 */
198 	if (enable_mergejoin || jointype == JOIN_FULL)
199 		extra.mergeclause_list = select_mergejoin_clauses(root,
200 														  joinrel,
201 														  outerrel,
202 														  innerrel,
203 														  restrictlist,
204 														  jointype,
205 														  &mergejoin_allowed);
206 
207 	/*
208 	 * If it's SEMI, ANTI, or inner_unique join, compute correction factors
209 	 * for cost estimation.  These will be the same for all paths.
210 	 */
211 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
212 		compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
213 									   jointype, sjinfo, restrictlist,
214 									   &extra.semifactors);
215 
216 	/*
217 	 * Decide whether it's sensible to generate parameterized paths for this
218 	 * joinrel, and if so, which relations such paths should require.  There
219 	 * is usually no need to create a parameterized result path unless there
220 	 * is a join order restriction that prevents joining one of our input rels
221 	 * directly to the parameter source rel instead of joining to the other
222 	 * input rel.  (But see allow_star_schema_join().)	This restriction
223 	 * reduces the number of parameterized paths we have to deal with at
224 	 * higher join levels, without compromising the quality of the resulting
225 	 * plan.  We express the restriction as a Relids set that must overlap the
226 	 * parameterization of any proposed join path.
227 	 */
228 	foreach(lc, root->join_info_list)
229 	{
230 		SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
231 
232 		/*
233 		 * SJ is relevant to this join if we have some part of its RHS
234 		 * (possibly not all of it), and haven't yet joined to its LHS.  (This
235 		 * test is pretty simplistic, but should be sufficient considering the
236 		 * join has already been proven legal.)  If the SJ is relevant, it
237 		 * presents constraints for joining to anything not in its RHS.
238 		 */
239 		if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
240 			!bms_overlap(joinrelids, sjinfo2->min_lefthand))
241 			extra.param_source_rels = bms_join(extra.param_source_rels,
242 											   bms_difference(root->all_baserels,
243 															  sjinfo2->min_righthand));
244 
245 		/* full joins constrain both sides symmetrically */
246 		if (sjinfo2->jointype == JOIN_FULL &&
247 			bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
248 			!bms_overlap(joinrelids, sjinfo2->min_righthand))
249 			extra.param_source_rels = bms_join(extra.param_source_rels,
250 											   bms_difference(root->all_baserels,
251 															  sjinfo2->min_lefthand));
252 	}
253 
254 	/*
255 	 * However, when a LATERAL subquery is involved, there will simply not be
256 	 * any paths for the joinrel that aren't parameterized by whatever the
257 	 * subquery is parameterized by, unless its parameterization is resolved
258 	 * within the joinrel.  So we might as well allow additional dependencies
259 	 * on whatever residual lateral dependencies the joinrel will have.
260 	 */
261 	extra.param_source_rels = bms_add_members(extra.param_source_rels,
262 											  joinrel->lateral_relids);
263 
264 	/*
265 	 * 1. Consider mergejoin paths where both relations must be explicitly
266 	 * sorted.  Skip this if we can't mergejoin.
267 	 */
268 	if (mergejoin_allowed)
269 		sort_inner_and_outer(root, joinrel, outerrel, innerrel,
270 							 jointype, &extra);
271 
272 	/*
273 	 * 2. Consider paths where the outer relation need not be explicitly
274 	 * sorted. This includes both nestloops and mergejoins where the outer
275 	 * path is already ordered.  Again, skip this if we can't mergejoin.
276 	 * (That's okay because we know that nestloop can't handle right/full
277 	 * joins at all, so it wouldn't work in the prohibited cases either.)
278 	 */
279 	if (mergejoin_allowed)
280 		match_unsorted_outer(root, joinrel, outerrel, innerrel,
281 							 jointype, &extra);
282 
283 #ifdef NOT_USED
284 
285 	/*
286 	 * 3. Consider paths where the inner relation need not be explicitly
287 	 * sorted.  This includes mergejoins only (nestloops were already built in
288 	 * match_unsorted_outer).
289 	 *
290 	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
291 	 * significant difference between the inner and outer side of a mergejoin,
292 	 * so match_unsorted_inner creates no paths that aren't equivalent to
293 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
294 	 * invoked with the two rels given in the other order.
295 	 */
296 	if (mergejoin_allowed)
297 		match_unsorted_inner(root, joinrel, outerrel, innerrel,
298 							 jointype, &extra);
299 #endif
300 
301 	/*
302 	 * 4. Consider paths where both outer and inner relations must be hashed
303 	 * before being joined.  As above, disregard enable_hashjoin for full
304 	 * joins, because there may be no other alternative.
305 	 */
306 	if (enable_hashjoin || jointype == JOIN_FULL)
307 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
308 							 jointype, &extra);
309 
310 	/*
311 	 * 5. If inner and outer relations are foreign tables (or joins) belonging
312 	 * to the same server and assigned to the same user to check access
313 	 * permissions as, give the FDW a chance to push down joins.
314 	 */
315 	if (joinrel->fdwroutine &&
316 		joinrel->fdwroutine->GetForeignJoinPaths)
317 		joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
318 												 outerrel, innerrel,
319 												 jointype, &extra);
320 
321 	/*
322 	 * 6. Finally, give extensions a chance to manipulate the path list.
323 	 */
324 	if (set_join_pathlist_hook)
325 		set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
326 							   jointype, &extra);
327 }
328 
329 /*
330  * We override the param_source_rels heuristic to accept nestloop paths in
331  * which the outer rel satisfies some but not all of the inner path's
332  * parameterization.  This is necessary to get good plans for star-schema
333  * scenarios, in which a parameterized path for a large table may require
334  * parameters from multiple small tables that will not get joined directly to
335  * each other.  We can handle that by stacking nestloops that have the small
336  * tables on the outside; but this breaks the rule the param_source_rels
337  * heuristic is based on, namely that parameters should not be passed down
338  * across joins unless there's a join-order-constraint-based reason to do so.
339  * So we ignore the param_source_rels restriction when this case applies.
340  *
341  * allow_star_schema_join() returns true if the param_source_rels restriction
342  * should be overridden, ie, it's okay to perform this join.
343  */
344 static inline bool
allow_star_schema_join(PlannerInfo * root,Relids outerrelids,Relids inner_paramrels)345 allow_star_schema_join(PlannerInfo *root,
346 					   Relids outerrelids,
347 					   Relids inner_paramrels)
348 {
349 	/*
350 	 * It's a star-schema case if the outer rel provides some but not all of
351 	 * the inner rel's parameterization.
352 	 */
353 	return (bms_overlap(inner_paramrels, outerrelids) &&
354 			bms_nonempty_difference(inner_paramrels, outerrelids));
355 }
356 
357 /*
358  * try_nestloop_path
359  *	  Consider a nestloop join path; if it appears useful, push it into
360  *	  the joinrel's pathlist via add_path().
361  */
362 static void
try_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)363 try_nestloop_path(PlannerInfo *root,
364 				  RelOptInfo *joinrel,
365 				  Path *outer_path,
366 				  Path *inner_path,
367 				  List *pathkeys,
368 				  JoinType jointype,
369 				  JoinPathExtraData *extra)
370 {
371 	Relids		required_outer;
372 	JoinCostWorkspace workspace;
373 	RelOptInfo *innerrel = inner_path->parent;
374 	RelOptInfo *outerrel = outer_path->parent;
375 	Relids		innerrelids;
376 	Relids		outerrelids;
377 	Relids		inner_paramrels = PATH_REQ_OUTER(inner_path);
378 	Relids		outer_paramrels = PATH_REQ_OUTER(outer_path);
379 
380 	/*
381 	 * Paths are parameterized by top-level parents, so run parameterization
382 	 * tests on the parent relids.
383 	 */
384 	if (innerrel->top_parent_relids)
385 		innerrelids = innerrel->top_parent_relids;
386 	else
387 		innerrelids = innerrel->relids;
388 
389 	if (outerrel->top_parent_relids)
390 		outerrelids = outerrel->top_parent_relids;
391 	else
392 		outerrelids = outerrel->relids;
393 
394 	/*
395 	 * Check to see if proposed path is still parameterized, and reject if the
396 	 * parameterization wouldn't be sensible --- unless allow_star_schema_join
397 	 * says to allow it anyway.  Also, we must reject if have_dangerous_phv
398 	 * doesn't like the look of it, which could only happen if the nestloop is
399 	 * still parameterized.
400 	 */
401 	required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
402 												  innerrelids, inner_paramrels);
403 	if (required_outer &&
404 		((!bms_overlap(required_outer, extra->param_source_rels) &&
405 		  !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
406 		 have_dangerous_phv(root, outerrelids, inner_paramrels)))
407 	{
408 		/* Waste no memory when we reject a path here */
409 		bms_free(required_outer);
410 		return;
411 	}
412 
413 	/*
414 	 * Do a precheck to quickly eliminate obviously-inferior paths.  We
415 	 * calculate a cheap lower bound on the path's cost and then use
416 	 * add_path_precheck() to see if the path is clearly going to be dominated
417 	 * by some existing path for the joinrel.  If not, do the full pushup with
418 	 * creating a fully valid path structure and submitting it to add_path().
419 	 * The latter two steps are expensive enough to make this two-phase
420 	 * methodology worthwhile.
421 	 */
422 	initial_cost_nestloop(root, &workspace, jointype,
423 						  outer_path, inner_path, extra);
424 
425 	if (add_path_precheck(joinrel,
426 						  workspace.startup_cost, workspace.total_cost,
427 						  pathkeys, required_outer))
428 	{
429 		/*
430 		 * If the inner path is parameterized, it is parameterized by the
431 		 * topmost parent of the outer rel, not the outer rel itself.  Fix
432 		 * that.
433 		 */
434 		if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
435 		{
436 			inner_path = reparameterize_path_by_child(root, inner_path,
437 													  outer_path->parent);
438 
439 			/*
440 			 * If we could not translate the path, we can't create nest loop
441 			 * path.
442 			 */
443 			if (!inner_path)
444 			{
445 				bms_free(required_outer);
446 				return;
447 			}
448 		}
449 
450 		add_path(joinrel, (Path *)
451 				 create_nestloop_path(root,
452 									  joinrel,
453 									  jointype,
454 									  &workspace,
455 									  extra,
456 									  outer_path,
457 									  inner_path,
458 									  extra->restrictlist,
459 									  pathkeys,
460 									  required_outer));
461 	}
462 	else
463 	{
464 		/* Waste no memory when we reject a path here */
465 		bms_free(required_outer);
466 	}
467 }
468 
469 /*
470  * try_partial_nestloop_path
471  *	  Consider a partial nestloop join path; if it appears useful, push it into
472  *	  the joinrel's partial_pathlist via add_partial_path().
473  */
474 static void
try_partial_nestloop_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,JoinType jointype,JoinPathExtraData * extra)475 try_partial_nestloop_path(PlannerInfo *root,
476 						  RelOptInfo *joinrel,
477 						  Path *outer_path,
478 						  Path *inner_path,
479 						  List *pathkeys,
480 						  JoinType jointype,
481 						  JoinPathExtraData *extra)
482 {
483 	JoinCostWorkspace workspace;
484 
485 	/*
486 	 * If the inner path is parameterized, the parameterization must be fully
487 	 * satisfied by the proposed outer path.  Parameterized partial paths are
488 	 * not supported.  The caller should already have verified that no
489 	 * extra_lateral_rels are required here.
490 	 */
491 	Assert(bms_is_empty(joinrel->lateral_relids));
492 	if (inner_path->param_info != NULL)
493 	{
494 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
495 		RelOptInfo *outerrel = outer_path->parent;
496 		Relids		outerrelids;
497 
498 		/*
499 		 * The inner and outer paths are parameterized, if at all, by the top
500 		 * level parents, not the child relations, so we must use those relids
501 		 * for our parameterization tests.
502 		 */
503 		if (outerrel->top_parent_relids)
504 			outerrelids = outerrel->top_parent_relids;
505 		else
506 			outerrelids = outerrel->relids;
507 
508 		if (!bms_is_subset(inner_paramrels, outerrelids))
509 			return;
510 	}
511 
512 	/*
513 	 * Before creating a path, get a quick lower bound on what it is likely to
514 	 * cost.  Bail out right away if it looks terrible.
515 	 */
516 	initial_cost_nestloop(root, &workspace, jointype,
517 						  outer_path, inner_path, extra);
518 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
519 		return;
520 
521 	/*
522 	 * If the inner path is parameterized, it is parameterized by the topmost
523 	 * parent of the outer rel, not the outer rel itself.  Fix that.
524 	 */
525 	if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
526 	{
527 		inner_path = reparameterize_path_by_child(root, inner_path,
528 												  outer_path->parent);
529 
530 		/*
531 		 * If we could not translate the path, we can't create nest loop path.
532 		 */
533 		if (!inner_path)
534 			return;
535 	}
536 
537 	/* Might be good enough to be worth trying, so let's try it. */
538 	add_partial_path(joinrel, (Path *)
539 					 create_nestloop_path(root,
540 										  joinrel,
541 										  jointype,
542 										  &workspace,
543 										  extra,
544 										  outer_path,
545 										  inner_path,
546 										  extra->restrictlist,
547 										  pathkeys,
548 										  NULL));
549 }
550 
551 /*
552  * try_mergejoin_path
553  *	  Consider a merge join path; if it appears useful, push it into
554  *	  the joinrel's pathlist via add_path().
555  */
556 static void
try_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra,bool is_partial)557 try_mergejoin_path(PlannerInfo *root,
558 				   RelOptInfo *joinrel,
559 				   Path *outer_path,
560 				   Path *inner_path,
561 				   List *pathkeys,
562 				   List *mergeclauses,
563 				   List *outersortkeys,
564 				   List *innersortkeys,
565 				   JoinType jointype,
566 				   JoinPathExtraData *extra,
567 				   bool is_partial)
568 {
569 	Relids		required_outer;
570 	JoinCostWorkspace workspace;
571 
572 	if (is_partial)
573 	{
574 		try_partial_mergejoin_path(root,
575 								   joinrel,
576 								   outer_path,
577 								   inner_path,
578 								   pathkeys,
579 								   mergeclauses,
580 								   outersortkeys,
581 								   innersortkeys,
582 								   jointype,
583 								   extra);
584 		return;
585 	}
586 
587 	/*
588 	 * Check to see if proposed path is still parameterized, and reject if the
589 	 * parameterization wouldn't be sensible.
590 	 */
591 	required_outer = calc_non_nestloop_required_outer(outer_path,
592 													  inner_path);
593 	if (required_outer &&
594 		!bms_overlap(required_outer, extra->param_source_rels))
595 	{
596 		/* Waste no memory when we reject a path here */
597 		bms_free(required_outer);
598 		return;
599 	}
600 
601 	/*
602 	 * If the given paths are already well enough ordered, we can skip doing
603 	 * an explicit sort.
604 	 */
605 	if (outersortkeys &&
606 		pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
607 		outersortkeys = NIL;
608 	if (innersortkeys &&
609 		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
610 		innersortkeys = NIL;
611 
612 	/*
613 	 * See comments in try_nestloop_path().
614 	 */
615 	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
616 						   outer_path, inner_path,
617 						   outersortkeys, innersortkeys,
618 						   extra);
619 
620 	if (add_path_precheck(joinrel,
621 						  workspace.startup_cost, workspace.total_cost,
622 						  pathkeys, required_outer))
623 	{
624 		add_path(joinrel, (Path *)
625 				 create_mergejoin_path(root,
626 									   joinrel,
627 									   jointype,
628 									   &workspace,
629 									   extra,
630 									   outer_path,
631 									   inner_path,
632 									   extra->restrictlist,
633 									   pathkeys,
634 									   required_outer,
635 									   mergeclauses,
636 									   outersortkeys,
637 									   innersortkeys));
638 	}
639 	else
640 	{
641 		/* Waste no memory when we reject a path here */
642 		bms_free(required_outer);
643 	}
644 }
645 
646 /*
647  * try_partial_mergejoin_path
648  *	  Consider a partial merge join path; if it appears useful, push it into
649  *	  the joinrel's pathlist via add_partial_path().
650  */
651 static void
try_partial_mergejoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * pathkeys,List * mergeclauses,List * outersortkeys,List * innersortkeys,JoinType jointype,JoinPathExtraData * extra)652 try_partial_mergejoin_path(PlannerInfo *root,
653 						   RelOptInfo *joinrel,
654 						   Path *outer_path,
655 						   Path *inner_path,
656 						   List *pathkeys,
657 						   List *mergeclauses,
658 						   List *outersortkeys,
659 						   List *innersortkeys,
660 						   JoinType jointype,
661 						   JoinPathExtraData *extra)
662 {
663 	JoinCostWorkspace workspace;
664 
665 	/*
666 	 * See comments in try_partial_hashjoin_path().
667 	 */
668 	Assert(bms_is_empty(joinrel->lateral_relids));
669 	if (inner_path->param_info != NULL)
670 	{
671 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
672 
673 		if (!bms_is_empty(inner_paramrels))
674 			return;
675 	}
676 
677 	/*
678 	 * If the given paths are already well enough ordered, we can skip doing
679 	 * an explicit sort.
680 	 */
681 	if (outersortkeys &&
682 		pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
683 		outersortkeys = NIL;
684 	if (innersortkeys &&
685 		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
686 		innersortkeys = NIL;
687 
688 	/*
689 	 * See comments in try_partial_nestloop_path().
690 	 */
691 	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
692 						   outer_path, inner_path,
693 						   outersortkeys, innersortkeys,
694 						   extra);
695 
696 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
697 		return;
698 
699 	/* Might be good enough to be worth trying, so let's try it. */
700 	add_partial_path(joinrel, (Path *)
701 					 create_mergejoin_path(root,
702 										   joinrel,
703 										   jointype,
704 										   &workspace,
705 										   extra,
706 										   outer_path,
707 										   inner_path,
708 										   extra->restrictlist,
709 										   pathkeys,
710 										   NULL,
711 										   mergeclauses,
712 										   outersortkeys,
713 										   innersortkeys));
714 }
715 
716 /*
717  * try_hashjoin_path
718  *	  Consider a hash join path; if it appears useful, push it into
719  *	  the joinrel's pathlist via add_path().
720  */
721 static void
try_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra)722 try_hashjoin_path(PlannerInfo *root,
723 				  RelOptInfo *joinrel,
724 				  Path *outer_path,
725 				  Path *inner_path,
726 				  List *hashclauses,
727 				  JoinType jointype,
728 				  JoinPathExtraData *extra)
729 {
730 	Relids		required_outer;
731 	JoinCostWorkspace workspace;
732 
733 	/*
734 	 * Check to see if proposed path is still parameterized, and reject if the
735 	 * parameterization wouldn't be sensible.
736 	 */
737 	required_outer = calc_non_nestloop_required_outer(outer_path,
738 													  inner_path);
739 	if (required_outer &&
740 		!bms_overlap(required_outer, extra->param_source_rels))
741 	{
742 		/* Waste no memory when we reject a path here */
743 		bms_free(required_outer);
744 		return;
745 	}
746 
747 	/*
748 	 * See comments in try_nestloop_path().  Also note that hashjoin paths
749 	 * never have any output pathkeys, per comments in create_hashjoin_path.
750 	 */
751 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
752 						  outer_path, inner_path, extra, false);
753 
754 	if (add_path_precheck(joinrel,
755 						  workspace.startup_cost, workspace.total_cost,
756 						  NIL, required_outer))
757 	{
758 		add_path(joinrel, (Path *)
759 				 create_hashjoin_path(root,
760 									  joinrel,
761 									  jointype,
762 									  &workspace,
763 									  extra,
764 									  outer_path,
765 									  inner_path,
766 									  false,	/* parallel_hash */
767 									  extra->restrictlist,
768 									  required_outer,
769 									  hashclauses));
770 	}
771 	else
772 	{
773 		/* Waste no memory when we reject a path here */
774 		bms_free(required_outer);
775 	}
776 }
777 
778 /*
779  * try_partial_hashjoin_path
780  *	  Consider a partial hashjoin join path; if it appears useful, push it into
781  *	  the joinrel's partial_pathlist via add_partial_path().
782  *	  The outer side is partial.  If parallel_hash is true, then the inner path
783  *	  must be partial and will be run in parallel to create one or more shared
784  *	  hash tables; otherwise the inner path must be complete and a copy of it
785  *	  is run in every process to create separate identical private hash tables.
786  */
787 static void
try_partial_hashjoin_path(PlannerInfo * root,RelOptInfo * joinrel,Path * outer_path,Path * inner_path,List * hashclauses,JoinType jointype,JoinPathExtraData * extra,bool parallel_hash)788 try_partial_hashjoin_path(PlannerInfo *root,
789 						  RelOptInfo *joinrel,
790 						  Path *outer_path,
791 						  Path *inner_path,
792 						  List *hashclauses,
793 						  JoinType jointype,
794 						  JoinPathExtraData *extra,
795 						  bool parallel_hash)
796 {
797 	JoinCostWorkspace workspace;
798 
799 	/*
800 	 * If the inner path is parameterized, the parameterization must be fully
801 	 * satisfied by the proposed outer path.  Parameterized partial paths are
802 	 * not supported.  The caller should already have verified that no
803 	 * extra_lateral_rels are required here.
804 	 */
805 	Assert(bms_is_empty(joinrel->lateral_relids));
806 	if (inner_path->param_info != NULL)
807 	{
808 		Relids		inner_paramrels = inner_path->param_info->ppi_req_outer;
809 
810 		if (!bms_is_empty(inner_paramrels))
811 			return;
812 	}
813 
814 	/*
815 	 * Before creating a path, get a quick lower bound on what it is likely to
816 	 * cost.  Bail out right away if it looks terrible.
817 	 */
818 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
819 						  outer_path, inner_path, extra, parallel_hash);
820 	if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
821 		return;
822 
823 	/* Might be good enough to be worth trying, so let's try it. */
824 	add_partial_path(joinrel, (Path *)
825 					 create_hashjoin_path(root,
826 										  joinrel,
827 										  jointype,
828 										  &workspace,
829 										  extra,
830 										  outer_path,
831 										  inner_path,
832 										  parallel_hash,
833 										  extra->restrictlist,
834 										  NULL,
835 										  hashclauses));
836 }
837 
838 /*
839  * clause_sides_match_join
840  *	  Determine whether a join clause is of the right form to use in this join.
841  *
842  * We already know that the clause is a binary opclause referencing only the
843  * rels in the current join.  The point here is to check whether it has the
844  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
845  * rather than mixing outer and inner vars on either side.  If it matches,
846  * we set the transient flag outer_is_left to identify which side is which.
847  */
848 static inline bool
clause_sides_match_join(RestrictInfo * rinfo,RelOptInfo * outerrel,RelOptInfo * innerrel)849 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
850 						RelOptInfo *innerrel)
851 {
852 	if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
853 		bms_is_subset(rinfo->right_relids, innerrel->relids))
854 	{
855 		/* lefthand side is outer */
856 		rinfo->outer_is_left = true;
857 		return true;
858 	}
859 	else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
860 			 bms_is_subset(rinfo->right_relids, outerrel->relids))
861 	{
862 		/* righthand side is outer */
863 		rinfo->outer_is_left = false;
864 		return true;
865 	}
866 	return false;				/* no good for these input relations */
867 }
868 
869 /*
870  * sort_inner_and_outer
871  *	  Create mergejoin join paths by explicitly sorting both the outer and
872  *	  inner join relations on each available merge ordering.
873  *
874  * 'joinrel' is the join relation
875  * 'outerrel' is the outer join relation
876  * 'innerrel' is the inner join relation
877  * 'jointype' is the type of join to do
878  * 'extra' contains additional input values
879  */
880 static void
sort_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)881 sort_inner_and_outer(PlannerInfo *root,
882 					 RelOptInfo *joinrel,
883 					 RelOptInfo *outerrel,
884 					 RelOptInfo *innerrel,
885 					 JoinType jointype,
886 					 JoinPathExtraData *extra)
887 {
888 	JoinType	save_jointype = jointype;
889 	Path	   *outer_path;
890 	Path	   *inner_path;
891 	Path	   *cheapest_partial_outer = NULL;
892 	Path	   *cheapest_safe_inner = NULL;
893 	List	   *all_pathkeys;
894 	ListCell   *l;
895 
896 	/*
897 	 * We only consider the cheapest-total-cost input paths, since we are
898 	 * assuming here that a sort is required.  We will consider
899 	 * cheapest-startup-cost input paths later, and only if they don't need a
900 	 * sort.
901 	 *
902 	 * This function intentionally does not consider parameterized input
903 	 * paths, except when the cheapest-total is parameterized.  If we did so,
904 	 * we'd have a combinatorial explosion of mergejoin paths of dubious
905 	 * value.  This interacts with decisions elsewhere that also discriminate
906 	 * against mergejoins with parameterized inputs; see comments in
907 	 * src/backend/optimizer/README.
908 	 */
909 	outer_path = outerrel->cheapest_total_path;
910 	inner_path = innerrel->cheapest_total_path;
911 
912 	/*
913 	 * If either cheapest-total path is parameterized by the other rel, we
914 	 * can't use a mergejoin.  (There's no use looking for alternative input
915 	 * paths, since these should already be the least-parameterized available
916 	 * paths.)
917 	 */
918 	if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
919 		PATH_PARAM_BY_REL(inner_path, outerrel))
920 		return;
921 
922 	/*
923 	 * If unique-ification is requested, do it and then handle as a plain
924 	 * inner join.
925 	 */
926 	if (jointype == JOIN_UNIQUE_OUTER)
927 	{
928 		outer_path = (Path *) create_unique_path(root, outerrel,
929 												 outer_path, extra->sjinfo);
930 		Assert(outer_path);
931 		jointype = JOIN_INNER;
932 	}
933 	else if (jointype == JOIN_UNIQUE_INNER)
934 	{
935 		inner_path = (Path *) create_unique_path(root, innerrel,
936 												 inner_path, extra->sjinfo);
937 		Assert(inner_path);
938 		jointype = JOIN_INNER;
939 	}
940 
941 	/*
942 	 * If the joinrel is parallel-safe, we may be able to consider a partial
943 	 * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
944 	 * outer path will be partial, and therefore we won't be able to properly
945 	 * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL and
946 	 * JOIN_RIGHT, because they can produce false null extended rows.  Also,
947 	 * the resulting path must not be parameterized.
948 	 */
949 	if (joinrel->consider_parallel &&
950 		save_jointype != JOIN_UNIQUE_OUTER &&
951 		save_jointype != JOIN_FULL &&
952 		save_jointype != JOIN_RIGHT &&
953 		outerrel->partial_pathlist != NIL &&
954 		bms_is_empty(joinrel->lateral_relids))
955 	{
956 		cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
957 
958 		if (inner_path->parallel_safe)
959 			cheapest_safe_inner = inner_path;
960 		else if (save_jointype != JOIN_UNIQUE_INNER)
961 			cheapest_safe_inner =
962 				get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
963 	}
964 
965 	/*
966 	 * Each possible ordering of the available mergejoin clauses will generate
967 	 * a differently-sorted result path at essentially the same cost.  We have
968 	 * no basis for choosing one over another at this level of joining, but
969 	 * some sort orders may be more useful than others for higher-level
970 	 * mergejoins, so it's worth considering multiple orderings.
971 	 *
972 	 * Actually, it's not quite true that every mergeclause ordering will
973 	 * generate a different path order, because some of the clauses may be
974 	 * partially redundant (refer to the same EquivalenceClasses).  Therefore,
975 	 * what we do is convert the mergeclause list to a list of canonical
976 	 * pathkeys, and then consider different orderings of the pathkeys.
977 	 *
978 	 * Generating a path for *every* permutation of the pathkeys doesn't seem
979 	 * like a winning strategy; the cost in planning time is too high. For
980 	 * now, we generate one path for each pathkey, listing that pathkey first
981 	 * and the rest in random order.  This should allow at least a one-clause
982 	 * mergejoin without re-sorting against any other possible mergejoin
983 	 * partner path.  But if we've not guessed the right ordering of secondary
984 	 * keys, we may end up evaluating clauses as qpquals when they could have
985 	 * been done as mergeclauses.  (In practice, it's rare that there's more
986 	 * than two or three mergeclauses, so expending a huge amount of thought
987 	 * on that is probably not worth it.)
988 	 *
989 	 * The pathkey order returned by select_outer_pathkeys_for_merge() has
990 	 * some heuristics behind it (see that function), so be sure to try it
991 	 * exactly as-is as well as making variants.
992 	 */
993 	all_pathkeys = select_outer_pathkeys_for_merge(root,
994 												   extra->mergeclause_list,
995 												   joinrel);
996 
997 	foreach(l, all_pathkeys)
998 	{
999 		List	   *front_pathkey = (List *) lfirst(l);
1000 		List	   *cur_mergeclauses;
1001 		List	   *outerkeys;
1002 		List	   *innerkeys;
1003 		List	   *merge_pathkeys;
1004 
1005 		/* Make a pathkey list with this guy first */
1006 		if (l != list_head(all_pathkeys))
1007 			outerkeys = lcons(front_pathkey,
1008 							  list_delete_ptr(list_copy(all_pathkeys),
1009 											  front_pathkey));
1010 		else
1011 			outerkeys = all_pathkeys;	/* no work at first one... */
1012 
1013 		/* Sort the mergeclauses into the corresponding ordering */
1014 		cur_mergeclauses =
1015 			find_mergeclauses_for_outer_pathkeys(root,
1016 												 outerkeys,
1017 												 extra->mergeclause_list);
1018 
1019 		/* Should have used them all... */
1020 		Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1021 
1022 		/* Build sort pathkeys for the inner side */
1023 		innerkeys = make_inner_pathkeys_for_merge(root,
1024 												  cur_mergeclauses,
1025 												  outerkeys);
1026 
1027 		/* Build pathkeys representing output sort order */
1028 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1029 											 outerkeys);
1030 
1031 		/*
1032 		 * And now we can make the path.
1033 		 *
1034 		 * Note: it's possible that the cheapest paths will already be sorted
1035 		 * properly.  try_mergejoin_path will detect that case and suppress an
1036 		 * explicit sort step, so we needn't do so here.
1037 		 */
1038 		try_mergejoin_path(root,
1039 						   joinrel,
1040 						   outer_path,
1041 						   inner_path,
1042 						   merge_pathkeys,
1043 						   cur_mergeclauses,
1044 						   outerkeys,
1045 						   innerkeys,
1046 						   jointype,
1047 						   extra,
1048 						   false);
1049 
1050 		/*
1051 		 * If we have partial outer and parallel safe inner path then try
1052 		 * partial mergejoin path.
1053 		 */
1054 		if (cheapest_partial_outer && cheapest_safe_inner)
1055 			try_partial_mergejoin_path(root,
1056 									   joinrel,
1057 									   cheapest_partial_outer,
1058 									   cheapest_safe_inner,
1059 									   merge_pathkeys,
1060 									   cur_mergeclauses,
1061 									   outerkeys,
1062 									   innerkeys,
1063 									   jointype,
1064 									   extra);
1065 	}
1066 }
1067 
1068 /*
1069  * generate_mergejoin_paths
1070  *	Creates possible mergejoin paths for input outerpath.
1071  *
1072  * We generate mergejoins if mergejoin clauses are available.  We have
1073  * two ways to generate the inner path for a mergejoin: sort the cheapest
1074  * inner path, or use an inner path that is already suitably ordered for the
1075  * merge.  If we have several mergeclauses, it could be that there is no inner
1076  * path (or only a very expensive one) for the full list of mergeclauses, but
1077  * better paths exist if we truncate the mergeclause list (thereby discarding
1078  * some sort key requirements).  So, we consider truncations of the
1079  * mergeclause list as well as the full list.  (Ideally we'd consider all
1080  * subsets of the mergeclause list, but that seems way too expensive.)
1081  */
1082 static void
generate_mergejoin_paths(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * innerrel,Path * outerpath,JoinType jointype,JoinPathExtraData * extra,bool useallclauses,Path * inner_cheapest_total,List * merge_pathkeys,bool is_partial)1083 generate_mergejoin_paths(PlannerInfo *root,
1084 						 RelOptInfo *joinrel,
1085 						 RelOptInfo *innerrel,
1086 						 Path *outerpath,
1087 						 JoinType jointype,
1088 						 JoinPathExtraData *extra,
1089 						 bool useallclauses,
1090 						 Path *inner_cheapest_total,
1091 						 List *merge_pathkeys,
1092 						 bool is_partial)
1093 {
1094 	List	   *mergeclauses;
1095 	List	   *innersortkeys;
1096 	List	   *trialsortkeys;
1097 	Path	   *cheapest_startup_inner;
1098 	Path	   *cheapest_total_inner;
1099 	JoinType	save_jointype = jointype;
1100 	int			num_sortkeys;
1101 	int			sortkeycnt;
1102 
1103 	if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1104 		jointype = JOIN_INNER;
1105 
1106 	/* Look for useful mergeclauses (if any) */
1107 	mergeclauses =
1108 		find_mergeclauses_for_outer_pathkeys(root,
1109 											 outerpath->pathkeys,
1110 											 extra->mergeclause_list);
1111 
1112 	/*
1113 	 * Done with this outer path if no chance for a mergejoin.
1114 	 *
1115 	 * Special corner case: for "x FULL JOIN y ON true", there will be no join
1116 	 * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
1117 	 * but since mergejoin is our only join type that supports FULL JOIN
1118 	 * without any join clauses, it's necessary to generate a clauseless
1119 	 * mergejoin path instead.
1120 	 */
1121 	if (mergeclauses == NIL)
1122 	{
1123 		if (jointype == JOIN_FULL)
1124 			 /* okay to try for mergejoin */ ;
1125 		else
1126 			return;
1127 	}
1128 	if (useallclauses &&
1129 		list_length(mergeclauses) != list_length(extra->mergeclause_list))
1130 		return;
1131 
1132 	/* Compute the required ordering of the inner path */
1133 	innersortkeys = make_inner_pathkeys_for_merge(root,
1134 												  mergeclauses,
1135 												  outerpath->pathkeys);
1136 
1137 	/*
1138 	 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1139 	 * a sort will be needed, only cheapest total cost matters. (But
1140 	 * try_mergejoin_path will do the right thing if inner_cheapest_total is
1141 	 * already correctly sorted.)
1142 	 */
1143 	try_mergejoin_path(root,
1144 					   joinrel,
1145 					   outerpath,
1146 					   inner_cheapest_total,
1147 					   merge_pathkeys,
1148 					   mergeclauses,
1149 					   NIL,
1150 					   innersortkeys,
1151 					   jointype,
1152 					   extra,
1153 					   is_partial);
1154 
1155 	/* Can't do anything else if inner path needs to be unique'd */
1156 	if (save_jointype == JOIN_UNIQUE_INNER)
1157 		return;
1158 
1159 	/*
1160 	 * Look for presorted inner paths that satisfy the innersortkey list ---
1161 	 * or any truncation thereof, if we are allowed to build a mergejoin using
1162 	 * a subset of the merge clauses.  Here, we consider both cheap startup
1163 	 * cost and cheap total cost.
1164 	 *
1165 	 * Currently we do not consider parameterized inner paths here. This
1166 	 * interacts with decisions elsewhere that also discriminate against
1167 	 * mergejoins with parameterized inputs; see comments in
1168 	 * src/backend/optimizer/README.
1169 	 *
1170 	 * As we shorten the sortkey list, we should consider only paths that are
1171 	 * strictly cheaper than (in particular, not the same as) any path found
1172 	 * in an earlier iteration.  Otherwise we'd be intentionally using fewer
1173 	 * merge keys than a given path allows (treating the rest as plain
1174 	 * joinquals), which is unlikely to be a good idea.  Also, eliminating
1175 	 * paths here on the basis of compare_path_costs is a lot cheaper than
1176 	 * building the mergejoin path only to throw it away.
1177 	 *
1178 	 * If inner_cheapest_total is well enough sorted to have not required a
1179 	 * sort in the path made above, we shouldn't make a duplicate path with
1180 	 * it, either.  We handle that case with the same logic that handles the
1181 	 * previous consideration, by initializing the variables that track
1182 	 * cheapest-so-far properly.  Note that we do NOT reject
1183 	 * inner_cheapest_total if we find it matches some shorter set of
1184 	 * pathkeys.  That case corresponds to using fewer mergekeys to avoid
1185 	 * sorting inner_cheapest_total, whereas we did sort it above, so the
1186 	 * plans being considered are different.
1187 	 */
1188 	if (pathkeys_contained_in(innersortkeys,
1189 							  inner_cheapest_total->pathkeys))
1190 	{
1191 		/* inner_cheapest_total didn't require a sort */
1192 		cheapest_startup_inner = inner_cheapest_total;
1193 		cheapest_total_inner = inner_cheapest_total;
1194 	}
1195 	else
1196 	{
1197 		/* it did require a sort, at least for the full set of keys */
1198 		cheapest_startup_inner = NULL;
1199 		cheapest_total_inner = NULL;
1200 	}
1201 	num_sortkeys = list_length(innersortkeys);
1202 	if (num_sortkeys > 1 && !useallclauses)
1203 		trialsortkeys = list_copy(innersortkeys);	/* need modifiable copy */
1204 	else
1205 		trialsortkeys = innersortkeys;	/* won't really truncate */
1206 
1207 	for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1208 	{
1209 		Path	   *innerpath;
1210 		List	   *newclauses = NIL;
1211 
1212 		/*
1213 		 * Look for an inner path ordered well enough for the first
1214 		 * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
1215 		 * destructively, which is why we made a copy...
1216 		 */
1217 		trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1218 		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1219 												   trialsortkeys,
1220 												   NULL,
1221 												   TOTAL_COST,
1222 												   is_partial);
1223 		if (innerpath != NULL &&
1224 			(cheapest_total_inner == NULL ||
1225 			 compare_path_costs(innerpath, cheapest_total_inner,
1226 								TOTAL_COST) < 0))
1227 		{
1228 			/* Found a cheap (or even-cheaper) sorted path */
1229 			/* Select the right mergeclauses, if we didn't already */
1230 			if (sortkeycnt < num_sortkeys)
1231 			{
1232 				newclauses =
1233 					trim_mergeclauses_for_inner_pathkeys(root,
1234 														 mergeclauses,
1235 														 trialsortkeys);
1236 				Assert(newclauses != NIL);
1237 			}
1238 			else
1239 				newclauses = mergeclauses;
1240 			try_mergejoin_path(root,
1241 							   joinrel,
1242 							   outerpath,
1243 							   innerpath,
1244 							   merge_pathkeys,
1245 							   newclauses,
1246 							   NIL,
1247 							   NIL,
1248 							   jointype,
1249 							   extra,
1250 							   is_partial);
1251 			cheapest_total_inner = innerpath;
1252 		}
1253 		/* Same on the basis of cheapest startup cost ... */
1254 		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1255 												   trialsortkeys,
1256 												   NULL,
1257 												   STARTUP_COST,
1258 												   is_partial);
1259 		if (innerpath != NULL &&
1260 			(cheapest_startup_inner == NULL ||
1261 			 compare_path_costs(innerpath, cheapest_startup_inner,
1262 								STARTUP_COST) < 0))
1263 		{
1264 			/* Found a cheap (or even-cheaper) sorted path */
1265 			if (innerpath != cheapest_total_inner)
1266 			{
1267 				/*
1268 				 * Avoid rebuilding clause list if we already made one; saves
1269 				 * memory in big join trees...
1270 				 */
1271 				if (newclauses == NIL)
1272 				{
1273 					if (sortkeycnt < num_sortkeys)
1274 					{
1275 						newclauses =
1276 							trim_mergeclauses_for_inner_pathkeys(root,
1277 																 mergeclauses,
1278 																 trialsortkeys);
1279 						Assert(newclauses != NIL);
1280 					}
1281 					else
1282 						newclauses = mergeclauses;
1283 				}
1284 				try_mergejoin_path(root,
1285 								   joinrel,
1286 								   outerpath,
1287 								   innerpath,
1288 								   merge_pathkeys,
1289 								   newclauses,
1290 								   NIL,
1291 								   NIL,
1292 								   jointype,
1293 								   extra,
1294 								   is_partial);
1295 			}
1296 			cheapest_startup_inner = innerpath;
1297 		}
1298 
1299 		/*
1300 		 * Don't consider truncated sortkeys if we need all clauses.
1301 		 */
1302 		if (useallclauses)
1303 			break;
1304 	}
1305 }
1306 
1307 /*
1308  * match_unsorted_outer
1309  *	  Creates possible join paths for processing a single join relation
1310  *	  'joinrel' by employing either iterative substitution or
1311  *	  mergejoining on each of its possible outer paths (considering
1312  *	  only outer paths that are already ordered well enough for merging).
1313  *
1314  * We always generate a nestloop path for each available outer path.
1315  * In fact we may generate as many as five: one on the cheapest-total-cost
1316  * inner path, one on the same with materialization, one on the
1317  * cheapest-startup-cost inner path (if different), one on the
1318  * cheapest-total inner-indexscan path (if any), and one on the
1319  * cheapest-startup inner-indexscan path (if different).
1320  *
1321  * We also consider mergejoins if mergejoin clauses are available.  See
1322  * detailed comments in generate_mergejoin_paths.
1323  *
1324  * 'joinrel' is the join relation
1325  * 'outerrel' is the outer join relation
1326  * 'innerrel' is the inner join relation
1327  * 'jointype' is the type of join to do
1328  * 'extra' contains additional input values
1329  */
1330 static void
match_unsorted_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1331 match_unsorted_outer(PlannerInfo *root,
1332 					 RelOptInfo *joinrel,
1333 					 RelOptInfo *outerrel,
1334 					 RelOptInfo *innerrel,
1335 					 JoinType jointype,
1336 					 JoinPathExtraData *extra)
1337 {
1338 	JoinType	save_jointype = jointype;
1339 	bool		nestjoinOK;
1340 	bool		useallclauses;
1341 	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
1342 	Path	   *matpath = NULL;
1343 	ListCell   *lc1;
1344 
1345 	/*
1346 	 * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
1347 	 * are doing a right or full mergejoin, we must use *all* the mergeclauses
1348 	 * as join clauses, else we will not have a valid plan.  (Although these
1349 	 * two flags are currently inverses, keep them separate for clarity and
1350 	 * possible future changes.)
1351 	 */
1352 	switch (jointype)
1353 	{
1354 		case JOIN_INNER:
1355 		case JOIN_LEFT:
1356 		case JOIN_SEMI:
1357 		case JOIN_ANTI:
1358 			nestjoinOK = true;
1359 			useallclauses = false;
1360 			break;
1361 		case JOIN_RIGHT:
1362 		case JOIN_FULL:
1363 			nestjoinOK = false;
1364 			useallclauses = true;
1365 			break;
1366 		case JOIN_UNIQUE_OUTER:
1367 		case JOIN_UNIQUE_INNER:
1368 			jointype = JOIN_INNER;
1369 			nestjoinOK = true;
1370 			useallclauses = false;
1371 			break;
1372 		default:
1373 			elog(ERROR, "unrecognized join type: %d",
1374 				 (int) jointype);
1375 			nestjoinOK = false; /* keep compiler quiet */
1376 			useallclauses = false;
1377 			break;
1378 	}
1379 
1380 	/*
1381 	 * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1382 	 * we will consider it below as a member of cheapest_parameterized_paths,
1383 	 * but the other possibilities considered in this routine aren't usable.
1384 	 */
1385 	if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1386 		inner_cheapest_total = NULL;
1387 
1388 	/*
1389 	 * If we need to unique-ify the inner path, we will consider only the
1390 	 * cheapest-total inner.
1391 	 */
1392 	if (save_jointype == JOIN_UNIQUE_INNER)
1393 	{
1394 		/* No way to do this with an inner path parameterized by outer rel */
1395 		if (inner_cheapest_total == NULL)
1396 			return;
1397 		inner_cheapest_total = (Path *)
1398 			create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1399 		Assert(inner_cheapest_total);
1400 	}
1401 	else if (nestjoinOK)
1402 	{
1403 		/*
1404 		 * Consider materializing the cheapest inner path, unless
1405 		 * enable_material is off or the path in question materializes its
1406 		 * output anyway.
1407 		 */
1408 		if (enable_material && inner_cheapest_total != NULL &&
1409 			!ExecMaterializesOutput(inner_cheapest_total->pathtype))
1410 			matpath = (Path *)
1411 				create_material_path(innerrel, inner_cheapest_total);
1412 	}
1413 
1414 	foreach(lc1, outerrel->pathlist)
1415 	{
1416 		Path	   *outerpath = (Path *) lfirst(lc1);
1417 		List	   *merge_pathkeys;
1418 
1419 		/*
1420 		 * We cannot use an outer path that is parameterized by the inner rel.
1421 		 */
1422 		if (PATH_PARAM_BY_REL(outerpath, innerrel))
1423 			continue;
1424 
1425 		/*
1426 		 * If we need to unique-ify the outer path, it's pointless to consider
1427 		 * any but the cheapest outer.  (XXX we don't consider parameterized
1428 		 * outers, nor inners, for unique-ified cases.  Should we?)
1429 		 */
1430 		if (save_jointype == JOIN_UNIQUE_OUTER)
1431 		{
1432 			if (outerpath != outerrel->cheapest_total_path)
1433 				continue;
1434 			outerpath = (Path *) create_unique_path(root, outerrel,
1435 													outerpath, extra->sjinfo);
1436 			Assert(outerpath);
1437 		}
1438 
1439 		/*
1440 		 * The result will have this sort order (even if it is implemented as
1441 		 * a nestloop, and even if some of the mergeclauses are implemented by
1442 		 * qpquals rather than as true mergeclauses):
1443 		 */
1444 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1445 											 outerpath->pathkeys);
1446 
1447 		if (save_jointype == JOIN_UNIQUE_INNER)
1448 		{
1449 			/*
1450 			 * Consider nestloop join, but only with the unique-ified cheapest
1451 			 * inner path
1452 			 */
1453 			try_nestloop_path(root,
1454 							  joinrel,
1455 							  outerpath,
1456 							  inner_cheapest_total,
1457 							  merge_pathkeys,
1458 							  jointype,
1459 							  extra);
1460 		}
1461 		else if (nestjoinOK)
1462 		{
1463 			/*
1464 			 * Consider nestloop joins using this outer path and various
1465 			 * available paths for the inner relation.  We consider the
1466 			 * cheapest-total paths for each available parameterization of the
1467 			 * inner relation, including the unparameterized case.
1468 			 */
1469 			ListCell   *lc2;
1470 
1471 			foreach(lc2, innerrel->cheapest_parameterized_paths)
1472 			{
1473 				Path	   *innerpath = (Path *) lfirst(lc2);
1474 
1475 				try_nestloop_path(root,
1476 								  joinrel,
1477 								  outerpath,
1478 								  innerpath,
1479 								  merge_pathkeys,
1480 								  jointype,
1481 								  extra);
1482 			}
1483 
1484 			/* Also consider materialized form of the cheapest inner path */
1485 			if (matpath != NULL)
1486 				try_nestloop_path(root,
1487 								  joinrel,
1488 								  outerpath,
1489 								  matpath,
1490 								  merge_pathkeys,
1491 								  jointype,
1492 								  extra);
1493 		}
1494 
1495 		/* Can't do anything else if outer path needs to be unique'd */
1496 		if (save_jointype == JOIN_UNIQUE_OUTER)
1497 			continue;
1498 
1499 		/* Can't do anything else if inner rel is parameterized by outer */
1500 		if (inner_cheapest_total == NULL)
1501 			continue;
1502 
1503 		/* Generate merge join paths */
1504 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1505 								 save_jointype, extra, useallclauses,
1506 								 inner_cheapest_total, merge_pathkeys,
1507 								 false);
1508 	}
1509 
1510 	/*
1511 	 * Consider partial nestloop and mergejoin plan if outerrel has any
1512 	 * partial path and the joinrel is parallel-safe.  However, we can't
1513 	 * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1514 	 * therefore we won't be able to properly guarantee uniqueness.  Nor can
1515 	 * we handle extra_lateral_rels, since partial paths must not be
1516 	 * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1517 	 * because they can produce false null extended rows.
1518 	 */
1519 	if (joinrel->consider_parallel &&
1520 		save_jointype != JOIN_UNIQUE_OUTER &&
1521 		save_jointype != JOIN_FULL &&
1522 		save_jointype != JOIN_RIGHT &&
1523 		outerrel->partial_pathlist != NIL &&
1524 		bms_is_empty(joinrel->lateral_relids))
1525 	{
1526 		if (nestjoinOK)
1527 			consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1528 									   save_jointype, extra);
1529 
1530 		/*
1531 		 * If inner_cheapest_total is NULL or non parallel-safe then find the
1532 		 * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
1533 		 * can't use any alternative inner path.
1534 		 */
1535 		if (inner_cheapest_total == NULL ||
1536 			!inner_cheapest_total->parallel_safe)
1537 		{
1538 			if (save_jointype == JOIN_UNIQUE_INNER)
1539 				return;
1540 
1541 			inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
1542 																		  innerrel->pathlist);
1543 		}
1544 
1545 		if (inner_cheapest_total)
1546 			consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1547 										save_jointype, extra,
1548 										inner_cheapest_total);
1549 	}
1550 }
1551 
1552 /*
1553  * consider_parallel_mergejoin
1554  *	  Try to build partial paths for a joinrel by joining a partial path
1555  *	  for the outer relation to a complete path for the inner relation.
1556  *
1557  * 'joinrel' is the join relation
1558  * 'outerrel' is the outer join relation
1559  * 'innerrel' is the inner join relation
1560  * 'jointype' is the type of join to do
1561  * 'extra' contains additional input values
1562  * 'inner_cheapest_total' cheapest total path for innerrel
1563  */
1564 static void
consider_parallel_mergejoin(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra,Path * inner_cheapest_total)1565 consider_parallel_mergejoin(PlannerInfo *root,
1566 							RelOptInfo *joinrel,
1567 							RelOptInfo *outerrel,
1568 							RelOptInfo *innerrel,
1569 							JoinType jointype,
1570 							JoinPathExtraData *extra,
1571 							Path *inner_cheapest_total)
1572 {
1573 	ListCell   *lc1;
1574 
1575 	/* generate merge join path for each partial outer path */
1576 	foreach(lc1, outerrel->partial_pathlist)
1577 	{
1578 		Path	   *outerpath = (Path *) lfirst(lc1);
1579 		List	   *merge_pathkeys;
1580 
1581 		/*
1582 		 * Figure out what useful ordering any paths we create will have.
1583 		 */
1584 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1585 											 outerpath->pathkeys);
1586 
1587 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1588 								 extra, false, inner_cheapest_total,
1589 								 merge_pathkeys, true);
1590 	}
1591 }
1592 
1593 /*
1594  * consider_parallel_nestloop
1595  *	  Try to build partial paths for a joinrel by joining a partial path for the
1596  *	  outer relation to a complete path for the inner relation.
1597  *
1598  * 'joinrel' is the join relation
1599  * 'outerrel' is the outer join relation
1600  * 'innerrel' is the inner join relation
1601  * 'jointype' is the type of join to do
1602  * 'extra' contains additional input values
1603  */
1604 static void
consider_parallel_nestloop(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1605 consider_parallel_nestloop(PlannerInfo *root,
1606 						   RelOptInfo *joinrel,
1607 						   RelOptInfo *outerrel,
1608 						   RelOptInfo *innerrel,
1609 						   JoinType jointype,
1610 						   JoinPathExtraData *extra)
1611 {
1612 	JoinType	save_jointype = jointype;
1613 	ListCell   *lc1;
1614 
1615 	if (jointype == JOIN_UNIQUE_INNER)
1616 		jointype = JOIN_INNER;
1617 
1618 	foreach(lc1, outerrel->partial_pathlist)
1619 	{
1620 		Path	   *outerpath = (Path *) lfirst(lc1);
1621 		List	   *pathkeys;
1622 		ListCell   *lc2;
1623 
1624 		/* Figure out what useful ordering any paths we create will have. */
1625 		pathkeys = build_join_pathkeys(root, joinrel, jointype,
1626 									   outerpath->pathkeys);
1627 
1628 		/*
1629 		 * Try the cheapest parameterized paths; only those which will produce
1630 		 * an unparameterized path when joined to this outerrel will survive
1631 		 * try_partial_nestloop_path.  The cheapest unparameterized path is
1632 		 * also in this list.
1633 		 */
1634 		foreach(lc2, innerrel->cheapest_parameterized_paths)
1635 		{
1636 			Path	   *innerpath = (Path *) lfirst(lc2);
1637 
1638 			/* Can't join to an inner path that is not parallel-safe */
1639 			if (!innerpath->parallel_safe)
1640 				continue;
1641 
1642 			/*
1643 			 * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1644 			 * cheapest_total_path, and we have to unique-ify it.  (We might
1645 			 * be able to relax this to allow other safe, unparameterized
1646 			 * inner paths, but right now create_unique_path is not on board
1647 			 * with that.)
1648 			 */
1649 			if (save_jointype == JOIN_UNIQUE_INNER)
1650 			{
1651 				if (innerpath != innerrel->cheapest_total_path)
1652 					continue;
1653 				innerpath = (Path *) create_unique_path(root, innerrel,
1654 														innerpath,
1655 														extra->sjinfo);
1656 				Assert(innerpath);
1657 			}
1658 
1659 			try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1660 									  pathkeys, jointype, extra);
1661 		}
1662 	}
1663 }
1664 
1665 /*
1666  * hash_inner_and_outer
1667  *	  Create hashjoin join paths by explicitly hashing both the outer and
1668  *	  inner keys of each available hash clause.
1669  *
1670  * 'joinrel' is the join relation
1671  * 'outerrel' is the outer join relation
1672  * 'innerrel' is the inner join relation
1673  * 'jointype' is the type of join to do
1674  * 'extra' contains additional input values
1675  */
1676 static void
hash_inner_and_outer(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,JoinType jointype,JoinPathExtraData * extra)1677 hash_inner_and_outer(PlannerInfo *root,
1678 					 RelOptInfo *joinrel,
1679 					 RelOptInfo *outerrel,
1680 					 RelOptInfo *innerrel,
1681 					 JoinType jointype,
1682 					 JoinPathExtraData *extra)
1683 {
1684 	JoinType	save_jointype = jointype;
1685 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
1686 	List	   *hashclauses;
1687 	ListCell   *l;
1688 
1689 	/*
1690 	 * We need to build only one hashclauses list for any given pair of outer
1691 	 * and inner relations; all of the hashable clauses will be used as keys.
1692 	 *
1693 	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
1694 	 * usable with this pair of sub-relations.
1695 	 */
1696 	hashclauses = NIL;
1697 	foreach(l, extra->restrictlist)
1698 	{
1699 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1700 
1701 		/*
1702 		 * If processing an outer join, only use its own join clauses for
1703 		 * hashing.  For inner joins we need not be so picky.
1704 		 */
1705 		if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1706 			continue;
1707 
1708 		if (!restrictinfo->can_join ||
1709 			restrictinfo->hashjoinoperator == InvalidOid)
1710 			continue;			/* not hashjoinable */
1711 
1712 		/*
1713 		 * Check if clause has the form "outer op inner" or "inner op outer".
1714 		 */
1715 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1716 			continue;			/* no good for these input relations */
1717 
1718 		hashclauses = lappend(hashclauses, restrictinfo);
1719 	}
1720 
1721 	/* If we found any usable hashclauses, make paths */
1722 	if (hashclauses)
1723 	{
1724 		/*
1725 		 * We consider both the cheapest-total-cost and cheapest-startup-cost
1726 		 * outer paths.  There's no need to consider any but the
1727 		 * cheapest-total-cost inner path, however.
1728 		 */
1729 		Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
1730 		Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
1731 		Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
1732 
1733 		/*
1734 		 * If either cheapest-total path is parameterized by the other rel, we
1735 		 * can't use a hashjoin.  (There's no use looking for alternative
1736 		 * input paths, since these should already be the least-parameterized
1737 		 * available paths.)
1738 		 */
1739 		if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1740 			PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1741 			return;
1742 
1743 		/* Unique-ify if need be; we ignore parameterized possibilities */
1744 		if (jointype == JOIN_UNIQUE_OUTER)
1745 		{
1746 			cheapest_total_outer = (Path *)
1747 				create_unique_path(root, outerrel,
1748 								   cheapest_total_outer, extra->sjinfo);
1749 			Assert(cheapest_total_outer);
1750 			jointype = JOIN_INNER;
1751 			try_hashjoin_path(root,
1752 							  joinrel,
1753 							  cheapest_total_outer,
1754 							  cheapest_total_inner,
1755 							  hashclauses,
1756 							  jointype,
1757 							  extra);
1758 			/* no possibility of cheap startup here */
1759 		}
1760 		else if (jointype == JOIN_UNIQUE_INNER)
1761 		{
1762 			cheapest_total_inner = (Path *)
1763 				create_unique_path(root, innerrel,
1764 								   cheapest_total_inner, extra->sjinfo);
1765 			Assert(cheapest_total_inner);
1766 			jointype = JOIN_INNER;
1767 			try_hashjoin_path(root,
1768 							  joinrel,
1769 							  cheapest_total_outer,
1770 							  cheapest_total_inner,
1771 							  hashclauses,
1772 							  jointype,
1773 							  extra);
1774 			if (cheapest_startup_outer != NULL &&
1775 				cheapest_startup_outer != cheapest_total_outer)
1776 				try_hashjoin_path(root,
1777 								  joinrel,
1778 								  cheapest_startup_outer,
1779 								  cheapest_total_inner,
1780 								  hashclauses,
1781 								  jointype,
1782 								  extra);
1783 		}
1784 		else
1785 		{
1786 			/*
1787 			 * For other jointypes, we consider the cheapest startup outer
1788 			 * together with the cheapest total inner, and then consider
1789 			 * pairings of cheapest-total paths including parameterized ones.
1790 			 * There is no use in generating parameterized paths on the basis
1791 			 * of possibly cheap startup cost, so this is sufficient.
1792 			 */
1793 			ListCell   *lc1;
1794 			ListCell   *lc2;
1795 
1796 			if (cheapest_startup_outer != NULL)
1797 				try_hashjoin_path(root,
1798 								  joinrel,
1799 								  cheapest_startup_outer,
1800 								  cheapest_total_inner,
1801 								  hashclauses,
1802 								  jointype,
1803 								  extra);
1804 
1805 			foreach(lc1, outerrel->cheapest_parameterized_paths)
1806 			{
1807 				Path	   *outerpath = (Path *) lfirst(lc1);
1808 
1809 				/*
1810 				 * We cannot use an outer path that is parameterized by the
1811 				 * inner rel.
1812 				 */
1813 				if (PATH_PARAM_BY_REL(outerpath, innerrel))
1814 					continue;
1815 
1816 				foreach(lc2, innerrel->cheapest_parameterized_paths)
1817 				{
1818 					Path	   *innerpath = (Path *) lfirst(lc2);
1819 
1820 					/*
1821 					 * We cannot use an inner path that is parameterized by
1822 					 * the outer rel, either.
1823 					 */
1824 					if (PATH_PARAM_BY_REL(innerpath, outerrel))
1825 						continue;
1826 
1827 					if (outerpath == cheapest_startup_outer &&
1828 						innerpath == cheapest_total_inner)
1829 						continue;	/* already tried it */
1830 
1831 					try_hashjoin_path(root,
1832 									  joinrel,
1833 									  outerpath,
1834 									  innerpath,
1835 									  hashclauses,
1836 									  jointype,
1837 									  extra);
1838 				}
1839 			}
1840 		}
1841 
1842 		/*
1843 		 * If the joinrel is parallel-safe, we may be able to consider a
1844 		 * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
1845 		 * because the outer path will be partial, and therefore we won't be
1846 		 * able to properly guarantee uniqueness.  Similarly, we can't handle
1847 		 * JOIN_FULL and JOIN_RIGHT, because they can produce false null
1848 		 * extended rows.  Also, the resulting path must not be parameterized.
1849 		 * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
1850 		 * Hash, since in that case we're back to a single hash table with a
1851 		 * single set of match bits for each batch, but that will require
1852 		 * figuring out a deadlock-free way to wait for the probe to finish.
1853 		 */
1854 		if (joinrel->consider_parallel &&
1855 			save_jointype != JOIN_UNIQUE_OUTER &&
1856 			save_jointype != JOIN_FULL &&
1857 			save_jointype != JOIN_RIGHT &&
1858 			outerrel->partial_pathlist != NIL &&
1859 			bms_is_empty(joinrel->lateral_relids))
1860 		{
1861 			Path	   *cheapest_partial_outer;
1862 			Path	   *cheapest_partial_inner = NULL;
1863 			Path	   *cheapest_safe_inner = NULL;
1864 
1865 			cheapest_partial_outer =
1866 				(Path *) linitial(outerrel->partial_pathlist);
1867 
1868 			/*
1869 			 * Can we use a partial inner plan too, so that we can build a
1870 			 * shared hash table in parallel?  We can't handle
1871 			 * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
1872 			 */
1873 			if (innerrel->partial_pathlist != NIL &&
1874 				save_jointype != JOIN_UNIQUE_INNER &&
1875 				enable_parallel_hash)
1876 			{
1877 				cheapest_partial_inner =
1878 					(Path *) linitial(innerrel->partial_pathlist);
1879 				try_partial_hashjoin_path(root, joinrel,
1880 										  cheapest_partial_outer,
1881 										  cheapest_partial_inner,
1882 										  hashclauses, jointype, extra,
1883 										  true /* parallel_hash */ );
1884 			}
1885 
1886 			/*
1887 			 * Normally, given that the joinrel is parallel-safe, the cheapest
1888 			 * total inner path will also be parallel-safe, but if not, we'll
1889 			 * have to search for the cheapest safe, unparameterized inner
1890 			 * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
1891 			 * inner path.
1892 			 */
1893 			if (cheapest_total_inner->parallel_safe)
1894 				cheapest_safe_inner = cheapest_total_inner;
1895 			else if (save_jointype != JOIN_UNIQUE_INNER)
1896 				cheapest_safe_inner =
1897 					get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1898 
1899 			if (cheapest_safe_inner != NULL)
1900 				try_partial_hashjoin_path(root, joinrel,
1901 										  cheapest_partial_outer,
1902 										  cheapest_safe_inner,
1903 										  hashclauses, jointype, extra,
1904 										  false /* parallel_hash */ );
1905 		}
1906 	}
1907 }
1908 
1909 /*
1910  * select_mergejoin_clauses
1911  *	  Select mergejoin clauses that are usable for a particular join.
1912  *	  Returns a list of RestrictInfo nodes for those clauses.
1913  *
1914  * *mergejoin_allowed is normally set to true, but it is set to false if
1915  * this is a right/full join and there are nonmergejoinable join clauses.
1916  * The executor's mergejoin machinery cannot handle such cases, so we have
1917  * to avoid generating a mergejoin plan.  (Note that this flag does NOT
1918  * consider whether there are actually any mergejoinable clauses.  This is
1919  * correct because in some cases we need to build a clauseless mergejoin.
1920  * Simply returning NIL is therefore not enough to distinguish safe from
1921  * unsafe cases.)
1922  *
1923  * We also mark each selected RestrictInfo to show which side is currently
1924  * being considered as outer.  These are transient markings that are only
1925  * good for the duration of the current add_paths_to_joinrel() call!
1926  *
1927  * We examine each restrictinfo clause known for the join to see
1928  * if it is mergejoinable and involves vars from the two sub-relations
1929  * currently of interest.
1930  */
1931 static List *
select_mergejoin_clauses(PlannerInfo * root,RelOptInfo * joinrel,RelOptInfo * outerrel,RelOptInfo * innerrel,List * restrictlist,JoinType jointype,bool * mergejoin_allowed)1932 select_mergejoin_clauses(PlannerInfo *root,
1933 						 RelOptInfo *joinrel,
1934 						 RelOptInfo *outerrel,
1935 						 RelOptInfo *innerrel,
1936 						 List *restrictlist,
1937 						 JoinType jointype,
1938 						 bool *mergejoin_allowed)
1939 {
1940 	List	   *result_list = NIL;
1941 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
1942 	bool		have_nonmergeable_joinclause = false;
1943 	ListCell   *l;
1944 
1945 	foreach(l, restrictlist)
1946 	{
1947 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1948 
1949 		/*
1950 		 * If processing an outer join, only use its own join clauses in the
1951 		 * merge.  For inner joins we can use pushed-down clauses too. (Note:
1952 		 * we don't set have_nonmergeable_joinclause here because pushed-down
1953 		 * clauses will become otherquals not joinquals.)
1954 		 */
1955 		if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1956 			continue;
1957 
1958 		/* Check that clause is a mergeable operator clause */
1959 		if (!restrictinfo->can_join ||
1960 			restrictinfo->mergeopfamilies == NIL)
1961 		{
1962 			/*
1963 			 * The executor can handle extra joinquals that are constants, but
1964 			 * not anything else, when doing right/full merge join.  (The
1965 			 * reason to support constants is so we can do FULL JOIN ON
1966 			 * FALSE.)
1967 			 */
1968 			if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
1969 				have_nonmergeable_joinclause = true;
1970 			continue;			/* not mergejoinable */
1971 		}
1972 
1973 		/*
1974 		 * Check if clause has the form "outer op inner" or "inner op outer".
1975 		 */
1976 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1977 		{
1978 			have_nonmergeable_joinclause = true;
1979 			continue;			/* no good for these input relations */
1980 		}
1981 
1982 		/*
1983 		 * Insist that each side have a non-redundant eclass.  This
1984 		 * restriction is needed because various bits of the planner expect
1985 		 * that each clause in a merge be associable with some pathkey in a
1986 		 * canonical pathkey list, but redundant eclasses can't appear in
1987 		 * canonical sort orderings.  (XXX it might be worth relaxing this,
1988 		 * but not enough time to address it for 8.3.)
1989 		 *
1990 		 * Note: it would be bad if this condition failed for an otherwise
1991 		 * mergejoinable FULL JOIN clause, since that would result in
1992 		 * undesirable planner failure.  I believe that is not possible
1993 		 * however; a variable involved in a full join could only appear in
1994 		 * below_outer_join eclasses, which aren't considered redundant.
1995 		 *
1996 		 * This case *can* happen for left/right join clauses: the outer-side
1997 		 * variable could be equated to a constant.  Because we will propagate
1998 		 * that constant across the join clause, the loss of ability to do a
1999 		 * mergejoin is not really all that big a deal, and so it's not clear
2000 		 * that improving this is important.
2001 		 */
2002 		update_mergeclause_eclasses(root, restrictinfo);
2003 
2004 		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2005 			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2006 		{
2007 			have_nonmergeable_joinclause = true;
2008 			continue;			/* can't handle redundant eclasses */
2009 		}
2010 
2011 		result_list = lappend(result_list, restrictinfo);
2012 	}
2013 
2014 	/*
2015 	 * Report whether mergejoin is allowed (see comment at top of function).
2016 	 */
2017 	switch (jointype)
2018 	{
2019 		case JOIN_RIGHT:
2020 		case JOIN_FULL:
2021 			*mergejoin_allowed = !have_nonmergeable_joinclause;
2022 			break;
2023 		default:
2024 			*mergejoin_allowed = true;
2025 			break;
2026 	}
2027 
2028 	return result_list;
2029 }
2030