1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *	  Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/allpaths.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #ifdef OPTIMIZER_DEBUG
31 #include "nodes/print.h"
32 #endif
33 #include "optimizer/appendinfo.h"
34 #include "optimizer/clauses.h"
35 #include "optimizer/cost.h"
36 #include "optimizer/geqo.h"
37 #include "optimizer/inherit.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/restrictinfo.h"
44 #include "optimizer/tlist.h"
45 #include "parser/parse_clause.h"
46 #include "parser/parsetree.h"
47 #include "partitioning/partbounds.h"
48 #include "partitioning/partprune.h"
49 #include "rewrite/rewriteManip.h"
50 #include "utils/lsyscache.h"
51 
52 
53 /* results of subquery_is_pushdown_safe */
54 typedef struct pushdown_safety_info
55 {
56 	bool	   *unsafeColumns;	/* which output columns are unsafe to use */
57 	bool		unsafeVolatile; /* don't push down volatile quals */
58 	bool		unsafeLeaky;	/* don't push down leaky quals */
59 } pushdown_safety_info;
60 
61 /* These parameters are set by GUC */
62 bool		enable_geqo = false;	/* just in case GUC doesn't set it */
63 int			geqo_threshold;
64 int			min_parallel_table_scan_size;
65 int			min_parallel_index_scan_size;
66 
67 /* Hook for plugins to get control in set_rel_pathlist() */
68 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
69 
70 /* Hook for plugins to replace standard_join_search() */
71 join_search_hook_type join_search_hook = NULL;
72 
73 
74 static void set_base_rel_consider_startup(PlannerInfo *root);
75 static void set_base_rel_sizes(PlannerInfo *root);
76 static void set_base_rel_pathlists(PlannerInfo *root);
77 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
78 						 Index rti, RangeTblEntry *rte);
79 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
80 							 Index rti, RangeTblEntry *rte);
81 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
82 							   RangeTblEntry *rte);
83 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
84 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
85 									  RangeTblEntry *rte);
86 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
87 								   RangeTblEntry *rte);
88 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
89 									 RangeTblEntry *rte);
90 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
91 										 RangeTblEntry *rte);
92 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
93 							 RangeTblEntry *rte);
94 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
95 								 RangeTblEntry *rte);
96 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
97 								Index rti, RangeTblEntry *rte);
98 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
99 									Index rti, RangeTblEntry *rte);
100 static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
101 										 List *live_childrels,
102 										 List *all_child_pathkeys,
103 										 List *partitioned_rels);
104 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
105 												   RelOptInfo *rel,
106 												   Relids required_outer);
107 static void accumulate_append_subpath(Path *path,
108 									  List **subpaths, List **special_subpaths);
109 static Path *get_singleton_append_subpath(Path *path);
110 static void set_dummy_rel_pathlist(RelOptInfo *rel);
111 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
112 								  Index rti, RangeTblEntry *rte);
113 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
114 								  RangeTblEntry *rte);
115 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
116 								RangeTblEntry *rte);
117 static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
118 								   RangeTblEntry *rte);
119 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
120 							 RangeTblEntry *rte);
121 static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
122 										 RangeTblEntry *rte);
123 static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
124 								RangeTblEntry *rte);
125 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
126 								   RangeTblEntry *rte);
127 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
128 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
129 									  pushdown_safety_info *safetyInfo);
130 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
131 								  pushdown_safety_info *safetyInfo);
132 static void check_output_expressions(Query *subquery,
133 									 pushdown_safety_info *safetyInfo);
134 static void compare_tlist_datatypes(List *tlist, List *colTypes,
135 									pushdown_safety_info *safetyInfo);
136 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
137 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
138 								  pushdown_safety_info *safetyInfo);
139 static void subquery_push_qual(Query *subquery,
140 							   RangeTblEntry *rte, Index rti, Node *qual);
141 static void recurse_push_qual(Node *setOp, Query *topquery,
142 							  RangeTblEntry *rte, Index rti, Node *qual);
143 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
144 
145 
146 /*
147  * make_one_rel
148  *	  Finds all possible access paths for executing a query, returning a
149  *	  single rel that represents the join of all base rels in the query.
150  */
151 RelOptInfo *
make_one_rel(PlannerInfo * root,List * joinlist)152 make_one_rel(PlannerInfo *root, List *joinlist)
153 {
154 	RelOptInfo *rel;
155 	Index		rti;
156 	double		total_pages;
157 
158 	/*
159 	 * Construct the all_baserels Relids set.
160 	 */
161 	root->all_baserels = NULL;
162 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
163 	{
164 		RelOptInfo *brel = root->simple_rel_array[rti];
165 
166 		/* there may be empty slots corresponding to non-baserel RTEs */
167 		if (brel == NULL)
168 			continue;
169 
170 		Assert(brel->relid == rti); /* sanity check on array */
171 
172 		/* ignore RTEs that are "other rels" */
173 		if (brel->reloptkind != RELOPT_BASEREL)
174 			continue;
175 
176 		root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
177 	}
178 
179 	/* Mark base rels as to whether we care about fast-start plans */
180 	set_base_rel_consider_startup(root);
181 
182 	/*
183 	 * Compute size estimates and consider_parallel flags for each base rel.
184 	 */
185 	set_base_rel_sizes(root);
186 
187 	/*
188 	 * We should now have size estimates for every actual table involved in
189 	 * the query, and we also know which if any have been deleted from the
190 	 * query by join removal, pruned by partition pruning, or eliminated by
191 	 * constraint exclusion.  So we can now compute total_table_pages.
192 	 *
193 	 * Note that appendrels are not double-counted here, even though we don't
194 	 * bother to distinguish RelOptInfos for appendrel parents, because the
195 	 * parents will have pages = 0.
196 	 *
197 	 * XXX if a table is self-joined, we will count it once per appearance,
198 	 * which perhaps is the wrong thing ... but that's not completely clear,
199 	 * and detecting self-joins here is difficult, so ignore it for now.
200 	 */
201 	total_pages = 0;
202 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
203 	{
204 		RelOptInfo *brel = root->simple_rel_array[rti];
205 
206 		if (brel == NULL)
207 			continue;
208 
209 		Assert(brel->relid == rti); /* sanity check on array */
210 
211 		if (IS_DUMMY_REL(brel))
212 			continue;
213 
214 		if (IS_SIMPLE_REL(brel))
215 			total_pages += (double) brel->pages;
216 	}
217 	root->total_table_pages = total_pages;
218 
219 	/*
220 	 * Generate access paths for each base rel.
221 	 */
222 	set_base_rel_pathlists(root);
223 
224 	/*
225 	 * Generate access paths for the entire join tree.
226 	 */
227 	rel = make_rel_from_joinlist(root, joinlist);
228 
229 	/*
230 	 * The result should join all and only the query's base rels.
231 	 */
232 	Assert(bms_equal(rel->relids, root->all_baserels));
233 
234 	return rel;
235 }
236 
237 /*
238  * set_base_rel_consider_startup
239  *	  Set the consider_[param_]startup flags for each base-relation entry.
240  *
241  * For the moment, we only deal with consider_param_startup here; because the
242  * logic for consider_startup is pretty trivial and is the same for every base
243  * relation, we just let build_simple_rel() initialize that flag correctly to
244  * start with.  If that logic ever gets more complicated it would probably
245  * be better to move it here.
246  */
247 static void
set_base_rel_consider_startup(PlannerInfo * root)248 set_base_rel_consider_startup(PlannerInfo *root)
249 {
250 	/*
251 	 * Since parameterized paths can only be used on the inside of a nestloop
252 	 * join plan, there is usually little value in considering fast-start
253 	 * plans for them.  However, for relations that are on the RHS of a SEMI
254 	 * or ANTI join, a fast-start plan can be useful because we're only going
255 	 * to care about fetching one tuple anyway.
256 	 *
257 	 * To minimize growth of planning time, we currently restrict this to
258 	 * cases where the RHS is a single base relation, not a join; there is no
259 	 * provision for consider_param_startup to get set at all on joinrels.
260 	 * Also we don't worry about appendrels.  costsize.c's costing rules for
261 	 * nestloop semi/antijoins don't consider such cases either.
262 	 */
263 	ListCell   *lc;
264 
265 	foreach(lc, root->join_info_list)
266 	{
267 		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
268 		int			varno;
269 
270 		if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
271 			bms_get_singleton_member(sjinfo->syn_righthand, &varno))
272 		{
273 			RelOptInfo *rel = find_base_rel(root, varno);
274 
275 			rel->consider_param_startup = true;
276 		}
277 	}
278 }
279 
280 /*
281  * set_base_rel_sizes
282  *	  Set the size estimates (rows and widths) for each base-relation entry.
283  *	  Also determine whether to consider parallel paths for base relations.
284  *
285  * We do this in a separate pass over the base rels so that rowcount
286  * estimates are available for parameterized path generation, and also so
287  * that each rel's consider_parallel flag is set correctly before we begin to
288  * generate paths.
289  */
290 static void
set_base_rel_sizes(PlannerInfo * root)291 set_base_rel_sizes(PlannerInfo *root)
292 {
293 	Index		rti;
294 
295 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
296 	{
297 		RelOptInfo *rel = root->simple_rel_array[rti];
298 		RangeTblEntry *rte;
299 
300 		/* there may be empty slots corresponding to non-baserel RTEs */
301 		if (rel == NULL)
302 			continue;
303 
304 		Assert(rel->relid == rti);	/* sanity check on array */
305 
306 		/* ignore RTEs that are "other rels" */
307 		if (rel->reloptkind != RELOPT_BASEREL)
308 			continue;
309 
310 		rte = root->simple_rte_array[rti];
311 
312 		/*
313 		 * If parallelism is allowable for this query in general, see whether
314 		 * it's allowable for this rel in particular.  We have to do this
315 		 * before set_rel_size(), because (a) if this rel is an inheritance
316 		 * parent, set_append_rel_size() will use and perhaps change the rel's
317 		 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
318 		 * goes ahead and makes paths immediately.
319 		 */
320 		if (root->glob->parallelModeOK)
321 			set_rel_consider_parallel(root, rel, rte);
322 
323 		set_rel_size(root, rel, rti, rte);
324 	}
325 }
326 
327 /*
328  * set_base_rel_pathlists
329  *	  Finds all paths available for scanning each base-relation entry.
330  *	  Sequential scan and any available indices are considered.
331  *	  Each useful path is attached to its relation's 'pathlist' field.
332  */
333 static void
set_base_rel_pathlists(PlannerInfo * root)334 set_base_rel_pathlists(PlannerInfo *root)
335 {
336 	Index		rti;
337 
338 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
339 	{
340 		RelOptInfo *rel = root->simple_rel_array[rti];
341 
342 		/* there may be empty slots corresponding to non-baserel RTEs */
343 		if (rel == NULL)
344 			continue;
345 
346 		Assert(rel->relid == rti);	/* sanity check on array */
347 
348 		/* ignore RTEs that are "other rels" */
349 		if (rel->reloptkind != RELOPT_BASEREL)
350 			continue;
351 
352 		set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
353 	}
354 }
355 
356 /*
357  * set_rel_size
358  *	  Set size estimates for a base relation
359  */
360 static void
set_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)361 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
362 			 Index rti, RangeTblEntry *rte)
363 {
364 	if (rel->reloptkind == RELOPT_BASEREL &&
365 		relation_excluded_by_constraints(root, rel, rte))
366 	{
367 		/*
368 		 * We proved we don't need to scan the rel via constraint exclusion,
369 		 * so set up a single dummy path for it.  Here we only check this for
370 		 * regular baserels; if it's an otherrel, CE was already checked in
371 		 * set_append_rel_size().
372 		 *
373 		 * In this case, we go ahead and set up the relation's path right away
374 		 * instead of leaving it for set_rel_pathlist to do.  This is because
375 		 * we don't have a convention for marking a rel as dummy except by
376 		 * assigning a dummy path to it.
377 		 */
378 		set_dummy_rel_pathlist(rel);
379 	}
380 	else if (rte->inh)
381 	{
382 		/* It's an "append relation", process accordingly */
383 		set_append_rel_size(root, rel, rti, rte);
384 	}
385 	else
386 	{
387 		switch (rel->rtekind)
388 		{
389 			case RTE_RELATION:
390 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
391 				{
392 					/* Foreign table */
393 					set_foreign_size(root, rel, rte);
394 				}
395 				else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
396 				{
397 					/*
398 					 * We could get here if asked to scan a partitioned table
399 					 * with ONLY.  In that case we shouldn't scan any of the
400 					 * partitions, so mark it as a dummy rel.
401 					 */
402 					set_dummy_rel_pathlist(rel);
403 				}
404 				else if (rte->tablesample != NULL)
405 				{
406 					/* Sampled relation */
407 					set_tablesample_rel_size(root, rel, rte);
408 				}
409 				else
410 				{
411 					/* Plain relation */
412 					set_plain_rel_size(root, rel, rte);
413 				}
414 				break;
415 			case RTE_SUBQUERY:
416 
417 				/*
418 				 * Subqueries don't support making a choice between
419 				 * parameterized and unparameterized paths, so just go ahead
420 				 * and build their paths immediately.
421 				 */
422 				set_subquery_pathlist(root, rel, rti, rte);
423 				break;
424 			case RTE_FUNCTION:
425 				set_function_size_estimates(root, rel);
426 				break;
427 			case RTE_TABLEFUNC:
428 				set_tablefunc_size_estimates(root, rel);
429 				break;
430 			case RTE_VALUES:
431 				set_values_size_estimates(root, rel);
432 				break;
433 			case RTE_CTE:
434 
435 				/*
436 				 * CTEs don't support making a choice between parameterized
437 				 * and unparameterized paths, so just go ahead and build their
438 				 * paths immediately.
439 				 */
440 				if (rte->self_reference)
441 					set_worktable_pathlist(root, rel, rte);
442 				else
443 					set_cte_pathlist(root, rel, rte);
444 				break;
445 			case RTE_NAMEDTUPLESTORE:
446 				/* Might as well just build the path immediately */
447 				set_namedtuplestore_pathlist(root, rel, rte);
448 				break;
449 			case RTE_RESULT:
450 				/* Might as well just build the path immediately */
451 				set_result_pathlist(root, rel, rte);
452 				break;
453 			default:
454 				elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
455 				break;
456 		}
457 	}
458 
459 	/*
460 	 * We insist that all non-dummy rels have a nonzero rowcount estimate.
461 	 */
462 	Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
463 }
464 
465 /*
466  * set_rel_pathlist
467  *	  Build access paths for a base relation
468  */
469 static void
set_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)470 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
471 				 Index rti, RangeTblEntry *rte)
472 {
473 	if (IS_DUMMY_REL(rel))
474 	{
475 		/* We already proved the relation empty, so nothing more to do */
476 	}
477 	else if (rte->inh)
478 	{
479 		/* It's an "append relation", process accordingly */
480 		set_append_rel_pathlist(root, rel, rti, rte);
481 	}
482 	else
483 	{
484 		switch (rel->rtekind)
485 		{
486 			case RTE_RELATION:
487 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
488 				{
489 					/* Foreign table */
490 					set_foreign_pathlist(root, rel, rte);
491 				}
492 				else if (rte->tablesample != NULL)
493 				{
494 					/* Sampled relation */
495 					set_tablesample_rel_pathlist(root, rel, rte);
496 				}
497 				else
498 				{
499 					/* Plain relation */
500 					set_plain_rel_pathlist(root, rel, rte);
501 				}
502 				break;
503 			case RTE_SUBQUERY:
504 				/* Subquery --- fully handled during set_rel_size */
505 				break;
506 			case RTE_FUNCTION:
507 				/* RangeFunction */
508 				set_function_pathlist(root, rel, rte);
509 				break;
510 			case RTE_TABLEFUNC:
511 				/* Table Function */
512 				set_tablefunc_pathlist(root, rel, rte);
513 				break;
514 			case RTE_VALUES:
515 				/* Values list */
516 				set_values_pathlist(root, rel, rte);
517 				break;
518 			case RTE_CTE:
519 				/* CTE reference --- fully handled during set_rel_size */
520 				break;
521 			case RTE_NAMEDTUPLESTORE:
522 				/* tuplestore reference --- fully handled during set_rel_size */
523 				break;
524 			case RTE_RESULT:
525 				/* simple Result --- fully handled during set_rel_size */
526 				break;
527 			default:
528 				elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
529 				break;
530 		}
531 	}
532 
533 	/*
534 	 * Allow a plugin to editorialize on the set of Paths for this base
535 	 * relation.  It could add new paths (such as CustomPaths) by calling
536 	 * add_path(), or add_partial_path() if parallel aware.  It could also
537 	 * delete or modify paths added by the core code.
538 	 */
539 	if (set_rel_pathlist_hook)
540 		(*set_rel_pathlist_hook) (root, rel, rti, rte);
541 
542 	/*
543 	 * If this is a baserel, we should normally consider gathering any partial
544 	 * paths we may have created for it.  We have to do this after calling the
545 	 * set_rel_pathlist_hook, else it cannot add partial paths to be included
546 	 * here.
547 	 *
548 	 * However, if this is an inheritance child, skip it.  Otherwise, we could
549 	 * end up with a very large number of gather nodes, each trying to grab
550 	 * its own pool of workers.  Instead, we'll consider gathering partial
551 	 * paths for the parent appendrel.
552 	 *
553 	 * Also, if this is the topmost scan/join rel (that is, the only baserel),
554 	 * we postpone gathering until the final scan/join targetlist is available
555 	 * (see grouping_planner).
556 	 */
557 	if (rel->reloptkind == RELOPT_BASEREL &&
558 		bms_membership(root->all_baserels) != BMS_SINGLETON)
559 		generate_useful_gather_paths(root, rel, false);
560 
561 	/* Now find the cheapest of the paths for this rel */
562 	set_cheapest(rel);
563 
564 #ifdef OPTIMIZER_DEBUG
565 	debug_print_rel(root, rel);
566 #endif
567 }
568 
569 /*
570  * set_plain_rel_size
571  *	  Set size estimates for a plain relation (no subquery, no inheritance)
572  */
573 static void
set_plain_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)574 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
575 {
576 	/*
577 	 * Test any partial indexes of rel for applicability.  We must do this
578 	 * first since partial unique indexes can affect size estimates.
579 	 */
580 	check_index_predicates(root, rel);
581 
582 	/* Mark rel with estimated output rows, width, etc */
583 	set_baserel_size_estimates(root, rel);
584 }
585 
586 /*
587  * If this relation could possibly be scanned from within a worker, then set
588  * its consider_parallel flag.
589  */
590 static void
set_rel_consider_parallel(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)591 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
592 						  RangeTblEntry *rte)
593 {
594 	/*
595 	 * The flag has previously been initialized to false, so we can just
596 	 * return if it becomes clear that we can't safely set it.
597 	 */
598 	Assert(!rel->consider_parallel);
599 
600 	/* Don't call this if parallelism is disallowed for the entire query. */
601 	Assert(root->glob->parallelModeOK);
602 
603 	/* This should only be called for baserels and appendrel children. */
604 	Assert(IS_SIMPLE_REL(rel));
605 
606 	/* Assorted checks based on rtekind. */
607 	switch (rte->rtekind)
608 	{
609 		case RTE_RELATION:
610 
611 			/*
612 			 * Currently, parallel workers can't access the leader's temporary
613 			 * tables.  We could possibly relax this if we wrote all of its
614 			 * local buffers at the start of the query and made no changes
615 			 * thereafter (maybe we could allow hint bit changes), and if we
616 			 * taught the workers to read them.  Writing a large number of
617 			 * temporary buffers could be expensive, though, and we don't have
618 			 * the rest of the necessary infrastructure right now anyway.  So
619 			 * for now, bail out if we see a temporary table.
620 			 */
621 			if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
622 				return;
623 
624 			/*
625 			 * Table sampling can be pushed down to workers if the sample
626 			 * function and its arguments are safe.
627 			 */
628 			if (rte->tablesample != NULL)
629 			{
630 				char		proparallel = func_parallel(rte->tablesample->tsmhandler);
631 
632 				if (proparallel != PROPARALLEL_SAFE)
633 					return;
634 				if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
635 					return;
636 			}
637 
638 			/*
639 			 * Ask FDWs whether they can support performing a ForeignScan
640 			 * within a worker.  Most often, the answer will be no.  For
641 			 * example, if the nature of the FDW is such that it opens a TCP
642 			 * connection with a remote server, each parallel worker would end
643 			 * up with a separate connection, and these connections might not
644 			 * be appropriately coordinated between workers and the leader.
645 			 */
646 			if (rte->relkind == RELKIND_FOREIGN_TABLE)
647 			{
648 				Assert(rel->fdwroutine);
649 				if (!rel->fdwroutine->IsForeignScanParallelSafe)
650 					return;
651 				if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
652 					return;
653 			}
654 
655 			/*
656 			 * There are additional considerations for appendrels, which we'll
657 			 * deal with in set_append_rel_size and set_append_rel_pathlist.
658 			 * For now, just set consider_parallel based on the rel's own
659 			 * quals and targetlist.
660 			 */
661 			break;
662 
663 		case RTE_SUBQUERY:
664 
665 			/*
666 			 * There's no intrinsic problem with scanning a subquery-in-FROM
667 			 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
668 			 * If the subquery doesn't happen to have any parallel-safe paths,
669 			 * then flagging it as consider_parallel won't change anything,
670 			 * but that's true for plain tables, too.  We must set
671 			 * consider_parallel based on the rel's own quals and targetlist,
672 			 * so that if a subquery path is parallel-safe but the quals and
673 			 * projection we're sticking onto it are not, we correctly mark
674 			 * the SubqueryScanPath as not parallel-safe.  (Note that
675 			 * set_subquery_pathlist() might push some of these quals down
676 			 * into the subquery itself, but that doesn't change anything.)
677 			 *
678 			 * We can't push sub-select containing LIMIT/OFFSET to workers as
679 			 * there is no guarantee that the row order will be fully
680 			 * deterministic, and applying LIMIT/OFFSET will lead to
681 			 * inconsistent results at the top-level.  (In some cases, where
682 			 * the result is ordered, we could relax this restriction.  But it
683 			 * doesn't currently seem worth expending extra effort to do so.)
684 			 */
685 			{
686 				Query	   *subquery = castNode(Query, rte->subquery);
687 
688 				if (limit_needed(subquery))
689 					return;
690 			}
691 			break;
692 
693 		case RTE_JOIN:
694 			/* Shouldn't happen; we're only considering baserels here. */
695 			Assert(false);
696 			return;
697 
698 		case RTE_FUNCTION:
699 			/* Check for parallel-restricted functions. */
700 			if (!is_parallel_safe(root, (Node *) rte->functions))
701 				return;
702 			break;
703 
704 		case RTE_TABLEFUNC:
705 			/* not parallel safe */
706 			return;
707 
708 		case RTE_VALUES:
709 			/* Check for parallel-restricted functions. */
710 			if (!is_parallel_safe(root, (Node *) rte->values_lists))
711 				return;
712 			break;
713 
714 		case RTE_CTE:
715 
716 			/*
717 			 * CTE tuplestores aren't shared among parallel workers, so we
718 			 * force all CTE scans to happen in the leader.  Also, populating
719 			 * the CTE would require executing a subplan that's not available
720 			 * in the worker, might be parallel-restricted, and must get
721 			 * executed only once.
722 			 */
723 			return;
724 
725 		case RTE_NAMEDTUPLESTORE:
726 
727 			/*
728 			 * tuplestore cannot be shared, at least without more
729 			 * infrastructure to support that.
730 			 */
731 			return;
732 
733 		case RTE_RESULT:
734 			/* RESULT RTEs, in themselves, are no problem. */
735 			break;
736 	}
737 
738 	/*
739 	 * If there's anything in baserestrictinfo that's parallel-restricted, we
740 	 * give up on parallelizing access to this relation.  We could consider
741 	 * instead postponing application of the restricted quals until we're
742 	 * above all the parallelism in the plan tree, but it's not clear that
743 	 * that would be a win in very many cases, and it might be tricky to make
744 	 * outer join clauses work correctly.  It would likely break equivalence
745 	 * classes, too.
746 	 */
747 	if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
748 		return;
749 
750 	/*
751 	 * Likewise, if the relation's outputs are not parallel-safe, give up.
752 	 * (Usually, they're just Vars, but sometimes they're not.)
753 	 */
754 	if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
755 		return;
756 
757 	/* We have a winner. */
758 	rel->consider_parallel = true;
759 }
760 
761 /*
762  * set_plain_rel_pathlist
763  *	  Build access paths for a plain relation (no subquery, no inheritance)
764  */
765 static void
set_plain_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)766 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
767 {
768 	Relids		required_outer;
769 
770 	/*
771 	 * We don't support pushing join clauses into the quals of a seqscan, but
772 	 * it could still have required parameterization due to LATERAL refs in
773 	 * its tlist.
774 	 */
775 	required_outer = rel->lateral_relids;
776 
777 	/* Consider sequential scan */
778 	add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
779 
780 	/* If appropriate, consider parallel sequential scan */
781 	if (rel->consider_parallel && required_outer == NULL)
782 		create_plain_partial_paths(root, rel);
783 
784 	/* Consider index scans */
785 	create_index_paths(root, rel);
786 
787 	/* Consider TID scans */
788 	create_tidscan_paths(root, rel);
789 }
790 
791 /*
792  * create_plain_partial_paths
793  *	  Build partial access paths for parallel scan of a plain relation
794  */
795 static void
create_plain_partial_paths(PlannerInfo * root,RelOptInfo * rel)796 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
797 {
798 	int			parallel_workers;
799 
800 	parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
801 											   max_parallel_workers_per_gather);
802 
803 	/* If any limit was set to zero, the user doesn't want a parallel scan. */
804 	if (parallel_workers <= 0)
805 		return;
806 
807 	/* Add an unordered partial path based on a parallel sequential scan. */
808 	add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
809 }
810 
811 /*
812  * set_tablesample_rel_size
813  *	  Set size estimates for a sampled relation
814  */
815 static void
set_tablesample_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)816 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
817 {
818 	TableSampleClause *tsc = rte->tablesample;
819 	TsmRoutine *tsm;
820 	BlockNumber pages;
821 	double		tuples;
822 
823 	/*
824 	 * Test any partial indexes of rel for applicability.  We must do this
825 	 * first since partial unique indexes can affect size estimates.
826 	 */
827 	check_index_predicates(root, rel);
828 
829 	/*
830 	 * Call the sampling method's estimation function to estimate the number
831 	 * of pages it will read and the number of tuples it will return.  (Note:
832 	 * we assume the function returns sane values.)
833 	 */
834 	tsm = GetTsmRoutine(tsc->tsmhandler);
835 	tsm->SampleScanGetSampleSize(root, rel, tsc->args,
836 								 &pages, &tuples);
837 
838 	/*
839 	 * For the moment, because we will only consider a SampleScan path for the
840 	 * rel, it's okay to just overwrite the pages and tuples estimates for the
841 	 * whole relation.  If we ever consider multiple path types for sampled
842 	 * rels, we'll need more complication.
843 	 */
844 	rel->pages = pages;
845 	rel->tuples = tuples;
846 
847 	/* Mark rel with estimated output rows, width, etc */
848 	set_baserel_size_estimates(root, rel);
849 }
850 
851 /*
852  * set_tablesample_rel_pathlist
853  *	  Build access paths for a sampled relation
854  */
855 static void
set_tablesample_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)856 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
857 {
858 	Relids		required_outer;
859 	Path	   *path;
860 
861 	/*
862 	 * We don't support pushing join clauses into the quals of a samplescan,
863 	 * but it could still have required parameterization due to LATERAL refs
864 	 * in its tlist or TABLESAMPLE arguments.
865 	 */
866 	required_outer = rel->lateral_relids;
867 
868 	/* Consider sampled scan */
869 	path = create_samplescan_path(root, rel, required_outer);
870 
871 	/*
872 	 * If the sampling method does not support repeatable scans, we must avoid
873 	 * plans that would scan the rel multiple times.  Ideally, we'd simply
874 	 * avoid putting the rel on the inside of a nestloop join; but adding such
875 	 * a consideration to the planner seems like a great deal of complication
876 	 * to support an uncommon usage of second-rate sampling methods.  Instead,
877 	 * if there is a risk that the query might perform an unsafe join, just
878 	 * wrap the SampleScan in a Materialize node.  We can check for joins by
879 	 * counting the membership of all_baserels (note that this correctly
880 	 * counts inheritance trees as single rels).  If we're inside a subquery,
881 	 * we can't easily check whether a join might occur in the outer query, so
882 	 * just assume one is possible.
883 	 *
884 	 * GetTsmRoutine is relatively expensive compared to the other tests here,
885 	 * so check repeatable_across_scans last, even though that's a bit odd.
886 	 */
887 	if ((root->query_level > 1 ||
888 		 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
889 		!(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
890 	{
891 		path = (Path *) create_material_path(rel, path);
892 	}
893 
894 	add_path(rel, path);
895 
896 	/* For the moment, at least, there are no other paths to consider */
897 }
898 
899 /*
900  * set_foreign_size
901  *		Set size estimates for a foreign table RTE
902  */
903 static void
set_foreign_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)904 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
905 {
906 	/* Mark rel with estimated output rows, width, etc */
907 	set_foreign_size_estimates(root, rel);
908 
909 	/* Let FDW adjust the size estimates, if it can */
910 	rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
911 
912 	/* ... but do not let it set the rows estimate to zero */
913 	rel->rows = clamp_row_est(rel->rows);
914 
915 	/* also, make sure rel->tuples is not insane relative to rel->rows */
916 	rel->tuples = Max(rel->tuples, rel->rows);
917 }
918 
919 /*
920  * set_foreign_pathlist
921  *		Build access paths for a foreign table RTE
922  */
923 static void
set_foreign_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)924 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
925 {
926 	/* Call the FDW's GetForeignPaths function to generate path(s) */
927 	rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
928 }
929 
930 /*
931  * set_append_rel_size
932  *	  Set size estimates for a simple "append relation"
933  *
934  * The passed-in rel and RTE represent the entire append relation.  The
935  * relation's contents are computed by appending together the output of the
936  * individual member relations.  Note that in the non-partitioned inheritance
937  * case, the first member relation is actually the same table as is mentioned
938  * in the parent RTE ... but it has a different RTE and RelOptInfo.  This is
939  * a good thing because their outputs are not the same size.
940  */
941 static void
set_append_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)942 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
943 					Index rti, RangeTblEntry *rte)
944 {
945 	int			parentRTindex = rti;
946 	bool		has_live_children;
947 	double		parent_rows;
948 	double		parent_size;
949 	double	   *parent_attrsizes;
950 	int			nattrs;
951 	ListCell   *l;
952 
953 	/* Guard against stack overflow due to overly deep inheritance tree. */
954 	check_stack_depth();
955 
956 	Assert(IS_SIMPLE_REL(rel));
957 
958 	/*
959 	 * Initialize partitioned_child_rels to contain this RT index.
960 	 *
961 	 * Note that during the set_append_rel_pathlist() phase, we will bubble up
962 	 * the indexes of partitioned relations that appear down in the tree, so
963 	 * that when we've created Paths for all the children, the root
964 	 * partitioned table's list will contain all such indexes.
965 	 */
966 	if (rte->relkind == RELKIND_PARTITIONED_TABLE)
967 		rel->partitioned_child_rels = list_make1_int(rti);
968 
969 	/*
970 	 * If this is a partitioned baserel, set the consider_partitionwise_join
971 	 * flag; currently, we only consider partitionwise joins with the baserel
972 	 * if its targetlist doesn't contain a whole-row Var.
973 	 */
974 	if (enable_partitionwise_join &&
975 		rel->reloptkind == RELOPT_BASEREL &&
976 		rte->relkind == RELKIND_PARTITIONED_TABLE &&
977 		rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
978 		rel->consider_partitionwise_join = true;
979 
980 	/*
981 	 * Initialize to compute size estimates for whole append relation.
982 	 *
983 	 * We handle width estimates by weighting the widths of different child
984 	 * rels proportionally to their number of rows.  This is sensible because
985 	 * the use of width estimates is mainly to compute the total relation
986 	 * "footprint" if we have to sort or hash it.  To do this, we sum the
987 	 * total equivalent size (in "double" arithmetic) and then divide by the
988 	 * total rowcount estimate.  This is done separately for the total rel
989 	 * width and each attribute.
990 	 *
991 	 * Note: if you consider changing this logic, beware that child rels could
992 	 * have zero rows and/or width, if they were excluded by constraints.
993 	 */
994 	has_live_children = false;
995 	parent_rows = 0;
996 	parent_size = 0;
997 	nattrs = rel->max_attr - rel->min_attr + 1;
998 	parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
999 
1000 	foreach(l, root->append_rel_list)
1001 	{
1002 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1003 		int			childRTindex;
1004 		RangeTblEntry *childRTE;
1005 		RelOptInfo *childrel;
1006 		ListCell   *parentvars;
1007 		ListCell   *childvars;
1008 
1009 		/* append_rel_list contains all append rels; ignore others */
1010 		if (appinfo->parent_relid != parentRTindex)
1011 			continue;
1012 
1013 		childRTindex = appinfo->child_relid;
1014 		childRTE = root->simple_rte_array[childRTindex];
1015 
1016 		/*
1017 		 * The child rel's RelOptInfo was already created during
1018 		 * add_other_rels_to_query.
1019 		 */
1020 		childrel = find_base_rel(root, childRTindex);
1021 		Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1022 
1023 		/* We may have already proven the child to be dummy. */
1024 		if (IS_DUMMY_REL(childrel))
1025 			continue;
1026 
1027 		/*
1028 		 * We have to copy the parent's targetlist and quals to the child,
1029 		 * with appropriate substitution of variables.  However, the
1030 		 * baserestrictinfo quals were already copied/substituted when the
1031 		 * child RelOptInfo was built.  So we don't need any additional setup
1032 		 * before applying constraint exclusion.
1033 		 */
1034 		if (relation_excluded_by_constraints(root, childrel, childRTE))
1035 		{
1036 			/*
1037 			 * This child need not be scanned, so we can omit it from the
1038 			 * appendrel.
1039 			 */
1040 			set_dummy_rel_pathlist(childrel);
1041 			continue;
1042 		}
1043 
1044 		/*
1045 		 * Constraint exclusion failed, so copy the parent's join quals and
1046 		 * targetlist to the child, with appropriate variable substitutions.
1047 		 *
1048 		 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1049 		 * expressions, which otherwise would not occur in a rel's targetlist.
1050 		 * Code that might be looking at an appendrel child must cope with
1051 		 * such.  (Normally, a rel's targetlist would only include Vars and
1052 		 * PlaceHolderVars.)  XXX we do not bother to update the cost or width
1053 		 * fields of childrel->reltarget; not clear if that would be useful.
1054 		 */
1055 		childrel->joininfo = (List *)
1056 			adjust_appendrel_attrs(root,
1057 								   (Node *) rel->joininfo,
1058 								   1, &appinfo);
1059 		childrel->reltarget->exprs = (List *)
1060 			adjust_appendrel_attrs(root,
1061 								   (Node *) rel->reltarget->exprs,
1062 								   1, &appinfo);
1063 
1064 		/*
1065 		 * We have to make child entries in the EquivalenceClass data
1066 		 * structures as well.  This is needed either if the parent
1067 		 * participates in some eclass joins (because we will want to consider
1068 		 * inner-indexscan joins on the individual children) or if the parent
1069 		 * has useful pathkeys (because we should try to build MergeAppend
1070 		 * paths that produce those sort orderings).
1071 		 */
1072 		if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1073 			add_child_rel_equivalences(root, appinfo, rel, childrel);
1074 		childrel->has_eclass_joins = rel->has_eclass_joins;
1075 
1076 		/*
1077 		 * Note: we could compute appropriate attr_needed data for the child's
1078 		 * variables, by transforming the parent's attr_needed through the
1079 		 * translated_vars mapping.  However, currently there's no need
1080 		 * because attr_needed is only examined for base relations not
1081 		 * otherrels.  So we just leave the child's attr_needed empty.
1082 		 */
1083 
1084 		/*
1085 		 * If we consider partitionwise joins with the parent rel, do the same
1086 		 * for partitioned child rels.
1087 		 *
1088 		 * Note: here we abuse the consider_partitionwise_join flag by setting
1089 		 * it for child rels that are not themselves partitioned.  We do so to
1090 		 * tell try_partitionwise_join() that the child rel is sufficiently
1091 		 * valid to be used as a per-partition input, even if it later gets
1092 		 * proven to be dummy.  (It's not usable until we've set up the
1093 		 * reltarget and EC entries, which we just did.)
1094 		 */
1095 		if (rel->consider_partitionwise_join)
1096 			childrel->consider_partitionwise_join = true;
1097 
1098 		/*
1099 		 * If parallelism is allowable for this query in general, see whether
1100 		 * it's allowable for this childrel in particular.  But if we've
1101 		 * already decided the appendrel is not parallel-safe as a whole,
1102 		 * there's no point in considering parallelism for this child.  For
1103 		 * consistency, do this before calling set_rel_size() for the child.
1104 		 */
1105 		if (root->glob->parallelModeOK && rel->consider_parallel)
1106 			set_rel_consider_parallel(root, childrel, childRTE);
1107 
1108 		/*
1109 		 * Compute the child's size.
1110 		 */
1111 		set_rel_size(root, childrel, childRTindex, childRTE);
1112 
1113 		/*
1114 		 * It is possible that constraint exclusion detected a contradiction
1115 		 * within a child subquery, even though we didn't prove one above. If
1116 		 * so, we can skip this child.
1117 		 */
1118 		if (IS_DUMMY_REL(childrel))
1119 			continue;
1120 
1121 		/* We have at least one live child. */
1122 		has_live_children = true;
1123 
1124 		/*
1125 		 * If any live child is not parallel-safe, treat the whole appendrel
1126 		 * as not parallel-safe.  In future we might be able to generate plans
1127 		 * in which some children are farmed out to workers while others are
1128 		 * not; but we don't have that today, so it's a waste to consider
1129 		 * partial paths anywhere in the appendrel unless it's all safe.
1130 		 * (Child rels visited before this one will be unmarked in
1131 		 * set_append_rel_pathlist().)
1132 		 */
1133 		if (!childrel->consider_parallel)
1134 			rel->consider_parallel = false;
1135 
1136 		/*
1137 		 * Accumulate size information from each live child.
1138 		 */
1139 		Assert(childrel->rows > 0);
1140 
1141 		parent_rows += childrel->rows;
1142 		parent_size += childrel->reltarget->width * childrel->rows;
1143 
1144 		/*
1145 		 * Accumulate per-column estimates too.  We need not do anything for
1146 		 * PlaceHolderVars in the parent list.  If child expression isn't a
1147 		 * Var, or we didn't record a width estimate for it, we have to fall
1148 		 * back on a datatype-based estimate.
1149 		 *
1150 		 * By construction, child's targetlist is 1-to-1 with parent's.
1151 		 */
1152 		forboth(parentvars, rel->reltarget->exprs,
1153 				childvars, childrel->reltarget->exprs)
1154 		{
1155 			Var		   *parentvar = (Var *) lfirst(parentvars);
1156 			Node	   *childvar = (Node *) lfirst(childvars);
1157 
1158 			if (IsA(parentvar, Var))
1159 			{
1160 				int			pndx = parentvar->varattno - rel->min_attr;
1161 				int32		child_width = 0;
1162 
1163 				if (IsA(childvar, Var) &&
1164 					((Var *) childvar)->varno == childrel->relid)
1165 				{
1166 					int			cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1167 
1168 					child_width = childrel->attr_widths[cndx];
1169 				}
1170 				if (child_width <= 0)
1171 					child_width = get_typavgwidth(exprType(childvar),
1172 												  exprTypmod(childvar));
1173 				Assert(child_width > 0);
1174 				parent_attrsizes[pndx] += child_width * childrel->rows;
1175 			}
1176 		}
1177 	}
1178 
1179 	if (has_live_children)
1180 	{
1181 		/*
1182 		 * Save the finished size estimates.
1183 		 */
1184 		int			i;
1185 
1186 		Assert(parent_rows > 0);
1187 		rel->rows = parent_rows;
1188 		rel->reltarget->width = rint(parent_size / parent_rows);
1189 		for (i = 0; i < nattrs; i++)
1190 			rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1191 
1192 		/*
1193 		 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1194 		 * because some places assume rel->tuples is valid for any baserel.
1195 		 */
1196 		rel->tuples = parent_rows;
1197 
1198 		/*
1199 		 * Note that we leave rel->pages as zero; this is important to avoid
1200 		 * double-counting the appendrel tree in total_table_pages.
1201 		 */
1202 	}
1203 	else
1204 	{
1205 		/*
1206 		 * All children were excluded by constraints, so mark the whole
1207 		 * appendrel dummy.  We must do this in this phase so that the rel's
1208 		 * dummy-ness is visible when we generate paths for other rels.
1209 		 */
1210 		set_dummy_rel_pathlist(rel);
1211 	}
1212 
1213 	pfree(parent_attrsizes);
1214 }
1215 
1216 /*
1217  * set_append_rel_pathlist
1218  *	  Build access paths for an "append relation"
1219  */
1220 static void
set_append_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)1221 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1222 						Index rti, RangeTblEntry *rte)
1223 {
1224 	int			parentRTindex = rti;
1225 	List	   *live_childrels = NIL;
1226 	ListCell   *l;
1227 
1228 	/*
1229 	 * Generate access paths for each member relation, and remember the
1230 	 * non-dummy children.
1231 	 */
1232 	foreach(l, root->append_rel_list)
1233 	{
1234 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1235 		int			childRTindex;
1236 		RangeTblEntry *childRTE;
1237 		RelOptInfo *childrel;
1238 
1239 		/* append_rel_list contains all append rels; ignore others */
1240 		if (appinfo->parent_relid != parentRTindex)
1241 			continue;
1242 
1243 		/* Re-locate the child RTE and RelOptInfo */
1244 		childRTindex = appinfo->child_relid;
1245 		childRTE = root->simple_rte_array[childRTindex];
1246 		childrel = root->simple_rel_array[childRTindex];
1247 
1248 		/*
1249 		 * If set_append_rel_size() decided the parent appendrel was
1250 		 * parallel-unsafe at some point after visiting this child rel, we
1251 		 * need to propagate the unsafety marking down to the child, so that
1252 		 * we don't generate useless partial paths for it.
1253 		 */
1254 		if (!rel->consider_parallel)
1255 			childrel->consider_parallel = false;
1256 
1257 		/*
1258 		 * Compute the child's access paths.
1259 		 */
1260 		set_rel_pathlist(root, childrel, childRTindex, childRTE);
1261 
1262 		/*
1263 		 * If child is dummy, ignore it.
1264 		 */
1265 		if (IS_DUMMY_REL(childrel))
1266 			continue;
1267 
1268 		/* Bubble up childrel's partitioned children. */
1269 		if (rel->part_scheme)
1270 			rel->partitioned_child_rels =
1271 				list_concat(rel->partitioned_child_rels,
1272 							childrel->partitioned_child_rels);
1273 
1274 		/*
1275 		 * Child is live, so add it to the live_childrels list for use below.
1276 		 */
1277 		live_childrels = lappend(live_childrels, childrel);
1278 	}
1279 
1280 	/* Add paths to the append relation. */
1281 	add_paths_to_append_rel(root, rel, live_childrels);
1282 }
1283 
1284 
1285 /*
1286  * add_paths_to_append_rel
1287  *		Generate paths for the given append relation given the set of non-dummy
1288  *		child rels.
1289  *
1290  * The function collects all parameterizations and orderings supported by the
1291  * non-dummy children. For every such parameterization or ordering, it creates
1292  * an append path collecting one path from each non-dummy child with given
1293  * parameterization or ordering. Similarly it collects partial paths from
1294  * non-dummy children to create partial append paths.
1295  */
1296 void
add_paths_to_append_rel(PlannerInfo * root,RelOptInfo * rel,List * live_childrels)1297 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
1298 						List *live_childrels)
1299 {
1300 	List	   *subpaths = NIL;
1301 	bool		subpaths_valid = true;
1302 	List	   *partial_subpaths = NIL;
1303 	List	   *pa_partial_subpaths = NIL;
1304 	List	   *pa_nonpartial_subpaths = NIL;
1305 	bool		partial_subpaths_valid = true;
1306 	bool		pa_subpaths_valid;
1307 	List	   *all_child_pathkeys = NIL;
1308 	List	   *all_child_outers = NIL;
1309 	ListCell   *l;
1310 	List	   *partitioned_rels = NIL;
1311 	double		partial_rows = -1;
1312 
1313 	/* If appropriate, consider parallel append */
1314 	pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
1315 
1316 	/*
1317 	 * AppendPath generated for partitioned tables must record the RT indexes
1318 	 * of partitioned tables that are direct or indirect children of this
1319 	 * Append rel.
1320 	 *
1321 	 * AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
1322 	 * itself does not represent a partitioned relation, but the child sub-
1323 	 * queries may contain references to partitioned relations.  The loop
1324 	 * below will look for such children and collect them in a list to be
1325 	 * passed to the path creation function.  (This assumes that we don't need
1326 	 * to look through multiple levels of subquery RTEs; if we ever do, we
1327 	 * could consider stuffing the list we generate here into sub-query RTE's
1328 	 * RelOptInfo, just like we do for partitioned rels, which would be used
1329 	 * when populating our parent rel with paths.  For the present, that
1330 	 * appears to be unnecessary.)
1331 	 */
1332 	if (rel->part_scheme != NULL)
1333 	{
1334 		if (IS_SIMPLE_REL(rel))
1335 			partitioned_rels = list_make1(rel->partitioned_child_rels);
1336 		else if (IS_JOIN_REL(rel))
1337 		{
1338 			int			relid = -1;
1339 			List	   *partrels = NIL;
1340 
1341 			/*
1342 			 * For a partitioned joinrel, concatenate the component rels'
1343 			 * partitioned_child_rels lists.
1344 			 */
1345 			while ((relid = bms_next_member(rel->relids, relid)) >= 0)
1346 			{
1347 				RelOptInfo *component;
1348 
1349 				Assert(relid >= 1 && relid < root->simple_rel_array_size);
1350 				component = root->simple_rel_array[relid];
1351 				Assert(component->part_scheme != NULL);
1352 				Assert(list_length(component->partitioned_child_rels) >= 1);
1353 				partrels = list_concat(partrels,
1354 									   component->partitioned_child_rels);
1355 			}
1356 
1357 			partitioned_rels = list_make1(partrels);
1358 		}
1359 
1360 		Assert(list_length(partitioned_rels) >= 1);
1361 	}
1362 
1363 	/*
1364 	 * For every non-dummy child, remember the cheapest path.  Also, identify
1365 	 * all pathkeys (orderings) and parameterizations (required_outer sets)
1366 	 * available for the non-dummy member relations.
1367 	 */
1368 	foreach(l, live_childrels)
1369 	{
1370 		RelOptInfo *childrel = lfirst(l);
1371 		ListCell   *lcp;
1372 		Path	   *cheapest_partial_path = NULL;
1373 
1374 		/*
1375 		 * For UNION ALLs with non-empty partitioned_child_rels, accumulate
1376 		 * the Lists of child relations.
1377 		 */
1378 		if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
1379 			partitioned_rels = lappend(partitioned_rels,
1380 									   childrel->partitioned_child_rels);
1381 
1382 		/*
1383 		 * If child has an unparameterized cheapest-total path, add that to
1384 		 * the unparameterized Append path we are constructing for the parent.
1385 		 * If not, there's no workable unparameterized path.
1386 		 *
1387 		 * With partitionwise aggregates, the child rel's pathlist may be
1388 		 * empty, so don't assume that a path exists here.
1389 		 */
1390 		if (childrel->pathlist != NIL &&
1391 			childrel->cheapest_total_path->param_info == NULL)
1392 			accumulate_append_subpath(childrel->cheapest_total_path,
1393 									  &subpaths, NULL);
1394 		else
1395 			subpaths_valid = false;
1396 
1397 		/* Same idea, but for a partial plan. */
1398 		if (childrel->partial_pathlist != NIL)
1399 		{
1400 			cheapest_partial_path = linitial(childrel->partial_pathlist);
1401 			accumulate_append_subpath(cheapest_partial_path,
1402 									  &partial_subpaths, NULL);
1403 		}
1404 		else
1405 			partial_subpaths_valid = false;
1406 
1407 		/*
1408 		 * Same idea, but for a parallel append mixing partial and non-partial
1409 		 * paths.
1410 		 */
1411 		if (pa_subpaths_valid)
1412 		{
1413 			Path	   *nppath = NULL;
1414 
1415 			nppath =
1416 				get_cheapest_parallel_safe_total_inner(childrel->pathlist);
1417 
1418 			if (cheapest_partial_path == NULL && nppath == NULL)
1419 			{
1420 				/* Neither a partial nor a parallel-safe path?  Forget it. */
1421 				pa_subpaths_valid = false;
1422 			}
1423 			else if (nppath == NULL ||
1424 					 (cheapest_partial_path != NULL &&
1425 					  cheapest_partial_path->total_cost < nppath->total_cost))
1426 			{
1427 				/* Partial path is cheaper or the only option. */
1428 				Assert(cheapest_partial_path != NULL);
1429 				accumulate_append_subpath(cheapest_partial_path,
1430 										  &pa_partial_subpaths,
1431 										  &pa_nonpartial_subpaths);
1432 
1433 			}
1434 			else
1435 			{
1436 				/*
1437 				 * Either we've got only a non-partial path, or we think that
1438 				 * a single backend can execute the best non-partial path
1439 				 * faster than all the parallel backends working together can
1440 				 * execute the best partial path.
1441 				 *
1442 				 * It might make sense to be more aggressive here.  Even if
1443 				 * the best non-partial path is more expensive than the best
1444 				 * partial path, it could still be better to choose the
1445 				 * non-partial path if there are several such paths that can
1446 				 * be given to different workers.  For now, we don't try to
1447 				 * figure that out.
1448 				 */
1449 				accumulate_append_subpath(nppath,
1450 										  &pa_nonpartial_subpaths,
1451 										  NULL);
1452 			}
1453 		}
1454 
1455 		/*
1456 		 * Collect lists of all the available path orderings and
1457 		 * parameterizations for all the children.  We use these as a
1458 		 * heuristic to indicate which sort orderings and parameterizations we
1459 		 * should build Append and MergeAppend paths for.
1460 		 */
1461 		foreach(lcp, childrel->pathlist)
1462 		{
1463 			Path	   *childpath = (Path *) lfirst(lcp);
1464 			List	   *childkeys = childpath->pathkeys;
1465 			Relids		childouter = PATH_REQ_OUTER(childpath);
1466 
1467 			/* Unsorted paths don't contribute to pathkey list */
1468 			if (childkeys != NIL)
1469 			{
1470 				ListCell   *lpk;
1471 				bool		found = false;
1472 
1473 				/* Have we already seen this ordering? */
1474 				foreach(lpk, all_child_pathkeys)
1475 				{
1476 					List	   *existing_pathkeys = (List *) lfirst(lpk);
1477 
1478 					if (compare_pathkeys(existing_pathkeys,
1479 										 childkeys) == PATHKEYS_EQUAL)
1480 					{
1481 						found = true;
1482 						break;
1483 					}
1484 				}
1485 				if (!found)
1486 				{
1487 					/* No, so add it to all_child_pathkeys */
1488 					all_child_pathkeys = lappend(all_child_pathkeys,
1489 												 childkeys);
1490 				}
1491 			}
1492 
1493 			/* Unparameterized paths don't contribute to param-set list */
1494 			if (childouter)
1495 			{
1496 				ListCell   *lco;
1497 				bool		found = false;
1498 
1499 				/* Have we already seen this param set? */
1500 				foreach(lco, all_child_outers)
1501 				{
1502 					Relids		existing_outers = (Relids) lfirst(lco);
1503 
1504 					if (bms_equal(existing_outers, childouter))
1505 					{
1506 						found = true;
1507 						break;
1508 					}
1509 				}
1510 				if (!found)
1511 				{
1512 					/* No, so add it to all_child_outers */
1513 					all_child_outers = lappend(all_child_outers,
1514 											   childouter);
1515 				}
1516 			}
1517 		}
1518 	}
1519 
1520 	/*
1521 	 * If we found unparameterized paths for all children, build an unordered,
1522 	 * unparameterized Append path for the rel.  (Note: this is correct even
1523 	 * if we have zero or one live subpath due to constraint exclusion.)
1524 	 */
1525 	if (subpaths_valid)
1526 		add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
1527 												  NIL, NULL, 0, false,
1528 												  partitioned_rels, -1));
1529 
1530 	/*
1531 	 * Consider an append of unordered, unparameterized partial paths.  Make
1532 	 * it parallel-aware if possible.
1533 	 */
1534 	if (partial_subpaths_valid && partial_subpaths != NIL)
1535 	{
1536 		AppendPath *appendpath;
1537 		ListCell   *lc;
1538 		int			parallel_workers = 0;
1539 
1540 		/* Find the highest number of workers requested for any subpath. */
1541 		foreach(lc, partial_subpaths)
1542 		{
1543 			Path	   *path = lfirst(lc);
1544 
1545 			parallel_workers = Max(parallel_workers, path->parallel_workers);
1546 		}
1547 		Assert(parallel_workers > 0);
1548 
1549 		/*
1550 		 * If the use of parallel append is permitted, always request at least
1551 		 * log2(# of children) workers.  We assume it can be useful to have
1552 		 * extra workers in this case because they will be spread out across
1553 		 * the children.  The precise formula is just a guess, but we don't
1554 		 * want to end up with a radically different answer for a table with N
1555 		 * partitions vs. an unpartitioned table with the same data, so the
1556 		 * use of some kind of log-scaling here seems to make some sense.
1557 		 */
1558 		if (enable_parallel_append)
1559 		{
1560 			parallel_workers = Max(parallel_workers,
1561 								   fls(list_length(live_childrels)));
1562 			parallel_workers = Min(parallel_workers,
1563 								   max_parallel_workers_per_gather);
1564 		}
1565 		Assert(parallel_workers > 0);
1566 
1567 		/* Generate a partial append path. */
1568 		appendpath = create_append_path(root, rel, NIL, partial_subpaths,
1569 										NIL, NULL, parallel_workers,
1570 										enable_parallel_append,
1571 										partitioned_rels, -1);
1572 
1573 		/*
1574 		 * Make sure any subsequent partial paths use the same row count
1575 		 * estimate.
1576 		 */
1577 		partial_rows = appendpath->path.rows;
1578 
1579 		/* Add the path. */
1580 		add_partial_path(rel, (Path *) appendpath);
1581 	}
1582 
1583 	/*
1584 	 * Consider a parallel-aware append using a mix of partial and non-partial
1585 	 * paths.  (This only makes sense if there's at least one child which has
1586 	 * a non-partial path that is substantially cheaper than any partial path;
1587 	 * otherwise, we should use the append path added in the previous step.)
1588 	 */
1589 	if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
1590 	{
1591 		AppendPath *appendpath;
1592 		ListCell   *lc;
1593 		int			parallel_workers = 0;
1594 
1595 		/*
1596 		 * Find the highest number of workers requested for any partial
1597 		 * subpath.
1598 		 */
1599 		foreach(lc, pa_partial_subpaths)
1600 		{
1601 			Path	   *path = lfirst(lc);
1602 
1603 			parallel_workers = Max(parallel_workers, path->parallel_workers);
1604 		}
1605 
1606 		/*
1607 		 * Same formula here as above.  It's even more important in this
1608 		 * instance because the non-partial paths won't contribute anything to
1609 		 * the planned number of parallel workers.
1610 		 */
1611 		parallel_workers = Max(parallel_workers,
1612 							   fls(list_length(live_childrels)));
1613 		parallel_workers = Min(parallel_workers,
1614 							   max_parallel_workers_per_gather);
1615 		Assert(parallel_workers > 0);
1616 
1617 		appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
1618 										pa_partial_subpaths,
1619 										NIL, NULL, parallel_workers, true,
1620 										partitioned_rels, partial_rows);
1621 		add_partial_path(rel, (Path *) appendpath);
1622 	}
1623 
1624 	/*
1625 	 * Also build unparameterized ordered append paths based on the collected
1626 	 * list of child pathkeys.
1627 	 */
1628 	if (subpaths_valid)
1629 		generate_orderedappend_paths(root, rel, live_childrels,
1630 									 all_child_pathkeys,
1631 									 partitioned_rels);
1632 
1633 	/*
1634 	 * Build Append paths for each parameterization seen among the child rels.
1635 	 * (This may look pretty expensive, but in most cases of practical
1636 	 * interest, the child rels will expose mostly the same parameterizations,
1637 	 * so that not that many cases actually get considered here.)
1638 	 *
1639 	 * The Append node itself cannot enforce quals, so all qual checking must
1640 	 * be done in the child paths.  This means that to have a parameterized
1641 	 * Append path, we must have the exact same parameterization for each
1642 	 * child path; otherwise some children might be failing to check the
1643 	 * moved-down quals.  To make them match up, we can try to increase the
1644 	 * parameterization of lesser-parameterized paths.
1645 	 */
1646 	foreach(l, all_child_outers)
1647 	{
1648 		Relids		required_outer = (Relids) lfirst(l);
1649 		ListCell   *lcr;
1650 
1651 		/* Select the child paths for an Append with this parameterization */
1652 		subpaths = NIL;
1653 		subpaths_valid = true;
1654 		foreach(lcr, live_childrels)
1655 		{
1656 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1657 			Path	   *subpath;
1658 
1659 			if (childrel->pathlist == NIL)
1660 			{
1661 				/* failed to make a suitable path for this child */
1662 				subpaths_valid = false;
1663 				break;
1664 			}
1665 
1666 			subpath = get_cheapest_parameterized_child_path(root,
1667 															childrel,
1668 															required_outer);
1669 			if (subpath == NULL)
1670 			{
1671 				/* failed to make a suitable path for this child */
1672 				subpaths_valid = false;
1673 				break;
1674 			}
1675 			accumulate_append_subpath(subpath, &subpaths, NULL);
1676 		}
1677 
1678 		if (subpaths_valid)
1679 			add_path(rel, (Path *)
1680 					 create_append_path(root, rel, subpaths, NIL,
1681 										NIL, required_outer, 0, false,
1682 										partitioned_rels, -1));
1683 	}
1684 
1685 	/*
1686 	 * When there is only a single child relation, the Append path can inherit
1687 	 * any ordering available for the child rel's path, so that it's useful to
1688 	 * consider ordered partial paths.  Above we only considered the cheapest
1689 	 * partial path for each child, but let's also make paths using any
1690 	 * partial paths that have pathkeys.
1691 	 */
1692 	if (list_length(live_childrels) == 1)
1693 	{
1694 		RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
1695 
1696 		foreach(l, childrel->partial_pathlist)
1697 		{
1698 			Path	   *path = (Path *) lfirst(l);
1699 			AppendPath *appendpath;
1700 
1701 			/*
1702 			 * Skip paths with no pathkeys.  Also skip the cheapest partial
1703 			 * path, since we already used that above.
1704 			 */
1705 			if (path->pathkeys == NIL ||
1706 				path == linitial(childrel->partial_pathlist))
1707 				continue;
1708 
1709 			appendpath = create_append_path(root, rel, NIL, list_make1(path),
1710 											NIL, NULL,
1711 											path->parallel_workers, true,
1712 											partitioned_rels, partial_rows);
1713 			add_partial_path(rel, (Path *) appendpath);
1714 		}
1715 	}
1716 }
1717 
1718 /*
1719  * generate_orderedappend_paths
1720  *		Generate ordered append paths for an append relation
1721  *
1722  * Usually we generate MergeAppend paths here, but there are some special
1723  * cases where we can generate simple Append paths, because the subpaths
1724  * can provide tuples in the required order already.
1725  *
1726  * We generate a path for each ordering (pathkey list) appearing in
1727  * all_child_pathkeys.
1728  *
1729  * We consider both cheapest-startup and cheapest-total cases, ie, for each
1730  * interesting ordering, collect all the cheapest startup subpaths and all the
1731  * cheapest total paths, and build a suitable path for each case.
1732  *
1733  * We don't currently generate any parameterized ordered paths here.  While
1734  * it would not take much more code here to do so, it's very unclear that it
1735  * is worth the planning cycles to investigate such paths: there's little
1736  * use for an ordered path on the inside of a nestloop.  In fact, it's likely
1737  * that the current coding of add_path would reject such paths out of hand,
1738  * because add_path gives no credit for sort ordering of parameterized paths,
1739  * and a parameterized MergeAppend is going to be more expensive than the
1740  * corresponding parameterized Append path.  If we ever try harder to support
1741  * parameterized mergejoin plans, it might be worth adding support for
1742  * parameterized paths here to feed such joins.  (See notes in
1743  * optimizer/README for why that might not ever happen, though.)
1744  */
1745 static void
generate_orderedappend_paths(PlannerInfo * root,RelOptInfo * rel,List * live_childrels,List * all_child_pathkeys,List * partitioned_rels)1746 generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
1747 							 List *live_childrels,
1748 							 List *all_child_pathkeys,
1749 							 List *partitioned_rels)
1750 {
1751 	ListCell   *lcp;
1752 	List	   *partition_pathkeys = NIL;
1753 	List	   *partition_pathkeys_desc = NIL;
1754 	bool		partition_pathkeys_partial = true;
1755 	bool		partition_pathkeys_desc_partial = true;
1756 
1757 	/*
1758 	 * Some partitioned table setups may allow us to use an Append node
1759 	 * instead of a MergeAppend.  This is possible in cases such as RANGE
1760 	 * partitioned tables where it's guaranteed that an earlier partition must
1761 	 * contain rows which come earlier in the sort order.  To detect whether
1762 	 * this is relevant, build pathkey descriptions of the partition ordering,
1763 	 * for both forward and reverse scans.
1764 	 */
1765 	if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
1766 		partitions_are_ordered(rel->boundinfo, rel->nparts))
1767 	{
1768 		partition_pathkeys = build_partition_pathkeys(root, rel,
1769 													  ForwardScanDirection,
1770 													  &partition_pathkeys_partial);
1771 
1772 		partition_pathkeys_desc = build_partition_pathkeys(root, rel,
1773 														   BackwardScanDirection,
1774 														   &partition_pathkeys_desc_partial);
1775 
1776 		/*
1777 		 * You might think we should truncate_useless_pathkeys here, but
1778 		 * allowing partition keys which are a subset of the query's pathkeys
1779 		 * can often be useful.  For example, consider a table partitioned by
1780 		 * RANGE (a, b), and a query with ORDER BY a, b, c.  If we have child
1781 		 * paths that can produce the a, b, c ordering (perhaps via indexes on
1782 		 * (a, b, c)) then it works to consider the appendrel output as
1783 		 * ordered by a, b, c.
1784 		 */
1785 	}
1786 
1787 	/* Now consider each interesting sort ordering */
1788 	foreach(lcp, all_child_pathkeys)
1789 	{
1790 		List	   *pathkeys = (List *) lfirst(lcp);
1791 		List	   *startup_subpaths = NIL;
1792 		List	   *total_subpaths = NIL;
1793 		bool		startup_neq_total = false;
1794 		ListCell   *lcr;
1795 		bool		match_partition_order;
1796 		bool		match_partition_order_desc;
1797 
1798 		/*
1799 		 * Determine if this sort ordering matches any partition pathkeys we
1800 		 * have, for both ascending and descending partition order.  If the
1801 		 * partition pathkeys happen to be contained in pathkeys then it still
1802 		 * works, as described above, providing that the partition pathkeys
1803 		 * are complete and not just a prefix of the partition keys.  (In such
1804 		 * cases we'll be relying on the child paths to have sorted the
1805 		 * lower-order columns of the required pathkeys.)
1806 		 */
1807 		match_partition_order =
1808 			pathkeys_contained_in(pathkeys, partition_pathkeys) ||
1809 			(!partition_pathkeys_partial &&
1810 			 pathkeys_contained_in(partition_pathkeys, pathkeys));
1811 
1812 		match_partition_order_desc = !match_partition_order &&
1813 			(pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
1814 			 (!partition_pathkeys_desc_partial &&
1815 			  pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
1816 
1817 		/* Select the child paths for this ordering... */
1818 		foreach(lcr, live_childrels)
1819 		{
1820 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1821 			Path	   *cheapest_startup,
1822 					   *cheapest_total;
1823 
1824 			/* Locate the right paths, if they are available. */
1825 			cheapest_startup =
1826 				get_cheapest_path_for_pathkeys(childrel->pathlist,
1827 											   pathkeys,
1828 											   NULL,
1829 											   STARTUP_COST,
1830 											   false);
1831 			cheapest_total =
1832 				get_cheapest_path_for_pathkeys(childrel->pathlist,
1833 											   pathkeys,
1834 											   NULL,
1835 											   TOTAL_COST,
1836 											   false);
1837 
1838 			/*
1839 			 * If we can't find any paths with the right order just use the
1840 			 * cheapest-total path; we'll have to sort it later.
1841 			 */
1842 			if (cheapest_startup == NULL || cheapest_total == NULL)
1843 			{
1844 				cheapest_startup = cheapest_total =
1845 					childrel->cheapest_total_path;
1846 				/* Assert we do have an unparameterized path for this child */
1847 				Assert(cheapest_total->param_info == NULL);
1848 			}
1849 
1850 			/*
1851 			 * Notice whether we actually have different paths for the
1852 			 * "cheapest" and "total" cases; frequently there will be no point
1853 			 * in two create_merge_append_path() calls.
1854 			 */
1855 			if (cheapest_startup != cheapest_total)
1856 				startup_neq_total = true;
1857 
1858 			/*
1859 			 * Collect the appropriate child paths.  The required logic varies
1860 			 * for the Append and MergeAppend cases.
1861 			 */
1862 			if (match_partition_order)
1863 			{
1864 				/*
1865 				 * We're going to make a plain Append path.  We don't need
1866 				 * most of what accumulate_append_subpath would do, but we do
1867 				 * want to cut out child Appends or MergeAppends if they have
1868 				 * just a single subpath (and hence aren't doing anything
1869 				 * useful).
1870 				 */
1871 				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1872 				cheapest_total = get_singleton_append_subpath(cheapest_total);
1873 
1874 				startup_subpaths = lappend(startup_subpaths, cheapest_startup);
1875 				total_subpaths = lappend(total_subpaths, cheapest_total);
1876 			}
1877 			else if (match_partition_order_desc)
1878 			{
1879 				/*
1880 				 * As above, but we need to reverse the order of the children,
1881 				 * because nodeAppend.c doesn't know anything about reverse
1882 				 * ordering and will scan the children in the order presented.
1883 				 */
1884 				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1885 				cheapest_total = get_singleton_append_subpath(cheapest_total);
1886 
1887 				startup_subpaths = lcons(cheapest_startup, startup_subpaths);
1888 				total_subpaths = lcons(cheapest_total, total_subpaths);
1889 			}
1890 			else
1891 			{
1892 				/*
1893 				 * Otherwise, rely on accumulate_append_subpath to collect the
1894 				 * child paths for the MergeAppend.
1895 				 */
1896 				accumulate_append_subpath(cheapest_startup,
1897 										  &startup_subpaths, NULL);
1898 				accumulate_append_subpath(cheapest_total,
1899 										  &total_subpaths, NULL);
1900 			}
1901 		}
1902 
1903 		/* ... and build the Append or MergeAppend paths */
1904 		if (match_partition_order || match_partition_order_desc)
1905 		{
1906 			/* We only need Append */
1907 			add_path(rel, (Path *) create_append_path(root,
1908 													  rel,
1909 													  startup_subpaths,
1910 													  NIL,
1911 													  pathkeys,
1912 													  NULL,
1913 													  0,
1914 													  false,
1915 													  partitioned_rels,
1916 													  -1));
1917 			if (startup_neq_total)
1918 				add_path(rel, (Path *) create_append_path(root,
1919 														  rel,
1920 														  total_subpaths,
1921 														  NIL,
1922 														  pathkeys,
1923 														  NULL,
1924 														  0,
1925 														  false,
1926 														  partitioned_rels,
1927 														  -1));
1928 		}
1929 		else
1930 		{
1931 			/* We need MergeAppend */
1932 			add_path(rel, (Path *) create_merge_append_path(root,
1933 															rel,
1934 															startup_subpaths,
1935 															pathkeys,
1936 															NULL,
1937 															partitioned_rels));
1938 			if (startup_neq_total)
1939 				add_path(rel, (Path *) create_merge_append_path(root,
1940 																rel,
1941 																total_subpaths,
1942 																pathkeys,
1943 																NULL,
1944 																partitioned_rels));
1945 		}
1946 	}
1947 }
1948 
1949 /*
1950  * get_cheapest_parameterized_child_path
1951  *		Get cheapest path for this relation that has exactly the requested
1952  *		parameterization.
1953  *
1954  * Returns NULL if unable to create such a path.
1955  */
1956 static Path *
get_cheapest_parameterized_child_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1957 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1958 									  Relids required_outer)
1959 {
1960 	Path	   *cheapest;
1961 	ListCell   *lc;
1962 
1963 	/*
1964 	 * Look up the cheapest existing path with no more than the needed
1965 	 * parameterization.  If it has exactly the needed parameterization, we're
1966 	 * done.
1967 	 */
1968 	cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1969 											  NIL,
1970 											  required_outer,
1971 											  TOTAL_COST,
1972 											  false);
1973 	Assert(cheapest != NULL);
1974 	if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1975 		return cheapest;
1976 
1977 	/*
1978 	 * Otherwise, we can "reparameterize" an existing path to match the given
1979 	 * parameterization, which effectively means pushing down additional
1980 	 * joinquals to be checked within the path's scan.  However, some existing
1981 	 * paths might check the available joinquals already while others don't;
1982 	 * therefore, it's not clear which existing path will be cheapest after
1983 	 * reparameterization.  We have to go through them all and find out.
1984 	 */
1985 	cheapest = NULL;
1986 	foreach(lc, rel->pathlist)
1987 	{
1988 		Path	   *path = (Path *) lfirst(lc);
1989 
1990 		/* Can't use it if it needs more than requested parameterization */
1991 		if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1992 			continue;
1993 
1994 		/*
1995 		 * Reparameterization can only increase the path's cost, so if it's
1996 		 * already more expensive than the current cheapest, forget it.
1997 		 */
1998 		if (cheapest != NULL &&
1999 			compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2000 			continue;
2001 
2002 		/* Reparameterize if needed, then recheck cost */
2003 		if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
2004 		{
2005 			path = reparameterize_path(root, path, required_outer, 1.0);
2006 			if (path == NULL)
2007 				continue;		/* failed to reparameterize this one */
2008 			Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
2009 
2010 			if (cheapest != NULL &&
2011 				compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2012 				continue;
2013 		}
2014 
2015 		/* We have a new best path */
2016 		cheapest = path;
2017 	}
2018 
2019 	/* Return the best path, or NULL if we found no suitable candidate */
2020 	return cheapest;
2021 }
2022 
2023 /*
2024  * accumulate_append_subpath
2025  *		Add a subpath to the list being built for an Append or MergeAppend.
2026  *
2027  * It's possible that the child is itself an Append or MergeAppend path, in
2028  * which case we can "cut out the middleman" and just add its child paths to
2029  * our own list.  (We don't try to do this earlier because we need to apply
2030  * both levels of transformation to the quals.)
2031  *
2032  * Note that if we omit a child MergeAppend in this way, we are effectively
2033  * omitting a sort step, which seems fine: if the parent is to be an Append,
2034  * its result would be unsorted anyway, while if the parent is to be a
2035  * MergeAppend, there's no point in a separate sort on a child.
2036  *
2037  * Normally, either path is a partial path and subpaths is a list of partial
2038  * paths, or else path is a non-partial plan and subpaths is a list of those.
2039  * However, if path is a parallel-aware Append, then we add its partial path
2040  * children to subpaths and the rest to special_subpaths.  If the latter is
2041  * NULL, we don't flatten the path at all (unless it contains only partial
2042  * paths).
2043  */
2044 static void
accumulate_append_subpath(Path * path,List ** subpaths,List ** special_subpaths)2045 accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
2046 {
2047 	if (IsA(path, AppendPath))
2048 	{
2049 		AppendPath *apath = (AppendPath *) path;
2050 
2051 		if (!apath->path.parallel_aware || apath->first_partial_path == 0)
2052 		{
2053 			*subpaths = list_concat(*subpaths, apath->subpaths);
2054 			return;
2055 		}
2056 		else if (special_subpaths != NULL)
2057 		{
2058 			List	   *new_special_subpaths;
2059 
2060 			/* Split Parallel Append into partial and non-partial subpaths */
2061 			*subpaths = list_concat(*subpaths,
2062 									list_copy_tail(apath->subpaths,
2063 												   apath->first_partial_path));
2064 			new_special_subpaths =
2065 				list_truncate(list_copy(apath->subpaths),
2066 							  apath->first_partial_path);
2067 			*special_subpaths = list_concat(*special_subpaths,
2068 											new_special_subpaths);
2069 			return;
2070 		}
2071 	}
2072 	else if (IsA(path, MergeAppendPath))
2073 	{
2074 		MergeAppendPath *mpath = (MergeAppendPath *) path;
2075 
2076 		*subpaths = list_concat(*subpaths, mpath->subpaths);
2077 		return;
2078 	}
2079 
2080 	*subpaths = lappend(*subpaths, path);
2081 }
2082 
2083 /*
2084  * get_singleton_append_subpath
2085  *		Returns the single subpath of an Append/MergeAppend, or just
2086  *		return 'path' if it's not a single sub-path Append/MergeAppend.
2087  *
2088  * Note: 'path' must not be a parallel-aware path.
2089  */
2090 static Path *
get_singleton_append_subpath(Path * path)2091 get_singleton_append_subpath(Path *path)
2092 {
2093 	Assert(!path->parallel_aware);
2094 
2095 	if (IsA(path, AppendPath))
2096 	{
2097 		AppendPath *apath = (AppendPath *) path;
2098 
2099 		if (list_length(apath->subpaths) == 1)
2100 			return (Path *) linitial(apath->subpaths);
2101 	}
2102 	else if (IsA(path, MergeAppendPath))
2103 	{
2104 		MergeAppendPath *mpath = (MergeAppendPath *) path;
2105 
2106 		if (list_length(mpath->subpaths) == 1)
2107 			return (Path *) linitial(mpath->subpaths);
2108 	}
2109 
2110 	return path;
2111 }
2112 
2113 /*
2114  * set_dummy_rel_pathlist
2115  *	  Build a dummy path for a relation that's been excluded by constraints
2116  *
2117  * Rather than inventing a special "dummy" path type, we represent this as an
2118  * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2119  *
2120  * (See also mark_dummy_rel, which does basically the same thing, but is
2121  * typically used to change a rel into dummy state after we already made
2122  * paths for it.)
2123  */
2124 static void
set_dummy_rel_pathlist(RelOptInfo * rel)2125 set_dummy_rel_pathlist(RelOptInfo *rel)
2126 {
2127 	/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2128 	rel->rows = 0;
2129 	rel->reltarget->width = 0;
2130 
2131 	/* Discard any pre-existing paths; no further need for them */
2132 	rel->pathlist = NIL;
2133 	rel->partial_pathlist = NIL;
2134 
2135 	/* Set up the dummy path */
2136 	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
2137 											  NIL, rel->lateral_relids,
2138 											  0, false, NIL, -1));
2139 
2140 	/*
2141 	 * We set the cheapest-path fields immediately, just in case they were
2142 	 * pointing at some discarded path.  This is redundant when we're called
2143 	 * from set_rel_size(), but not when called from elsewhere, and doing it
2144 	 * twice is harmless anyway.
2145 	 */
2146 	set_cheapest(rel);
2147 }
2148 
2149 /* quick-and-dirty test to see if any joining is needed */
2150 static bool
has_multiple_baserels(PlannerInfo * root)2151 has_multiple_baserels(PlannerInfo *root)
2152 {
2153 	int			num_base_rels = 0;
2154 	Index		rti;
2155 
2156 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
2157 	{
2158 		RelOptInfo *brel = root->simple_rel_array[rti];
2159 
2160 		if (brel == NULL)
2161 			continue;
2162 
2163 		/* ignore RTEs that are "other rels" */
2164 		if (brel->reloptkind == RELOPT_BASEREL)
2165 			if (++num_base_rels > 1)
2166 				return true;
2167 	}
2168 	return false;
2169 }
2170 
2171 /*
2172  * set_subquery_pathlist
2173  *		Generate SubqueryScan access paths for a subquery RTE
2174  *
2175  * We don't currently support generating parameterized paths for subqueries
2176  * by pushing join clauses down into them; it seems too expensive to re-plan
2177  * the subquery multiple times to consider different alternatives.
2178  * (XXX that could stand to be reconsidered, now that we use Paths.)
2179  * So the paths made here will be parameterized if the subquery contains
2180  * LATERAL references, otherwise not.  As long as that's true, there's no need
2181  * for a separate set_subquery_size phase: just make the paths right away.
2182  */
2183 static void
set_subquery_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)2184 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
2185 					  Index rti, RangeTblEntry *rte)
2186 {
2187 	Query	   *parse = root->parse;
2188 	Query	   *subquery = rte->subquery;
2189 	Relids		required_outer;
2190 	pushdown_safety_info safetyInfo;
2191 	double		tuple_fraction;
2192 	RelOptInfo *sub_final_rel;
2193 	ListCell   *lc;
2194 
2195 	/*
2196 	 * Must copy the Query so that planning doesn't mess up the RTE contents
2197 	 * (really really need to fix the planner to not scribble on its input,
2198 	 * someday ... but see remove_unused_subquery_outputs to start with).
2199 	 */
2200 	subquery = copyObject(subquery);
2201 
2202 	/*
2203 	 * If it's a LATERAL subquery, it might contain some Vars of the current
2204 	 * query level, requiring it to be treated as parameterized, even though
2205 	 * we don't support pushing down join quals into subqueries.
2206 	 */
2207 	required_outer = rel->lateral_relids;
2208 
2209 	/*
2210 	 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2211 	 * flags as needed while recursing.  In particular, we need a workspace
2212 	 * for keeping track of unsafe-to-reference columns.  unsafeColumns[i]
2213 	 * will be set true if we find that output column i of the subquery is
2214 	 * unsafe to use in a pushed-down qual.
2215 	 */
2216 	memset(&safetyInfo, 0, sizeof(safetyInfo));
2217 	safetyInfo.unsafeColumns = (bool *)
2218 		palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
2219 
2220 	/*
2221 	 * If the subquery has the "security_barrier" flag, it means the subquery
2222 	 * originated from a view that must enforce row level security.  Then we
2223 	 * must not push down quals that contain leaky functions.  (Ideally this
2224 	 * would be checked inside subquery_is_pushdown_safe, but since we don't
2225 	 * currently pass the RTE to that function, we must do it here.)
2226 	 */
2227 	safetyInfo.unsafeLeaky = rte->security_barrier;
2228 
2229 	/*
2230 	 * If there are any restriction clauses that have been attached to the
2231 	 * subquery relation, consider pushing them down to become WHERE or HAVING
2232 	 * quals of the subquery itself.  This transformation is useful because it
2233 	 * may allow us to generate a better plan for the subquery than evaluating
2234 	 * all the subquery output rows and then filtering them.
2235 	 *
2236 	 * There are several cases where we cannot push down clauses. Restrictions
2237 	 * involving the subquery are checked by subquery_is_pushdown_safe().
2238 	 * Restrictions on individual clauses are checked by
2239 	 * qual_is_pushdown_safe().  Also, we don't want to push down
2240 	 * pseudoconstant clauses; better to have the gating node above the
2241 	 * subquery.
2242 	 *
2243 	 * Non-pushed-down clauses will get evaluated as qpquals of the
2244 	 * SubqueryScan node.
2245 	 *
2246 	 * XXX Are there any cases where we want to make a policy decision not to
2247 	 * push down a pushable qual, because it'd result in a worse plan?
2248 	 */
2249 	if (rel->baserestrictinfo != NIL &&
2250 		subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
2251 	{
2252 		/* OK to consider pushing down individual quals */
2253 		List	   *upperrestrictlist = NIL;
2254 		ListCell   *l;
2255 
2256 		foreach(l, rel->baserestrictinfo)
2257 		{
2258 			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
2259 			Node	   *clause = (Node *) rinfo->clause;
2260 
2261 			if (!rinfo->pseudoconstant &&
2262 				qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
2263 			{
2264 				/* Push it down */
2265 				subquery_push_qual(subquery, rte, rti, clause);
2266 			}
2267 			else
2268 			{
2269 				/* Keep it in the upper query */
2270 				upperrestrictlist = lappend(upperrestrictlist, rinfo);
2271 			}
2272 		}
2273 		rel->baserestrictinfo = upperrestrictlist;
2274 		/* We don't bother recomputing baserestrict_min_security */
2275 	}
2276 
2277 	pfree(safetyInfo.unsafeColumns);
2278 
2279 	/*
2280 	 * The upper query might not use all the subquery's output columns; if
2281 	 * not, we can simplify.
2282 	 */
2283 	remove_unused_subquery_outputs(subquery, rel);
2284 
2285 	/*
2286 	 * We can safely pass the outer tuple_fraction down to the subquery if the
2287 	 * outer level has no joining, aggregation, or sorting to do. Otherwise
2288 	 * we'd better tell the subquery to plan for full retrieval. (XXX This
2289 	 * could probably be made more intelligent ...)
2290 	 */
2291 	if (parse->hasAggs ||
2292 		parse->groupClause ||
2293 		parse->groupingSets ||
2294 		parse->havingQual ||
2295 		parse->distinctClause ||
2296 		parse->sortClause ||
2297 		has_multiple_baserels(root))
2298 		tuple_fraction = 0.0;	/* default case */
2299 	else
2300 		tuple_fraction = root->tuple_fraction;
2301 
2302 	/* plan_params should not be in use in current query level */
2303 	Assert(root->plan_params == NIL);
2304 
2305 	/* Generate a subroot and Paths for the subquery */
2306 	rel->subroot = subquery_planner(root->glob, subquery,
2307 									root,
2308 									false, tuple_fraction);
2309 
2310 	/* Isolate the params needed by this specific subplan */
2311 	rel->subplan_params = root->plan_params;
2312 	root->plan_params = NIL;
2313 
2314 	/*
2315 	 * It's possible that constraint exclusion proved the subquery empty. If
2316 	 * so, it's desirable to produce an unadorned dummy path so that we will
2317 	 * recognize appropriate optimizations at this query level.
2318 	 */
2319 	sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
2320 
2321 	if (IS_DUMMY_REL(sub_final_rel))
2322 	{
2323 		set_dummy_rel_pathlist(rel);
2324 		return;
2325 	}
2326 
2327 	/*
2328 	 * Mark rel with estimated output rows, width, etc.  Note that we have to
2329 	 * do this before generating outer-query paths, else cost_subqueryscan is
2330 	 * not happy.
2331 	 */
2332 	set_subquery_size_estimates(root, rel);
2333 
2334 	/*
2335 	 * For each Path that subquery_planner produced, make a SubqueryScanPath
2336 	 * in the outer query.
2337 	 */
2338 	foreach(lc, sub_final_rel->pathlist)
2339 	{
2340 		Path	   *subpath = (Path *) lfirst(lc);
2341 		List	   *pathkeys;
2342 
2343 		/* Convert subpath's pathkeys to outer representation */
2344 		pathkeys = convert_subquery_pathkeys(root,
2345 											 rel,
2346 											 subpath->pathkeys,
2347 											 make_tlist_from_pathtarget(subpath->pathtarget));
2348 
2349 		/* Generate outer path using this subpath */
2350 		add_path(rel, (Path *)
2351 				 create_subqueryscan_path(root, rel, subpath,
2352 										  pathkeys, required_outer));
2353 	}
2354 
2355 	/* If outer rel allows parallelism, do same for partial paths. */
2356 	if (rel->consider_parallel && bms_is_empty(required_outer))
2357 	{
2358 		/* If consider_parallel is false, there should be no partial paths. */
2359 		Assert(sub_final_rel->consider_parallel ||
2360 			   sub_final_rel->partial_pathlist == NIL);
2361 
2362 		/* Same for partial paths. */
2363 		foreach(lc, sub_final_rel->partial_pathlist)
2364 		{
2365 			Path	   *subpath = (Path *) lfirst(lc);
2366 			List	   *pathkeys;
2367 
2368 			/* Convert subpath's pathkeys to outer representation */
2369 			pathkeys = convert_subquery_pathkeys(root,
2370 												 rel,
2371 												 subpath->pathkeys,
2372 												 make_tlist_from_pathtarget(subpath->pathtarget));
2373 
2374 			/* Generate outer path using this subpath */
2375 			add_partial_path(rel, (Path *)
2376 							 create_subqueryscan_path(root, rel, subpath,
2377 													  pathkeys,
2378 													  required_outer));
2379 		}
2380 	}
2381 }
2382 
2383 /*
2384  * set_function_pathlist
2385  *		Build the (single) access path for a function RTE
2386  */
2387 static void
set_function_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2388 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2389 {
2390 	Relids		required_outer;
2391 	List	   *pathkeys = NIL;
2392 
2393 	/*
2394 	 * We don't support pushing join clauses into the quals of a function
2395 	 * scan, but it could still have required parameterization due to LATERAL
2396 	 * refs in the function expression.
2397 	 */
2398 	required_outer = rel->lateral_relids;
2399 
2400 	/*
2401 	 * The result is considered unordered unless ORDINALITY was used, in which
2402 	 * case it is ordered by the ordinal column (the last one).  See if we
2403 	 * care, by checking for uses of that Var in equivalence classes.
2404 	 */
2405 	if (rte->funcordinality)
2406 	{
2407 		AttrNumber	ordattno = rel->max_attr;
2408 		Var		   *var = NULL;
2409 		ListCell   *lc;
2410 
2411 		/*
2412 		 * Is there a Var for it in rel's targetlist?  If not, the query did
2413 		 * not reference the ordinality column, or at least not in any way
2414 		 * that would be interesting for sorting.
2415 		 */
2416 		foreach(lc, rel->reltarget->exprs)
2417 		{
2418 			Var		   *node = (Var *) lfirst(lc);
2419 
2420 			/* checking varno/varlevelsup is just paranoia */
2421 			if (IsA(node, Var) &&
2422 				node->varattno == ordattno &&
2423 				node->varno == rel->relid &&
2424 				node->varlevelsup == 0)
2425 			{
2426 				var = node;
2427 				break;
2428 			}
2429 		}
2430 
2431 		/*
2432 		 * Try to build pathkeys for this Var with int8 sorting.  We tell
2433 		 * build_expression_pathkey not to build any new equivalence class; if
2434 		 * the Var isn't already mentioned in some EC, it means that nothing
2435 		 * cares about the ordering.
2436 		 */
2437 		if (var)
2438 			pathkeys = build_expression_pathkey(root,
2439 												(Expr *) var,
2440 												NULL,	/* below outer joins */
2441 												Int8LessOperator,
2442 												rel->relids,
2443 												false);
2444 	}
2445 
2446 	/* Generate appropriate path */
2447 	add_path(rel, create_functionscan_path(root, rel,
2448 										   pathkeys, required_outer));
2449 }
2450 
2451 /*
2452  * set_values_pathlist
2453  *		Build the (single) access path for a VALUES RTE
2454  */
2455 static void
set_values_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2456 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2457 {
2458 	Relids		required_outer;
2459 
2460 	/*
2461 	 * We don't support pushing join clauses into the quals of a values scan,
2462 	 * but it could still have required parameterization due to LATERAL refs
2463 	 * in the values expressions.
2464 	 */
2465 	required_outer = rel->lateral_relids;
2466 
2467 	/* Generate appropriate path */
2468 	add_path(rel, create_valuesscan_path(root, rel, required_outer));
2469 }
2470 
2471 /*
2472  * set_tablefunc_pathlist
2473  *		Build the (single) access path for a table func RTE
2474  */
2475 static void
set_tablefunc_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2476 set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2477 {
2478 	Relids		required_outer;
2479 
2480 	/*
2481 	 * We don't support pushing join clauses into the quals of a tablefunc
2482 	 * scan, but it could still have required parameterization due to LATERAL
2483 	 * refs in the function expression.
2484 	 */
2485 	required_outer = rel->lateral_relids;
2486 
2487 	/* Generate appropriate path */
2488 	add_path(rel, create_tablefuncscan_path(root, rel,
2489 											required_outer));
2490 }
2491 
2492 /*
2493  * set_cte_pathlist
2494  *		Build the (single) access path for a non-self-reference CTE RTE
2495  *
2496  * There's no need for a separate set_cte_size phase, since we don't
2497  * support join-qual-parameterized paths for CTEs.
2498  */
2499 static void
set_cte_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2500 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2501 {
2502 	Plan	   *cteplan;
2503 	PlannerInfo *cteroot;
2504 	Index		levelsup;
2505 	int			ndx;
2506 	ListCell   *lc;
2507 	int			plan_id;
2508 	Relids		required_outer;
2509 
2510 	/*
2511 	 * Find the referenced CTE, and locate the plan previously made for it.
2512 	 */
2513 	levelsup = rte->ctelevelsup;
2514 	cteroot = root;
2515 	while (levelsup-- > 0)
2516 	{
2517 		cteroot = cteroot->parent_root;
2518 		if (!cteroot)			/* shouldn't happen */
2519 			elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2520 	}
2521 
2522 	/*
2523 	 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2524 	 * on planning the CTEs (ie, this is a side-reference from another CTE).
2525 	 * So we mustn't use forboth here.
2526 	 */
2527 	ndx = 0;
2528 	foreach(lc, cteroot->parse->cteList)
2529 	{
2530 		CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
2531 
2532 		if (strcmp(cte->ctename, rte->ctename) == 0)
2533 			break;
2534 		ndx++;
2535 	}
2536 	if (lc == NULL)				/* shouldn't happen */
2537 		elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
2538 	if (ndx >= list_length(cteroot->cte_plan_ids))
2539 		elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
2540 	plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
2541 	Assert(plan_id > 0);
2542 	cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2543 
2544 	/* Mark rel with estimated output rows, width, etc */
2545 	set_cte_size_estimates(root, rel, cteplan->plan_rows);
2546 
2547 	/*
2548 	 * We don't support pushing join clauses into the quals of a CTE scan, but
2549 	 * it could still have required parameterization due to LATERAL refs in
2550 	 * its tlist.
2551 	 */
2552 	required_outer = rel->lateral_relids;
2553 
2554 	/* Generate appropriate path */
2555 	add_path(rel, create_ctescan_path(root, rel, required_outer));
2556 }
2557 
2558 /*
2559  * set_namedtuplestore_pathlist
2560  *		Build the (single) access path for a named tuplestore RTE
2561  *
2562  * There's no need for a separate set_namedtuplestore_size phase, since we
2563  * don't support join-qual-parameterized paths for tuplestores.
2564  */
2565 static void
set_namedtuplestore_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2566 set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
2567 							 RangeTblEntry *rte)
2568 {
2569 	Relids		required_outer;
2570 
2571 	/* Mark rel with estimated output rows, width, etc */
2572 	set_namedtuplestore_size_estimates(root, rel);
2573 
2574 	/*
2575 	 * We don't support pushing join clauses into the quals of a tuplestore
2576 	 * scan, but it could still have required parameterization due to LATERAL
2577 	 * refs in its tlist.
2578 	 */
2579 	required_outer = rel->lateral_relids;
2580 
2581 	/* Generate appropriate path */
2582 	add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
2583 
2584 	/* Select cheapest path (pretty easy in this case...) */
2585 	set_cheapest(rel);
2586 }
2587 
2588 /*
2589  * set_result_pathlist
2590  *		Build the (single) access path for an RTE_RESULT RTE
2591  *
2592  * There's no need for a separate set_result_size phase, since we
2593  * don't support join-qual-parameterized paths for these RTEs.
2594  */
2595 static void
set_result_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2596 set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
2597 					RangeTblEntry *rte)
2598 {
2599 	Relids		required_outer;
2600 
2601 	/* Mark rel with estimated output rows, width, etc */
2602 	set_result_size_estimates(root, rel);
2603 
2604 	/*
2605 	 * We don't support pushing join clauses into the quals of a Result scan,
2606 	 * but it could still have required parameterization due to LATERAL refs
2607 	 * in its tlist.
2608 	 */
2609 	required_outer = rel->lateral_relids;
2610 
2611 	/* Generate appropriate path */
2612 	add_path(rel, create_resultscan_path(root, rel, required_outer));
2613 
2614 	/* Select cheapest path (pretty easy in this case...) */
2615 	set_cheapest(rel);
2616 }
2617 
2618 /*
2619  * set_worktable_pathlist
2620  *		Build the (single) access path for a self-reference CTE RTE
2621  *
2622  * There's no need for a separate set_worktable_size phase, since we don't
2623  * support join-qual-parameterized paths for CTEs.
2624  */
2625 static void
set_worktable_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2626 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2627 {
2628 	Path	   *ctepath;
2629 	PlannerInfo *cteroot;
2630 	Index		levelsup;
2631 	Relids		required_outer;
2632 
2633 	/*
2634 	 * We need to find the non-recursive term's path, which is in the plan
2635 	 * level that's processing the recursive UNION, which is one level *below*
2636 	 * where the CTE comes from.
2637 	 */
2638 	levelsup = rte->ctelevelsup;
2639 	if (levelsup == 0)			/* shouldn't happen */
2640 		elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2641 	levelsup--;
2642 	cteroot = root;
2643 	while (levelsup-- > 0)
2644 	{
2645 		cteroot = cteroot->parent_root;
2646 		if (!cteroot)			/* shouldn't happen */
2647 			elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2648 	}
2649 	ctepath = cteroot->non_recursive_path;
2650 	if (!ctepath)				/* shouldn't happen */
2651 		elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
2652 
2653 	/* Mark rel with estimated output rows, width, etc */
2654 	set_cte_size_estimates(root, rel, ctepath->rows);
2655 
2656 	/*
2657 	 * We don't support pushing join clauses into the quals of a worktable
2658 	 * scan, but it could still have required parameterization due to LATERAL
2659 	 * refs in its tlist.  (I'm not sure this is actually possible given the
2660 	 * restrictions on recursive references, but it's easy enough to support.)
2661 	 */
2662 	required_outer = rel->lateral_relids;
2663 
2664 	/* Generate appropriate path */
2665 	add_path(rel, create_worktablescan_path(root, rel, required_outer));
2666 }
2667 
2668 /*
2669  * generate_gather_paths
2670  *		Generate parallel access paths for a relation by pushing a Gather or
2671  *		Gather Merge on top of a partial path.
2672  *
2673  * This must not be called until after we're done creating all partial paths
2674  * for the specified relation.  (Otherwise, add_partial_path might delete a
2675  * path that some GatherPath or GatherMergePath has a reference to.)
2676  *
2677  * If we're generating paths for a scan or join relation, override_rows will
2678  * be false, and we'll just use the relation's size estimate.  When we're
2679  * being called for a partially-grouped path, though, we need to override
2680  * the rowcount estimate.  (It's not clear that the particular value we're
2681  * using here is actually best, but the underlying rel has no estimate so
2682  * we must do something.)
2683  */
2684 void
generate_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2685 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2686 {
2687 	Path	   *cheapest_partial_path;
2688 	Path	   *simple_gather_path;
2689 	ListCell   *lc;
2690 	double		rows;
2691 	double	   *rowsp = NULL;
2692 
2693 	/* If there are no partial paths, there's nothing to do here. */
2694 	if (rel->partial_pathlist == NIL)
2695 		return;
2696 
2697 	/* Should we override the rel's rowcount estimate? */
2698 	if (override_rows)
2699 		rowsp = &rows;
2700 
2701 	/*
2702 	 * The output of Gather is always unsorted, so there's only one partial
2703 	 * path of interest: the cheapest one.  That will be the one at the front
2704 	 * of partial_pathlist because of the way add_partial_path works.
2705 	 */
2706 	cheapest_partial_path = linitial(rel->partial_pathlist);
2707 	rows =
2708 		cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
2709 	simple_gather_path = (Path *)
2710 		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
2711 						   NULL, rowsp);
2712 	add_path(rel, simple_gather_path);
2713 
2714 	/*
2715 	 * For each useful ordering, we can consider an order-preserving Gather
2716 	 * Merge.
2717 	 */
2718 	foreach(lc, rel->partial_pathlist)
2719 	{
2720 		Path	   *subpath = (Path *) lfirst(lc);
2721 		GatherMergePath *path;
2722 
2723 		if (subpath->pathkeys == NIL)
2724 			continue;
2725 
2726 		rows = subpath->rows * subpath->parallel_workers;
2727 		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
2728 										subpath->pathkeys, NULL, rowsp);
2729 		add_path(rel, &path->path);
2730 	}
2731 }
2732 
2733 /*
2734  * get_useful_pathkeys_for_relation
2735  *		Determine which orderings of a relation might be useful.
2736  *
2737  * Getting data in sorted order can be useful either because the requested
2738  * order matches the final output ordering for the overall query we're
2739  * planning, or because it enables an efficient merge join.  Here, we try
2740  * to figure out which pathkeys to consider.
2741  *
2742  * This allows us to do incremental sort on top of an index scan under a gather
2743  * merge node, i.e. parallelized.
2744  *
2745  * If the require_parallel_safe is true, we also require the expressions to
2746  * be parallel safe (which allows pushing the sort below Gather Merge).
2747  *
2748  * XXX At the moment this can only ever return a list with a single element,
2749  * because it looks at query_pathkeys only. So we might return the pathkeys
2750  * directly, but it seems plausible we'll want to consider other orderings
2751  * in the future. For example, we might want to consider pathkeys useful for
2752  * merge joins.
2753  */
2754 static List *
get_useful_pathkeys_for_relation(PlannerInfo * root,RelOptInfo * rel,bool require_parallel_safe)2755 get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
2756 								 bool require_parallel_safe)
2757 {
2758 	List	   *useful_pathkeys_list = NIL;
2759 
2760 	/*
2761 	 * Considering query_pathkeys is always worth it, because it might allow
2762 	 * us to avoid a total sort when we have a partially presorted path
2763 	 * available or to push the total sort into the parallel portion of the
2764 	 * query.
2765 	 */
2766 	if (root->query_pathkeys)
2767 	{
2768 		ListCell   *lc;
2769 		int			npathkeys = 0;	/* useful pathkeys */
2770 
2771 		foreach(lc, root->query_pathkeys)
2772 		{
2773 			PathKey    *pathkey = (PathKey *) lfirst(lc);
2774 			EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
2775 
2776 			/*
2777 			 * We can only build a sort for pathkeys which contain an EC
2778 			 * member in the current relation's target, so ignore any suffix
2779 			 * of the list as soon as we find a pathkey without an EC member
2780 			 * in the relation.
2781 			 *
2782 			 * By still returning the prefix of the pathkeys list that does
2783 			 * meet criteria of EC membership in the current relation, we
2784 			 * enable not just an incremental sort on the entirety of
2785 			 * query_pathkeys but also incremental sort below a JOIN.
2786 			 *
2787 			 * If requested, ensure the expression is parallel safe too.
2788 			 */
2789 			if (!find_em_expr_usable_for_sorting_rel(root, pathkey_ec, rel,
2790 													 require_parallel_safe))
2791 				break;
2792 
2793 			npathkeys++;
2794 		}
2795 
2796 		/*
2797 		 * The whole query_pathkeys list matches, so append it directly, to
2798 		 * allow comparing pathkeys easily by comparing list pointer. If we
2799 		 * have to truncate the pathkeys, we gotta do a copy though.
2800 		 */
2801 		if (npathkeys == list_length(root->query_pathkeys))
2802 			useful_pathkeys_list = lappend(useful_pathkeys_list,
2803 										   root->query_pathkeys);
2804 		else if (npathkeys > 0)
2805 			useful_pathkeys_list = lappend(useful_pathkeys_list,
2806 										   list_truncate(list_copy(root->query_pathkeys),
2807 														 npathkeys));
2808 	}
2809 
2810 	return useful_pathkeys_list;
2811 }
2812 
2813 /*
2814  * generate_useful_gather_paths
2815  *		Generate parallel access paths for a relation by pushing a Gather or
2816  *		Gather Merge on top of a partial path.
2817  *
2818  * Unlike plain generate_gather_paths, this looks both at pathkeys of input
2819  * paths (aiming to preserve the ordering), but also considers ordering that
2820  * might be useful for nodes above the gather merge node, and tries to add
2821  * a sort (regular or incremental) to provide that.
2822  */
2823 void
generate_useful_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2824 generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2825 {
2826 	ListCell   *lc;
2827 	double		rows;
2828 	double	   *rowsp = NULL;
2829 	List	   *useful_pathkeys_list = NIL;
2830 	Path	   *cheapest_partial_path = NULL;
2831 
2832 	/* If there are no partial paths, there's nothing to do here. */
2833 	if (rel->partial_pathlist == NIL)
2834 		return;
2835 
2836 	/* Should we override the rel's rowcount estimate? */
2837 	if (override_rows)
2838 		rowsp = &rows;
2839 
2840 	/* generate the regular gather (merge) paths */
2841 	generate_gather_paths(root, rel, override_rows);
2842 
2843 	/* consider incremental sort for interesting orderings */
2844 	useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
2845 
2846 	/* used for explicit (full) sort paths */
2847 	cheapest_partial_path = linitial(rel->partial_pathlist);
2848 
2849 	/*
2850 	 * Consider sorted paths for each interesting ordering. We generate both
2851 	 * incremental and full sort.
2852 	 */
2853 	foreach(lc, useful_pathkeys_list)
2854 	{
2855 		List	   *useful_pathkeys = lfirst(lc);
2856 		ListCell   *lc2;
2857 		bool		is_sorted;
2858 		int			presorted_keys;
2859 
2860 		foreach(lc2, rel->partial_pathlist)
2861 		{
2862 			Path	   *subpath = (Path *) lfirst(lc2);
2863 			GatherMergePath *path;
2864 
2865 			is_sorted = pathkeys_count_contained_in(useful_pathkeys,
2866 													subpath->pathkeys,
2867 													&presorted_keys);
2868 
2869 			/*
2870 			 * We don't need to consider the case where a subpath is already
2871 			 * fully sorted because generate_gather_paths already creates a
2872 			 * gather merge path for every subpath that has pathkeys present.
2873 			 *
2874 			 * But since the subpath is already sorted, we know we don't need
2875 			 * to consider adding a sort (other either kind) on top of it, so
2876 			 * we can continue here.
2877 			 */
2878 			if (is_sorted)
2879 				continue;
2880 
2881 			/*
2882 			 * Consider regular sort for the cheapest partial path (for each
2883 			 * useful pathkeys). We know the path is not sorted, because we'd
2884 			 * not get here otherwise.
2885 			 *
2886 			 * This is not redundant with the gather paths created in
2887 			 * generate_gather_paths, because that doesn't generate ordered
2888 			 * output. Here we add an explicit sort to match the useful
2889 			 * ordering.
2890 			 */
2891 			if (cheapest_partial_path == subpath)
2892 			{
2893 				Path	   *tmp;
2894 
2895 				tmp = (Path *) create_sort_path(root,
2896 												rel,
2897 												subpath,
2898 												useful_pathkeys,
2899 												-1.0);
2900 
2901 				rows = tmp->rows * tmp->parallel_workers;
2902 
2903 				path = create_gather_merge_path(root, rel,
2904 												tmp,
2905 												rel->reltarget,
2906 												tmp->pathkeys,
2907 												NULL,
2908 												rowsp);
2909 
2910 				add_path(rel, &path->path);
2911 
2912 				/* Fall through */
2913 			}
2914 
2915 			/*
2916 			 * Consider incremental sort, but only when the subpath is already
2917 			 * partially sorted on a pathkey prefix.
2918 			 */
2919 			if (enable_incremental_sort && presorted_keys > 0)
2920 			{
2921 				Path	   *tmp;
2922 
2923 				/*
2924 				 * We should have already excluded pathkeys of length 1
2925 				 * because then presorted_keys > 0 would imply is_sorted was
2926 				 * true.
2927 				 */
2928 				Assert(list_length(useful_pathkeys) != 1);
2929 
2930 				tmp = (Path *) create_incremental_sort_path(root,
2931 															rel,
2932 															subpath,
2933 															useful_pathkeys,
2934 															presorted_keys,
2935 															-1);
2936 
2937 				path = create_gather_merge_path(root, rel,
2938 												tmp,
2939 												rel->reltarget,
2940 												tmp->pathkeys,
2941 												NULL,
2942 												rowsp);
2943 
2944 				add_path(rel, &path->path);
2945 			}
2946 		}
2947 	}
2948 }
2949 
2950 /*
2951  * make_rel_from_joinlist
2952  *	  Build access paths using a "joinlist" to guide the join path search.
2953  *
2954  * See comments for deconstruct_jointree() for definition of the joinlist
2955  * data structure.
2956  */
2957 static RelOptInfo *
make_rel_from_joinlist(PlannerInfo * root,List * joinlist)2958 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
2959 {
2960 	int			levels_needed;
2961 	List	   *initial_rels;
2962 	ListCell   *jl;
2963 
2964 	/*
2965 	 * Count the number of child joinlist nodes.  This is the depth of the
2966 	 * dynamic-programming algorithm we must employ to consider all ways of
2967 	 * joining the child nodes.
2968 	 */
2969 	levels_needed = list_length(joinlist);
2970 
2971 	if (levels_needed <= 0)
2972 		return NULL;			/* nothing to do? */
2973 
2974 	/*
2975 	 * Construct a list of rels corresponding to the child joinlist nodes.
2976 	 * This may contain both base rels and rels constructed according to
2977 	 * sub-joinlists.
2978 	 */
2979 	initial_rels = NIL;
2980 	foreach(jl, joinlist)
2981 	{
2982 		Node	   *jlnode = (Node *) lfirst(jl);
2983 		RelOptInfo *thisrel;
2984 
2985 		if (IsA(jlnode, RangeTblRef))
2986 		{
2987 			int			varno = ((RangeTblRef *) jlnode)->rtindex;
2988 
2989 			thisrel = find_base_rel(root, varno);
2990 		}
2991 		else if (IsA(jlnode, List))
2992 		{
2993 			/* Recurse to handle subproblem */
2994 			thisrel = make_rel_from_joinlist(root, (List *) jlnode);
2995 		}
2996 		else
2997 		{
2998 			elog(ERROR, "unrecognized joinlist node type: %d",
2999 				 (int) nodeTag(jlnode));
3000 			thisrel = NULL;		/* keep compiler quiet */
3001 		}
3002 
3003 		initial_rels = lappend(initial_rels, thisrel);
3004 	}
3005 
3006 	if (levels_needed == 1)
3007 	{
3008 		/*
3009 		 * Single joinlist node, so we're done.
3010 		 */
3011 		return (RelOptInfo *) linitial(initial_rels);
3012 	}
3013 	else
3014 	{
3015 		/*
3016 		 * Consider the different orders in which we could join the rels,
3017 		 * using a plugin, GEQO, or the regular join search code.
3018 		 *
3019 		 * We put the initial_rels list into a PlannerInfo field because
3020 		 * has_legal_joinclause() needs to look at it (ugly :-().
3021 		 */
3022 		root->initial_rels = initial_rels;
3023 
3024 		if (join_search_hook)
3025 			return (*join_search_hook) (root, levels_needed, initial_rels);
3026 		else if (enable_geqo && levels_needed >= geqo_threshold)
3027 			return geqo(root, levels_needed, initial_rels);
3028 		else
3029 			return standard_join_search(root, levels_needed, initial_rels);
3030 	}
3031 }
3032 
3033 /*
3034  * standard_join_search
3035  *	  Find possible joinpaths for a query by successively finding ways
3036  *	  to join component relations into join relations.
3037  *
3038  * 'levels_needed' is the number of iterations needed, ie, the number of
3039  *		independent jointree items in the query.  This is > 1.
3040  *
3041  * 'initial_rels' is a list of RelOptInfo nodes for each independent
3042  *		jointree item.  These are the components to be joined together.
3043  *		Note that levels_needed == list_length(initial_rels).
3044  *
3045  * Returns the final level of join relations, i.e., the relation that is
3046  * the result of joining all the original relations together.
3047  * At least one implementation path must be provided for this relation and
3048  * all required sub-relations.
3049  *
3050  * To support loadable plugins that modify planner behavior by changing the
3051  * join searching algorithm, we provide a hook variable that lets a plugin
3052  * replace or supplement this function.  Any such hook must return the same
3053  * final join relation as the standard code would, but it might have a
3054  * different set of implementation paths attached, and only the sub-joinrels
3055  * needed for these paths need have been instantiated.
3056  *
3057  * Note to plugin authors: the functions invoked during standard_join_search()
3058  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
3059  * than one join-order search, you'll probably need to save and restore the
3060  * original states of those data structures.  See geqo_eval() for an example.
3061  */
3062 RelOptInfo *
standard_join_search(PlannerInfo * root,int levels_needed,List * initial_rels)3063 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
3064 {
3065 	int			lev;
3066 	RelOptInfo *rel;
3067 
3068 	/*
3069 	 * This function cannot be invoked recursively within any one planning
3070 	 * problem, so join_rel_level[] can't be in use already.
3071 	 */
3072 	Assert(root->join_rel_level == NULL);
3073 
3074 	/*
3075 	 * We employ a simple "dynamic programming" algorithm: we first find all
3076 	 * ways to build joins of two jointree items, then all ways to build joins
3077 	 * of three items (from two-item joins and single items), then four-item
3078 	 * joins, and so on until we have considered all ways to join all the
3079 	 * items into one rel.
3080 	 *
3081 	 * root->join_rel_level[j] is a list of all the j-item rels.  Initially we
3082 	 * set root->join_rel_level[1] to represent all the single-jointree-item
3083 	 * relations.
3084 	 */
3085 	root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
3086 
3087 	root->join_rel_level[1] = initial_rels;
3088 
3089 	for (lev = 2; lev <= levels_needed; lev++)
3090 	{
3091 		ListCell   *lc;
3092 
3093 		/*
3094 		 * Determine all possible pairs of relations to be joined at this
3095 		 * level, and build paths for making each one from every available
3096 		 * pair of lower-level relations.
3097 		 */
3098 		join_search_one_level(root, lev);
3099 
3100 		/*
3101 		 * Run generate_partitionwise_join_paths() and generate_gather_paths()
3102 		 * for each just-processed joinrel.  We could not do this earlier
3103 		 * because both regular and partial paths can get added to a
3104 		 * particular joinrel at multiple times within join_search_one_level.
3105 		 *
3106 		 * After that, we're done creating paths for the joinrel, so run
3107 		 * set_cheapest().
3108 		 */
3109 		foreach(lc, root->join_rel_level[lev])
3110 		{
3111 			rel = (RelOptInfo *) lfirst(lc);
3112 
3113 			/* Create paths for partitionwise joins. */
3114 			generate_partitionwise_join_paths(root, rel);
3115 
3116 			/*
3117 			 * Except for the topmost scan/join rel, consider gathering
3118 			 * partial paths.  We'll do the same for the topmost scan/join rel
3119 			 * once we know the final targetlist (see grouping_planner).
3120 			 */
3121 			if (lev < levels_needed)
3122 				generate_useful_gather_paths(root, rel, false);
3123 
3124 			/* Find and save the cheapest paths for this rel */
3125 			set_cheapest(rel);
3126 
3127 #ifdef OPTIMIZER_DEBUG
3128 			debug_print_rel(root, rel);
3129 #endif
3130 		}
3131 	}
3132 
3133 	/*
3134 	 * We should have a single rel at the final level.
3135 	 */
3136 	if (root->join_rel_level[levels_needed] == NIL)
3137 		elog(ERROR, "failed to build any %d-way joins", levels_needed);
3138 	Assert(list_length(root->join_rel_level[levels_needed]) == 1);
3139 
3140 	rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
3141 
3142 	root->join_rel_level = NULL;
3143 
3144 	return rel;
3145 }
3146 
3147 /*****************************************************************************
3148  *			PUSHING QUALS DOWN INTO SUBQUERIES
3149  *****************************************************************************/
3150 
3151 /*
3152  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
3153  *
3154  * subquery is the particular component query being checked.  topquery
3155  * is the top component of a set-operations tree (the same Query if no
3156  * set-op is involved).
3157  *
3158  * Conditions checked here:
3159  *
3160  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
3161  * since that could change the set of rows returned.
3162  *
3163  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
3164  * quals into it, because that could change the results.
3165  *
3166  * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
3167  * This is because upper-level quals should semantically be evaluated only
3168  * once per distinct row, not once per original row, and if the qual is
3169  * volatile then extra evaluations could change the results.  (This issue
3170  * does not apply to other forms of aggregation such as GROUP BY, because
3171  * when those are present we push into HAVING not WHERE, so that the quals
3172  * are still applied after aggregation.)
3173  *
3174  * 4. If the subquery contains window functions, we cannot push volatile quals
3175  * into it.  The issue here is a bit different from DISTINCT: a volatile qual
3176  * might succeed for some rows of a window partition and fail for others,
3177  * thereby changing the partition contents and thus the window functions'
3178  * results for rows that remain.
3179  *
3180  * 5. If the subquery contains any set-returning functions in its targetlist,
3181  * we cannot push volatile quals into it.  That would push them below the SRFs
3182  * and thereby change the number of times they are evaluated.  Also, a
3183  * volatile qual could succeed for some SRF output rows and fail for others,
3184  * a behavior that cannot occur if it's evaluated before SRF expansion.
3185  *
3186  * 6. If the subquery has nonempty grouping sets, we cannot push down any
3187  * quals.  The concern here is that a qual referencing a "constant" grouping
3188  * column could get constant-folded, which would be improper because the value
3189  * is potentially nullable by grouping-set expansion.  This restriction could
3190  * be removed if we had a parsetree representation that shows that such
3191  * grouping columns are not really constant.  (There are other ideas that
3192  * could be used to relax this restriction, but that's the approach most
3193  * likely to get taken in the future.  Note that there's not much to be gained
3194  * so long as subquery_planner can't move HAVING clauses to WHERE within such
3195  * a subquery.)
3196  *
3197  * In addition, we make several checks on the subquery's output columns to see
3198  * if it is safe to reference them in pushed-down quals.  If output column k
3199  * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
3200  * to true, but we don't reject the subquery overall since column k might not
3201  * be referenced by some/all quals.  The unsafeColumns[] array will be
3202  * consulted later by qual_is_pushdown_safe().  It's better to do it this way
3203  * than to make the checks directly in qual_is_pushdown_safe(), because when
3204  * the subquery involves set operations we have to check the output
3205  * expressions in each arm of the set op.
3206  *
3207  * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
3208  * we're effectively assuming that the quals cannot distinguish values that
3209  * the DISTINCT's equality operator sees as equal, yet there are many
3210  * counterexamples to that assumption.  However use of such a qual with a
3211  * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
3212  * "equal" value will be chosen as the output value by the DISTINCT operation.
3213  * So we don't worry too much about that.  Another objection is that if the
3214  * qual is expensive to evaluate, running it for each original row might cost
3215  * more than we save by eliminating rows before the DISTINCT step.  But it
3216  * would be very hard to estimate that at this stage, and in practice pushdown
3217  * seldom seems to make things worse, so we ignore that problem too.
3218  *
3219  * Note: likewise, pushing quals into a subquery with window functions is a
3220  * bit dubious: the quals might remove some rows of a window partition while
3221  * leaving others, causing changes in the window functions' results for the
3222  * surviving rows.  We insist that such a qual reference only partitioning
3223  * columns, but again that only protects us if the qual does not distinguish
3224  * values that the partitioning equality operator sees as equal.  The risks
3225  * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3226  * occurs and thus there is no theoretical problem with such a qual.  But
3227  * we'll do this anyway because the potential performance benefits are very
3228  * large, and we've seen no field complaints about the longstanding comparable
3229  * behavior with DISTINCT.
3230  */
3231 static bool
subquery_is_pushdown_safe(Query * subquery,Query * topquery,pushdown_safety_info * safetyInfo)3232 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
3233 						  pushdown_safety_info *safetyInfo)
3234 {
3235 	SetOperationStmt *topop;
3236 
3237 	/* Check point 1 */
3238 	if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
3239 		return false;
3240 
3241 	/* Check point 6 */
3242 	if (subquery->groupClause && subquery->groupingSets)
3243 		return false;
3244 
3245 	/* Check points 3, 4, and 5 */
3246 	if (subquery->distinctClause ||
3247 		subquery->hasWindowFuncs ||
3248 		subquery->hasTargetSRFs)
3249 		safetyInfo->unsafeVolatile = true;
3250 
3251 	/*
3252 	 * If we're at a leaf query, check for unsafe expressions in its target
3253 	 * list, and mark any unsafe ones in unsafeColumns[].  (Non-leaf nodes in
3254 	 * setop trees have only simple Vars in their tlists, so no need to check
3255 	 * them.)
3256 	 */
3257 	if (subquery->setOperations == NULL)
3258 		check_output_expressions(subquery, safetyInfo);
3259 
3260 	/* Are we at top level, or looking at a setop component? */
3261 	if (subquery == topquery)
3262 	{
3263 		/* Top level, so check any component queries */
3264 		if (subquery->setOperations != NULL)
3265 			if (!recurse_pushdown_safe(subquery->setOperations, topquery,
3266 									   safetyInfo))
3267 				return false;
3268 	}
3269 	else
3270 	{
3271 		/* Setop component must not have more components (too weird) */
3272 		if (subquery->setOperations != NULL)
3273 			return false;
3274 		/* Check whether setop component output types match top level */
3275 		topop = castNode(SetOperationStmt, topquery->setOperations);
3276 		Assert(topop);
3277 		compare_tlist_datatypes(subquery->targetList,
3278 								topop->colTypes,
3279 								safetyInfo);
3280 	}
3281 	return true;
3282 }
3283 
3284 /*
3285  * Helper routine to recurse through setOperations tree
3286  */
3287 static bool
recurse_pushdown_safe(Node * setOp,Query * topquery,pushdown_safety_info * safetyInfo)3288 recurse_pushdown_safe(Node *setOp, Query *topquery,
3289 					  pushdown_safety_info *safetyInfo)
3290 {
3291 	if (IsA(setOp, RangeTblRef))
3292 	{
3293 		RangeTblRef *rtr = (RangeTblRef *) setOp;
3294 		RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
3295 		Query	   *subquery = rte->subquery;
3296 
3297 		Assert(subquery != NULL);
3298 		return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
3299 	}
3300 	else if (IsA(setOp, SetOperationStmt))
3301 	{
3302 		SetOperationStmt *op = (SetOperationStmt *) setOp;
3303 
3304 		/* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3305 		if (op->op == SETOP_EXCEPT)
3306 			return false;
3307 		/* Else recurse */
3308 		if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
3309 			return false;
3310 		if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
3311 			return false;
3312 	}
3313 	else
3314 	{
3315 		elog(ERROR, "unrecognized node type: %d",
3316 			 (int) nodeTag(setOp));
3317 	}
3318 	return true;
3319 }
3320 
3321 /*
3322  * check_output_expressions - check subquery's output expressions for safety
3323  *
3324  * There are several cases in which it's unsafe to push down an upper-level
3325  * qual if it references a particular output column of a subquery.  We check
3326  * each output column of the subquery and set unsafeColumns[k] to true if
3327  * that column is unsafe for a pushed-down qual to reference.  The conditions
3328  * checked here are:
3329  *
3330  * 1. We must not push down any quals that refer to subselect outputs that
3331  * return sets, else we'd introduce functions-returning-sets into the
3332  * subquery's WHERE/HAVING quals.
3333  *
3334  * 2. We must not push down any quals that refer to subselect outputs that
3335  * contain volatile functions, for fear of introducing strange results due
3336  * to multiple evaluation of a volatile function.
3337  *
3338  * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3339  * refer to non-DISTINCT output columns, because that could change the set
3340  * of rows returned.  (This condition is vacuous for DISTINCT, because then
3341  * there are no non-DISTINCT output columns, so we needn't check.  Note that
3342  * subquery_is_pushdown_safe already reported that we can't use volatile
3343  * quals if there's DISTINCT or DISTINCT ON.)
3344  *
3345  * 4. If the subquery has any window functions, we must not push down quals
3346  * that reference any output columns that are not listed in all the subquery's
3347  * window PARTITION BY clauses.  We can push down quals that use only
3348  * partitioning columns because they should succeed or fail identically for
3349  * every row of any one window partition, and totally excluding some
3350  * partitions will not change a window function's results for remaining
3351  * partitions.  (Again, this also requires nonvolatile quals, but
3352  * subquery_is_pushdown_safe handles that.)
3353  */
3354 static void
check_output_expressions(Query * subquery,pushdown_safety_info * safetyInfo)3355 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
3356 {
3357 	ListCell   *lc;
3358 
3359 	foreach(lc, subquery->targetList)
3360 	{
3361 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
3362 
3363 		if (tle->resjunk)
3364 			continue;			/* ignore resjunk columns */
3365 
3366 		/* We need not check further if output col is already known unsafe */
3367 		if (safetyInfo->unsafeColumns[tle->resno])
3368 			continue;
3369 
3370 		/* Functions returning sets are unsafe (point 1) */
3371 		if (subquery->hasTargetSRFs &&
3372 			expression_returns_set((Node *) tle->expr))
3373 		{
3374 			safetyInfo->unsafeColumns[tle->resno] = true;
3375 			continue;
3376 		}
3377 
3378 		/* Volatile functions are unsafe (point 2) */
3379 		if (contain_volatile_functions((Node *) tle->expr))
3380 		{
3381 			safetyInfo->unsafeColumns[tle->resno] = true;
3382 			continue;
3383 		}
3384 
3385 		/* If subquery uses DISTINCT ON, check point 3 */
3386 		if (subquery->hasDistinctOn &&
3387 			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
3388 		{
3389 			/* non-DISTINCT column, so mark it unsafe */
3390 			safetyInfo->unsafeColumns[tle->resno] = true;
3391 			continue;
3392 		}
3393 
3394 		/* If subquery uses window functions, check point 4 */
3395 		if (subquery->hasWindowFuncs &&
3396 			!targetIsInAllPartitionLists(tle, subquery))
3397 		{
3398 			/* not present in all PARTITION BY clauses, so mark it unsafe */
3399 			safetyInfo->unsafeColumns[tle->resno] = true;
3400 			continue;
3401 		}
3402 	}
3403 }
3404 
3405 /*
3406  * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3407  * push quals into each component query, but the quals can only reference
3408  * subquery columns that suffer no type coercions in the set operation.
3409  * Otherwise there are possible semantic gotchas.  So, we check the
3410  * component queries to see if any of them have output types different from
3411  * the top-level setop outputs.  unsafeColumns[k] is set true if column k
3412  * has different type in any component.
3413  *
3414  * We don't have to care about typmods here: the only allowed difference
3415  * between set-op input and output typmods is input is a specific typmod
3416  * and output is -1, and that does not require a coercion.
3417  *
3418  * tlist is a subquery tlist.
3419  * colTypes is an OID list of the top-level setop's output column types.
3420  * safetyInfo->unsafeColumns[] is the result array.
3421  */
3422 static void
compare_tlist_datatypes(List * tlist,List * colTypes,pushdown_safety_info * safetyInfo)3423 compare_tlist_datatypes(List *tlist, List *colTypes,
3424 						pushdown_safety_info *safetyInfo)
3425 {
3426 	ListCell   *l;
3427 	ListCell   *colType = list_head(colTypes);
3428 
3429 	foreach(l, tlist)
3430 	{
3431 		TargetEntry *tle = (TargetEntry *) lfirst(l);
3432 
3433 		if (tle->resjunk)
3434 			continue;			/* ignore resjunk columns */
3435 		if (colType == NULL)
3436 			elog(ERROR, "wrong number of tlist entries");
3437 		if (exprType((Node *) tle->expr) != lfirst_oid(colType))
3438 			safetyInfo->unsafeColumns[tle->resno] = true;
3439 		colType = lnext(colTypes, colType);
3440 	}
3441 	if (colType != NULL)
3442 		elog(ERROR, "wrong number of tlist entries");
3443 }
3444 
3445 /*
3446  * targetIsInAllPartitionLists
3447  *		True if the TargetEntry is listed in the PARTITION BY clause
3448  *		of every window defined in the query.
3449  *
3450  * It would be safe to ignore windows not actually used by any window
3451  * function, but it's not easy to get that info at this stage; and it's
3452  * unlikely to be useful to spend any extra cycles getting it, since
3453  * unreferenced window definitions are probably infrequent in practice.
3454  */
3455 static bool
targetIsInAllPartitionLists(TargetEntry * tle,Query * query)3456 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
3457 {
3458 	ListCell   *lc;
3459 
3460 	foreach(lc, query->windowClause)
3461 	{
3462 		WindowClause *wc = (WindowClause *) lfirst(lc);
3463 
3464 		if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
3465 			return false;
3466 	}
3467 	return true;
3468 }
3469 
3470 /*
3471  * qual_is_pushdown_safe - is a particular qual safe to push down?
3472  *
3473  * qual is a restriction clause applying to the given subquery (whose RTE
3474  * has index rti in the parent query).
3475  *
3476  * Conditions checked here:
3477  *
3478  * 1. The qual must not contain any SubPlans (mainly because I'm not sure
3479  * it will work correctly: SubLinks will already have been transformed into
3480  * SubPlans in the qual, but not in the subquery).  Note that SubLinks that
3481  * transform to initplans are safe, and will be accepted here because what
3482  * we'll see in the qual is just a Param referencing the initplan output.
3483  *
3484  * 2. If unsafeVolatile is set, the qual must not contain any volatile
3485  * functions.
3486  *
3487  * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
3488  * that are passed Var nodes, and therefore might reveal values from the
3489  * subquery as side effects.
3490  *
3491  * 4. The qual must not refer to the whole-row output of the subquery
3492  * (since there is no easy way to name that within the subquery itself).
3493  *
3494  * 5. The qual must not refer to any subquery output columns that were
3495  * found to be unsafe to reference by subquery_is_pushdown_safe().
3496  */
3497 static bool
qual_is_pushdown_safe(Query * subquery,Index rti,Node * qual,pushdown_safety_info * safetyInfo)3498 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
3499 					  pushdown_safety_info *safetyInfo)
3500 {
3501 	bool		safe = true;
3502 	List	   *vars;
3503 	ListCell   *vl;
3504 
3505 	/* Refuse subselects (point 1) */
3506 	if (contain_subplans(qual))
3507 		return false;
3508 
3509 	/* Refuse volatile quals if we found they'd be unsafe (point 2) */
3510 	if (safetyInfo->unsafeVolatile &&
3511 		contain_volatile_functions(qual))
3512 		return false;
3513 
3514 	/* Refuse leaky quals if told to (point 3) */
3515 	if (safetyInfo->unsafeLeaky &&
3516 		contain_leaked_vars(qual))
3517 		return false;
3518 
3519 	/*
3520 	 * It would be unsafe to push down window function calls, but at least for
3521 	 * the moment we could never see any in a qual anyhow.  (The same applies
3522 	 * to aggregates, which we check for in pull_var_clause below.)
3523 	 */
3524 	Assert(!contain_window_function(qual));
3525 
3526 	/*
3527 	 * Examine all Vars used in clause.  Since it's a restriction clause, all
3528 	 * such Vars must refer to subselect output columns ... unless this is
3529 	 * part of a LATERAL subquery, in which case there could be lateral
3530 	 * references.
3531 	 */
3532 	vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
3533 	foreach(vl, vars)
3534 	{
3535 		Var		   *var = (Var *) lfirst(vl);
3536 
3537 		/*
3538 		 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3539 		 * It's not clear whether a PHV could safely be pushed down, and even
3540 		 * less clear whether such a situation could arise in any cases of
3541 		 * practical interest anyway.  So for the moment, just refuse to push
3542 		 * down.
3543 		 */
3544 		if (!IsA(var, Var))
3545 		{
3546 			safe = false;
3547 			break;
3548 		}
3549 
3550 		/*
3551 		 * Punt if we find any lateral references.  It would be safe to push
3552 		 * these down, but we'd have to convert them into outer references,
3553 		 * which subquery_push_qual lacks the infrastructure to do.  The case
3554 		 * arises so seldom that it doesn't seem worth working hard on.
3555 		 */
3556 		if (var->varno != rti)
3557 		{
3558 			safe = false;
3559 			break;
3560 		}
3561 
3562 		/* Subqueries have no system columns */
3563 		Assert(var->varattno >= 0);
3564 
3565 		/* Check point 4 */
3566 		if (var->varattno == 0)
3567 		{
3568 			safe = false;
3569 			break;
3570 		}
3571 
3572 		/* Check point 5 */
3573 		if (safetyInfo->unsafeColumns[var->varattno])
3574 		{
3575 			safe = false;
3576 			break;
3577 		}
3578 	}
3579 
3580 	list_free(vars);
3581 
3582 	return safe;
3583 }
3584 
3585 /*
3586  * subquery_push_qual - push down a qual that we have determined is safe
3587  */
3588 static void
subquery_push_qual(Query * subquery,RangeTblEntry * rte,Index rti,Node * qual)3589 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
3590 {
3591 	if (subquery->setOperations != NULL)
3592 	{
3593 		/* Recurse to push it separately to each component query */
3594 		recurse_push_qual(subquery->setOperations, subquery,
3595 						  rte, rti, qual);
3596 	}
3597 	else
3598 	{
3599 		/*
3600 		 * We need to replace Vars in the qual (which must refer to outputs of
3601 		 * the subquery) with copies of the subquery's targetlist expressions.
3602 		 * Note that at this point, any uplevel Vars in the qual should have
3603 		 * been replaced with Params, so they need no work.
3604 		 *
3605 		 * This step also ensures that when we are pushing into a setop tree,
3606 		 * each component query gets its own copy of the qual.
3607 		 */
3608 		qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
3609 										 subquery->targetList,
3610 										 REPLACEVARS_REPORT_ERROR, 0,
3611 										 &subquery->hasSubLinks);
3612 
3613 		/*
3614 		 * Now attach the qual to the proper place: normally WHERE, but if the
3615 		 * subquery uses grouping or aggregation, put it in HAVING (since the
3616 		 * qual really refers to the group-result rows).
3617 		 */
3618 		if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
3619 			subquery->havingQual = make_and_qual(subquery->havingQual, qual);
3620 		else
3621 			subquery->jointree->quals =
3622 				make_and_qual(subquery->jointree->quals, qual);
3623 
3624 		/*
3625 		 * We need not change the subquery's hasAggs or hasSubLinks flags,
3626 		 * since we can't be pushing down any aggregates that weren't there
3627 		 * before, and we don't push down subselects at all.
3628 		 */
3629 	}
3630 }
3631 
3632 /*
3633  * Helper routine to recurse through setOperations tree
3634  */
3635 static void
recurse_push_qual(Node * setOp,Query * topquery,RangeTblEntry * rte,Index rti,Node * qual)3636 recurse_push_qual(Node *setOp, Query *topquery,
3637 				  RangeTblEntry *rte, Index rti, Node *qual)
3638 {
3639 	if (IsA(setOp, RangeTblRef))
3640 	{
3641 		RangeTblRef *rtr = (RangeTblRef *) setOp;
3642 		RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
3643 		Query	   *subquery = subrte->subquery;
3644 
3645 		Assert(subquery != NULL);
3646 		subquery_push_qual(subquery, rte, rti, qual);
3647 	}
3648 	else if (IsA(setOp, SetOperationStmt))
3649 	{
3650 		SetOperationStmt *op = (SetOperationStmt *) setOp;
3651 
3652 		recurse_push_qual(op->larg, topquery, rte, rti, qual);
3653 		recurse_push_qual(op->rarg, topquery, rte, rti, qual);
3654 	}
3655 	else
3656 	{
3657 		elog(ERROR, "unrecognized node type: %d",
3658 			 (int) nodeTag(setOp));
3659 	}
3660 }
3661 
3662 /*****************************************************************************
3663  *			SIMPLIFYING SUBQUERY TARGETLISTS
3664  *****************************************************************************/
3665 
3666 /*
3667  * remove_unused_subquery_outputs
3668  *		Remove subquery targetlist items we don't need
3669  *
3670  * It's possible, even likely, that the upper query does not read all the
3671  * output columns of the subquery.  We can remove any such outputs that are
3672  * not needed by the subquery itself (e.g., as sort/group columns) and do not
3673  * affect semantics otherwise (e.g., volatile functions can't be removed).
3674  * This is useful not only because we might be able to remove expensive-to-
3675  * compute expressions, but because deletion of output columns might allow
3676  * optimizations such as join removal to occur within the subquery.
3677  *
3678  * To avoid affecting column numbering in the targetlist, we don't physically
3679  * remove unused tlist entries, but rather replace their expressions with NULL
3680  * constants.  This is implemented by modifying subquery->targetList.
3681  */
3682 static void
remove_unused_subquery_outputs(Query * subquery,RelOptInfo * rel)3683 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
3684 {
3685 	Bitmapset  *attrs_used = NULL;
3686 	ListCell   *lc;
3687 
3688 	/*
3689 	 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
3690 	 * could update all the child SELECTs' tlists, but it seems not worth the
3691 	 * trouble presently.
3692 	 */
3693 	if (subquery->setOperations)
3694 		return;
3695 
3696 	/*
3697 	 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
3698 	 * time: all its output columns must be used in the distinctClause.
3699 	 */
3700 	if (subquery->distinctClause && !subquery->hasDistinctOn)
3701 		return;
3702 
3703 	/*
3704 	 * Collect a bitmap of all the output column numbers used by the upper
3705 	 * query.
3706 	 *
3707 	 * Add all the attributes needed for joins or final output.  Note: we must
3708 	 * look at rel's targetlist, not the attr_needed data, because attr_needed
3709 	 * isn't computed for inheritance child rels, cf set_append_rel_size().
3710 	 * (XXX might be worth changing that sometime.)
3711 	 */
3712 	pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
3713 
3714 	/* Add all the attributes used by un-pushed-down restriction clauses. */
3715 	foreach(lc, rel->baserestrictinfo)
3716 	{
3717 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
3718 
3719 		pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
3720 	}
3721 
3722 	/*
3723 	 * If there's a whole-row reference to the subquery, we can't remove
3724 	 * anything.
3725 	 */
3726 	if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
3727 		return;
3728 
3729 	/*
3730 	 * Run through the tlist and zap entries we don't need.  It's okay to
3731 	 * modify the tlist items in-place because set_subquery_pathlist made a
3732 	 * copy of the subquery.
3733 	 */
3734 	foreach(lc, subquery->targetList)
3735 	{
3736 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
3737 		Node	   *texpr = (Node *) tle->expr;
3738 
3739 		/*
3740 		 * If it has a sortgroupref number, it's used in some sort/group
3741 		 * clause so we'd better not remove it.  Also, don't remove any
3742 		 * resjunk columns, since their reason for being has nothing to do
3743 		 * with anybody reading the subquery's output.  (It's likely that
3744 		 * resjunk columns in a sub-SELECT would always have ressortgroupref
3745 		 * set, but even if they don't, it seems imprudent to remove them.)
3746 		 */
3747 		if (tle->ressortgroupref || tle->resjunk)
3748 			continue;
3749 
3750 		/*
3751 		 * If it's used by the upper query, we can't remove it.
3752 		 */
3753 		if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
3754 						  attrs_used))
3755 			continue;
3756 
3757 		/*
3758 		 * If it contains a set-returning function, we can't remove it since
3759 		 * that could change the number of rows returned by the subquery.
3760 		 */
3761 		if (subquery->hasTargetSRFs &&
3762 			expression_returns_set(texpr))
3763 			continue;
3764 
3765 		/*
3766 		 * If it contains volatile functions, we daren't remove it for fear
3767 		 * that the user is expecting their side-effects to happen.
3768 		 */
3769 		if (contain_volatile_functions(texpr))
3770 			continue;
3771 
3772 		/*
3773 		 * OK, we don't need it.  Replace the expression with a NULL constant.
3774 		 * Preserve the exposed type of the expression, in case something
3775 		 * looks at the rowtype of the subquery's result.
3776 		 */
3777 		tle->expr = (Expr *) makeNullConst(exprType(texpr),
3778 										   exprTypmod(texpr),
3779 										   exprCollation(texpr));
3780 	}
3781 }
3782 
3783 /*
3784  * create_partial_bitmap_paths
3785  *	  Build partial bitmap heap path for the relation
3786  */
3787 void
create_partial_bitmap_paths(PlannerInfo * root,RelOptInfo * rel,Path * bitmapqual)3788 create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
3789 							Path *bitmapqual)
3790 {
3791 	int			parallel_workers;
3792 	double		pages_fetched;
3793 
3794 	/* Compute heap pages for bitmap heap scan */
3795 	pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
3796 										 NULL, NULL);
3797 
3798 	parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
3799 											   max_parallel_workers_per_gather);
3800 
3801 	if (parallel_workers <= 0)
3802 		return;
3803 
3804 	add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
3805 														   bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
3806 }
3807 
3808 /*
3809  * Compute the number of parallel workers that should be used to scan a
3810  * relation.  We compute the parallel workers based on the size of the heap to
3811  * be scanned and the size of the index to be scanned, then choose a minimum
3812  * of those.
3813  *
3814  * "heap_pages" is the number of pages from the table that we expect to scan, or
3815  * -1 if we don't expect to scan any.
3816  *
3817  * "index_pages" is the number of pages from the index that we expect to scan, or
3818  * -1 if we don't expect to scan any.
3819  *
3820  * "max_workers" is caller's limit on the number of workers.  This typically
3821  * comes from a GUC.
3822  */
3823 int
compute_parallel_worker(RelOptInfo * rel,double heap_pages,double index_pages,int max_workers)3824 compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
3825 						int max_workers)
3826 {
3827 	int			parallel_workers = 0;
3828 
3829 	/*
3830 	 * If the user has set the parallel_workers reloption, use that; otherwise
3831 	 * select a default number of workers.
3832 	 */
3833 	if (rel->rel_parallel_workers != -1)
3834 		parallel_workers = rel->rel_parallel_workers;
3835 	else
3836 	{
3837 		/*
3838 		 * If the number of pages being scanned is insufficient to justify a
3839 		 * parallel scan, just return zero ... unless it's an inheritance
3840 		 * child. In that case, we want to generate a parallel path here
3841 		 * anyway.  It might not be worthwhile just for this relation, but
3842 		 * when combined with all of its inheritance siblings it may well pay
3843 		 * off.
3844 		 */
3845 		if (rel->reloptkind == RELOPT_BASEREL &&
3846 			((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
3847 			 (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
3848 			return 0;
3849 
3850 		if (heap_pages >= 0)
3851 		{
3852 			int			heap_parallel_threshold;
3853 			int			heap_parallel_workers = 1;
3854 
3855 			/*
3856 			 * Select the number of workers based on the log of the size of
3857 			 * the relation.  This probably needs to be a good deal more
3858 			 * sophisticated, but we need something here for now.  Note that
3859 			 * the upper limit of the min_parallel_table_scan_size GUC is
3860 			 * chosen to prevent overflow here.
3861 			 */
3862 			heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
3863 			while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
3864 			{
3865 				heap_parallel_workers++;
3866 				heap_parallel_threshold *= 3;
3867 				if (heap_parallel_threshold > INT_MAX / 3)
3868 					break;		/* avoid overflow */
3869 			}
3870 
3871 			parallel_workers = heap_parallel_workers;
3872 		}
3873 
3874 		if (index_pages >= 0)
3875 		{
3876 			int			index_parallel_workers = 1;
3877 			int			index_parallel_threshold;
3878 
3879 			/* same calculation as for heap_pages above */
3880 			index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
3881 			while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
3882 			{
3883 				index_parallel_workers++;
3884 				index_parallel_threshold *= 3;
3885 				if (index_parallel_threshold > INT_MAX / 3)
3886 					break;		/* avoid overflow */
3887 			}
3888 
3889 			if (parallel_workers > 0)
3890 				parallel_workers = Min(parallel_workers, index_parallel_workers);
3891 			else
3892 				parallel_workers = index_parallel_workers;
3893 		}
3894 	}
3895 
3896 	/* In no case use more than caller supplied maximum number of workers */
3897 	parallel_workers = Min(parallel_workers, max_workers);
3898 
3899 	return parallel_workers;
3900 }
3901 
3902 /*
3903  * generate_partitionwise_join_paths
3904  * 		Create paths representing partitionwise join for given partitioned
3905  * 		join relation.
3906  *
3907  * This must not be called until after we are done adding paths for all
3908  * child-joins. Otherwise, add_path might delete a path to which some path
3909  * generated here has a reference.
3910  */
3911 void
generate_partitionwise_join_paths(PlannerInfo * root,RelOptInfo * rel)3912 generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
3913 {
3914 	List	   *live_children = NIL;
3915 	int			cnt_parts;
3916 	int			num_parts;
3917 	RelOptInfo **part_rels;
3918 
3919 	/* Handle only join relations here. */
3920 	if (!IS_JOIN_REL(rel))
3921 		return;
3922 
3923 	/* We've nothing to do if the relation is not partitioned. */
3924 	if (!IS_PARTITIONED_REL(rel))
3925 		return;
3926 
3927 	/* The relation should have consider_partitionwise_join set. */
3928 	Assert(rel->consider_partitionwise_join);
3929 
3930 	/* Guard against stack overflow due to overly deep partition hierarchy. */
3931 	check_stack_depth();
3932 
3933 	num_parts = rel->nparts;
3934 	part_rels = rel->part_rels;
3935 
3936 	/* Collect non-dummy child-joins. */
3937 	for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
3938 	{
3939 		RelOptInfo *child_rel = part_rels[cnt_parts];
3940 
3941 		/* If it's been pruned entirely, it's certainly dummy. */
3942 		if (child_rel == NULL)
3943 			continue;
3944 
3945 		/* Add partitionwise join paths for partitioned child-joins. */
3946 		generate_partitionwise_join_paths(root, child_rel);
3947 
3948 		set_cheapest(child_rel);
3949 
3950 		/* Dummy children will not be scanned, so ignore those. */
3951 		if (IS_DUMMY_REL(child_rel))
3952 			continue;
3953 
3954 #ifdef OPTIMIZER_DEBUG
3955 		debug_print_rel(root, child_rel);
3956 #endif
3957 
3958 		live_children = lappend(live_children, child_rel);
3959 	}
3960 
3961 	/* If all child-joins are dummy, parent join is also dummy. */
3962 	if (!live_children)
3963 	{
3964 		mark_dummy_rel(rel);
3965 		return;
3966 	}
3967 
3968 	/* Build additional paths for this rel from child-join paths. */
3969 	add_paths_to_append_rel(root, rel, live_children);
3970 	list_free(live_children);
3971 }
3972 
3973 
3974 /*****************************************************************************
3975  *			DEBUG SUPPORT
3976  *****************************************************************************/
3977 
3978 #ifdef OPTIMIZER_DEBUG
3979 
3980 static void
print_relids(PlannerInfo * root,Relids relids)3981 print_relids(PlannerInfo *root, Relids relids)
3982 {
3983 	int			x;
3984 	bool		first = true;
3985 
3986 	x = -1;
3987 	while ((x = bms_next_member(relids, x)) >= 0)
3988 	{
3989 		if (!first)
3990 			printf(" ");
3991 		if (x < root->simple_rel_array_size &&
3992 			root->simple_rte_array[x])
3993 			printf("%s", root->simple_rte_array[x]->eref->aliasname);
3994 		else
3995 			printf("%d", x);
3996 		first = false;
3997 	}
3998 }
3999 
4000 static void
print_restrictclauses(PlannerInfo * root,List * clauses)4001 print_restrictclauses(PlannerInfo *root, List *clauses)
4002 {
4003 	ListCell   *l;
4004 
4005 	foreach(l, clauses)
4006 	{
4007 		RestrictInfo *c = lfirst(l);
4008 
4009 		print_expr((Node *) c->clause, root->parse->rtable);
4010 		if (lnext(clauses, l))
4011 			printf(", ");
4012 	}
4013 }
4014 
4015 static void
print_path(PlannerInfo * root,Path * path,int indent)4016 print_path(PlannerInfo *root, Path *path, int indent)
4017 {
4018 	const char *ptype;
4019 	bool		join = false;
4020 	Path	   *subpath = NULL;
4021 	int			i;
4022 
4023 	switch (nodeTag(path))
4024 	{
4025 		case T_Path:
4026 			switch (path->pathtype)
4027 			{
4028 				case T_SeqScan:
4029 					ptype = "SeqScan";
4030 					break;
4031 				case T_SampleScan:
4032 					ptype = "SampleScan";
4033 					break;
4034 				case T_FunctionScan:
4035 					ptype = "FunctionScan";
4036 					break;
4037 				case T_TableFuncScan:
4038 					ptype = "TableFuncScan";
4039 					break;
4040 				case T_ValuesScan:
4041 					ptype = "ValuesScan";
4042 					break;
4043 				case T_CteScan:
4044 					ptype = "CteScan";
4045 					break;
4046 				case T_NamedTuplestoreScan:
4047 					ptype = "NamedTuplestoreScan";
4048 					break;
4049 				case T_Result:
4050 					ptype = "Result";
4051 					break;
4052 				case T_WorkTableScan:
4053 					ptype = "WorkTableScan";
4054 					break;
4055 				default:
4056 					ptype = "???Path";
4057 					break;
4058 			}
4059 			break;
4060 		case T_IndexPath:
4061 			ptype = "IdxScan";
4062 			break;
4063 		case T_BitmapHeapPath:
4064 			ptype = "BitmapHeapScan";
4065 			break;
4066 		case T_BitmapAndPath:
4067 			ptype = "BitmapAndPath";
4068 			break;
4069 		case T_BitmapOrPath:
4070 			ptype = "BitmapOrPath";
4071 			break;
4072 		case T_TidPath:
4073 			ptype = "TidScan";
4074 			break;
4075 		case T_SubqueryScanPath:
4076 			ptype = "SubqueryScan";
4077 			break;
4078 		case T_ForeignPath:
4079 			ptype = "ForeignScan";
4080 			break;
4081 		case T_CustomPath:
4082 			ptype = "CustomScan";
4083 			break;
4084 		case T_NestPath:
4085 			ptype = "NestLoop";
4086 			join = true;
4087 			break;
4088 		case T_MergePath:
4089 			ptype = "MergeJoin";
4090 			join = true;
4091 			break;
4092 		case T_HashPath:
4093 			ptype = "HashJoin";
4094 			join = true;
4095 			break;
4096 		case T_AppendPath:
4097 			ptype = "Append";
4098 			break;
4099 		case T_MergeAppendPath:
4100 			ptype = "MergeAppend";
4101 			break;
4102 		case T_GroupResultPath:
4103 			ptype = "GroupResult";
4104 			break;
4105 		case T_MaterialPath:
4106 			ptype = "Material";
4107 			subpath = ((MaterialPath *) path)->subpath;
4108 			break;
4109 		case T_UniquePath:
4110 			ptype = "Unique";
4111 			subpath = ((UniquePath *) path)->subpath;
4112 			break;
4113 		case T_GatherPath:
4114 			ptype = "Gather";
4115 			subpath = ((GatherPath *) path)->subpath;
4116 			break;
4117 		case T_GatherMergePath:
4118 			ptype = "GatherMerge";
4119 			subpath = ((GatherMergePath *) path)->subpath;
4120 			break;
4121 		case T_ProjectionPath:
4122 			ptype = "Projection";
4123 			subpath = ((ProjectionPath *) path)->subpath;
4124 			break;
4125 		case T_ProjectSetPath:
4126 			ptype = "ProjectSet";
4127 			subpath = ((ProjectSetPath *) path)->subpath;
4128 			break;
4129 		case T_SortPath:
4130 			ptype = "Sort";
4131 			subpath = ((SortPath *) path)->subpath;
4132 			break;
4133 		case T_IncrementalSortPath:
4134 			ptype = "IncrementalSort";
4135 			subpath = ((SortPath *) path)->subpath;
4136 			break;
4137 		case T_GroupPath:
4138 			ptype = "Group";
4139 			subpath = ((GroupPath *) path)->subpath;
4140 			break;
4141 		case T_UpperUniquePath:
4142 			ptype = "UpperUnique";
4143 			subpath = ((UpperUniquePath *) path)->subpath;
4144 			break;
4145 		case T_AggPath:
4146 			ptype = "Agg";
4147 			subpath = ((AggPath *) path)->subpath;
4148 			break;
4149 		case T_GroupingSetsPath:
4150 			ptype = "GroupingSets";
4151 			subpath = ((GroupingSetsPath *) path)->subpath;
4152 			break;
4153 		case T_MinMaxAggPath:
4154 			ptype = "MinMaxAgg";
4155 			break;
4156 		case T_WindowAggPath:
4157 			ptype = "WindowAgg";
4158 			subpath = ((WindowAggPath *) path)->subpath;
4159 			break;
4160 		case T_SetOpPath:
4161 			ptype = "SetOp";
4162 			subpath = ((SetOpPath *) path)->subpath;
4163 			break;
4164 		case T_RecursiveUnionPath:
4165 			ptype = "RecursiveUnion";
4166 			break;
4167 		case T_LockRowsPath:
4168 			ptype = "LockRows";
4169 			subpath = ((LockRowsPath *) path)->subpath;
4170 			break;
4171 		case T_ModifyTablePath:
4172 			ptype = "ModifyTable";
4173 			break;
4174 		case T_LimitPath:
4175 			ptype = "Limit";
4176 			subpath = ((LimitPath *) path)->subpath;
4177 			break;
4178 		default:
4179 			ptype = "???Path";
4180 			break;
4181 	}
4182 
4183 	for (i = 0; i < indent; i++)
4184 		printf("\t");
4185 	printf("%s", ptype);
4186 
4187 	if (path->parent)
4188 	{
4189 		printf("(");
4190 		print_relids(root, path->parent->relids);
4191 		printf(")");
4192 	}
4193 	if (path->param_info)
4194 	{
4195 		printf(" required_outer (");
4196 		print_relids(root, path->param_info->ppi_req_outer);
4197 		printf(")");
4198 	}
4199 	printf(" rows=%.0f cost=%.2f..%.2f\n",
4200 		   path->rows, path->startup_cost, path->total_cost);
4201 
4202 	if (path->pathkeys)
4203 	{
4204 		for (i = 0; i < indent; i++)
4205 			printf("\t");
4206 		printf("  pathkeys: ");
4207 		print_pathkeys(path->pathkeys, root->parse->rtable);
4208 	}
4209 
4210 	if (join)
4211 	{
4212 		JoinPath   *jp = (JoinPath *) path;
4213 
4214 		for (i = 0; i < indent; i++)
4215 			printf("\t");
4216 		printf("  clauses: ");
4217 		print_restrictclauses(root, jp->joinrestrictinfo);
4218 		printf("\n");
4219 
4220 		if (IsA(path, MergePath))
4221 		{
4222 			MergePath  *mp = (MergePath *) path;
4223 
4224 			for (i = 0; i < indent; i++)
4225 				printf("\t");
4226 			printf("  sortouter=%d sortinner=%d materializeinner=%d\n",
4227 				   ((mp->outersortkeys) ? 1 : 0),
4228 				   ((mp->innersortkeys) ? 1 : 0),
4229 				   ((mp->materialize_inner) ? 1 : 0));
4230 		}
4231 
4232 		print_path(root, jp->outerjoinpath, indent + 1);
4233 		print_path(root, jp->innerjoinpath, indent + 1);
4234 	}
4235 
4236 	if (subpath)
4237 		print_path(root, subpath, indent + 1);
4238 }
4239 
4240 void
debug_print_rel(PlannerInfo * root,RelOptInfo * rel)4241 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
4242 {
4243 	ListCell   *l;
4244 
4245 	printf("RELOPTINFO (");
4246 	print_relids(root, rel->relids);
4247 	printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
4248 
4249 	if (rel->baserestrictinfo)
4250 	{
4251 		printf("\tbaserestrictinfo: ");
4252 		print_restrictclauses(root, rel->baserestrictinfo);
4253 		printf("\n");
4254 	}
4255 
4256 	if (rel->joininfo)
4257 	{
4258 		printf("\tjoininfo: ");
4259 		print_restrictclauses(root, rel->joininfo);
4260 		printf("\n");
4261 	}
4262 
4263 	printf("\tpath list:\n");
4264 	foreach(l, rel->pathlist)
4265 		print_path(root, lfirst(l), 1);
4266 	if (rel->cheapest_parameterized_paths)
4267 	{
4268 		printf("\n\tcheapest parameterized paths:\n");
4269 		foreach(l, rel->cheapest_parameterized_paths)
4270 			print_path(root, lfirst(l), 1);
4271 	}
4272 	if (rel->cheapest_startup_path)
4273 	{
4274 		printf("\n\tcheapest startup path:\n");
4275 		print_path(root, rel->cheapest_startup_path, 1);
4276 	}
4277 	if (rel->cheapest_total_path)
4278 	{
4279 		printf("\n\tcheapest total path:\n");
4280 		print_path(root, rel->cheapest_total_path, 1);
4281 	}
4282 	printf("\n");
4283 	fflush(stdout);
4284 }
4285 
4286 #endif							/* OPTIMIZER_DEBUG */
4287