1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *	  Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/allpaths.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #ifdef OPTIMIZER_DEBUG
31 #include "nodes/print.h"
32 #endif
33 #include "optimizer/appendinfo.h"
34 #include "optimizer/clauses.h"
35 #include "optimizer/cost.h"
36 #include "optimizer/geqo.h"
37 #include "optimizer/inherit.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/restrictinfo.h"
44 #include "optimizer/tlist.h"
45 #include "parser/parse_clause.h"
46 #include "parser/parsetree.h"
47 #include "partitioning/partbounds.h"
48 #include "partitioning/partprune.h"
49 #include "rewrite/rewriteManip.h"
50 #include "utils/lsyscache.h"
51 
52 
53 /* results of subquery_is_pushdown_safe */
54 typedef struct pushdown_safety_info
55 {
56 	bool	   *unsafeColumns;	/* which output columns are unsafe to use */
57 	bool		unsafeVolatile; /* don't push down volatile quals */
58 	bool		unsafeLeaky;	/* don't push down leaky quals */
59 } pushdown_safety_info;
60 
61 /* These parameters are set by GUC */
62 bool		enable_geqo = false;	/* just in case GUC doesn't set it */
63 int			geqo_threshold;
64 int			min_parallel_table_scan_size;
65 int			min_parallel_index_scan_size;
66 
67 /* Hook for plugins to get control in set_rel_pathlist() */
68 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
69 
70 /* Hook for plugins to replace standard_join_search() */
71 join_search_hook_type join_search_hook = NULL;
72 
73 
74 static void set_base_rel_consider_startup(PlannerInfo *root);
75 static void set_base_rel_sizes(PlannerInfo *root);
76 static void set_base_rel_pathlists(PlannerInfo *root);
77 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
78 						 Index rti, RangeTblEntry *rte);
79 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
80 							 Index rti, RangeTblEntry *rte);
81 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
82 							   RangeTblEntry *rte);
83 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
84 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
85 									  RangeTblEntry *rte);
86 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
87 								   RangeTblEntry *rte);
88 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
89 									 RangeTblEntry *rte);
90 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
91 										 RangeTblEntry *rte);
92 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
93 							 RangeTblEntry *rte);
94 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
95 								 RangeTblEntry *rte);
96 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
97 								Index rti, RangeTblEntry *rte);
98 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
99 									Index rti, RangeTblEntry *rte);
100 static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
101 										 List *live_childrels,
102 										 List *all_child_pathkeys);
103 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
104 												   RelOptInfo *rel,
105 												   Relids required_outer);
106 static void accumulate_append_subpath(Path *path,
107 									  List **subpaths,
108 									  List **special_subpaths);
109 static Path *get_singleton_append_subpath(Path *path);
110 static void set_dummy_rel_pathlist(RelOptInfo *rel);
111 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
112 								  Index rti, RangeTblEntry *rte);
113 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
114 								  RangeTblEntry *rte);
115 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
116 								RangeTblEntry *rte);
117 static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
118 								   RangeTblEntry *rte);
119 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
120 							 RangeTblEntry *rte);
121 static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
122 										 RangeTblEntry *rte);
123 static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
124 								RangeTblEntry *rte);
125 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
126 								   RangeTblEntry *rte);
127 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
128 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
129 									  pushdown_safety_info *safetyInfo);
130 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
131 								  pushdown_safety_info *safetyInfo);
132 static void check_output_expressions(Query *subquery,
133 									 pushdown_safety_info *safetyInfo);
134 static void compare_tlist_datatypes(List *tlist, List *colTypes,
135 									pushdown_safety_info *safetyInfo);
136 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
137 static bool qual_is_pushdown_safe(Query *subquery, Index rti,
138 								  RestrictInfo *rinfo,
139 								  pushdown_safety_info *safetyInfo);
140 static void subquery_push_qual(Query *subquery,
141 							   RangeTblEntry *rte, Index rti, Node *qual);
142 static void recurse_push_qual(Node *setOp, Query *topquery,
143 							  RangeTblEntry *rte, Index rti, Node *qual);
144 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
145 
146 
147 /*
148  * make_one_rel
149  *	  Finds all possible access paths for executing a query, returning a
150  *	  single rel that represents the join of all base rels in the query.
151  */
152 RelOptInfo *
make_one_rel(PlannerInfo * root,List * joinlist)153 make_one_rel(PlannerInfo *root, List *joinlist)
154 {
155 	RelOptInfo *rel;
156 	Index		rti;
157 	double		total_pages;
158 
159 	/*
160 	 * Construct the all_baserels Relids set.
161 	 */
162 	root->all_baserels = NULL;
163 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
164 	{
165 		RelOptInfo *brel = root->simple_rel_array[rti];
166 
167 		/* there may be empty slots corresponding to non-baserel RTEs */
168 		if (brel == NULL)
169 			continue;
170 
171 		Assert(brel->relid == rti); /* sanity check on array */
172 
173 		/* ignore RTEs that are "other rels" */
174 		if (brel->reloptkind != RELOPT_BASEREL)
175 			continue;
176 
177 		root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
178 	}
179 
180 	/* Mark base rels as to whether we care about fast-start plans */
181 	set_base_rel_consider_startup(root);
182 
183 	/*
184 	 * Compute size estimates and consider_parallel flags for each base rel.
185 	 */
186 	set_base_rel_sizes(root);
187 
188 	/*
189 	 * We should now have size estimates for every actual table involved in
190 	 * the query, and we also know which if any have been deleted from the
191 	 * query by join removal, pruned by partition pruning, or eliminated by
192 	 * constraint exclusion.  So we can now compute total_table_pages.
193 	 *
194 	 * Note that appendrels are not double-counted here, even though we don't
195 	 * bother to distinguish RelOptInfos for appendrel parents, because the
196 	 * parents will have pages = 0.
197 	 *
198 	 * XXX if a table is self-joined, we will count it once per appearance,
199 	 * which perhaps is the wrong thing ... but that's not completely clear,
200 	 * and detecting self-joins here is difficult, so ignore it for now.
201 	 */
202 	total_pages = 0;
203 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
204 	{
205 		RelOptInfo *brel = root->simple_rel_array[rti];
206 
207 		if (brel == NULL)
208 			continue;
209 
210 		Assert(brel->relid == rti); /* sanity check on array */
211 
212 		if (IS_DUMMY_REL(brel))
213 			continue;
214 
215 		if (IS_SIMPLE_REL(brel))
216 			total_pages += (double) brel->pages;
217 	}
218 	root->total_table_pages = total_pages;
219 
220 	/*
221 	 * Generate access paths for each base rel.
222 	 */
223 	set_base_rel_pathlists(root);
224 
225 	/*
226 	 * Generate access paths for the entire join tree.
227 	 */
228 	rel = make_rel_from_joinlist(root, joinlist);
229 
230 	/*
231 	 * The result should join all and only the query's base rels.
232 	 */
233 	Assert(bms_equal(rel->relids, root->all_baserels));
234 
235 	return rel;
236 }
237 
238 /*
239  * set_base_rel_consider_startup
240  *	  Set the consider_[param_]startup flags for each base-relation entry.
241  *
242  * For the moment, we only deal with consider_param_startup here; because the
243  * logic for consider_startup is pretty trivial and is the same for every base
244  * relation, we just let build_simple_rel() initialize that flag correctly to
245  * start with.  If that logic ever gets more complicated it would probably
246  * be better to move it here.
247  */
248 static void
set_base_rel_consider_startup(PlannerInfo * root)249 set_base_rel_consider_startup(PlannerInfo *root)
250 {
251 	/*
252 	 * Since parameterized paths can only be used on the inside of a nestloop
253 	 * join plan, there is usually little value in considering fast-start
254 	 * plans for them.  However, for relations that are on the RHS of a SEMI
255 	 * or ANTI join, a fast-start plan can be useful because we're only going
256 	 * to care about fetching one tuple anyway.
257 	 *
258 	 * To minimize growth of planning time, we currently restrict this to
259 	 * cases where the RHS is a single base relation, not a join; there is no
260 	 * provision for consider_param_startup to get set at all on joinrels.
261 	 * Also we don't worry about appendrels.  costsize.c's costing rules for
262 	 * nestloop semi/antijoins don't consider such cases either.
263 	 */
264 	ListCell   *lc;
265 
266 	foreach(lc, root->join_info_list)
267 	{
268 		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
269 		int			varno;
270 
271 		if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
272 			bms_get_singleton_member(sjinfo->syn_righthand, &varno))
273 		{
274 			RelOptInfo *rel = find_base_rel(root, varno);
275 
276 			rel->consider_param_startup = true;
277 		}
278 	}
279 }
280 
281 /*
282  * set_base_rel_sizes
283  *	  Set the size estimates (rows and widths) for each base-relation entry.
284  *	  Also determine whether to consider parallel paths for base relations.
285  *
286  * We do this in a separate pass over the base rels so that rowcount
287  * estimates are available for parameterized path generation, and also so
288  * that each rel's consider_parallel flag is set correctly before we begin to
289  * generate paths.
290  */
291 static void
set_base_rel_sizes(PlannerInfo * root)292 set_base_rel_sizes(PlannerInfo *root)
293 {
294 	Index		rti;
295 
296 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
297 	{
298 		RelOptInfo *rel = root->simple_rel_array[rti];
299 		RangeTblEntry *rte;
300 
301 		/* there may be empty slots corresponding to non-baserel RTEs */
302 		if (rel == NULL)
303 			continue;
304 
305 		Assert(rel->relid == rti);	/* sanity check on array */
306 
307 		/* ignore RTEs that are "other rels" */
308 		if (rel->reloptkind != RELOPT_BASEREL)
309 			continue;
310 
311 		rte = root->simple_rte_array[rti];
312 
313 		/*
314 		 * If parallelism is allowable for this query in general, see whether
315 		 * it's allowable for this rel in particular.  We have to do this
316 		 * before set_rel_size(), because (a) if this rel is an inheritance
317 		 * parent, set_append_rel_size() will use and perhaps change the rel's
318 		 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
319 		 * goes ahead and makes paths immediately.
320 		 */
321 		if (root->glob->parallelModeOK)
322 			set_rel_consider_parallel(root, rel, rte);
323 
324 		set_rel_size(root, rel, rti, rte);
325 	}
326 }
327 
328 /*
329  * set_base_rel_pathlists
330  *	  Finds all paths available for scanning each base-relation entry.
331  *	  Sequential scan and any available indices are considered.
332  *	  Each useful path is attached to its relation's 'pathlist' field.
333  */
334 static void
set_base_rel_pathlists(PlannerInfo * root)335 set_base_rel_pathlists(PlannerInfo *root)
336 {
337 	Index		rti;
338 
339 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
340 	{
341 		RelOptInfo *rel = root->simple_rel_array[rti];
342 
343 		/* there may be empty slots corresponding to non-baserel RTEs */
344 		if (rel == NULL)
345 			continue;
346 
347 		Assert(rel->relid == rti);	/* sanity check on array */
348 
349 		/* ignore RTEs that are "other rels" */
350 		if (rel->reloptkind != RELOPT_BASEREL)
351 			continue;
352 
353 		set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
354 	}
355 }
356 
357 /*
358  * set_rel_size
359  *	  Set size estimates for a base relation
360  */
361 static void
set_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)362 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
363 			 Index rti, RangeTblEntry *rte)
364 {
365 	if (rel->reloptkind == RELOPT_BASEREL &&
366 		relation_excluded_by_constraints(root, rel, rte))
367 	{
368 		/*
369 		 * We proved we don't need to scan the rel via constraint exclusion,
370 		 * so set up a single dummy path for it.  Here we only check this for
371 		 * regular baserels; if it's an otherrel, CE was already checked in
372 		 * set_append_rel_size().
373 		 *
374 		 * In this case, we go ahead and set up the relation's path right away
375 		 * instead of leaving it for set_rel_pathlist to do.  This is because
376 		 * we don't have a convention for marking a rel as dummy except by
377 		 * assigning a dummy path to it.
378 		 */
379 		set_dummy_rel_pathlist(rel);
380 	}
381 	else if (rte->inh)
382 	{
383 		/* It's an "append relation", process accordingly */
384 		set_append_rel_size(root, rel, rti, rte);
385 	}
386 	else
387 	{
388 		switch (rel->rtekind)
389 		{
390 			case RTE_RELATION:
391 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
392 				{
393 					/* Foreign table */
394 					set_foreign_size(root, rel, rte);
395 				}
396 				else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
397 				{
398 					/*
399 					 * We could get here if asked to scan a partitioned table
400 					 * with ONLY.  In that case we shouldn't scan any of the
401 					 * partitions, so mark it as a dummy rel.
402 					 */
403 					set_dummy_rel_pathlist(rel);
404 				}
405 				else if (rte->tablesample != NULL)
406 				{
407 					/* Sampled relation */
408 					set_tablesample_rel_size(root, rel, rte);
409 				}
410 				else
411 				{
412 					/* Plain relation */
413 					set_plain_rel_size(root, rel, rte);
414 				}
415 				break;
416 			case RTE_SUBQUERY:
417 
418 				/*
419 				 * Subqueries don't support making a choice between
420 				 * parameterized and unparameterized paths, so just go ahead
421 				 * and build their paths immediately.
422 				 */
423 				set_subquery_pathlist(root, rel, rti, rte);
424 				break;
425 			case RTE_FUNCTION:
426 				set_function_size_estimates(root, rel);
427 				break;
428 			case RTE_TABLEFUNC:
429 				set_tablefunc_size_estimates(root, rel);
430 				break;
431 			case RTE_VALUES:
432 				set_values_size_estimates(root, rel);
433 				break;
434 			case RTE_CTE:
435 
436 				/*
437 				 * CTEs don't support making a choice between parameterized
438 				 * and unparameterized paths, so just go ahead and build their
439 				 * paths immediately.
440 				 */
441 				if (rte->self_reference)
442 					set_worktable_pathlist(root, rel, rte);
443 				else
444 					set_cte_pathlist(root, rel, rte);
445 				break;
446 			case RTE_NAMEDTUPLESTORE:
447 				/* Might as well just build the path immediately */
448 				set_namedtuplestore_pathlist(root, rel, rte);
449 				break;
450 			case RTE_RESULT:
451 				/* Might as well just build the path immediately */
452 				set_result_pathlist(root, rel, rte);
453 				break;
454 			default:
455 				elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
456 				break;
457 		}
458 	}
459 
460 	/*
461 	 * We insist that all non-dummy rels have a nonzero rowcount estimate.
462 	 */
463 	Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
464 }
465 
466 /*
467  * set_rel_pathlist
468  *	  Build access paths for a base relation
469  */
470 static void
set_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)471 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
472 				 Index rti, RangeTblEntry *rte)
473 {
474 	if (IS_DUMMY_REL(rel))
475 	{
476 		/* We already proved the relation empty, so nothing more to do */
477 	}
478 	else if (rte->inh)
479 	{
480 		/* It's an "append relation", process accordingly */
481 		set_append_rel_pathlist(root, rel, rti, rte);
482 	}
483 	else
484 	{
485 		switch (rel->rtekind)
486 		{
487 			case RTE_RELATION:
488 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
489 				{
490 					/* Foreign table */
491 					set_foreign_pathlist(root, rel, rte);
492 				}
493 				else if (rte->tablesample != NULL)
494 				{
495 					/* Sampled relation */
496 					set_tablesample_rel_pathlist(root, rel, rte);
497 				}
498 				else
499 				{
500 					/* Plain relation */
501 					set_plain_rel_pathlist(root, rel, rte);
502 				}
503 				break;
504 			case RTE_SUBQUERY:
505 				/* Subquery --- fully handled during set_rel_size */
506 				break;
507 			case RTE_FUNCTION:
508 				/* RangeFunction */
509 				set_function_pathlist(root, rel, rte);
510 				break;
511 			case RTE_TABLEFUNC:
512 				/* Table Function */
513 				set_tablefunc_pathlist(root, rel, rte);
514 				break;
515 			case RTE_VALUES:
516 				/* Values list */
517 				set_values_pathlist(root, rel, rte);
518 				break;
519 			case RTE_CTE:
520 				/* CTE reference --- fully handled during set_rel_size */
521 				break;
522 			case RTE_NAMEDTUPLESTORE:
523 				/* tuplestore reference --- fully handled during set_rel_size */
524 				break;
525 			case RTE_RESULT:
526 				/* simple Result --- fully handled during set_rel_size */
527 				break;
528 			default:
529 				elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
530 				break;
531 		}
532 	}
533 
534 	/*
535 	 * Allow a plugin to editorialize on the set of Paths for this base
536 	 * relation.  It could add new paths (such as CustomPaths) by calling
537 	 * add_path(), or add_partial_path() if parallel aware.  It could also
538 	 * delete or modify paths added by the core code.
539 	 */
540 	if (set_rel_pathlist_hook)
541 		(*set_rel_pathlist_hook) (root, rel, rti, rte);
542 
543 	/*
544 	 * If this is a baserel, we should normally consider gathering any partial
545 	 * paths we may have created for it.  We have to do this after calling the
546 	 * set_rel_pathlist_hook, else it cannot add partial paths to be included
547 	 * here.
548 	 *
549 	 * However, if this is an inheritance child, skip it.  Otherwise, we could
550 	 * end up with a very large number of gather nodes, each trying to grab
551 	 * its own pool of workers.  Instead, we'll consider gathering partial
552 	 * paths for the parent appendrel.
553 	 *
554 	 * Also, if this is the topmost scan/join rel (that is, the only baserel),
555 	 * we postpone gathering until the final scan/join targetlist is available
556 	 * (see grouping_planner).
557 	 */
558 	if (rel->reloptkind == RELOPT_BASEREL &&
559 		bms_membership(root->all_baserels) != BMS_SINGLETON)
560 		generate_useful_gather_paths(root, rel, false);
561 
562 	/* Now find the cheapest of the paths for this rel */
563 	set_cheapest(rel);
564 
565 #ifdef OPTIMIZER_DEBUG
566 	debug_print_rel(root, rel);
567 #endif
568 }
569 
570 /*
571  * set_plain_rel_size
572  *	  Set size estimates for a plain relation (no subquery, no inheritance)
573  */
574 static void
set_plain_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)575 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
576 {
577 	/*
578 	 * Test any partial indexes of rel for applicability.  We must do this
579 	 * first since partial unique indexes can affect size estimates.
580 	 */
581 	check_index_predicates(root, rel);
582 
583 	/* Mark rel with estimated output rows, width, etc */
584 	set_baserel_size_estimates(root, rel);
585 }
586 
587 /*
588  * If this relation could possibly be scanned from within a worker, then set
589  * its consider_parallel flag.
590  */
591 static void
set_rel_consider_parallel(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)592 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
593 						  RangeTblEntry *rte)
594 {
595 	/*
596 	 * The flag has previously been initialized to false, so we can just
597 	 * return if it becomes clear that we can't safely set it.
598 	 */
599 	Assert(!rel->consider_parallel);
600 
601 	/* Don't call this if parallelism is disallowed for the entire query. */
602 	Assert(root->glob->parallelModeOK);
603 
604 	/* This should only be called for baserels and appendrel children. */
605 	Assert(IS_SIMPLE_REL(rel));
606 
607 	/* Assorted checks based on rtekind. */
608 	switch (rte->rtekind)
609 	{
610 		case RTE_RELATION:
611 
612 			/*
613 			 * Currently, parallel workers can't access the leader's temporary
614 			 * tables.  We could possibly relax this if we wrote all of its
615 			 * local buffers at the start of the query and made no changes
616 			 * thereafter (maybe we could allow hint bit changes), and if we
617 			 * taught the workers to read them.  Writing a large number of
618 			 * temporary buffers could be expensive, though, and we don't have
619 			 * the rest of the necessary infrastructure right now anyway.  So
620 			 * for now, bail out if we see a temporary table.
621 			 */
622 			if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
623 				return;
624 
625 			/*
626 			 * Table sampling can be pushed down to workers if the sample
627 			 * function and its arguments are safe.
628 			 */
629 			if (rte->tablesample != NULL)
630 			{
631 				char		proparallel = func_parallel(rte->tablesample->tsmhandler);
632 
633 				if (proparallel != PROPARALLEL_SAFE)
634 					return;
635 				if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
636 					return;
637 			}
638 
639 			/*
640 			 * Ask FDWs whether they can support performing a ForeignScan
641 			 * within a worker.  Most often, the answer will be no.  For
642 			 * example, if the nature of the FDW is such that it opens a TCP
643 			 * connection with a remote server, each parallel worker would end
644 			 * up with a separate connection, and these connections might not
645 			 * be appropriately coordinated between workers and the leader.
646 			 */
647 			if (rte->relkind == RELKIND_FOREIGN_TABLE)
648 			{
649 				Assert(rel->fdwroutine);
650 				if (!rel->fdwroutine->IsForeignScanParallelSafe)
651 					return;
652 				if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
653 					return;
654 			}
655 
656 			/*
657 			 * There are additional considerations for appendrels, which we'll
658 			 * deal with in set_append_rel_size and set_append_rel_pathlist.
659 			 * For now, just set consider_parallel based on the rel's own
660 			 * quals and targetlist.
661 			 */
662 			break;
663 
664 		case RTE_SUBQUERY:
665 
666 			/*
667 			 * There's no intrinsic problem with scanning a subquery-in-FROM
668 			 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
669 			 * If the subquery doesn't happen to have any parallel-safe paths,
670 			 * then flagging it as consider_parallel won't change anything,
671 			 * but that's true for plain tables, too.  We must set
672 			 * consider_parallel based on the rel's own quals and targetlist,
673 			 * so that if a subquery path is parallel-safe but the quals and
674 			 * projection we're sticking onto it are not, we correctly mark
675 			 * the SubqueryScanPath as not parallel-safe.  (Note that
676 			 * set_subquery_pathlist() might push some of these quals down
677 			 * into the subquery itself, but that doesn't change anything.)
678 			 *
679 			 * We can't push sub-select containing LIMIT/OFFSET to workers as
680 			 * there is no guarantee that the row order will be fully
681 			 * deterministic, and applying LIMIT/OFFSET will lead to
682 			 * inconsistent results at the top-level.  (In some cases, where
683 			 * the result is ordered, we could relax this restriction.  But it
684 			 * doesn't currently seem worth expending extra effort to do so.)
685 			 */
686 			{
687 				Query	   *subquery = castNode(Query, rte->subquery);
688 
689 				if (limit_needed(subquery))
690 					return;
691 			}
692 			break;
693 
694 		case RTE_JOIN:
695 			/* Shouldn't happen; we're only considering baserels here. */
696 			Assert(false);
697 			return;
698 
699 		case RTE_FUNCTION:
700 			/* Check for parallel-restricted functions. */
701 			if (!is_parallel_safe(root, (Node *) rte->functions))
702 				return;
703 			break;
704 
705 		case RTE_TABLEFUNC:
706 			/* not parallel safe */
707 			return;
708 
709 		case RTE_VALUES:
710 			/* Check for parallel-restricted functions. */
711 			if (!is_parallel_safe(root, (Node *) rte->values_lists))
712 				return;
713 			break;
714 
715 		case RTE_CTE:
716 
717 			/*
718 			 * CTE tuplestores aren't shared among parallel workers, so we
719 			 * force all CTE scans to happen in the leader.  Also, populating
720 			 * the CTE would require executing a subplan that's not available
721 			 * in the worker, might be parallel-restricted, and must get
722 			 * executed only once.
723 			 */
724 			return;
725 
726 		case RTE_NAMEDTUPLESTORE:
727 
728 			/*
729 			 * tuplestore cannot be shared, at least without more
730 			 * infrastructure to support that.
731 			 */
732 			return;
733 
734 		case RTE_RESULT:
735 			/* RESULT RTEs, in themselves, are no problem. */
736 			break;
737 	}
738 
739 	/*
740 	 * If there's anything in baserestrictinfo that's parallel-restricted, we
741 	 * give up on parallelizing access to this relation.  We could consider
742 	 * instead postponing application of the restricted quals until we're
743 	 * above all the parallelism in the plan tree, but it's not clear that
744 	 * that would be a win in very many cases, and it might be tricky to make
745 	 * outer join clauses work correctly.  It would likely break equivalence
746 	 * classes, too.
747 	 */
748 	if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
749 		return;
750 
751 	/*
752 	 * Likewise, if the relation's outputs are not parallel-safe, give up.
753 	 * (Usually, they're just Vars, but sometimes they're not.)
754 	 */
755 	if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
756 		return;
757 
758 	/* We have a winner. */
759 	rel->consider_parallel = true;
760 }
761 
762 /*
763  * set_plain_rel_pathlist
764  *	  Build access paths for a plain relation (no subquery, no inheritance)
765  */
766 static void
set_plain_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)767 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
768 {
769 	Relids		required_outer;
770 
771 	/*
772 	 * We don't support pushing join clauses into the quals of a seqscan, but
773 	 * it could still have required parameterization due to LATERAL refs in
774 	 * its tlist.
775 	 */
776 	required_outer = rel->lateral_relids;
777 
778 	/* Consider sequential scan */
779 	add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
780 
781 	/* If appropriate, consider parallel sequential scan */
782 	if (rel->consider_parallel && required_outer == NULL)
783 		create_plain_partial_paths(root, rel);
784 
785 	/* Consider index scans */
786 	create_index_paths(root, rel);
787 
788 	/* Consider TID scans */
789 	create_tidscan_paths(root, rel);
790 }
791 
792 /*
793  * create_plain_partial_paths
794  *	  Build partial access paths for parallel scan of a plain relation
795  */
796 static void
create_plain_partial_paths(PlannerInfo * root,RelOptInfo * rel)797 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
798 {
799 	int			parallel_workers;
800 
801 	parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
802 											   max_parallel_workers_per_gather);
803 
804 	/* If any limit was set to zero, the user doesn't want a parallel scan. */
805 	if (parallel_workers <= 0)
806 		return;
807 
808 	/* Add an unordered partial path based on a parallel sequential scan. */
809 	add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
810 }
811 
812 /*
813  * set_tablesample_rel_size
814  *	  Set size estimates for a sampled relation
815  */
816 static void
set_tablesample_rel_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)817 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
818 {
819 	TableSampleClause *tsc = rte->tablesample;
820 	TsmRoutine *tsm;
821 	BlockNumber pages;
822 	double		tuples;
823 
824 	/*
825 	 * Test any partial indexes of rel for applicability.  We must do this
826 	 * first since partial unique indexes can affect size estimates.
827 	 */
828 	check_index_predicates(root, rel);
829 
830 	/*
831 	 * Call the sampling method's estimation function to estimate the number
832 	 * of pages it will read and the number of tuples it will return.  (Note:
833 	 * we assume the function returns sane values.)
834 	 */
835 	tsm = GetTsmRoutine(tsc->tsmhandler);
836 	tsm->SampleScanGetSampleSize(root, rel, tsc->args,
837 								 &pages, &tuples);
838 
839 	/*
840 	 * For the moment, because we will only consider a SampleScan path for the
841 	 * rel, it's okay to just overwrite the pages and tuples estimates for the
842 	 * whole relation.  If we ever consider multiple path types for sampled
843 	 * rels, we'll need more complication.
844 	 */
845 	rel->pages = pages;
846 	rel->tuples = tuples;
847 
848 	/* Mark rel with estimated output rows, width, etc */
849 	set_baserel_size_estimates(root, rel);
850 }
851 
852 /*
853  * set_tablesample_rel_pathlist
854  *	  Build access paths for a sampled relation
855  */
856 static void
set_tablesample_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)857 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
858 {
859 	Relids		required_outer;
860 	Path	   *path;
861 
862 	/*
863 	 * We don't support pushing join clauses into the quals of a samplescan,
864 	 * but it could still have required parameterization due to LATERAL refs
865 	 * in its tlist or TABLESAMPLE arguments.
866 	 */
867 	required_outer = rel->lateral_relids;
868 
869 	/* Consider sampled scan */
870 	path = create_samplescan_path(root, rel, required_outer);
871 
872 	/*
873 	 * If the sampling method does not support repeatable scans, we must avoid
874 	 * plans that would scan the rel multiple times.  Ideally, we'd simply
875 	 * avoid putting the rel on the inside of a nestloop join; but adding such
876 	 * a consideration to the planner seems like a great deal of complication
877 	 * to support an uncommon usage of second-rate sampling methods.  Instead,
878 	 * if there is a risk that the query might perform an unsafe join, just
879 	 * wrap the SampleScan in a Materialize node.  We can check for joins by
880 	 * counting the membership of all_baserels (note that this correctly
881 	 * counts inheritance trees as single rels).  If we're inside a subquery,
882 	 * we can't easily check whether a join might occur in the outer query, so
883 	 * just assume one is possible.
884 	 *
885 	 * GetTsmRoutine is relatively expensive compared to the other tests here,
886 	 * so check repeatable_across_scans last, even though that's a bit odd.
887 	 */
888 	if ((root->query_level > 1 ||
889 		 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
890 		!(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
891 	{
892 		path = (Path *) create_material_path(rel, path);
893 	}
894 
895 	add_path(rel, path);
896 
897 	/* For the moment, at least, there are no other paths to consider */
898 }
899 
900 /*
901  * set_foreign_size
902  *		Set size estimates for a foreign table RTE
903  */
904 static void
set_foreign_size(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)905 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
906 {
907 	/* Mark rel with estimated output rows, width, etc */
908 	set_foreign_size_estimates(root, rel);
909 
910 	/* Let FDW adjust the size estimates, if it can */
911 	rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
912 
913 	/* ... but do not let it set the rows estimate to zero */
914 	rel->rows = clamp_row_est(rel->rows);
915 
916 	/*
917 	 * Also, make sure rel->tuples is not insane relative to rel->rows.
918 	 * Notably, this ensures sanity if pg_class.reltuples contains -1 and the
919 	 * FDW doesn't do anything to replace that.
920 	 */
921 	rel->tuples = Max(rel->tuples, rel->rows);
922 }
923 
924 /*
925  * set_foreign_pathlist
926  *		Build access paths for a foreign table RTE
927  */
928 static void
set_foreign_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)929 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
930 {
931 	/* Call the FDW's GetForeignPaths function to generate path(s) */
932 	rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
933 }
934 
935 /*
936  * set_append_rel_size
937  *	  Set size estimates for a simple "append relation"
938  *
939  * The passed-in rel and RTE represent the entire append relation.  The
940  * relation's contents are computed by appending together the output of the
941  * individual member relations.  Note that in the non-partitioned inheritance
942  * case, the first member relation is actually the same table as is mentioned
943  * in the parent RTE ... but it has a different RTE and RelOptInfo.  This is
944  * a good thing because their outputs are not the same size.
945  */
946 static void
set_append_rel_size(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)947 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
948 					Index rti, RangeTblEntry *rte)
949 {
950 	int			parentRTindex = rti;
951 	bool		has_live_children;
952 	double		parent_rows;
953 	double		parent_size;
954 	double	   *parent_attrsizes;
955 	int			nattrs;
956 	ListCell   *l;
957 
958 	/* Guard against stack overflow due to overly deep inheritance tree. */
959 	check_stack_depth();
960 
961 	Assert(IS_SIMPLE_REL(rel));
962 
963 	/*
964 	 * If this is a partitioned baserel, set the consider_partitionwise_join
965 	 * flag; currently, we only consider partitionwise joins with the baserel
966 	 * if its targetlist doesn't contain a whole-row Var.
967 	 */
968 	if (enable_partitionwise_join &&
969 		rel->reloptkind == RELOPT_BASEREL &&
970 		rte->relkind == RELKIND_PARTITIONED_TABLE &&
971 		rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
972 		rel->consider_partitionwise_join = true;
973 
974 	/*
975 	 * Initialize to compute size estimates for whole append relation.
976 	 *
977 	 * We handle width estimates by weighting the widths of different child
978 	 * rels proportionally to their number of rows.  This is sensible because
979 	 * the use of width estimates is mainly to compute the total relation
980 	 * "footprint" if we have to sort or hash it.  To do this, we sum the
981 	 * total equivalent size (in "double" arithmetic) and then divide by the
982 	 * total rowcount estimate.  This is done separately for the total rel
983 	 * width and each attribute.
984 	 *
985 	 * Note: if you consider changing this logic, beware that child rels could
986 	 * have zero rows and/or width, if they were excluded by constraints.
987 	 */
988 	has_live_children = false;
989 	parent_rows = 0;
990 	parent_size = 0;
991 	nattrs = rel->max_attr - rel->min_attr + 1;
992 	parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
993 
994 	foreach(l, root->append_rel_list)
995 	{
996 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
997 		int			childRTindex;
998 		RangeTblEntry *childRTE;
999 		RelOptInfo *childrel;
1000 		ListCell   *parentvars;
1001 		ListCell   *childvars;
1002 
1003 		/* append_rel_list contains all append rels; ignore others */
1004 		if (appinfo->parent_relid != parentRTindex)
1005 			continue;
1006 
1007 		childRTindex = appinfo->child_relid;
1008 		childRTE = root->simple_rte_array[childRTindex];
1009 
1010 		/*
1011 		 * The child rel's RelOptInfo was already created during
1012 		 * add_other_rels_to_query.
1013 		 */
1014 		childrel = find_base_rel(root, childRTindex);
1015 		Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1016 
1017 		/* We may have already proven the child to be dummy. */
1018 		if (IS_DUMMY_REL(childrel))
1019 			continue;
1020 
1021 		/*
1022 		 * We have to copy the parent's targetlist and quals to the child,
1023 		 * with appropriate substitution of variables.  However, the
1024 		 * baserestrictinfo quals were already copied/substituted when the
1025 		 * child RelOptInfo was built.  So we don't need any additional setup
1026 		 * before applying constraint exclusion.
1027 		 */
1028 		if (relation_excluded_by_constraints(root, childrel, childRTE))
1029 		{
1030 			/*
1031 			 * This child need not be scanned, so we can omit it from the
1032 			 * appendrel.
1033 			 */
1034 			set_dummy_rel_pathlist(childrel);
1035 			continue;
1036 		}
1037 
1038 		/*
1039 		 * Constraint exclusion failed, so copy the parent's join quals and
1040 		 * targetlist to the child, with appropriate variable substitutions.
1041 		 *
1042 		 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1043 		 * expressions, which otherwise would not occur in a rel's targetlist.
1044 		 * Code that might be looking at an appendrel child must cope with
1045 		 * such.  (Normally, a rel's targetlist would only include Vars and
1046 		 * PlaceHolderVars.)  XXX we do not bother to update the cost or width
1047 		 * fields of childrel->reltarget; not clear if that would be useful.
1048 		 */
1049 		childrel->joininfo = (List *)
1050 			adjust_appendrel_attrs(root,
1051 								   (Node *) rel->joininfo,
1052 								   1, &appinfo);
1053 		childrel->reltarget->exprs = (List *)
1054 			adjust_appendrel_attrs(root,
1055 								   (Node *) rel->reltarget->exprs,
1056 								   1, &appinfo);
1057 
1058 		/*
1059 		 * We have to make child entries in the EquivalenceClass data
1060 		 * structures as well.  This is needed either if the parent
1061 		 * participates in some eclass joins (because we will want to consider
1062 		 * inner-indexscan joins on the individual children) or if the parent
1063 		 * has useful pathkeys (because we should try to build MergeAppend
1064 		 * paths that produce those sort orderings).
1065 		 */
1066 		if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1067 			add_child_rel_equivalences(root, appinfo, rel, childrel);
1068 		childrel->has_eclass_joins = rel->has_eclass_joins;
1069 
1070 		/*
1071 		 * Note: we could compute appropriate attr_needed data for the child's
1072 		 * variables, by transforming the parent's attr_needed through the
1073 		 * translated_vars mapping.  However, currently there's no need
1074 		 * because attr_needed is only examined for base relations not
1075 		 * otherrels.  So we just leave the child's attr_needed empty.
1076 		 */
1077 
1078 		/*
1079 		 * If we consider partitionwise joins with the parent rel, do the same
1080 		 * for partitioned child rels.
1081 		 *
1082 		 * Note: here we abuse the consider_partitionwise_join flag by setting
1083 		 * it for child rels that are not themselves partitioned.  We do so to
1084 		 * tell try_partitionwise_join() that the child rel is sufficiently
1085 		 * valid to be used as a per-partition input, even if it later gets
1086 		 * proven to be dummy.  (It's not usable until we've set up the
1087 		 * reltarget and EC entries, which we just did.)
1088 		 */
1089 		if (rel->consider_partitionwise_join)
1090 			childrel->consider_partitionwise_join = true;
1091 
1092 		/*
1093 		 * If parallelism is allowable for this query in general, see whether
1094 		 * it's allowable for this childrel in particular.  But if we've
1095 		 * already decided the appendrel is not parallel-safe as a whole,
1096 		 * there's no point in considering parallelism for this child.  For
1097 		 * consistency, do this before calling set_rel_size() for the child.
1098 		 */
1099 		if (root->glob->parallelModeOK && rel->consider_parallel)
1100 			set_rel_consider_parallel(root, childrel, childRTE);
1101 
1102 		/*
1103 		 * Compute the child's size.
1104 		 */
1105 		set_rel_size(root, childrel, childRTindex, childRTE);
1106 
1107 		/*
1108 		 * It is possible that constraint exclusion detected a contradiction
1109 		 * within a child subquery, even though we didn't prove one above. If
1110 		 * so, we can skip this child.
1111 		 */
1112 		if (IS_DUMMY_REL(childrel))
1113 			continue;
1114 
1115 		/* We have at least one live child. */
1116 		has_live_children = true;
1117 
1118 		/*
1119 		 * If any live child is not parallel-safe, treat the whole appendrel
1120 		 * as not parallel-safe.  In future we might be able to generate plans
1121 		 * in which some children are farmed out to workers while others are
1122 		 * not; but we don't have that today, so it's a waste to consider
1123 		 * partial paths anywhere in the appendrel unless it's all safe.
1124 		 * (Child rels visited before this one will be unmarked in
1125 		 * set_append_rel_pathlist().)
1126 		 */
1127 		if (!childrel->consider_parallel)
1128 			rel->consider_parallel = false;
1129 
1130 		/*
1131 		 * Accumulate size information from each live child.
1132 		 */
1133 		Assert(childrel->rows > 0);
1134 
1135 		parent_rows += childrel->rows;
1136 		parent_size += childrel->reltarget->width * childrel->rows;
1137 
1138 		/*
1139 		 * Accumulate per-column estimates too.  We need not do anything for
1140 		 * PlaceHolderVars in the parent list.  If child expression isn't a
1141 		 * Var, or we didn't record a width estimate for it, we have to fall
1142 		 * back on a datatype-based estimate.
1143 		 *
1144 		 * By construction, child's targetlist is 1-to-1 with parent's.
1145 		 */
1146 		forboth(parentvars, rel->reltarget->exprs,
1147 				childvars, childrel->reltarget->exprs)
1148 		{
1149 			Var		   *parentvar = (Var *) lfirst(parentvars);
1150 			Node	   *childvar = (Node *) lfirst(childvars);
1151 
1152 			if (IsA(parentvar, Var) && parentvar->varno == parentRTindex)
1153 			{
1154 				int			pndx = parentvar->varattno - rel->min_attr;
1155 				int32		child_width = 0;
1156 
1157 				if (IsA(childvar, Var) &&
1158 					((Var *) childvar)->varno == childrel->relid)
1159 				{
1160 					int			cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1161 
1162 					child_width = childrel->attr_widths[cndx];
1163 				}
1164 				if (child_width <= 0)
1165 					child_width = get_typavgwidth(exprType(childvar),
1166 												  exprTypmod(childvar));
1167 				Assert(child_width > 0);
1168 				parent_attrsizes[pndx] += child_width * childrel->rows;
1169 			}
1170 		}
1171 	}
1172 
1173 	if (has_live_children)
1174 	{
1175 		/*
1176 		 * Save the finished size estimates.
1177 		 */
1178 		int			i;
1179 
1180 		Assert(parent_rows > 0);
1181 		rel->rows = parent_rows;
1182 		rel->reltarget->width = rint(parent_size / parent_rows);
1183 		for (i = 0; i < nattrs; i++)
1184 			rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1185 
1186 		/*
1187 		 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1188 		 * because some places assume rel->tuples is valid for any baserel.
1189 		 */
1190 		rel->tuples = parent_rows;
1191 
1192 		/*
1193 		 * Note that we leave rel->pages as zero; this is important to avoid
1194 		 * double-counting the appendrel tree in total_table_pages.
1195 		 */
1196 	}
1197 	else
1198 	{
1199 		/*
1200 		 * All children were excluded by constraints, so mark the whole
1201 		 * appendrel dummy.  We must do this in this phase so that the rel's
1202 		 * dummy-ness is visible when we generate paths for other rels.
1203 		 */
1204 		set_dummy_rel_pathlist(rel);
1205 	}
1206 
1207 	pfree(parent_attrsizes);
1208 }
1209 
1210 /*
1211  * set_append_rel_pathlist
1212  *	  Build access paths for an "append relation"
1213  */
1214 static void
set_append_rel_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)1215 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1216 						Index rti, RangeTblEntry *rte)
1217 {
1218 	int			parentRTindex = rti;
1219 	List	   *live_childrels = NIL;
1220 	ListCell   *l;
1221 
1222 	/*
1223 	 * Generate access paths for each member relation, and remember the
1224 	 * non-dummy children.
1225 	 */
1226 	foreach(l, root->append_rel_list)
1227 	{
1228 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1229 		int			childRTindex;
1230 		RangeTblEntry *childRTE;
1231 		RelOptInfo *childrel;
1232 
1233 		/* append_rel_list contains all append rels; ignore others */
1234 		if (appinfo->parent_relid != parentRTindex)
1235 			continue;
1236 
1237 		/* Re-locate the child RTE and RelOptInfo */
1238 		childRTindex = appinfo->child_relid;
1239 		childRTE = root->simple_rte_array[childRTindex];
1240 		childrel = root->simple_rel_array[childRTindex];
1241 
1242 		/*
1243 		 * If set_append_rel_size() decided the parent appendrel was
1244 		 * parallel-unsafe at some point after visiting this child rel, we
1245 		 * need to propagate the unsafety marking down to the child, so that
1246 		 * we don't generate useless partial paths for it.
1247 		 */
1248 		if (!rel->consider_parallel)
1249 			childrel->consider_parallel = false;
1250 
1251 		/*
1252 		 * Compute the child's access paths.
1253 		 */
1254 		set_rel_pathlist(root, childrel, childRTindex, childRTE);
1255 
1256 		/*
1257 		 * If child is dummy, ignore it.
1258 		 */
1259 		if (IS_DUMMY_REL(childrel))
1260 			continue;
1261 
1262 		/*
1263 		 * Child is live, so add it to the live_childrels list for use below.
1264 		 */
1265 		live_childrels = lappend(live_childrels, childrel);
1266 	}
1267 
1268 	/* Add paths to the append relation. */
1269 	add_paths_to_append_rel(root, rel, live_childrels);
1270 }
1271 
1272 
1273 /*
1274  * add_paths_to_append_rel
1275  *		Generate paths for the given append relation given the set of non-dummy
1276  *		child rels.
1277  *
1278  * The function collects all parameterizations and orderings supported by the
1279  * non-dummy children. For every such parameterization or ordering, it creates
1280  * an append path collecting one path from each non-dummy child with given
1281  * parameterization or ordering. Similarly it collects partial paths from
1282  * non-dummy children to create partial append paths.
1283  */
1284 void
add_paths_to_append_rel(PlannerInfo * root,RelOptInfo * rel,List * live_childrels)1285 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
1286 						List *live_childrels)
1287 {
1288 	List	   *subpaths = NIL;
1289 	bool		subpaths_valid = true;
1290 	List	   *partial_subpaths = NIL;
1291 	List	   *pa_partial_subpaths = NIL;
1292 	List	   *pa_nonpartial_subpaths = NIL;
1293 	bool		partial_subpaths_valid = true;
1294 	bool		pa_subpaths_valid;
1295 	List	   *all_child_pathkeys = NIL;
1296 	List	   *all_child_outers = NIL;
1297 	ListCell   *l;
1298 	double		partial_rows = -1;
1299 
1300 	/* If appropriate, consider parallel append */
1301 	pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
1302 
1303 	/*
1304 	 * For every non-dummy child, remember the cheapest path.  Also, identify
1305 	 * all pathkeys (orderings) and parameterizations (required_outer sets)
1306 	 * available for the non-dummy member relations.
1307 	 */
1308 	foreach(l, live_childrels)
1309 	{
1310 		RelOptInfo *childrel = lfirst(l);
1311 		ListCell   *lcp;
1312 		Path	   *cheapest_partial_path = NULL;
1313 
1314 		/*
1315 		 * If child has an unparameterized cheapest-total path, add that to
1316 		 * the unparameterized Append path we are constructing for the parent.
1317 		 * If not, there's no workable unparameterized path.
1318 		 *
1319 		 * With partitionwise aggregates, the child rel's pathlist may be
1320 		 * empty, so don't assume that a path exists here.
1321 		 */
1322 		if (childrel->pathlist != NIL &&
1323 			childrel->cheapest_total_path->param_info == NULL)
1324 			accumulate_append_subpath(childrel->cheapest_total_path,
1325 									  &subpaths, NULL);
1326 		else
1327 			subpaths_valid = false;
1328 
1329 		/* Same idea, but for a partial plan. */
1330 		if (childrel->partial_pathlist != NIL)
1331 		{
1332 			cheapest_partial_path = linitial(childrel->partial_pathlist);
1333 			accumulate_append_subpath(cheapest_partial_path,
1334 									  &partial_subpaths, NULL);
1335 		}
1336 		else
1337 			partial_subpaths_valid = false;
1338 
1339 		/*
1340 		 * Same idea, but for a parallel append mixing partial and non-partial
1341 		 * paths.
1342 		 */
1343 		if (pa_subpaths_valid)
1344 		{
1345 			Path	   *nppath = NULL;
1346 
1347 			nppath =
1348 				get_cheapest_parallel_safe_total_inner(childrel->pathlist);
1349 
1350 			if (cheapest_partial_path == NULL && nppath == NULL)
1351 			{
1352 				/* Neither a partial nor a parallel-safe path?  Forget it. */
1353 				pa_subpaths_valid = false;
1354 			}
1355 			else if (nppath == NULL ||
1356 					 (cheapest_partial_path != NULL &&
1357 					  cheapest_partial_path->total_cost < nppath->total_cost))
1358 			{
1359 				/* Partial path is cheaper or the only option. */
1360 				Assert(cheapest_partial_path != NULL);
1361 				accumulate_append_subpath(cheapest_partial_path,
1362 										  &pa_partial_subpaths,
1363 										  &pa_nonpartial_subpaths);
1364 			}
1365 			else
1366 			{
1367 				/*
1368 				 * Either we've got only a non-partial path, or we think that
1369 				 * a single backend can execute the best non-partial path
1370 				 * faster than all the parallel backends working together can
1371 				 * execute the best partial path.
1372 				 *
1373 				 * It might make sense to be more aggressive here.  Even if
1374 				 * the best non-partial path is more expensive than the best
1375 				 * partial path, it could still be better to choose the
1376 				 * non-partial path if there are several such paths that can
1377 				 * be given to different workers.  For now, we don't try to
1378 				 * figure that out.
1379 				 */
1380 				accumulate_append_subpath(nppath,
1381 										  &pa_nonpartial_subpaths,
1382 										  NULL);
1383 			}
1384 		}
1385 
1386 		/*
1387 		 * Collect lists of all the available path orderings and
1388 		 * parameterizations for all the children.  We use these as a
1389 		 * heuristic to indicate which sort orderings and parameterizations we
1390 		 * should build Append and MergeAppend paths for.
1391 		 */
1392 		foreach(lcp, childrel->pathlist)
1393 		{
1394 			Path	   *childpath = (Path *) lfirst(lcp);
1395 			List	   *childkeys = childpath->pathkeys;
1396 			Relids		childouter = PATH_REQ_OUTER(childpath);
1397 
1398 			/* Unsorted paths don't contribute to pathkey list */
1399 			if (childkeys != NIL)
1400 			{
1401 				ListCell   *lpk;
1402 				bool		found = false;
1403 
1404 				/* Have we already seen this ordering? */
1405 				foreach(lpk, all_child_pathkeys)
1406 				{
1407 					List	   *existing_pathkeys = (List *) lfirst(lpk);
1408 
1409 					if (compare_pathkeys(existing_pathkeys,
1410 										 childkeys) == PATHKEYS_EQUAL)
1411 					{
1412 						found = true;
1413 						break;
1414 					}
1415 				}
1416 				if (!found)
1417 				{
1418 					/* No, so add it to all_child_pathkeys */
1419 					all_child_pathkeys = lappend(all_child_pathkeys,
1420 												 childkeys);
1421 				}
1422 			}
1423 
1424 			/* Unparameterized paths don't contribute to param-set list */
1425 			if (childouter)
1426 			{
1427 				ListCell   *lco;
1428 				bool		found = false;
1429 
1430 				/* Have we already seen this param set? */
1431 				foreach(lco, all_child_outers)
1432 				{
1433 					Relids		existing_outers = (Relids) lfirst(lco);
1434 
1435 					if (bms_equal(existing_outers, childouter))
1436 					{
1437 						found = true;
1438 						break;
1439 					}
1440 				}
1441 				if (!found)
1442 				{
1443 					/* No, so add it to all_child_outers */
1444 					all_child_outers = lappend(all_child_outers,
1445 											   childouter);
1446 				}
1447 			}
1448 		}
1449 	}
1450 
1451 	/*
1452 	 * If we found unparameterized paths for all children, build an unordered,
1453 	 * unparameterized Append path for the rel.  (Note: this is correct even
1454 	 * if we have zero or one live subpath due to constraint exclusion.)
1455 	 */
1456 	if (subpaths_valid)
1457 		add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
1458 												  NIL, NULL, 0, false,
1459 												  -1));
1460 
1461 	/*
1462 	 * Consider an append of unordered, unparameterized partial paths.  Make
1463 	 * it parallel-aware if possible.
1464 	 */
1465 	if (partial_subpaths_valid && partial_subpaths != NIL)
1466 	{
1467 		AppendPath *appendpath;
1468 		ListCell   *lc;
1469 		int			parallel_workers = 0;
1470 
1471 		/* Find the highest number of workers requested for any subpath. */
1472 		foreach(lc, partial_subpaths)
1473 		{
1474 			Path	   *path = lfirst(lc);
1475 
1476 			parallel_workers = Max(parallel_workers, path->parallel_workers);
1477 		}
1478 		Assert(parallel_workers > 0);
1479 
1480 		/*
1481 		 * If the use of parallel append is permitted, always request at least
1482 		 * log2(# of children) workers.  We assume it can be useful to have
1483 		 * extra workers in this case because they will be spread out across
1484 		 * the children.  The precise formula is just a guess, but we don't
1485 		 * want to end up with a radically different answer for a table with N
1486 		 * partitions vs. an unpartitioned table with the same data, so the
1487 		 * use of some kind of log-scaling here seems to make some sense.
1488 		 */
1489 		if (enable_parallel_append)
1490 		{
1491 			parallel_workers = Max(parallel_workers,
1492 								   fls(list_length(live_childrels)));
1493 			parallel_workers = Min(parallel_workers,
1494 								   max_parallel_workers_per_gather);
1495 		}
1496 		Assert(parallel_workers > 0);
1497 
1498 		/* Generate a partial append path. */
1499 		appendpath = create_append_path(root, rel, NIL, partial_subpaths,
1500 										NIL, NULL, parallel_workers,
1501 										enable_parallel_append,
1502 										-1);
1503 
1504 		/*
1505 		 * Make sure any subsequent partial paths use the same row count
1506 		 * estimate.
1507 		 */
1508 		partial_rows = appendpath->path.rows;
1509 
1510 		/* Add the path. */
1511 		add_partial_path(rel, (Path *) appendpath);
1512 	}
1513 
1514 	/*
1515 	 * Consider a parallel-aware append using a mix of partial and non-partial
1516 	 * paths.  (This only makes sense if there's at least one child which has
1517 	 * a non-partial path that is substantially cheaper than any partial path;
1518 	 * otherwise, we should use the append path added in the previous step.)
1519 	 */
1520 	if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
1521 	{
1522 		AppendPath *appendpath;
1523 		ListCell   *lc;
1524 		int			parallel_workers = 0;
1525 
1526 		/*
1527 		 * Find the highest number of workers requested for any partial
1528 		 * subpath.
1529 		 */
1530 		foreach(lc, pa_partial_subpaths)
1531 		{
1532 			Path	   *path = lfirst(lc);
1533 
1534 			parallel_workers = Max(parallel_workers, path->parallel_workers);
1535 		}
1536 
1537 		/*
1538 		 * Same formula here as above.  It's even more important in this
1539 		 * instance because the non-partial paths won't contribute anything to
1540 		 * the planned number of parallel workers.
1541 		 */
1542 		parallel_workers = Max(parallel_workers,
1543 							   fls(list_length(live_childrels)));
1544 		parallel_workers = Min(parallel_workers,
1545 							   max_parallel_workers_per_gather);
1546 		Assert(parallel_workers > 0);
1547 
1548 		appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
1549 										pa_partial_subpaths,
1550 										NIL, NULL, parallel_workers, true,
1551 										partial_rows);
1552 		add_partial_path(rel, (Path *) appendpath);
1553 	}
1554 
1555 	/*
1556 	 * Also build unparameterized ordered append paths based on the collected
1557 	 * list of child pathkeys.
1558 	 */
1559 	if (subpaths_valid)
1560 		generate_orderedappend_paths(root, rel, live_childrels,
1561 									 all_child_pathkeys);
1562 
1563 	/*
1564 	 * Build Append paths for each parameterization seen among the child rels.
1565 	 * (This may look pretty expensive, but in most cases of practical
1566 	 * interest, the child rels will expose mostly the same parameterizations,
1567 	 * so that not that many cases actually get considered here.)
1568 	 *
1569 	 * The Append node itself cannot enforce quals, so all qual checking must
1570 	 * be done in the child paths.  This means that to have a parameterized
1571 	 * Append path, we must have the exact same parameterization for each
1572 	 * child path; otherwise some children might be failing to check the
1573 	 * moved-down quals.  To make them match up, we can try to increase the
1574 	 * parameterization of lesser-parameterized paths.
1575 	 */
1576 	foreach(l, all_child_outers)
1577 	{
1578 		Relids		required_outer = (Relids) lfirst(l);
1579 		ListCell   *lcr;
1580 
1581 		/* Select the child paths for an Append with this parameterization */
1582 		subpaths = NIL;
1583 		subpaths_valid = true;
1584 		foreach(lcr, live_childrels)
1585 		{
1586 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1587 			Path	   *subpath;
1588 
1589 			if (childrel->pathlist == NIL)
1590 			{
1591 				/* failed to make a suitable path for this child */
1592 				subpaths_valid = false;
1593 				break;
1594 			}
1595 
1596 			subpath = get_cheapest_parameterized_child_path(root,
1597 															childrel,
1598 															required_outer);
1599 			if (subpath == NULL)
1600 			{
1601 				/* failed to make a suitable path for this child */
1602 				subpaths_valid = false;
1603 				break;
1604 			}
1605 			accumulate_append_subpath(subpath, &subpaths, NULL);
1606 		}
1607 
1608 		if (subpaths_valid)
1609 			add_path(rel, (Path *)
1610 					 create_append_path(root, rel, subpaths, NIL,
1611 										NIL, required_outer, 0, false,
1612 										-1));
1613 	}
1614 
1615 	/*
1616 	 * When there is only a single child relation, the Append path can inherit
1617 	 * any ordering available for the child rel's path, so that it's useful to
1618 	 * consider ordered partial paths.  Above we only considered the cheapest
1619 	 * partial path for each child, but let's also make paths using any
1620 	 * partial paths that have pathkeys.
1621 	 */
1622 	if (list_length(live_childrels) == 1)
1623 	{
1624 		RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
1625 
1626 		/* skip the cheapest partial path, since we already used that above */
1627 		for_each_from(l, childrel->partial_pathlist, 1)
1628 		{
1629 			Path	   *path = (Path *) lfirst(l);
1630 			AppendPath *appendpath;
1631 
1632 			/* skip paths with no pathkeys. */
1633 			if (path->pathkeys == NIL)
1634 				continue;
1635 
1636 			appendpath = create_append_path(root, rel, NIL, list_make1(path),
1637 											NIL, NULL,
1638 											path->parallel_workers, true,
1639 											partial_rows);
1640 			add_partial_path(rel, (Path *) appendpath);
1641 		}
1642 	}
1643 }
1644 
1645 /*
1646  * generate_orderedappend_paths
1647  *		Generate ordered append paths for an append relation
1648  *
1649  * Usually we generate MergeAppend paths here, but there are some special
1650  * cases where we can generate simple Append paths, because the subpaths
1651  * can provide tuples in the required order already.
1652  *
1653  * We generate a path for each ordering (pathkey list) appearing in
1654  * all_child_pathkeys.
1655  *
1656  * We consider both cheapest-startup and cheapest-total cases, ie, for each
1657  * interesting ordering, collect all the cheapest startup subpaths and all the
1658  * cheapest total paths, and build a suitable path for each case.
1659  *
1660  * We don't currently generate any parameterized ordered paths here.  While
1661  * it would not take much more code here to do so, it's very unclear that it
1662  * is worth the planning cycles to investigate such paths: there's little
1663  * use for an ordered path on the inside of a nestloop.  In fact, it's likely
1664  * that the current coding of add_path would reject such paths out of hand,
1665  * because add_path gives no credit for sort ordering of parameterized paths,
1666  * and a parameterized MergeAppend is going to be more expensive than the
1667  * corresponding parameterized Append path.  If we ever try harder to support
1668  * parameterized mergejoin plans, it might be worth adding support for
1669  * parameterized paths here to feed such joins.  (See notes in
1670  * optimizer/README for why that might not ever happen, though.)
1671  */
1672 static void
generate_orderedappend_paths(PlannerInfo * root,RelOptInfo * rel,List * live_childrels,List * all_child_pathkeys)1673 generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
1674 							 List *live_childrels,
1675 							 List *all_child_pathkeys)
1676 {
1677 	ListCell   *lcp;
1678 	List	   *partition_pathkeys = NIL;
1679 	List	   *partition_pathkeys_desc = NIL;
1680 	bool		partition_pathkeys_partial = true;
1681 	bool		partition_pathkeys_desc_partial = true;
1682 
1683 	/*
1684 	 * Some partitioned table setups may allow us to use an Append node
1685 	 * instead of a MergeAppend.  This is possible in cases such as RANGE
1686 	 * partitioned tables where it's guaranteed that an earlier partition must
1687 	 * contain rows which come earlier in the sort order.  To detect whether
1688 	 * this is relevant, build pathkey descriptions of the partition ordering,
1689 	 * for both forward and reverse scans.
1690 	 */
1691 	if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
1692 		partitions_are_ordered(rel->boundinfo, rel->nparts))
1693 	{
1694 		partition_pathkeys = build_partition_pathkeys(root, rel,
1695 													  ForwardScanDirection,
1696 													  &partition_pathkeys_partial);
1697 
1698 		partition_pathkeys_desc = build_partition_pathkeys(root, rel,
1699 														   BackwardScanDirection,
1700 														   &partition_pathkeys_desc_partial);
1701 
1702 		/*
1703 		 * You might think we should truncate_useless_pathkeys here, but
1704 		 * allowing partition keys which are a subset of the query's pathkeys
1705 		 * can often be useful.  For example, consider a table partitioned by
1706 		 * RANGE (a, b), and a query with ORDER BY a, b, c.  If we have child
1707 		 * paths that can produce the a, b, c ordering (perhaps via indexes on
1708 		 * (a, b, c)) then it works to consider the appendrel output as
1709 		 * ordered by a, b, c.
1710 		 */
1711 	}
1712 
1713 	/* Now consider each interesting sort ordering */
1714 	foreach(lcp, all_child_pathkeys)
1715 	{
1716 		List	   *pathkeys = (List *) lfirst(lcp);
1717 		List	   *startup_subpaths = NIL;
1718 		List	   *total_subpaths = NIL;
1719 		bool		startup_neq_total = false;
1720 		ListCell   *lcr;
1721 		bool		match_partition_order;
1722 		bool		match_partition_order_desc;
1723 
1724 		/*
1725 		 * Determine if this sort ordering matches any partition pathkeys we
1726 		 * have, for both ascending and descending partition order.  If the
1727 		 * partition pathkeys happen to be contained in pathkeys then it still
1728 		 * works, as described above, providing that the partition pathkeys
1729 		 * are complete and not just a prefix of the partition keys.  (In such
1730 		 * cases we'll be relying on the child paths to have sorted the
1731 		 * lower-order columns of the required pathkeys.)
1732 		 */
1733 		match_partition_order =
1734 			pathkeys_contained_in(pathkeys, partition_pathkeys) ||
1735 			(!partition_pathkeys_partial &&
1736 			 pathkeys_contained_in(partition_pathkeys, pathkeys));
1737 
1738 		match_partition_order_desc = !match_partition_order &&
1739 			(pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
1740 			 (!partition_pathkeys_desc_partial &&
1741 			  pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
1742 
1743 		/* Select the child paths for this ordering... */
1744 		foreach(lcr, live_childrels)
1745 		{
1746 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1747 			Path	   *cheapest_startup,
1748 					   *cheapest_total;
1749 
1750 			/* Locate the right paths, if they are available. */
1751 			cheapest_startup =
1752 				get_cheapest_path_for_pathkeys(childrel->pathlist,
1753 											   pathkeys,
1754 											   NULL,
1755 											   STARTUP_COST,
1756 											   false);
1757 			cheapest_total =
1758 				get_cheapest_path_for_pathkeys(childrel->pathlist,
1759 											   pathkeys,
1760 											   NULL,
1761 											   TOTAL_COST,
1762 											   false);
1763 
1764 			/*
1765 			 * If we can't find any paths with the right order just use the
1766 			 * cheapest-total path; we'll have to sort it later.
1767 			 */
1768 			if (cheapest_startup == NULL || cheapest_total == NULL)
1769 			{
1770 				cheapest_startup = cheapest_total =
1771 					childrel->cheapest_total_path;
1772 				/* Assert we do have an unparameterized path for this child */
1773 				Assert(cheapest_total->param_info == NULL);
1774 			}
1775 
1776 			/*
1777 			 * Notice whether we actually have different paths for the
1778 			 * "cheapest" and "total" cases; frequently there will be no point
1779 			 * in two create_merge_append_path() calls.
1780 			 */
1781 			if (cheapest_startup != cheapest_total)
1782 				startup_neq_total = true;
1783 
1784 			/*
1785 			 * Collect the appropriate child paths.  The required logic varies
1786 			 * for the Append and MergeAppend cases.
1787 			 */
1788 			if (match_partition_order)
1789 			{
1790 				/*
1791 				 * We're going to make a plain Append path.  We don't need
1792 				 * most of what accumulate_append_subpath would do, but we do
1793 				 * want to cut out child Appends or MergeAppends if they have
1794 				 * just a single subpath (and hence aren't doing anything
1795 				 * useful).
1796 				 */
1797 				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1798 				cheapest_total = get_singleton_append_subpath(cheapest_total);
1799 
1800 				startup_subpaths = lappend(startup_subpaths, cheapest_startup);
1801 				total_subpaths = lappend(total_subpaths, cheapest_total);
1802 			}
1803 			else if (match_partition_order_desc)
1804 			{
1805 				/*
1806 				 * As above, but we need to reverse the order of the children,
1807 				 * because nodeAppend.c doesn't know anything about reverse
1808 				 * ordering and will scan the children in the order presented.
1809 				 */
1810 				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1811 				cheapest_total = get_singleton_append_subpath(cheapest_total);
1812 
1813 				startup_subpaths = lcons(cheapest_startup, startup_subpaths);
1814 				total_subpaths = lcons(cheapest_total, total_subpaths);
1815 			}
1816 			else
1817 			{
1818 				/*
1819 				 * Otherwise, rely on accumulate_append_subpath to collect the
1820 				 * child paths for the MergeAppend.
1821 				 */
1822 				accumulate_append_subpath(cheapest_startup,
1823 										  &startup_subpaths, NULL);
1824 				accumulate_append_subpath(cheapest_total,
1825 										  &total_subpaths, NULL);
1826 			}
1827 		}
1828 
1829 		/* ... and build the Append or MergeAppend paths */
1830 		if (match_partition_order || match_partition_order_desc)
1831 		{
1832 			/* We only need Append */
1833 			add_path(rel, (Path *) create_append_path(root,
1834 													  rel,
1835 													  startup_subpaths,
1836 													  NIL,
1837 													  pathkeys,
1838 													  NULL,
1839 													  0,
1840 													  false,
1841 													  -1));
1842 			if (startup_neq_total)
1843 				add_path(rel, (Path *) create_append_path(root,
1844 														  rel,
1845 														  total_subpaths,
1846 														  NIL,
1847 														  pathkeys,
1848 														  NULL,
1849 														  0,
1850 														  false,
1851 														  -1));
1852 		}
1853 		else
1854 		{
1855 			/* We need MergeAppend */
1856 			add_path(rel, (Path *) create_merge_append_path(root,
1857 															rel,
1858 															startup_subpaths,
1859 															pathkeys,
1860 															NULL));
1861 			if (startup_neq_total)
1862 				add_path(rel, (Path *) create_merge_append_path(root,
1863 																rel,
1864 																total_subpaths,
1865 																pathkeys,
1866 																NULL));
1867 		}
1868 	}
1869 }
1870 
1871 /*
1872  * get_cheapest_parameterized_child_path
1873  *		Get cheapest path for this relation that has exactly the requested
1874  *		parameterization.
1875  *
1876  * Returns NULL if unable to create such a path.
1877  */
1878 static Path *
get_cheapest_parameterized_child_path(PlannerInfo * root,RelOptInfo * rel,Relids required_outer)1879 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1880 									  Relids required_outer)
1881 {
1882 	Path	   *cheapest;
1883 	ListCell   *lc;
1884 
1885 	/*
1886 	 * Look up the cheapest existing path with no more than the needed
1887 	 * parameterization.  If it has exactly the needed parameterization, we're
1888 	 * done.
1889 	 */
1890 	cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1891 											  NIL,
1892 											  required_outer,
1893 											  TOTAL_COST,
1894 											  false);
1895 	Assert(cheapest != NULL);
1896 	if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1897 		return cheapest;
1898 
1899 	/*
1900 	 * Otherwise, we can "reparameterize" an existing path to match the given
1901 	 * parameterization, which effectively means pushing down additional
1902 	 * joinquals to be checked within the path's scan.  However, some existing
1903 	 * paths might check the available joinquals already while others don't;
1904 	 * therefore, it's not clear which existing path will be cheapest after
1905 	 * reparameterization.  We have to go through them all and find out.
1906 	 */
1907 	cheapest = NULL;
1908 	foreach(lc, rel->pathlist)
1909 	{
1910 		Path	   *path = (Path *) lfirst(lc);
1911 
1912 		/* Can't use it if it needs more than requested parameterization */
1913 		if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1914 			continue;
1915 
1916 		/*
1917 		 * Reparameterization can only increase the path's cost, so if it's
1918 		 * already more expensive than the current cheapest, forget it.
1919 		 */
1920 		if (cheapest != NULL &&
1921 			compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1922 			continue;
1923 
1924 		/* Reparameterize if needed, then recheck cost */
1925 		if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
1926 		{
1927 			path = reparameterize_path(root, path, required_outer, 1.0);
1928 			if (path == NULL)
1929 				continue;		/* failed to reparameterize this one */
1930 			Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
1931 
1932 			if (cheapest != NULL &&
1933 				compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1934 				continue;
1935 		}
1936 
1937 		/* We have a new best path */
1938 		cheapest = path;
1939 	}
1940 
1941 	/* Return the best path, or NULL if we found no suitable candidate */
1942 	return cheapest;
1943 }
1944 
1945 /*
1946  * accumulate_append_subpath
1947  *		Add a subpath to the list being built for an Append or MergeAppend.
1948  *
1949  * It's possible that the child is itself an Append or MergeAppend path, in
1950  * which case we can "cut out the middleman" and just add its child paths to
1951  * our own list.  (We don't try to do this earlier because we need to apply
1952  * both levels of transformation to the quals.)
1953  *
1954  * Note that if we omit a child MergeAppend in this way, we are effectively
1955  * omitting a sort step, which seems fine: if the parent is to be an Append,
1956  * its result would be unsorted anyway, while if the parent is to be a
1957  * MergeAppend, there's no point in a separate sort on a child.
1958  *
1959  * Normally, either path is a partial path and subpaths is a list of partial
1960  * paths, or else path is a non-partial plan and subpaths is a list of those.
1961  * However, if path is a parallel-aware Append, then we add its partial path
1962  * children to subpaths and the rest to special_subpaths.  If the latter is
1963  * NULL, we don't flatten the path at all (unless it contains only partial
1964  * paths).
1965  */
1966 static void
accumulate_append_subpath(Path * path,List ** subpaths,List ** special_subpaths)1967 accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
1968 {
1969 	if (IsA(path, AppendPath))
1970 	{
1971 		AppendPath *apath = (AppendPath *) path;
1972 
1973 		if (!apath->path.parallel_aware || apath->first_partial_path == 0)
1974 		{
1975 			*subpaths = list_concat(*subpaths, apath->subpaths);
1976 			return;
1977 		}
1978 		else if (special_subpaths != NULL)
1979 		{
1980 			List	   *new_special_subpaths;
1981 
1982 			/* Split Parallel Append into partial and non-partial subpaths */
1983 			*subpaths = list_concat(*subpaths,
1984 									list_copy_tail(apath->subpaths,
1985 												   apath->first_partial_path));
1986 			new_special_subpaths =
1987 				list_truncate(list_copy(apath->subpaths),
1988 							  apath->first_partial_path);
1989 			*special_subpaths = list_concat(*special_subpaths,
1990 											new_special_subpaths);
1991 			return;
1992 		}
1993 	}
1994 	else if (IsA(path, MergeAppendPath))
1995 	{
1996 		MergeAppendPath *mpath = (MergeAppendPath *) path;
1997 
1998 		*subpaths = list_concat(*subpaths, mpath->subpaths);
1999 		return;
2000 	}
2001 
2002 	*subpaths = lappend(*subpaths, path);
2003 }
2004 
2005 /*
2006  * get_singleton_append_subpath
2007  *		Returns the single subpath of an Append/MergeAppend, or just
2008  *		return 'path' if it's not a single sub-path Append/MergeAppend.
2009  *
2010  * Note: 'path' must not be a parallel-aware path.
2011  */
2012 static Path *
get_singleton_append_subpath(Path * path)2013 get_singleton_append_subpath(Path *path)
2014 {
2015 	Assert(!path->parallel_aware);
2016 
2017 	if (IsA(path, AppendPath))
2018 	{
2019 		AppendPath *apath = (AppendPath *) path;
2020 
2021 		if (list_length(apath->subpaths) == 1)
2022 			return (Path *) linitial(apath->subpaths);
2023 	}
2024 	else if (IsA(path, MergeAppendPath))
2025 	{
2026 		MergeAppendPath *mpath = (MergeAppendPath *) path;
2027 
2028 		if (list_length(mpath->subpaths) == 1)
2029 			return (Path *) linitial(mpath->subpaths);
2030 	}
2031 
2032 	return path;
2033 }
2034 
2035 /*
2036  * set_dummy_rel_pathlist
2037  *	  Build a dummy path for a relation that's been excluded by constraints
2038  *
2039  * Rather than inventing a special "dummy" path type, we represent this as an
2040  * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2041  *
2042  * (See also mark_dummy_rel, which does basically the same thing, but is
2043  * typically used to change a rel into dummy state after we already made
2044  * paths for it.)
2045  */
2046 static void
set_dummy_rel_pathlist(RelOptInfo * rel)2047 set_dummy_rel_pathlist(RelOptInfo *rel)
2048 {
2049 	/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2050 	rel->rows = 0;
2051 	rel->reltarget->width = 0;
2052 
2053 	/* Discard any pre-existing paths; no further need for them */
2054 	rel->pathlist = NIL;
2055 	rel->partial_pathlist = NIL;
2056 
2057 	/* Set up the dummy path */
2058 	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
2059 											  NIL, rel->lateral_relids,
2060 											  0, false, -1));
2061 
2062 	/*
2063 	 * We set the cheapest-path fields immediately, just in case they were
2064 	 * pointing at some discarded path.  This is redundant when we're called
2065 	 * from set_rel_size(), but not when called from elsewhere, and doing it
2066 	 * twice is harmless anyway.
2067 	 */
2068 	set_cheapest(rel);
2069 }
2070 
2071 /* quick-and-dirty test to see if any joining is needed */
2072 static bool
has_multiple_baserels(PlannerInfo * root)2073 has_multiple_baserels(PlannerInfo *root)
2074 {
2075 	int			num_base_rels = 0;
2076 	Index		rti;
2077 
2078 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
2079 	{
2080 		RelOptInfo *brel = root->simple_rel_array[rti];
2081 
2082 		if (brel == NULL)
2083 			continue;
2084 
2085 		/* ignore RTEs that are "other rels" */
2086 		if (brel->reloptkind == RELOPT_BASEREL)
2087 			if (++num_base_rels > 1)
2088 				return true;
2089 	}
2090 	return false;
2091 }
2092 
2093 /*
2094  * set_subquery_pathlist
2095  *		Generate SubqueryScan access paths for a subquery RTE
2096  *
2097  * We don't currently support generating parameterized paths for subqueries
2098  * by pushing join clauses down into them; it seems too expensive to re-plan
2099  * the subquery multiple times to consider different alternatives.
2100  * (XXX that could stand to be reconsidered, now that we use Paths.)
2101  * So the paths made here will be parameterized if the subquery contains
2102  * LATERAL references, otherwise not.  As long as that's true, there's no need
2103  * for a separate set_subquery_size phase: just make the paths right away.
2104  */
2105 static void
set_subquery_pathlist(PlannerInfo * root,RelOptInfo * rel,Index rti,RangeTblEntry * rte)2106 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
2107 					  Index rti, RangeTblEntry *rte)
2108 {
2109 	Query	   *parse = root->parse;
2110 	Query	   *subquery = rte->subquery;
2111 	Relids		required_outer;
2112 	pushdown_safety_info safetyInfo;
2113 	double		tuple_fraction;
2114 	RelOptInfo *sub_final_rel;
2115 	ListCell   *lc;
2116 
2117 	/*
2118 	 * Must copy the Query so that planning doesn't mess up the RTE contents
2119 	 * (really really need to fix the planner to not scribble on its input,
2120 	 * someday ... but see remove_unused_subquery_outputs to start with).
2121 	 */
2122 	subquery = copyObject(subquery);
2123 
2124 	/*
2125 	 * If it's a LATERAL subquery, it might contain some Vars of the current
2126 	 * query level, requiring it to be treated as parameterized, even though
2127 	 * we don't support pushing down join quals into subqueries.
2128 	 */
2129 	required_outer = rel->lateral_relids;
2130 
2131 	/*
2132 	 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2133 	 * flags as needed while recursing.  In particular, we need a workspace
2134 	 * for keeping track of unsafe-to-reference columns.  unsafeColumns[i]
2135 	 * will be set true if we find that output column i of the subquery is
2136 	 * unsafe to use in a pushed-down qual.
2137 	 */
2138 	memset(&safetyInfo, 0, sizeof(safetyInfo));
2139 	safetyInfo.unsafeColumns = (bool *)
2140 		palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
2141 
2142 	/*
2143 	 * If the subquery has the "security_barrier" flag, it means the subquery
2144 	 * originated from a view that must enforce row-level security.  Then we
2145 	 * must not push down quals that contain leaky functions.  (Ideally this
2146 	 * would be checked inside subquery_is_pushdown_safe, but since we don't
2147 	 * currently pass the RTE to that function, we must do it here.)
2148 	 */
2149 	safetyInfo.unsafeLeaky = rte->security_barrier;
2150 
2151 	/*
2152 	 * If there are any restriction clauses that have been attached to the
2153 	 * subquery relation, consider pushing them down to become WHERE or HAVING
2154 	 * quals of the subquery itself.  This transformation is useful because it
2155 	 * may allow us to generate a better plan for the subquery than evaluating
2156 	 * all the subquery output rows and then filtering them.
2157 	 *
2158 	 * There are several cases where we cannot push down clauses. Restrictions
2159 	 * involving the subquery are checked by subquery_is_pushdown_safe().
2160 	 * Restrictions on individual clauses are checked by
2161 	 * qual_is_pushdown_safe().  Also, we don't want to push down
2162 	 * pseudoconstant clauses; better to have the gating node above the
2163 	 * subquery.
2164 	 *
2165 	 * Non-pushed-down clauses will get evaluated as qpquals of the
2166 	 * SubqueryScan node.
2167 	 *
2168 	 * XXX Are there any cases where we want to make a policy decision not to
2169 	 * push down a pushable qual, because it'd result in a worse plan?
2170 	 */
2171 	if (rel->baserestrictinfo != NIL &&
2172 		subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
2173 	{
2174 		/* OK to consider pushing down individual quals */
2175 		List	   *upperrestrictlist = NIL;
2176 		ListCell   *l;
2177 
2178 		foreach(l, rel->baserestrictinfo)
2179 		{
2180 			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
2181 
2182 			if (!rinfo->pseudoconstant &&
2183 				qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo))
2184 			{
2185 				Node	   *clause = (Node *) rinfo->clause;
2186 
2187 				/* Push it down */
2188 				subquery_push_qual(subquery, rte, rti, clause);
2189 			}
2190 			else
2191 			{
2192 				/* Keep it in the upper query */
2193 				upperrestrictlist = lappend(upperrestrictlist, rinfo);
2194 			}
2195 		}
2196 		rel->baserestrictinfo = upperrestrictlist;
2197 		/* We don't bother recomputing baserestrict_min_security */
2198 	}
2199 
2200 	pfree(safetyInfo.unsafeColumns);
2201 
2202 	/*
2203 	 * The upper query might not use all the subquery's output columns; if
2204 	 * not, we can simplify.
2205 	 */
2206 	remove_unused_subquery_outputs(subquery, rel);
2207 
2208 	/*
2209 	 * We can safely pass the outer tuple_fraction down to the subquery if the
2210 	 * outer level has no joining, aggregation, or sorting to do. Otherwise
2211 	 * we'd better tell the subquery to plan for full retrieval. (XXX This
2212 	 * could probably be made more intelligent ...)
2213 	 */
2214 	if (parse->hasAggs ||
2215 		parse->groupClause ||
2216 		parse->groupingSets ||
2217 		parse->havingQual ||
2218 		parse->distinctClause ||
2219 		parse->sortClause ||
2220 		has_multiple_baserels(root))
2221 		tuple_fraction = 0.0;	/* default case */
2222 	else
2223 		tuple_fraction = root->tuple_fraction;
2224 
2225 	/* plan_params should not be in use in current query level */
2226 	Assert(root->plan_params == NIL);
2227 
2228 	/* Generate a subroot and Paths for the subquery */
2229 	rel->subroot = subquery_planner(root->glob, subquery,
2230 									root,
2231 									false, tuple_fraction);
2232 
2233 	/* Isolate the params needed by this specific subplan */
2234 	rel->subplan_params = root->plan_params;
2235 	root->plan_params = NIL;
2236 
2237 	/*
2238 	 * It's possible that constraint exclusion proved the subquery empty. If
2239 	 * so, it's desirable to produce an unadorned dummy path so that we will
2240 	 * recognize appropriate optimizations at this query level.
2241 	 */
2242 	sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
2243 
2244 	if (IS_DUMMY_REL(sub_final_rel))
2245 	{
2246 		set_dummy_rel_pathlist(rel);
2247 		return;
2248 	}
2249 
2250 	/*
2251 	 * Mark rel with estimated output rows, width, etc.  Note that we have to
2252 	 * do this before generating outer-query paths, else cost_subqueryscan is
2253 	 * not happy.
2254 	 */
2255 	set_subquery_size_estimates(root, rel);
2256 
2257 	/*
2258 	 * For each Path that subquery_planner produced, make a SubqueryScanPath
2259 	 * in the outer query.
2260 	 */
2261 	foreach(lc, sub_final_rel->pathlist)
2262 	{
2263 		Path	   *subpath = (Path *) lfirst(lc);
2264 		List	   *pathkeys;
2265 
2266 		/* Convert subpath's pathkeys to outer representation */
2267 		pathkeys = convert_subquery_pathkeys(root,
2268 											 rel,
2269 											 subpath->pathkeys,
2270 											 make_tlist_from_pathtarget(subpath->pathtarget));
2271 
2272 		/* Generate outer path using this subpath */
2273 		add_path(rel, (Path *)
2274 				 create_subqueryscan_path(root, rel, subpath,
2275 										  pathkeys, required_outer));
2276 	}
2277 
2278 	/* If outer rel allows parallelism, do same for partial paths. */
2279 	if (rel->consider_parallel && bms_is_empty(required_outer))
2280 	{
2281 		/* If consider_parallel is false, there should be no partial paths. */
2282 		Assert(sub_final_rel->consider_parallel ||
2283 			   sub_final_rel->partial_pathlist == NIL);
2284 
2285 		/* Same for partial paths. */
2286 		foreach(lc, sub_final_rel->partial_pathlist)
2287 		{
2288 			Path	   *subpath = (Path *) lfirst(lc);
2289 			List	   *pathkeys;
2290 
2291 			/* Convert subpath's pathkeys to outer representation */
2292 			pathkeys = convert_subquery_pathkeys(root,
2293 												 rel,
2294 												 subpath->pathkeys,
2295 												 make_tlist_from_pathtarget(subpath->pathtarget));
2296 
2297 			/* Generate outer path using this subpath */
2298 			add_partial_path(rel, (Path *)
2299 							 create_subqueryscan_path(root, rel, subpath,
2300 													  pathkeys,
2301 													  required_outer));
2302 		}
2303 	}
2304 }
2305 
2306 /*
2307  * set_function_pathlist
2308  *		Build the (single) access path for a function RTE
2309  */
2310 static void
set_function_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2311 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2312 {
2313 	Relids		required_outer;
2314 	List	   *pathkeys = NIL;
2315 
2316 	/*
2317 	 * We don't support pushing join clauses into the quals of a function
2318 	 * scan, but it could still have required parameterization due to LATERAL
2319 	 * refs in the function expression.
2320 	 */
2321 	required_outer = rel->lateral_relids;
2322 
2323 	/*
2324 	 * The result is considered unordered unless ORDINALITY was used, in which
2325 	 * case it is ordered by the ordinal column (the last one).  See if we
2326 	 * care, by checking for uses of that Var in equivalence classes.
2327 	 */
2328 	if (rte->funcordinality)
2329 	{
2330 		AttrNumber	ordattno = rel->max_attr;
2331 		Var		   *var = NULL;
2332 		ListCell   *lc;
2333 
2334 		/*
2335 		 * Is there a Var for it in rel's targetlist?  If not, the query did
2336 		 * not reference the ordinality column, or at least not in any way
2337 		 * that would be interesting for sorting.
2338 		 */
2339 		foreach(lc, rel->reltarget->exprs)
2340 		{
2341 			Var		   *node = (Var *) lfirst(lc);
2342 
2343 			/* checking varno/varlevelsup is just paranoia */
2344 			if (IsA(node, Var) &&
2345 				node->varattno == ordattno &&
2346 				node->varno == rel->relid &&
2347 				node->varlevelsup == 0)
2348 			{
2349 				var = node;
2350 				break;
2351 			}
2352 		}
2353 
2354 		/*
2355 		 * Try to build pathkeys for this Var with int8 sorting.  We tell
2356 		 * build_expression_pathkey not to build any new equivalence class; if
2357 		 * the Var isn't already mentioned in some EC, it means that nothing
2358 		 * cares about the ordering.
2359 		 */
2360 		if (var)
2361 			pathkeys = build_expression_pathkey(root,
2362 												(Expr *) var,
2363 												NULL,	/* below outer joins */
2364 												Int8LessOperator,
2365 												rel->relids,
2366 												false);
2367 	}
2368 
2369 	/* Generate appropriate path */
2370 	add_path(rel, create_functionscan_path(root, rel,
2371 										   pathkeys, required_outer));
2372 }
2373 
2374 /*
2375  * set_values_pathlist
2376  *		Build the (single) access path for a VALUES RTE
2377  */
2378 static void
set_values_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2379 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2380 {
2381 	Relids		required_outer;
2382 
2383 	/*
2384 	 * We don't support pushing join clauses into the quals of a values scan,
2385 	 * but it could still have required parameterization due to LATERAL refs
2386 	 * in the values expressions.
2387 	 */
2388 	required_outer = rel->lateral_relids;
2389 
2390 	/* Generate appropriate path */
2391 	add_path(rel, create_valuesscan_path(root, rel, required_outer));
2392 }
2393 
2394 /*
2395  * set_tablefunc_pathlist
2396  *		Build the (single) access path for a table func RTE
2397  */
2398 static void
set_tablefunc_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2399 set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2400 {
2401 	Relids		required_outer;
2402 
2403 	/*
2404 	 * We don't support pushing join clauses into the quals of a tablefunc
2405 	 * scan, but it could still have required parameterization due to LATERAL
2406 	 * refs in the function expression.
2407 	 */
2408 	required_outer = rel->lateral_relids;
2409 
2410 	/* Generate appropriate path */
2411 	add_path(rel, create_tablefuncscan_path(root, rel,
2412 											required_outer));
2413 }
2414 
2415 /*
2416  * set_cte_pathlist
2417  *		Build the (single) access path for a non-self-reference CTE RTE
2418  *
2419  * There's no need for a separate set_cte_size phase, since we don't
2420  * support join-qual-parameterized paths for CTEs.
2421  */
2422 static void
set_cte_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2423 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2424 {
2425 	Plan	   *cteplan;
2426 	PlannerInfo *cteroot;
2427 	Index		levelsup;
2428 	int			ndx;
2429 	ListCell   *lc;
2430 	int			plan_id;
2431 	Relids		required_outer;
2432 
2433 	/*
2434 	 * Find the referenced CTE, and locate the plan previously made for it.
2435 	 */
2436 	levelsup = rte->ctelevelsup;
2437 	cteroot = root;
2438 	while (levelsup-- > 0)
2439 	{
2440 		cteroot = cteroot->parent_root;
2441 		if (!cteroot)			/* shouldn't happen */
2442 			elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2443 	}
2444 
2445 	/*
2446 	 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2447 	 * on planning the CTEs (ie, this is a side-reference from another CTE).
2448 	 * So we mustn't use forboth here.
2449 	 */
2450 	ndx = 0;
2451 	foreach(lc, cteroot->parse->cteList)
2452 	{
2453 		CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
2454 
2455 		if (strcmp(cte->ctename, rte->ctename) == 0)
2456 			break;
2457 		ndx++;
2458 	}
2459 	if (lc == NULL)				/* shouldn't happen */
2460 		elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
2461 	if (ndx >= list_length(cteroot->cte_plan_ids))
2462 		elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
2463 	plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
2464 	Assert(plan_id > 0);
2465 	cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2466 
2467 	/* Mark rel with estimated output rows, width, etc */
2468 	set_cte_size_estimates(root, rel, cteplan->plan_rows);
2469 
2470 	/*
2471 	 * We don't support pushing join clauses into the quals of a CTE scan, but
2472 	 * it could still have required parameterization due to LATERAL refs in
2473 	 * its tlist.
2474 	 */
2475 	required_outer = rel->lateral_relids;
2476 
2477 	/* Generate appropriate path */
2478 	add_path(rel, create_ctescan_path(root, rel, required_outer));
2479 }
2480 
2481 /*
2482  * set_namedtuplestore_pathlist
2483  *		Build the (single) access path for a named tuplestore RTE
2484  *
2485  * There's no need for a separate set_namedtuplestore_size phase, since we
2486  * don't support join-qual-parameterized paths for tuplestores.
2487  */
2488 static void
set_namedtuplestore_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2489 set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
2490 							 RangeTblEntry *rte)
2491 {
2492 	Relids		required_outer;
2493 
2494 	/* Mark rel with estimated output rows, width, etc */
2495 	set_namedtuplestore_size_estimates(root, rel);
2496 
2497 	/*
2498 	 * We don't support pushing join clauses into the quals of a tuplestore
2499 	 * scan, but it could still have required parameterization due to LATERAL
2500 	 * refs in its tlist.
2501 	 */
2502 	required_outer = rel->lateral_relids;
2503 
2504 	/* Generate appropriate path */
2505 	add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
2506 
2507 	/* Select cheapest path (pretty easy in this case...) */
2508 	set_cheapest(rel);
2509 }
2510 
2511 /*
2512  * set_result_pathlist
2513  *		Build the (single) access path for an RTE_RESULT RTE
2514  *
2515  * There's no need for a separate set_result_size phase, since we
2516  * don't support join-qual-parameterized paths for these RTEs.
2517  */
2518 static void
set_result_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2519 set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
2520 					RangeTblEntry *rte)
2521 {
2522 	Relids		required_outer;
2523 
2524 	/* Mark rel with estimated output rows, width, etc */
2525 	set_result_size_estimates(root, rel);
2526 
2527 	/*
2528 	 * We don't support pushing join clauses into the quals of a Result scan,
2529 	 * but it could still have required parameterization due to LATERAL refs
2530 	 * in its tlist.
2531 	 */
2532 	required_outer = rel->lateral_relids;
2533 
2534 	/* Generate appropriate path */
2535 	add_path(rel, create_resultscan_path(root, rel, required_outer));
2536 
2537 	/* Select cheapest path (pretty easy in this case...) */
2538 	set_cheapest(rel);
2539 }
2540 
2541 /*
2542  * set_worktable_pathlist
2543  *		Build the (single) access path for a self-reference CTE RTE
2544  *
2545  * There's no need for a separate set_worktable_size phase, since we don't
2546  * support join-qual-parameterized paths for CTEs.
2547  */
2548 static void
set_worktable_pathlist(PlannerInfo * root,RelOptInfo * rel,RangeTblEntry * rte)2549 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2550 {
2551 	Path	   *ctepath;
2552 	PlannerInfo *cteroot;
2553 	Index		levelsup;
2554 	Relids		required_outer;
2555 
2556 	/*
2557 	 * We need to find the non-recursive term's path, which is in the plan
2558 	 * level that's processing the recursive UNION, which is one level *below*
2559 	 * where the CTE comes from.
2560 	 */
2561 	levelsup = rte->ctelevelsup;
2562 	if (levelsup == 0)			/* shouldn't happen */
2563 		elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2564 	levelsup--;
2565 	cteroot = root;
2566 	while (levelsup-- > 0)
2567 	{
2568 		cteroot = cteroot->parent_root;
2569 		if (!cteroot)			/* shouldn't happen */
2570 			elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2571 	}
2572 	ctepath = cteroot->non_recursive_path;
2573 	if (!ctepath)				/* shouldn't happen */
2574 		elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
2575 
2576 	/* Mark rel with estimated output rows, width, etc */
2577 	set_cte_size_estimates(root, rel, ctepath->rows);
2578 
2579 	/*
2580 	 * We don't support pushing join clauses into the quals of a worktable
2581 	 * scan, but it could still have required parameterization due to LATERAL
2582 	 * refs in its tlist.  (I'm not sure this is actually possible given the
2583 	 * restrictions on recursive references, but it's easy enough to support.)
2584 	 */
2585 	required_outer = rel->lateral_relids;
2586 
2587 	/* Generate appropriate path */
2588 	add_path(rel, create_worktablescan_path(root, rel, required_outer));
2589 }
2590 
2591 /*
2592  * generate_gather_paths
2593  *		Generate parallel access paths for a relation by pushing a Gather or
2594  *		Gather Merge on top of a partial path.
2595  *
2596  * This must not be called until after we're done creating all partial paths
2597  * for the specified relation.  (Otherwise, add_partial_path might delete a
2598  * path that some GatherPath or GatherMergePath has a reference to.)
2599  *
2600  * If we're generating paths for a scan or join relation, override_rows will
2601  * be false, and we'll just use the relation's size estimate.  When we're
2602  * being called for a partially-grouped path, though, we need to override
2603  * the rowcount estimate.  (It's not clear that the particular value we're
2604  * using here is actually best, but the underlying rel has no estimate so
2605  * we must do something.)
2606  */
2607 void
generate_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2608 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2609 {
2610 	Path	   *cheapest_partial_path;
2611 	Path	   *simple_gather_path;
2612 	ListCell   *lc;
2613 	double		rows;
2614 	double	   *rowsp = NULL;
2615 
2616 	/* If there are no partial paths, there's nothing to do here. */
2617 	if (rel->partial_pathlist == NIL)
2618 		return;
2619 
2620 	/* Should we override the rel's rowcount estimate? */
2621 	if (override_rows)
2622 		rowsp = &rows;
2623 
2624 	/*
2625 	 * The output of Gather is always unsorted, so there's only one partial
2626 	 * path of interest: the cheapest one.  That will be the one at the front
2627 	 * of partial_pathlist because of the way add_partial_path works.
2628 	 */
2629 	cheapest_partial_path = linitial(rel->partial_pathlist);
2630 	rows =
2631 		cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
2632 	simple_gather_path = (Path *)
2633 		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
2634 						   NULL, rowsp);
2635 	add_path(rel, simple_gather_path);
2636 
2637 	/*
2638 	 * For each useful ordering, we can consider an order-preserving Gather
2639 	 * Merge.
2640 	 */
2641 	foreach(lc, rel->partial_pathlist)
2642 	{
2643 		Path	   *subpath = (Path *) lfirst(lc);
2644 		GatherMergePath *path;
2645 
2646 		if (subpath->pathkeys == NIL)
2647 			continue;
2648 
2649 		rows = subpath->rows * subpath->parallel_workers;
2650 		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
2651 										subpath->pathkeys, NULL, rowsp);
2652 		add_path(rel, &path->path);
2653 	}
2654 }
2655 
2656 /*
2657  * get_useful_pathkeys_for_relation
2658  *		Determine which orderings of a relation might be useful.
2659  *
2660  * Getting data in sorted order can be useful either because the requested
2661  * order matches the final output ordering for the overall query we're
2662  * planning, or because it enables an efficient merge join.  Here, we try
2663  * to figure out which pathkeys to consider.
2664  *
2665  * This allows us to do incremental sort on top of an index scan under a gather
2666  * merge node, i.e. parallelized.
2667  *
2668  * If the require_parallel_safe is true, we also require the expressions to
2669  * be parallel safe (which allows pushing the sort below Gather Merge).
2670  *
2671  * XXX At the moment this can only ever return a list with a single element,
2672  * because it looks at query_pathkeys only. So we might return the pathkeys
2673  * directly, but it seems plausible we'll want to consider other orderings
2674  * in the future. For example, we might want to consider pathkeys useful for
2675  * merge joins.
2676  */
2677 static List *
get_useful_pathkeys_for_relation(PlannerInfo * root,RelOptInfo * rel,bool require_parallel_safe)2678 get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
2679 								 bool require_parallel_safe)
2680 {
2681 	List	   *useful_pathkeys_list = NIL;
2682 
2683 	/*
2684 	 * Considering query_pathkeys is always worth it, because it might allow
2685 	 * us to avoid a total sort when we have a partially presorted path
2686 	 * available or to push the total sort into the parallel portion of the
2687 	 * query.
2688 	 */
2689 	if (root->query_pathkeys)
2690 	{
2691 		ListCell   *lc;
2692 		int			npathkeys = 0;	/* useful pathkeys */
2693 
2694 		foreach(lc, root->query_pathkeys)
2695 		{
2696 			PathKey    *pathkey = (PathKey *) lfirst(lc);
2697 			EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
2698 
2699 			/*
2700 			 * We can only build a sort for pathkeys that contain a
2701 			 * safe-to-compute-early EC member computable from the current
2702 			 * relation's reltarget, so ignore the remainder of the list as
2703 			 * soon as we find a pathkey without such a member.
2704 			 *
2705 			 * It's still worthwhile to return any prefix of the pathkeys list
2706 			 * that meets this requirement, as we may be able to do an
2707 			 * incremental sort.
2708 			 *
2709 			 * If requested, ensure the sort expression is parallel-safe too.
2710 			 */
2711 			if (!relation_can_be_sorted_early(root, rel, pathkey_ec,
2712 											  require_parallel_safe))
2713 				break;
2714 
2715 			npathkeys++;
2716 		}
2717 
2718 		/*
2719 		 * The whole query_pathkeys list matches, so append it directly, to
2720 		 * allow comparing pathkeys easily by comparing list pointer. If we
2721 		 * have to truncate the pathkeys, we gotta do a copy though.
2722 		 */
2723 		if (npathkeys == list_length(root->query_pathkeys))
2724 			useful_pathkeys_list = lappend(useful_pathkeys_list,
2725 										   root->query_pathkeys);
2726 		else if (npathkeys > 0)
2727 			useful_pathkeys_list = lappend(useful_pathkeys_list,
2728 										   list_truncate(list_copy(root->query_pathkeys),
2729 														 npathkeys));
2730 	}
2731 
2732 	return useful_pathkeys_list;
2733 }
2734 
2735 /*
2736  * generate_useful_gather_paths
2737  *		Generate parallel access paths for a relation by pushing a Gather or
2738  *		Gather Merge on top of a partial path.
2739  *
2740  * Unlike plain generate_gather_paths, this looks both at pathkeys of input
2741  * paths (aiming to preserve the ordering), but also considers ordering that
2742  * might be useful for nodes above the gather merge node, and tries to add
2743  * a sort (regular or incremental) to provide that.
2744  */
2745 void
generate_useful_gather_paths(PlannerInfo * root,RelOptInfo * rel,bool override_rows)2746 generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2747 {
2748 	ListCell   *lc;
2749 	double		rows;
2750 	double	   *rowsp = NULL;
2751 	List	   *useful_pathkeys_list = NIL;
2752 	Path	   *cheapest_partial_path = NULL;
2753 
2754 	/* If there are no partial paths, there's nothing to do here. */
2755 	if (rel->partial_pathlist == NIL)
2756 		return;
2757 
2758 	/* Should we override the rel's rowcount estimate? */
2759 	if (override_rows)
2760 		rowsp = &rows;
2761 
2762 	/* generate the regular gather (merge) paths */
2763 	generate_gather_paths(root, rel, override_rows);
2764 
2765 	/* consider incremental sort for interesting orderings */
2766 	useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
2767 
2768 	/* used for explicit (full) sort paths */
2769 	cheapest_partial_path = linitial(rel->partial_pathlist);
2770 
2771 	/*
2772 	 * Consider sorted paths for each interesting ordering. We generate both
2773 	 * incremental and full sort.
2774 	 */
2775 	foreach(lc, useful_pathkeys_list)
2776 	{
2777 		List	   *useful_pathkeys = lfirst(lc);
2778 		ListCell   *lc2;
2779 		bool		is_sorted;
2780 		int			presorted_keys;
2781 
2782 		foreach(lc2, rel->partial_pathlist)
2783 		{
2784 			Path	   *subpath = (Path *) lfirst(lc2);
2785 			GatherMergePath *path;
2786 
2787 			is_sorted = pathkeys_count_contained_in(useful_pathkeys,
2788 													subpath->pathkeys,
2789 													&presorted_keys);
2790 
2791 			/*
2792 			 * We don't need to consider the case where a subpath is already
2793 			 * fully sorted because generate_gather_paths already creates a
2794 			 * gather merge path for every subpath that has pathkeys present.
2795 			 *
2796 			 * But since the subpath is already sorted, we know we don't need
2797 			 * to consider adding a sort (other either kind) on top of it, so
2798 			 * we can continue here.
2799 			 */
2800 			if (is_sorted)
2801 				continue;
2802 
2803 			/*
2804 			 * Consider regular sort for the cheapest partial path (for each
2805 			 * useful pathkeys). We know the path is not sorted, because we'd
2806 			 * not get here otherwise.
2807 			 *
2808 			 * This is not redundant with the gather paths created in
2809 			 * generate_gather_paths, because that doesn't generate ordered
2810 			 * output. Here we add an explicit sort to match the useful
2811 			 * ordering.
2812 			 */
2813 			if (cheapest_partial_path == subpath)
2814 			{
2815 				Path	   *tmp;
2816 
2817 				tmp = (Path *) create_sort_path(root,
2818 												rel,
2819 												subpath,
2820 												useful_pathkeys,
2821 												-1.0);
2822 
2823 				rows = tmp->rows * tmp->parallel_workers;
2824 
2825 				path = create_gather_merge_path(root, rel,
2826 												tmp,
2827 												rel->reltarget,
2828 												tmp->pathkeys,
2829 												NULL,
2830 												rowsp);
2831 
2832 				add_path(rel, &path->path);
2833 
2834 				/* Fall through */
2835 			}
2836 
2837 			/*
2838 			 * Consider incremental sort, but only when the subpath is already
2839 			 * partially sorted on a pathkey prefix.
2840 			 */
2841 			if (enable_incremental_sort && presorted_keys > 0)
2842 			{
2843 				Path	   *tmp;
2844 
2845 				/*
2846 				 * We should have already excluded pathkeys of length 1
2847 				 * because then presorted_keys > 0 would imply is_sorted was
2848 				 * true.
2849 				 */
2850 				Assert(list_length(useful_pathkeys) != 1);
2851 
2852 				tmp = (Path *) create_incremental_sort_path(root,
2853 															rel,
2854 															subpath,
2855 															useful_pathkeys,
2856 															presorted_keys,
2857 															-1);
2858 
2859 				path = create_gather_merge_path(root, rel,
2860 												tmp,
2861 												rel->reltarget,
2862 												tmp->pathkeys,
2863 												NULL,
2864 												rowsp);
2865 
2866 				add_path(rel, &path->path);
2867 			}
2868 		}
2869 	}
2870 }
2871 
2872 /*
2873  * make_rel_from_joinlist
2874  *	  Build access paths using a "joinlist" to guide the join path search.
2875  *
2876  * See comments for deconstruct_jointree() for definition of the joinlist
2877  * data structure.
2878  */
2879 static RelOptInfo *
make_rel_from_joinlist(PlannerInfo * root,List * joinlist)2880 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
2881 {
2882 	int			levels_needed;
2883 	List	   *initial_rels;
2884 	ListCell   *jl;
2885 
2886 	/*
2887 	 * Count the number of child joinlist nodes.  This is the depth of the
2888 	 * dynamic-programming algorithm we must employ to consider all ways of
2889 	 * joining the child nodes.
2890 	 */
2891 	levels_needed = list_length(joinlist);
2892 
2893 	if (levels_needed <= 0)
2894 		return NULL;			/* nothing to do? */
2895 
2896 	/*
2897 	 * Construct a list of rels corresponding to the child joinlist nodes.
2898 	 * This may contain both base rels and rels constructed according to
2899 	 * sub-joinlists.
2900 	 */
2901 	initial_rels = NIL;
2902 	foreach(jl, joinlist)
2903 	{
2904 		Node	   *jlnode = (Node *) lfirst(jl);
2905 		RelOptInfo *thisrel;
2906 
2907 		if (IsA(jlnode, RangeTblRef))
2908 		{
2909 			int			varno = ((RangeTblRef *) jlnode)->rtindex;
2910 
2911 			thisrel = find_base_rel(root, varno);
2912 		}
2913 		else if (IsA(jlnode, List))
2914 		{
2915 			/* Recurse to handle subproblem */
2916 			thisrel = make_rel_from_joinlist(root, (List *) jlnode);
2917 		}
2918 		else
2919 		{
2920 			elog(ERROR, "unrecognized joinlist node type: %d",
2921 				 (int) nodeTag(jlnode));
2922 			thisrel = NULL;		/* keep compiler quiet */
2923 		}
2924 
2925 		initial_rels = lappend(initial_rels, thisrel);
2926 	}
2927 
2928 	if (levels_needed == 1)
2929 	{
2930 		/*
2931 		 * Single joinlist node, so we're done.
2932 		 */
2933 		return (RelOptInfo *) linitial(initial_rels);
2934 	}
2935 	else
2936 	{
2937 		/*
2938 		 * Consider the different orders in which we could join the rels,
2939 		 * using a plugin, GEQO, or the regular join search code.
2940 		 *
2941 		 * We put the initial_rels list into a PlannerInfo field because
2942 		 * has_legal_joinclause() needs to look at it (ugly :-().
2943 		 */
2944 		root->initial_rels = initial_rels;
2945 
2946 		if (join_search_hook)
2947 			return (*join_search_hook) (root, levels_needed, initial_rels);
2948 		else if (enable_geqo && levels_needed >= geqo_threshold)
2949 			return geqo(root, levels_needed, initial_rels);
2950 		else
2951 			return standard_join_search(root, levels_needed, initial_rels);
2952 	}
2953 }
2954 
2955 /*
2956  * standard_join_search
2957  *	  Find possible joinpaths for a query by successively finding ways
2958  *	  to join component relations into join relations.
2959  *
2960  * 'levels_needed' is the number of iterations needed, ie, the number of
2961  *		independent jointree items in the query.  This is > 1.
2962  *
2963  * 'initial_rels' is a list of RelOptInfo nodes for each independent
2964  *		jointree item.  These are the components to be joined together.
2965  *		Note that levels_needed == list_length(initial_rels).
2966  *
2967  * Returns the final level of join relations, i.e., the relation that is
2968  * the result of joining all the original relations together.
2969  * At least one implementation path must be provided for this relation and
2970  * all required sub-relations.
2971  *
2972  * To support loadable plugins that modify planner behavior by changing the
2973  * join searching algorithm, we provide a hook variable that lets a plugin
2974  * replace or supplement this function.  Any such hook must return the same
2975  * final join relation as the standard code would, but it might have a
2976  * different set of implementation paths attached, and only the sub-joinrels
2977  * needed for these paths need have been instantiated.
2978  *
2979  * Note to plugin authors: the functions invoked during standard_join_search()
2980  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
2981  * than one join-order search, you'll probably need to save and restore the
2982  * original states of those data structures.  See geqo_eval() for an example.
2983  */
2984 RelOptInfo *
standard_join_search(PlannerInfo * root,int levels_needed,List * initial_rels)2985 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
2986 {
2987 	int			lev;
2988 	RelOptInfo *rel;
2989 
2990 	/*
2991 	 * This function cannot be invoked recursively within any one planning
2992 	 * problem, so join_rel_level[] can't be in use already.
2993 	 */
2994 	Assert(root->join_rel_level == NULL);
2995 
2996 	/*
2997 	 * We employ a simple "dynamic programming" algorithm: we first find all
2998 	 * ways to build joins of two jointree items, then all ways to build joins
2999 	 * of three items (from two-item joins and single items), then four-item
3000 	 * joins, and so on until we have considered all ways to join all the
3001 	 * items into one rel.
3002 	 *
3003 	 * root->join_rel_level[j] is a list of all the j-item rels.  Initially we
3004 	 * set root->join_rel_level[1] to represent all the single-jointree-item
3005 	 * relations.
3006 	 */
3007 	root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
3008 
3009 	root->join_rel_level[1] = initial_rels;
3010 
3011 	for (lev = 2; lev <= levels_needed; lev++)
3012 	{
3013 		ListCell   *lc;
3014 
3015 		/*
3016 		 * Determine all possible pairs of relations to be joined at this
3017 		 * level, and build paths for making each one from every available
3018 		 * pair of lower-level relations.
3019 		 */
3020 		join_search_one_level(root, lev);
3021 
3022 		/*
3023 		 * Run generate_partitionwise_join_paths() and
3024 		 * generate_useful_gather_paths() for each just-processed joinrel.  We
3025 		 * could not do this earlier because both regular and partial paths
3026 		 * can get added to a particular joinrel at multiple times within
3027 		 * join_search_one_level.
3028 		 *
3029 		 * After that, we're done creating paths for the joinrel, so run
3030 		 * set_cheapest().
3031 		 */
3032 		foreach(lc, root->join_rel_level[lev])
3033 		{
3034 			rel = (RelOptInfo *) lfirst(lc);
3035 
3036 			/* Create paths for partitionwise joins. */
3037 			generate_partitionwise_join_paths(root, rel);
3038 
3039 			/*
3040 			 * Except for the topmost scan/join rel, consider gathering
3041 			 * partial paths.  We'll do the same for the topmost scan/join rel
3042 			 * once we know the final targetlist (see grouping_planner).
3043 			 */
3044 			if (lev < levels_needed)
3045 				generate_useful_gather_paths(root, rel, false);
3046 
3047 			/* Find and save the cheapest paths for this rel */
3048 			set_cheapest(rel);
3049 
3050 #ifdef OPTIMIZER_DEBUG
3051 			debug_print_rel(root, rel);
3052 #endif
3053 		}
3054 	}
3055 
3056 	/*
3057 	 * We should have a single rel at the final level.
3058 	 */
3059 	if (root->join_rel_level[levels_needed] == NIL)
3060 		elog(ERROR, "failed to build any %d-way joins", levels_needed);
3061 	Assert(list_length(root->join_rel_level[levels_needed]) == 1);
3062 
3063 	rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
3064 
3065 	root->join_rel_level = NULL;
3066 
3067 	return rel;
3068 }
3069 
3070 /*****************************************************************************
3071  *			PUSHING QUALS DOWN INTO SUBQUERIES
3072  *****************************************************************************/
3073 
3074 /*
3075  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
3076  *
3077  * subquery is the particular component query being checked.  topquery
3078  * is the top component of a set-operations tree (the same Query if no
3079  * set-op is involved).
3080  *
3081  * Conditions checked here:
3082  *
3083  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
3084  * since that could change the set of rows returned.
3085  *
3086  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
3087  * quals into it, because that could change the results.
3088  *
3089  * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
3090  * This is because upper-level quals should semantically be evaluated only
3091  * once per distinct row, not once per original row, and if the qual is
3092  * volatile then extra evaluations could change the results.  (This issue
3093  * does not apply to other forms of aggregation such as GROUP BY, because
3094  * when those are present we push into HAVING not WHERE, so that the quals
3095  * are still applied after aggregation.)
3096  *
3097  * 4. If the subquery contains window functions, we cannot push volatile quals
3098  * into it.  The issue here is a bit different from DISTINCT: a volatile qual
3099  * might succeed for some rows of a window partition and fail for others,
3100  * thereby changing the partition contents and thus the window functions'
3101  * results for rows that remain.
3102  *
3103  * 5. If the subquery contains any set-returning functions in its targetlist,
3104  * we cannot push volatile quals into it.  That would push them below the SRFs
3105  * and thereby change the number of times they are evaluated.  Also, a
3106  * volatile qual could succeed for some SRF output rows and fail for others,
3107  * a behavior that cannot occur if it's evaluated before SRF expansion.
3108  *
3109  * 6. If the subquery has nonempty grouping sets, we cannot push down any
3110  * quals.  The concern here is that a qual referencing a "constant" grouping
3111  * column could get constant-folded, which would be improper because the value
3112  * is potentially nullable by grouping-set expansion.  This restriction could
3113  * be removed if we had a parsetree representation that shows that such
3114  * grouping columns are not really constant.  (There are other ideas that
3115  * could be used to relax this restriction, but that's the approach most
3116  * likely to get taken in the future.  Note that there's not much to be gained
3117  * so long as subquery_planner can't move HAVING clauses to WHERE within such
3118  * a subquery.)
3119  *
3120  * In addition, we make several checks on the subquery's output columns to see
3121  * if it is safe to reference them in pushed-down quals.  If output column k
3122  * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
3123  * to true, but we don't reject the subquery overall since column k might not
3124  * be referenced by some/all quals.  The unsafeColumns[] array will be
3125  * consulted later by qual_is_pushdown_safe().  It's better to do it this way
3126  * than to make the checks directly in qual_is_pushdown_safe(), because when
3127  * the subquery involves set operations we have to check the output
3128  * expressions in each arm of the set op.
3129  *
3130  * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
3131  * we're effectively assuming that the quals cannot distinguish values that
3132  * the DISTINCT's equality operator sees as equal, yet there are many
3133  * counterexamples to that assumption.  However use of such a qual with a
3134  * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
3135  * "equal" value will be chosen as the output value by the DISTINCT operation.
3136  * So we don't worry too much about that.  Another objection is that if the
3137  * qual is expensive to evaluate, running it for each original row might cost
3138  * more than we save by eliminating rows before the DISTINCT step.  But it
3139  * would be very hard to estimate that at this stage, and in practice pushdown
3140  * seldom seems to make things worse, so we ignore that problem too.
3141  *
3142  * Note: likewise, pushing quals into a subquery with window functions is a
3143  * bit dubious: the quals might remove some rows of a window partition while
3144  * leaving others, causing changes in the window functions' results for the
3145  * surviving rows.  We insist that such a qual reference only partitioning
3146  * columns, but again that only protects us if the qual does not distinguish
3147  * values that the partitioning equality operator sees as equal.  The risks
3148  * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3149  * occurs and thus there is no theoretical problem with such a qual.  But
3150  * we'll do this anyway because the potential performance benefits are very
3151  * large, and we've seen no field complaints about the longstanding comparable
3152  * behavior with DISTINCT.
3153  */
3154 static bool
subquery_is_pushdown_safe(Query * subquery,Query * topquery,pushdown_safety_info * safetyInfo)3155 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
3156 						  pushdown_safety_info *safetyInfo)
3157 {
3158 	SetOperationStmt *topop;
3159 
3160 	/* Check point 1 */
3161 	if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
3162 		return false;
3163 
3164 	/* Check point 6 */
3165 	if (subquery->groupClause && subquery->groupingSets)
3166 		return false;
3167 
3168 	/* Check points 3, 4, and 5 */
3169 	if (subquery->distinctClause ||
3170 		subquery->hasWindowFuncs ||
3171 		subquery->hasTargetSRFs)
3172 		safetyInfo->unsafeVolatile = true;
3173 
3174 	/*
3175 	 * If we're at a leaf query, check for unsafe expressions in its target
3176 	 * list, and mark any unsafe ones in unsafeColumns[].  (Non-leaf nodes in
3177 	 * setop trees have only simple Vars in their tlists, so no need to check
3178 	 * them.)
3179 	 */
3180 	if (subquery->setOperations == NULL)
3181 		check_output_expressions(subquery, safetyInfo);
3182 
3183 	/* Are we at top level, or looking at a setop component? */
3184 	if (subquery == topquery)
3185 	{
3186 		/* Top level, so check any component queries */
3187 		if (subquery->setOperations != NULL)
3188 			if (!recurse_pushdown_safe(subquery->setOperations, topquery,
3189 									   safetyInfo))
3190 				return false;
3191 	}
3192 	else
3193 	{
3194 		/* Setop component must not have more components (too weird) */
3195 		if (subquery->setOperations != NULL)
3196 			return false;
3197 		/* Check whether setop component output types match top level */
3198 		topop = castNode(SetOperationStmt, topquery->setOperations);
3199 		Assert(topop);
3200 		compare_tlist_datatypes(subquery->targetList,
3201 								topop->colTypes,
3202 								safetyInfo);
3203 	}
3204 	return true;
3205 }
3206 
3207 /*
3208  * Helper routine to recurse through setOperations tree
3209  */
3210 static bool
recurse_pushdown_safe(Node * setOp,Query * topquery,pushdown_safety_info * safetyInfo)3211 recurse_pushdown_safe(Node *setOp, Query *topquery,
3212 					  pushdown_safety_info *safetyInfo)
3213 {
3214 	if (IsA(setOp, RangeTblRef))
3215 	{
3216 		RangeTblRef *rtr = (RangeTblRef *) setOp;
3217 		RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
3218 		Query	   *subquery = rte->subquery;
3219 
3220 		Assert(subquery != NULL);
3221 		return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
3222 	}
3223 	else if (IsA(setOp, SetOperationStmt))
3224 	{
3225 		SetOperationStmt *op = (SetOperationStmt *) setOp;
3226 
3227 		/* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3228 		if (op->op == SETOP_EXCEPT)
3229 			return false;
3230 		/* Else recurse */
3231 		if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
3232 			return false;
3233 		if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
3234 			return false;
3235 	}
3236 	else
3237 	{
3238 		elog(ERROR, "unrecognized node type: %d",
3239 			 (int) nodeTag(setOp));
3240 	}
3241 	return true;
3242 }
3243 
3244 /*
3245  * check_output_expressions - check subquery's output expressions for safety
3246  *
3247  * There are several cases in which it's unsafe to push down an upper-level
3248  * qual if it references a particular output column of a subquery.  We check
3249  * each output column of the subquery and set unsafeColumns[k] to true if
3250  * that column is unsafe for a pushed-down qual to reference.  The conditions
3251  * checked here are:
3252  *
3253  * 1. We must not push down any quals that refer to subselect outputs that
3254  * return sets, else we'd introduce functions-returning-sets into the
3255  * subquery's WHERE/HAVING quals.
3256  *
3257  * 2. We must not push down any quals that refer to subselect outputs that
3258  * contain volatile functions, for fear of introducing strange results due
3259  * to multiple evaluation of a volatile function.
3260  *
3261  * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3262  * refer to non-DISTINCT output columns, because that could change the set
3263  * of rows returned.  (This condition is vacuous for DISTINCT, because then
3264  * there are no non-DISTINCT output columns, so we needn't check.  Note that
3265  * subquery_is_pushdown_safe already reported that we can't use volatile
3266  * quals if there's DISTINCT or DISTINCT ON.)
3267  *
3268  * 4. If the subquery has any window functions, we must not push down quals
3269  * that reference any output columns that are not listed in all the subquery's
3270  * window PARTITION BY clauses.  We can push down quals that use only
3271  * partitioning columns because they should succeed or fail identically for
3272  * every row of any one window partition, and totally excluding some
3273  * partitions will not change a window function's results for remaining
3274  * partitions.  (Again, this also requires nonvolatile quals, but
3275  * subquery_is_pushdown_safe handles that.)
3276  */
3277 static void
check_output_expressions(Query * subquery,pushdown_safety_info * safetyInfo)3278 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
3279 {
3280 	ListCell   *lc;
3281 
3282 	foreach(lc, subquery->targetList)
3283 	{
3284 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
3285 
3286 		if (tle->resjunk)
3287 			continue;			/* ignore resjunk columns */
3288 
3289 		/* We need not check further if output col is already known unsafe */
3290 		if (safetyInfo->unsafeColumns[tle->resno])
3291 			continue;
3292 
3293 		/* Functions returning sets are unsafe (point 1) */
3294 		if (subquery->hasTargetSRFs &&
3295 			expression_returns_set((Node *) tle->expr))
3296 		{
3297 			safetyInfo->unsafeColumns[tle->resno] = true;
3298 			continue;
3299 		}
3300 
3301 		/* Volatile functions are unsafe (point 2) */
3302 		if (contain_volatile_functions((Node *) tle->expr))
3303 		{
3304 			safetyInfo->unsafeColumns[tle->resno] = true;
3305 			continue;
3306 		}
3307 
3308 		/* If subquery uses DISTINCT ON, check point 3 */
3309 		if (subquery->hasDistinctOn &&
3310 			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
3311 		{
3312 			/* non-DISTINCT column, so mark it unsafe */
3313 			safetyInfo->unsafeColumns[tle->resno] = true;
3314 			continue;
3315 		}
3316 
3317 		/* If subquery uses window functions, check point 4 */
3318 		if (subquery->hasWindowFuncs &&
3319 			!targetIsInAllPartitionLists(tle, subquery))
3320 		{
3321 			/* not present in all PARTITION BY clauses, so mark it unsafe */
3322 			safetyInfo->unsafeColumns[tle->resno] = true;
3323 			continue;
3324 		}
3325 	}
3326 }
3327 
3328 /*
3329  * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3330  * push quals into each component query, but the quals can only reference
3331  * subquery columns that suffer no type coercions in the set operation.
3332  * Otherwise there are possible semantic gotchas.  So, we check the
3333  * component queries to see if any of them have output types different from
3334  * the top-level setop outputs.  unsafeColumns[k] is set true if column k
3335  * has different type in any component.
3336  *
3337  * We don't have to care about typmods here: the only allowed difference
3338  * between set-op input and output typmods is input is a specific typmod
3339  * and output is -1, and that does not require a coercion.
3340  *
3341  * tlist is a subquery tlist.
3342  * colTypes is an OID list of the top-level setop's output column types.
3343  * safetyInfo->unsafeColumns[] is the result array.
3344  */
3345 static void
compare_tlist_datatypes(List * tlist,List * colTypes,pushdown_safety_info * safetyInfo)3346 compare_tlist_datatypes(List *tlist, List *colTypes,
3347 						pushdown_safety_info *safetyInfo)
3348 {
3349 	ListCell   *l;
3350 	ListCell   *colType = list_head(colTypes);
3351 
3352 	foreach(l, tlist)
3353 	{
3354 		TargetEntry *tle = (TargetEntry *) lfirst(l);
3355 
3356 		if (tle->resjunk)
3357 			continue;			/* ignore resjunk columns */
3358 		if (colType == NULL)
3359 			elog(ERROR, "wrong number of tlist entries");
3360 		if (exprType((Node *) tle->expr) != lfirst_oid(colType))
3361 			safetyInfo->unsafeColumns[tle->resno] = true;
3362 		colType = lnext(colTypes, colType);
3363 	}
3364 	if (colType != NULL)
3365 		elog(ERROR, "wrong number of tlist entries");
3366 }
3367 
3368 /*
3369  * targetIsInAllPartitionLists
3370  *		True if the TargetEntry is listed in the PARTITION BY clause
3371  *		of every window defined in the query.
3372  *
3373  * It would be safe to ignore windows not actually used by any window
3374  * function, but it's not easy to get that info at this stage; and it's
3375  * unlikely to be useful to spend any extra cycles getting it, since
3376  * unreferenced window definitions are probably infrequent in practice.
3377  */
3378 static bool
targetIsInAllPartitionLists(TargetEntry * tle,Query * query)3379 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
3380 {
3381 	ListCell   *lc;
3382 
3383 	foreach(lc, query->windowClause)
3384 	{
3385 		WindowClause *wc = (WindowClause *) lfirst(lc);
3386 
3387 		if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
3388 			return false;
3389 	}
3390 	return true;
3391 }
3392 
3393 /*
3394  * qual_is_pushdown_safe - is a particular rinfo safe to push down?
3395  *
3396  * rinfo is a restriction clause applying to the given subquery (whose RTE
3397  * has index rti in the parent query).
3398  *
3399  * Conditions checked here:
3400  *
3401  * 1. rinfo's clause must not contain any SubPlans (mainly because it's
3402  * unclear that it will work correctly: SubLinks will already have been
3403  * transformed into SubPlans in the qual, but not in the subquery).  Note that
3404  * SubLinks that transform to initplans are safe, and will be accepted here
3405  * because what we'll see in the qual is just a Param referencing the initplan
3406  * output.
3407  *
3408  * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile
3409  * functions.
3410  *
3411  * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky
3412  * functions that are passed Var nodes, and therefore might reveal values from
3413  * the subquery as side effects.
3414  *
3415  * 4. rinfo's clause must not refer to the whole-row output of the subquery
3416  * (since there is no easy way to name that within the subquery itself).
3417  *
3418  * 5. rinfo's clause must not refer to any subquery output columns that were
3419  * found to be unsafe to reference by subquery_is_pushdown_safe().
3420  */
3421 static bool
qual_is_pushdown_safe(Query * subquery,Index rti,RestrictInfo * rinfo,pushdown_safety_info * safetyInfo)3422 qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo,
3423 					  pushdown_safety_info *safetyInfo)
3424 {
3425 	bool		safe = true;
3426 	Node	   *qual = (Node *) rinfo->clause;
3427 	List	   *vars;
3428 	ListCell   *vl;
3429 
3430 	/* Refuse subselects (point 1) */
3431 	if (contain_subplans(qual))
3432 		return false;
3433 
3434 	/* Refuse volatile quals if we found they'd be unsafe (point 2) */
3435 	if (safetyInfo->unsafeVolatile &&
3436 		contain_volatile_functions((Node *) rinfo))
3437 		return false;
3438 
3439 	/* Refuse leaky quals if told to (point 3) */
3440 	if (safetyInfo->unsafeLeaky &&
3441 		contain_leaked_vars(qual))
3442 		return false;
3443 
3444 	/*
3445 	 * It would be unsafe to push down window function calls, but at least for
3446 	 * the moment we could never see any in a qual anyhow.  (The same applies
3447 	 * to aggregates, which we check for in pull_var_clause below.)
3448 	 */
3449 	Assert(!contain_window_function(qual));
3450 
3451 	/*
3452 	 * Examine all Vars used in clause.  Since it's a restriction clause, all
3453 	 * such Vars must refer to subselect output columns ... unless this is
3454 	 * part of a LATERAL subquery, in which case there could be lateral
3455 	 * references.
3456 	 */
3457 	vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
3458 	foreach(vl, vars)
3459 	{
3460 		Var		   *var = (Var *) lfirst(vl);
3461 
3462 		/*
3463 		 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3464 		 * It's not clear whether a PHV could safely be pushed down, and even
3465 		 * less clear whether such a situation could arise in any cases of
3466 		 * practical interest anyway.  So for the moment, just refuse to push
3467 		 * down.
3468 		 */
3469 		if (!IsA(var, Var))
3470 		{
3471 			safe = false;
3472 			break;
3473 		}
3474 
3475 		/*
3476 		 * Punt if we find any lateral references.  It would be safe to push
3477 		 * these down, but we'd have to convert them into outer references,
3478 		 * which subquery_push_qual lacks the infrastructure to do.  The case
3479 		 * arises so seldom that it doesn't seem worth working hard on.
3480 		 */
3481 		if (var->varno != rti)
3482 		{
3483 			safe = false;
3484 			break;
3485 		}
3486 
3487 		/* Subqueries have no system columns */
3488 		Assert(var->varattno >= 0);
3489 
3490 		/* Check point 4 */
3491 		if (var->varattno == 0)
3492 		{
3493 			safe = false;
3494 			break;
3495 		}
3496 
3497 		/* Check point 5 */
3498 		if (safetyInfo->unsafeColumns[var->varattno])
3499 		{
3500 			safe = false;
3501 			break;
3502 		}
3503 	}
3504 
3505 	list_free(vars);
3506 
3507 	return safe;
3508 }
3509 
3510 /*
3511  * subquery_push_qual - push down a qual that we have determined is safe
3512  */
3513 static void
subquery_push_qual(Query * subquery,RangeTblEntry * rte,Index rti,Node * qual)3514 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
3515 {
3516 	if (subquery->setOperations != NULL)
3517 	{
3518 		/* Recurse to push it separately to each component query */
3519 		recurse_push_qual(subquery->setOperations, subquery,
3520 						  rte, rti, qual);
3521 	}
3522 	else
3523 	{
3524 		/*
3525 		 * We need to replace Vars in the qual (which must refer to outputs of
3526 		 * the subquery) with copies of the subquery's targetlist expressions.
3527 		 * Note that at this point, any uplevel Vars in the qual should have
3528 		 * been replaced with Params, so they need no work.
3529 		 *
3530 		 * This step also ensures that when we are pushing into a setop tree,
3531 		 * each component query gets its own copy of the qual.
3532 		 */
3533 		qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
3534 										 subquery->targetList,
3535 										 REPLACEVARS_REPORT_ERROR, 0,
3536 										 &subquery->hasSubLinks);
3537 
3538 		/*
3539 		 * Now attach the qual to the proper place: normally WHERE, but if the
3540 		 * subquery uses grouping or aggregation, put it in HAVING (since the
3541 		 * qual really refers to the group-result rows).
3542 		 */
3543 		if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
3544 			subquery->havingQual = make_and_qual(subquery->havingQual, qual);
3545 		else
3546 			subquery->jointree->quals =
3547 				make_and_qual(subquery->jointree->quals, qual);
3548 
3549 		/*
3550 		 * We need not change the subquery's hasAggs or hasSubLinks flags,
3551 		 * since we can't be pushing down any aggregates that weren't there
3552 		 * before, and we don't push down subselects at all.
3553 		 */
3554 	}
3555 }
3556 
3557 /*
3558  * Helper routine to recurse through setOperations tree
3559  */
3560 static void
recurse_push_qual(Node * setOp,Query * topquery,RangeTblEntry * rte,Index rti,Node * qual)3561 recurse_push_qual(Node *setOp, Query *topquery,
3562 				  RangeTblEntry *rte, Index rti, Node *qual)
3563 {
3564 	if (IsA(setOp, RangeTblRef))
3565 	{
3566 		RangeTblRef *rtr = (RangeTblRef *) setOp;
3567 		RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
3568 		Query	   *subquery = subrte->subquery;
3569 
3570 		Assert(subquery != NULL);
3571 		subquery_push_qual(subquery, rte, rti, qual);
3572 	}
3573 	else if (IsA(setOp, SetOperationStmt))
3574 	{
3575 		SetOperationStmt *op = (SetOperationStmt *) setOp;
3576 
3577 		recurse_push_qual(op->larg, topquery, rte, rti, qual);
3578 		recurse_push_qual(op->rarg, topquery, rte, rti, qual);
3579 	}
3580 	else
3581 	{
3582 		elog(ERROR, "unrecognized node type: %d",
3583 			 (int) nodeTag(setOp));
3584 	}
3585 }
3586 
3587 /*****************************************************************************
3588  *			SIMPLIFYING SUBQUERY TARGETLISTS
3589  *****************************************************************************/
3590 
3591 /*
3592  * remove_unused_subquery_outputs
3593  *		Remove subquery targetlist items we don't need
3594  *
3595  * It's possible, even likely, that the upper query does not read all the
3596  * output columns of the subquery.  We can remove any such outputs that are
3597  * not needed by the subquery itself (e.g., as sort/group columns) and do not
3598  * affect semantics otherwise (e.g., volatile functions can't be removed).
3599  * This is useful not only because we might be able to remove expensive-to-
3600  * compute expressions, but because deletion of output columns might allow
3601  * optimizations such as join removal to occur within the subquery.
3602  *
3603  * To avoid affecting column numbering in the targetlist, we don't physically
3604  * remove unused tlist entries, but rather replace their expressions with NULL
3605  * constants.  This is implemented by modifying subquery->targetList.
3606  */
3607 static void
remove_unused_subquery_outputs(Query * subquery,RelOptInfo * rel)3608 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
3609 {
3610 	Bitmapset  *attrs_used = NULL;
3611 	ListCell   *lc;
3612 
3613 	/*
3614 	 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
3615 	 * could update all the child SELECTs' tlists, but it seems not worth the
3616 	 * trouble presently.
3617 	 */
3618 	if (subquery->setOperations)
3619 		return;
3620 
3621 	/*
3622 	 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
3623 	 * time: all its output columns must be used in the distinctClause.
3624 	 */
3625 	if (subquery->distinctClause && !subquery->hasDistinctOn)
3626 		return;
3627 
3628 	/*
3629 	 * Collect a bitmap of all the output column numbers used by the upper
3630 	 * query.
3631 	 *
3632 	 * Add all the attributes needed for joins or final output.  Note: we must
3633 	 * look at rel's targetlist, not the attr_needed data, because attr_needed
3634 	 * isn't computed for inheritance child rels, cf set_append_rel_size().
3635 	 * (XXX might be worth changing that sometime.)
3636 	 */
3637 	pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
3638 
3639 	/* Add all the attributes used by un-pushed-down restriction clauses. */
3640 	foreach(lc, rel->baserestrictinfo)
3641 	{
3642 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
3643 
3644 		pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
3645 	}
3646 
3647 	/*
3648 	 * If there's a whole-row reference to the subquery, we can't remove
3649 	 * anything.
3650 	 */
3651 	if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
3652 		return;
3653 
3654 	/*
3655 	 * Run through the tlist and zap entries we don't need.  It's okay to
3656 	 * modify the tlist items in-place because set_subquery_pathlist made a
3657 	 * copy of the subquery.
3658 	 */
3659 	foreach(lc, subquery->targetList)
3660 	{
3661 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
3662 		Node	   *texpr = (Node *) tle->expr;
3663 
3664 		/*
3665 		 * If it has a sortgroupref number, it's used in some sort/group
3666 		 * clause so we'd better not remove it.  Also, don't remove any
3667 		 * resjunk columns, since their reason for being has nothing to do
3668 		 * with anybody reading the subquery's output.  (It's likely that
3669 		 * resjunk columns in a sub-SELECT would always have ressortgroupref
3670 		 * set, but even if they don't, it seems imprudent to remove them.)
3671 		 */
3672 		if (tle->ressortgroupref || tle->resjunk)
3673 			continue;
3674 
3675 		/*
3676 		 * If it's used by the upper query, we can't remove it.
3677 		 */
3678 		if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
3679 						  attrs_used))
3680 			continue;
3681 
3682 		/*
3683 		 * If it contains a set-returning function, we can't remove it since
3684 		 * that could change the number of rows returned by the subquery.
3685 		 */
3686 		if (subquery->hasTargetSRFs &&
3687 			expression_returns_set(texpr))
3688 			continue;
3689 
3690 		/*
3691 		 * If it contains volatile functions, we daren't remove it for fear
3692 		 * that the user is expecting their side-effects to happen.
3693 		 */
3694 		if (contain_volatile_functions(texpr))
3695 			continue;
3696 
3697 		/*
3698 		 * OK, we don't need it.  Replace the expression with a NULL constant.
3699 		 * Preserve the exposed type of the expression, in case something
3700 		 * looks at the rowtype of the subquery's result.
3701 		 */
3702 		tle->expr = (Expr *) makeNullConst(exprType(texpr),
3703 										   exprTypmod(texpr),
3704 										   exprCollation(texpr));
3705 	}
3706 }
3707 
3708 /*
3709  * create_partial_bitmap_paths
3710  *	  Build partial bitmap heap path for the relation
3711  */
3712 void
create_partial_bitmap_paths(PlannerInfo * root,RelOptInfo * rel,Path * bitmapqual)3713 create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
3714 							Path *bitmapqual)
3715 {
3716 	int			parallel_workers;
3717 	double		pages_fetched;
3718 
3719 	/* Compute heap pages for bitmap heap scan */
3720 	pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
3721 										 NULL, NULL);
3722 
3723 	parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
3724 											   max_parallel_workers_per_gather);
3725 
3726 	if (parallel_workers <= 0)
3727 		return;
3728 
3729 	add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
3730 														   bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
3731 }
3732 
3733 /*
3734  * Compute the number of parallel workers that should be used to scan a
3735  * relation.  We compute the parallel workers based on the size of the heap to
3736  * be scanned and the size of the index to be scanned, then choose a minimum
3737  * of those.
3738  *
3739  * "heap_pages" is the number of pages from the table that we expect to scan, or
3740  * -1 if we don't expect to scan any.
3741  *
3742  * "index_pages" is the number of pages from the index that we expect to scan, or
3743  * -1 if we don't expect to scan any.
3744  *
3745  * "max_workers" is caller's limit on the number of workers.  This typically
3746  * comes from a GUC.
3747  */
3748 int
compute_parallel_worker(RelOptInfo * rel,double heap_pages,double index_pages,int max_workers)3749 compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
3750 						int max_workers)
3751 {
3752 	int			parallel_workers = 0;
3753 
3754 	/*
3755 	 * If the user has set the parallel_workers reloption, use that; otherwise
3756 	 * select a default number of workers.
3757 	 */
3758 	if (rel->rel_parallel_workers != -1)
3759 		parallel_workers = rel->rel_parallel_workers;
3760 	else
3761 	{
3762 		/*
3763 		 * If the number of pages being scanned is insufficient to justify a
3764 		 * parallel scan, just return zero ... unless it's an inheritance
3765 		 * child. In that case, we want to generate a parallel path here
3766 		 * anyway.  It might not be worthwhile just for this relation, but
3767 		 * when combined with all of its inheritance siblings it may well pay
3768 		 * off.
3769 		 */
3770 		if (rel->reloptkind == RELOPT_BASEREL &&
3771 			((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
3772 			 (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
3773 			return 0;
3774 
3775 		if (heap_pages >= 0)
3776 		{
3777 			int			heap_parallel_threshold;
3778 			int			heap_parallel_workers = 1;
3779 
3780 			/*
3781 			 * Select the number of workers based on the log of the size of
3782 			 * the relation.  This probably needs to be a good deal more
3783 			 * sophisticated, but we need something here for now.  Note that
3784 			 * the upper limit of the min_parallel_table_scan_size GUC is
3785 			 * chosen to prevent overflow here.
3786 			 */
3787 			heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
3788 			while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
3789 			{
3790 				heap_parallel_workers++;
3791 				heap_parallel_threshold *= 3;
3792 				if (heap_parallel_threshold > INT_MAX / 3)
3793 					break;		/* avoid overflow */
3794 			}
3795 
3796 			parallel_workers = heap_parallel_workers;
3797 		}
3798 
3799 		if (index_pages >= 0)
3800 		{
3801 			int			index_parallel_workers = 1;
3802 			int			index_parallel_threshold;
3803 
3804 			/* same calculation as for heap_pages above */
3805 			index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
3806 			while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
3807 			{
3808 				index_parallel_workers++;
3809 				index_parallel_threshold *= 3;
3810 				if (index_parallel_threshold > INT_MAX / 3)
3811 					break;		/* avoid overflow */
3812 			}
3813 
3814 			if (parallel_workers > 0)
3815 				parallel_workers = Min(parallel_workers, index_parallel_workers);
3816 			else
3817 				parallel_workers = index_parallel_workers;
3818 		}
3819 	}
3820 
3821 	/* In no case use more than caller supplied maximum number of workers */
3822 	parallel_workers = Min(parallel_workers, max_workers);
3823 
3824 	return parallel_workers;
3825 }
3826 
3827 /*
3828  * generate_partitionwise_join_paths
3829  * 		Create paths representing partitionwise join for given partitioned
3830  * 		join relation.
3831  *
3832  * This must not be called until after we are done adding paths for all
3833  * child-joins. Otherwise, add_path might delete a path to which some path
3834  * generated here has a reference.
3835  */
3836 void
generate_partitionwise_join_paths(PlannerInfo * root,RelOptInfo * rel)3837 generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
3838 {
3839 	List	   *live_children = NIL;
3840 	int			cnt_parts;
3841 	int			num_parts;
3842 	RelOptInfo **part_rels;
3843 
3844 	/* Handle only join relations here. */
3845 	if (!IS_JOIN_REL(rel))
3846 		return;
3847 
3848 	/* We've nothing to do if the relation is not partitioned. */
3849 	if (!IS_PARTITIONED_REL(rel))
3850 		return;
3851 
3852 	/* The relation should have consider_partitionwise_join set. */
3853 	Assert(rel->consider_partitionwise_join);
3854 
3855 	/* Guard against stack overflow due to overly deep partition hierarchy. */
3856 	check_stack_depth();
3857 
3858 	num_parts = rel->nparts;
3859 	part_rels = rel->part_rels;
3860 
3861 	/* Collect non-dummy child-joins. */
3862 	for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
3863 	{
3864 		RelOptInfo *child_rel = part_rels[cnt_parts];
3865 
3866 		/* If it's been pruned entirely, it's certainly dummy. */
3867 		if (child_rel == NULL)
3868 			continue;
3869 
3870 		/* Add partitionwise join paths for partitioned child-joins. */
3871 		generate_partitionwise_join_paths(root, child_rel);
3872 
3873 		set_cheapest(child_rel);
3874 
3875 		/* Dummy children will not be scanned, so ignore those. */
3876 		if (IS_DUMMY_REL(child_rel))
3877 			continue;
3878 
3879 #ifdef OPTIMIZER_DEBUG
3880 		debug_print_rel(root, child_rel);
3881 #endif
3882 
3883 		live_children = lappend(live_children, child_rel);
3884 	}
3885 
3886 	/* If all child-joins are dummy, parent join is also dummy. */
3887 	if (!live_children)
3888 	{
3889 		mark_dummy_rel(rel);
3890 		return;
3891 	}
3892 
3893 	/* Build additional paths for this rel from child-join paths. */
3894 	add_paths_to_append_rel(root, rel, live_children);
3895 	list_free(live_children);
3896 }
3897 
3898 
3899 /*****************************************************************************
3900  *			DEBUG SUPPORT
3901  *****************************************************************************/
3902 
3903 #ifdef OPTIMIZER_DEBUG
3904 
3905 static void
print_relids(PlannerInfo * root,Relids relids)3906 print_relids(PlannerInfo *root, Relids relids)
3907 {
3908 	int			x;
3909 	bool		first = true;
3910 
3911 	x = -1;
3912 	while ((x = bms_next_member(relids, x)) >= 0)
3913 	{
3914 		if (!first)
3915 			printf(" ");
3916 		if (x < root->simple_rel_array_size &&
3917 			root->simple_rte_array[x])
3918 			printf("%s", root->simple_rte_array[x]->eref->aliasname);
3919 		else
3920 			printf("%d", x);
3921 		first = false;
3922 	}
3923 }
3924 
3925 static void
print_restrictclauses(PlannerInfo * root,List * clauses)3926 print_restrictclauses(PlannerInfo *root, List *clauses)
3927 {
3928 	ListCell   *l;
3929 
3930 	foreach(l, clauses)
3931 	{
3932 		RestrictInfo *c = lfirst(l);
3933 
3934 		print_expr((Node *) c->clause, root->parse->rtable);
3935 		if (lnext(clauses, l))
3936 			printf(", ");
3937 	}
3938 }
3939 
3940 static void
print_path(PlannerInfo * root,Path * path,int indent)3941 print_path(PlannerInfo *root, Path *path, int indent)
3942 {
3943 	const char *ptype;
3944 	bool		join = false;
3945 	Path	   *subpath = NULL;
3946 	int			i;
3947 
3948 	switch (nodeTag(path))
3949 	{
3950 		case T_Path:
3951 			switch (path->pathtype)
3952 			{
3953 				case T_SeqScan:
3954 					ptype = "SeqScan";
3955 					break;
3956 				case T_SampleScan:
3957 					ptype = "SampleScan";
3958 					break;
3959 				case T_FunctionScan:
3960 					ptype = "FunctionScan";
3961 					break;
3962 				case T_TableFuncScan:
3963 					ptype = "TableFuncScan";
3964 					break;
3965 				case T_ValuesScan:
3966 					ptype = "ValuesScan";
3967 					break;
3968 				case T_CteScan:
3969 					ptype = "CteScan";
3970 					break;
3971 				case T_NamedTuplestoreScan:
3972 					ptype = "NamedTuplestoreScan";
3973 					break;
3974 				case T_Result:
3975 					ptype = "Result";
3976 					break;
3977 				case T_WorkTableScan:
3978 					ptype = "WorkTableScan";
3979 					break;
3980 				default:
3981 					ptype = "???Path";
3982 					break;
3983 			}
3984 			break;
3985 		case T_IndexPath:
3986 			ptype = "IdxScan";
3987 			break;
3988 		case T_BitmapHeapPath:
3989 			ptype = "BitmapHeapScan";
3990 			break;
3991 		case T_BitmapAndPath:
3992 			ptype = "BitmapAndPath";
3993 			break;
3994 		case T_BitmapOrPath:
3995 			ptype = "BitmapOrPath";
3996 			break;
3997 		case T_TidPath:
3998 			ptype = "TidScan";
3999 			break;
4000 		case T_SubqueryScanPath:
4001 			ptype = "SubqueryScan";
4002 			break;
4003 		case T_ForeignPath:
4004 			ptype = "ForeignScan";
4005 			break;
4006 		case T_CustomPath:
4007 			ptype = "CustomScan";
4008 			break;
4009 		case T_NestPath:
4010 			ptype = "NestLoop";
4011 			join = true;
4012 			break;
4013 		case T_MergePath:
4014 			ptype = "MergeJoin";
4015 			join = true;
4016 			break;
4017 		case T_HashPath:
4018 			ptype = "HashJoin";
4019 			join = true;
4020 			break;
4021 		case T_AppendPath:
4022 			ptype = "Append";
4023 			break;
4024 		case T_MergeAppendPath:
4025 			ptype = "MergeAppend";
4026 			break;
4027 		case T_GroupResultPath:
4028 			ptype = "GroupResult";
4029 			break;
4030 		case T_MaterialPath:
4031 			ptype = "Material";
4032 			subpath = ((MaterialPath *) path)->subpath;
4033 			break;
4034 		case T_MemoizePath:
4035 			ptype = "Memoize";
4036 			subpath = ((MemoizePath *) path)->subpath;
4037 			break;
4038 		case T_UniquePath:
4039 			ptype = "Unique";
4040 			subpath = ((UniquePath *) path)->subpath;
4041 			break;
4042 		case T_GatherPath:
4043 			ptype = "Gather";
4044 			subpath = ((GatherPath *) path)->subpath;
4045 			break;
4046 		case T_GatherMergePath:
4047 			ptype = "GatherMerge";
4048 			subpath = ((GatherMergePath *) path)->subpath;
4049 			break;
4050 		case T_ProjectionPath:
4051 			ptype = "Projection";
4052 			subpath = ((ProjectionPath *) path)->subpath;
4053 			break;
4054 		case T_ProjectSetPath:
4055 			ptype = "ProjectSet";
4056 			subpath = ((ProjectSetPath *) path)->subpath;
4057 			break;
4058 		case T_SortPath:
4059 			ptype = "Sort";
4060 			subpath = ((SortPath *) path)->subpath;
4061 			break;
4062 		case T_IncrementalSortPath:
4063 			ptype = "IncrementalSort";
4064 			subpath = ((SortPath *) path)->subpath;
4065 			break;
4066 		case T_GroupPath:
4067 			ptype = "Group";
4068 			subpath = ((GroupPath *) path)->subpath;
4069 			break;
4070 		case T_UpperUniquePath:
4071 			ptype = "UpperUnique";
4072 			subpath = ((UpperUniquePath *) path)->subpath;
4073 			break;
4074 		case T_AggPath:
4075 			ptype = "Agg";
4076 			subpath = ((AggPath *) path)->subpath;
4077 			break;
4078 		case T_GroupingSetsPath:
4079 			ptype = "GroupingSets";
4080 			subpath = ((GroupingSetsPath *) path)->subpath;
4081 			break;
4082 		case T_MinMaxAggPath:
4083 			ptype = "MinMaxAgg";
4084 			break;
4085 		case T_WindowAggPath:
4086 			ptype = "WindowAgg";
4087 			subpath = ((WindowAggPath *) path)->subpath;
4088 			break;
4089 		case T_SetOpPath:
4090 			ptype = "SetOp";
4091 			subpath = ((SetOpPath *) path)->subpath;
4092 			break;
4093 		case T_RecursiveUnionPath:
4094 			ptype = "RecursiveUnion";
4095 			break;
4096 		case T_LockRowsPath:
4097 			ptype = "LockRows";
4098 			subpath = ((LockRowsPath *) path)->subpath;
4099 			break;
4100 		case T_ModifyTablePath:
4101 			ptype = "ModifyTable";
4102 			break;
4103 		case T_LimitPath:
4104 			ptype = "Limit";
4105 			subpath = ((LimitPath *) path)->subpath;
4106 			break;
4107 		default:
4108 			ptype = "???Path";
4109 			break;
4110 	}
4111 
4112 	for (i = 0; i < indent; i++)
4113 		printf("\t");
4114 	printf("%s", ptype);
4115 
4116 	if (path->parent)
4117 	{
4118 		printf("(");
4119 		print_relids(root, path->parent->relids);
4120 		printf(")");
4121 	}
4122 	if (path->param_info)
4123 	{
4124 		printf(" required_outer (");
4125 		print_relids(root, path->param_info->ppi_req_outer);
4126 		printf(")");
4127 	}
4128 	printf(" rows=%.0f cost=%.2f..%.2f\n",
4129 		   path->rows, path->startup_cost, path->total_cost);
4130 
4131 	if (path->pathkeys)
4132 	{
4133 		for (i = 0; i < indent; i++)
4134 			printf("\t");
4135 		printf("  pathkeys: ");
4136 		print_pathkeys(path->pathkeys, root->parse->rtable);
4137 	}
4138 
4139 	if (join)
4140 	{
4141 		JoinPath   *jp = (JoinPath *) path;
4142 
4143 		for (i = 0; i < indent; i++)
4144 			printf("\t");
4145 		printf("  clauses: ");
4146 		print_restrictclauses(root, jp->joinrestrictinfo);
4147 		printf("\n");
4148 
4149 		if (IsA(path, MergePath))
4150 		{
4151 			MergePath  *mp = (MergePath *) path;
4152 
4153 			for (i = 0; i < indent; i++)
4154 				printf("\t");
4155 			printf("  sortouter=%d sortinner=%d materializeinner=%d\n",
4156 				   ((mp->outersortkeys) ? 1 : 0),
4157 				   ((mp->innersortkeys) ? 1 : 0),
4158 				   ((mp->materialize_inner) ? 1 : 0));
4159 		}
4160 
4161 		print_path(root, jp->outerjoinpath, indent + 1);
4162 		print_path(root, jp->innerjoinpath, indent + 1);
4163 	}
4164 
4165 	if (subpath)
4166 		print_path(root, subpath, indent + 1);
4167 }
4168 
4169 void
debug_print_rel(PlannerInfo * root,RelOptInfo * rel)4170 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
4171 {
4172 	ListCell   *l;
4173 
4174 	printf("RELOPTINFO (");
4175 	print_relids(root, rel->relids);
4176 	printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
4177 
4178 	if (rel->baserestrictinfo)
4179 	{
4180 		printf("\tbaserestrictinfo: ");
4181 		print_restrictclauses(root, rel->baserestrictinfo);
4182 		printf("\n");
4183 	}
4184 
4185 	if (rel->joininfo)
4186 	{
4187 		printf("\tjoininfo: ");
4188 		print_restrictclauses(root, rel->joininfo);
4189 		printf("\n");
4190 	}
4191 
4192 	printf("\tpath list:\n");
4193 	foreach(l, rel->pathlist)
4194 		print_path(root, lfirst(l), 1);
4195 	if (rel->cheapest_parameterized_paths)
4196 	{
4197 		printf("\n\tcheapest parameterized paths:\n");
4198 		foreach(l, rel->cheapest_parameterized_paths)
4199 			print_path(root, lfirst(l), 1);
4200 	}
4201 	if (rel->cheapest_startup_path)
4202 	{
4203 		printf("\n\tcheapest startup path:\n");
4204 		print_path(root, rel->cheapest_startup_path, 1);
4205 	}
4206 	if (rel->cheapest_total_path)
4207 	{
4208 		printf("\n\tcheapest total path:\n");
4209 		print_path(root, rel->cheapest_total_path, 1);
4210 	}
4211 	printf("\n");
4212 	fflush(stdout);
4213 }
4214 
4215 #endif							/* OPTIMIZER_DEBUG */
4216