1 /*-------------------------------------------------------------------------
2  *
3  * prepunion.c
4  *	  Routines to plan set-operation queries.  The filename is a leftover
5  *	  from a time when only UNIONs were implemented.
6  *
7  * There are two code paths in the planner for set-operation queries.
8  * If a subquery consists entirely of simple UNION ALL operations, it
9  * is converted into an "append relation".  Otherwise, it is handled
10  * by the general code in this module (plan_set_operations and its
11  * subroutines).  There is some support code here for the append-relation
12  * case, but most of the heavy lifting for that is done elsewhere,
13  * notably in prepjointree.c and allpaths.c.
14  *
15  * There is also some code here to support planning of queries that use
16  * inheritance (SELECT FROM foo*).  Inheritance trees are converted into
17  * append relations, and thenceforth share code with the UNION ALL case.
18  *
19  *
20  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  *
24  * IDENTIFICATION
25  *	  src/backend/optimizer/prep/prepunion.c
26  *
27  *-------------------------------------------------------------------------
28  */
29 #include "postgres.h"
30 
31 #include <limits.h>
32 
33 #include "access/heapam.h"
34 #include "access/htup_details.h"
35 #include "access/sysattr.h"
36 #include "catalog/partition.h"
37 #include "catalog/pg_inherits.h"
38 #include "catalog/pg_type.h"
39 #include "miscadmin.h"
40 #include "nodes/makefuncs.h"
41 #include "nodes/nodeFuncs.h"
42 #include "optimizer/cost.h"
43 #include "optimizer/pathnode.h"
44 #include "optimizer/paths.h"
45 #include "optimizer/planmain.h"
46 #include "optimizer/planner.h"
47 #include "optimizer/prep.h"
48 #include "optimizer/tlist.h"
49 #include "parser/parse_coerce.h"
50 #include "parser/parsetree.h"
51 #include "utils/lsyscache.h"
52 #include "utils/rel.h"
53 #include "utils/selfuncs.h"
54 
55 
56 typedef struct
57 {
58 	PlannerInfo *root;
59 	int			nappinfos;
60 	AppendRelInfo **appinfos;
61 } adjust_appendrel_attrs_context;
62 
63 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
64 					   List *colTypes, List *colCollations,
65 					   bool junkOK,
66 					   int flag, List *refnames_tlist,
67 					   List **pTargetList,
68 					   double *pNumGroups);
69 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
70 						PlannerInfo *root,
71 						List *refnames_tlist,
72 						List **pTargetList);
73 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
74 					 List *refnames_tlist,
75 					 List **pTargetList);
76 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
77 						List *refnames_tlist,
78 						List **pTargetList);
79 static List *plan_union_children(PlannerInfo *root,
80 					SetOperationStmt *top_union,
81 					List *refnames_tlist,
82 					List **tlist_list);
83 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
84 				  PlannerInfo *root);
85 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
86 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
87 					Path *input_path,
88 					double dNumGroups, double dNumOutputRows,
89 					const char *construct);
90 static List *generate_setop_tlist(List *colTypes, List *colCollations,
91 					 int flag,
92 					 Index varno,
93 					 bool hack_constants,
94 					 List *input_tlist,
95 					 List *refnames_tlist);
96 static List *generate_append_tlist(List *colTypes, List *colCollations,
97 					  bool flag,
98 					  List *input_tlists,
99 					  List *refnames_tlist);
100 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
101 static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
102 						 Index rti);
103 static void expand_partitioned_rtentry(PlannerInfo *root,
104 						   RangeTblEntry *parentrte,
105 						   Index parentRTindex, Relation parentrel,
106 						   PlanRowMark *top_parentrc, LOCKMODE lockmode,
107 						   List **appinfos);
108 static void expand_single_inheritance_child(PlannerInfo *root,
109 								RangeTblEntry *parentrte,
110 								Index parentRTindex, Relation parentrel,
111 								PlanRowMark *top_parentrc, Relation childrel,
112 								List **appinfos, RangeTblEntry **childrte_p,
113 								Index *childRTindex_p);
114 static void make_inh_translation_list(Relation oldrelation,
115 						  Relation newrelation,
116 						  Index newvarno,
117 						  List **translated_vars);
118 static Bitmapset *translate_col_privs(const Bitmapset *parent_privs,
119 					List *translated_vars);
120 static Node *adjust_appendrel_attrs_mutator(Node *node,
121 							   adjust_appendrel_attrs_context *context);
122 static Relids adjust_child_relids(Relids relids, int nappinfos,
123 					AppendRelInfo **appinfos);
124 static List *adjust_inherited_tlist(List *tlist,
125 					   AppendRelInfo *context);
126 
127 
128 /*
129  * plan_set_operations
130  *
131  *	  Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
132  *
133  * This routine only deals with the setOperations tree of the given query.
134  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
135  * when we return to grouping_planner; likewise for LIMIT.
136  *
137  * What we return is an "upperrel" RelOptInfo containing at least one Path
138  * that implements the set-operation tree.  In addition, root->processed_tlist
139  * receives a targetlist representing the output of the topmost setop node.
140  */
141 RelOptInfo *
plan_set_operations(PlannerInfo * root)142 plan_set_operations(PlannerInfo *root)
143 {
144 	Query	   *parse = root->parse;
145 	SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
146 	Node	   *node;
147 	RangeTblEntry *leftmostRTE;
148 	Query	   *leftmostQuery;
149 	RelOptInfo *setop_rel;
150 	List	   *top_tlist;
151 
152 	Assert(topop);
153 
154 	/* check for unsupported stuff */
155 	Assert(parse->jointree->fromlist == NIL);
156 	Assert(parse->jointree->quals == NULL);
157 	Assert(parse->groupClause == NIL);
158 	Assert(parse->havingQual == NULL);
159 	Assert(parse->windowClause == NIL);
160 	Assert(parse->distinctClause == NIL);
161 
162 	/*
163 	 * We'll need to build RelOptInfos for each of the leaf subqueries, which
164 	 * are RTE_SUBQUERY rangetable entries in this Query.  Prepare the index
165 	 * arrays for that.
166 	 */
167 	setup_simple_rel_arrays(root);
168 
169 	/*
170 	 * Populate append_rel_array with each AppendRelInfo to allow direct
171 	 * lookups by child relid.
172 	 */
173 	setup_append_rel_array(root);
174 
175 	/*
176 	 * Find the leftmost component Query.  We need to use its column names for
177 	 * all generated tlists (else SELECT INTO won't work right).
178 	 */
179 	node = topop->larg;
180 	while (node && IsA(node, SetOperationStmt))
181 		node = ((SetOperationStmt *) node)->larg;
182 	Assert(node && IsA(node, RangeTblRef));
183 	leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
184 	leftmostQuery = leftmostRTE->subquery;
185 	Assert(leftmostQuery != NULL);
186 
187 	/*
188 	 * If the topmost node is a recursive union, it needs special processing.
189 	 */
190 	if (root->hasRecursion)
191 	{
192 		setop_rel = generate_recursion_path(topop, root,
193 											leftmostQuery->targetList,
194 											&top_tlist);
195 	}
196 	else
197 	{
198 		/*
199 		 * Recurse on setOperations tree to generate paths for set ops. The
200 		 * final output paths should have just the column types shown as the
201 		 * output from the top-level node, plus possibly resjunk working
202 		 * columns (we can rely on upper-level nodes to deal with that).
203 		 */
204 		setop_rel = recurse_set_operations((Node *) topop, root,
205 										   topop->colTypes, topop->colCollations,
206 										   true, -1,
207 										   leftmostQuery->targetList,
208 										   &top_tlist,
209 										   NULL);
210 	}
211 
212 	/* Must return the built tlist into root->processed_tlist. */
213 	root->processed_tlist = top_tlist;
214 
215 	return setop_rel;
216 }
217 
218 /*
219  * recurse_set_operations
220  *	  Recursively handle one step in a tree of set operations
221  *
222  * colTypes: OID list of set-op's result column datatypes
223  * colCollations: OID list of set-op's result column collations
224  * junkOK: if true, child resjunk columns may be left in the result
225  * flag: if >= 0, add a resjunk output column indicating value of flag
226  * refnames_tlist: targetlist to take column names from
227  *
228  * Returns a RelOptInfo for the subtree, as well as these output parameters:
229  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
230  * *pNumGroups: if not NULL, we estimate the number of distinct groups
231  *		in the result, and store it there
232  *
233  * The pTargetList output parameter is mostly redundant with the pathtarget
234  * of the returned RelOptInfo, but for the moment we need it because much of
235  * the logic in this file depends on flag columns being marked resjunk.
236  * Pending a redesign of how that works, this is the easy way out.
237  *
238  * We don't have to care about typmods here: the only allowed difference
239  * between set-op input and output typmods is input is a specific typmod
240  * and output is -1, and that does not require a coercion.
241  */
242 static RelOptInfo *
recurse_set_operations(Node * setOp,PlannerInfo * root,List * colTypes,List * colCollations,bool junkOK,int flag,List * refnames_tlist,List ** pTargetList,double * pNumGroups)243 recurse_set_operations(Node *setOp, PlannerInfo *root,
244 					   List *colTypes, List *colCollations,
245 					   bool junkOK,
246 					   int flag, List *refnames_tlist,
247 					   List **pTargetList,
248 					   double *pNumGroups)
249 {
250 	RelOptInfo *rel = NULL;		/* keep compiler quiet */
251 
252 	/* Guard against stack overflow due to overly complex setop nests */
253 	check_stack_depth();
254 
255 	if (IsA(setOp, RangeTblRef))
256 	{
257 		RangeTblRef *rtr = (RangeTblRef *) setOp;
258 		RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
259 		Query	   *subquery = rte->subquery;
260 		PlannerInfo *subroot;
261 		RelOptInfo *final_rel;
262 		Path	   *subpath;
263 		Path	   *path;
264 		List	   *tlist;
265 
266 		Assert(subquery != NULL);
267 
268 		/* Build a RelOptInfo for this leaf subquery. */
269 		rel = build_simple_rel(root, rtr->rtindex, NULL);
270 
271 		/* plan_params should not be in use in current query level */
272 		Assert(root->plan_params == NIL);
273 
274 		/* Generate a subroot and Paths for the subquery */
275 		subroot = rel->subroot = subquery_planner(root->glob, subquery,
276 												  root,
277 												  false,
278 												  root->tuple_fraction);
279 
280 		/*
281 		 * It should not be possible for the primitive query to contain any
282 		 * cross-references to other primitive queries in the setop tree.
283 		 */
284 		if (root->plan_params)
285 			elog(ERROR, "unexpected outer reference in set operation subquery");
286 
287 		/* Figure out the appropriate target list for this subquery. */
288 		tlist = generate_setop_tlist(colTypes, colCollations,
289 									 flag,
290 									 rtr->rtindex,
291 									 true,
292 									 subroot->processed_tlist,
293 									 refnames_tlist);
294 		rel->reltarget = create_pathtarget(root, tlist);
295 
296 		/* Return the fully-fledged tlist to caller, too */
297 		*pTargetList = tlist;
298 
299 		/*
300 		 * Mark rel with estimated output rows, width, etc.  Note that we have
301 		 * to do this before generating outer-query paths, else
302 		 * cost_subqueryscan is not happy.
303 		 */
304 		set_subquery_size_estimates(root, rel);
305 
306 		/*
307 		 * Since we may want to add a partial path to this relation, we must
308 		 * set its consider_parallel flag correctly.
309 		 */
310 		final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
311 		rel->consider_parallel = final_rel->consider_parallel;
312 
313 		/*
314 		 * For the moment, we consider only a single Path for the subquery.
315 		 * This should change soon (make it look more like
316 		 * set_subquery_pathlist).
317 		 */
318 		subpath = get_cheapest_fractional_path(final_rel,
319 											   root->tuple_fraction);
320 
321 		/*
322 		 * Stick a SubqueryScanPath atop that.
323 		 *
324 		 * We don't bother to determine the subquery's output ordering since
325 		 * it won't be reflected in the set-op result anyhow; so just label
326 		 * the SubqueryScanPath with nil pathkeys.  (XXX that should change
327 		 * soon too, likely.)
328 		 */
329 		path = (Path *) create_subqueryscan_path(root, rel, subpath,
330 												 NIL, NULL);
331 
332 		add_path(rel, path);
333 
334 		/*
335 		 * If we have a partial path for the child relation, we can use that
336 		 * to build a partial path for this relation.  But there's no point in
337 		 * considering any path but the cheapest.
338 		 */
339 		if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
340 			final_rel->partial_pathlist != NIL)
341 		{
342 			Path	   *partial_subpath;
343 			Path	   *partial_path;
344 
345 			partial_subpath = linitial(final_rel->partial_pathlist);
346 			partial_path = (Path *)
347 				create_subqueryscan_path(root, rel, partial_subpath,
348 										 NIL, NULL);
349 			add_partial_path(rel, partial_path);
350 		}
351 
352 		/*
353 		 * Estimate number of groups if caller wants it.  If the subquery used
354 		 * grouping or aggregation, its output is probably mostly unique
355 		 * anyway; otherwise do statistical estimation.
356 		 *
357 		 * XXX you don't really want to know about this: we do the estimation
358 		 * using the subquery's original targetlist expressions, not the
359 		 * subroot->processed_tlist which might seem more appropriate.  The
360 		 * reason is that if the subquery is itself a setop, it may return a
361 		 * processed_tlist containing "varno 0" Vars generated by
362 		 * generate_append_tlist, and those would confuse estimate_num_groups
363 		 * mightily.  We ought to get rid of the "varno 0" hack, but that
364 		 * requires a redesign of the parsetree representation of setops, so
365 		 * that there can be an RTE corresponding to each setop's output.
366 		 */
367 		if (pNumGroups)
368 		{
369 			if (subquery->groupClause || subquery->groupingSets ||
370 				subquery->distinctClause ||
371 				subroot->hasHavingQual || subquery->hasAggs)
372 				*pNumGroups = subpath->rows;
373 			else
374 				*pNumGroups = estimate_num_groups(subroot,
375 												  get_tlist_exprs(subquery->targetList, false),
376 												  subpath->rows,
377 												  NULL);
378 		}
379 	}
380 	else if (IsA(setOp, SetOperationStmt))
381 	{
382 		SetOperationStmt *op = (SetOperationStmt *) setOp;
383 
384 		/* UNIONs are much different from INTERSECT/EXCEPT */
385 		if (op->op == SETOP_UNION)
386 			rel = generate_union_paths(op, root,
387 									   refnames_tlist,
388 									   pTargetList);
389 		else
390 			rel = generate_nonunion_paths(op, root,
391 										  refnames_tlist,
392 										  pTargetList);
393 		if (pNumGroups)
394 			*pNumGroups = rel->rows;
395 
396 		/*
397 		 * If necessary, add a Result node to project the caller-requested
398 		 * output columns.
399 		 *
400 		 * XXX you don't really want to know about this: setrefs.c will apply
401 		 * fix_upper_expr() to the Result node's tlist. This would fail if the
402 		 * Vars generated by generate_setop_tlist() were not exactly equal()
403 		 * to the corresponding tlist entries of the subplan. However, since
404 		 * the subplan was generated by generate_union_plan() or
405 		 * generate_nonunion_plan(), and hence its tlist was generated by
406 		 * generate_append_tlist(), this will work.  We just tell
407 		 * generate_setop_tlist() to use varno 0.
408 		 */
409 		if (flag >= 0 ||
410 			!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
411 			!tlist_same_collations(*pTargetList, colCollations, junkOK))
412 		{
413 			PathTarget *target;
414 			ListCell   *lc;
415 
416 			*pTargetList = generate_setop_tlist(colTypes, colCollations,
417 												flag,
418 												0,
419 												false,
420 												*pTargetList,
421 												refnames_tlist);
422 			target = create_pathtarget(root, *pTargetList);
423 
424 			/* Apply projection to each path */
425 			foreach(lc, rel->pathlist)
426 			{
427 				Path	   *subpath = (Path *) lfirst(lc);
428 				Path	   *path;
429 
430 				Assert(subpath->param_info == NULL);
431 				path = apply_projection_to_path(root, subpath->parent,
432 												subpath, target);
433 				/* If we had to add a Result, path is different from subpath */
434 				if (path != subpath)
435 					lfirst(lc) = path;
436 			}
437 
438 			/* Apply projection to each partial path */
439 			foreach(lc, rel->partial_pathlist)
440 			{
441 				Path	   *subpath = (Path *) lfirst(lc);
442 				Path	   *path;
443 
444 				Assert(subpath->param_info == NULL);
445 
446 				/* avoid apply_projection_to_path, in case of multiple refs */
447 				path = (Path *) create_projection_path(root, subpath->parent,
448 													   subpath, target);
449 				lfirst(lc) = path;
450 			}
451 		}
452 	}
453 	else
454 	{
455 		elog(ERROR, "unrecognized node type: %d",
456 			 (int) nodeTag(setOp));
457 		*pTargetList = NIL;
458 	}
459 
460 	postprocess_setop_rel(root, rel);
461 
462 	return rel;
463 }
464 
465 /*
466  * Generate paths for a recursive UNION node
467  */
468 static RelOptInfo *
generate_recursion_path(SetOperationStmt * setOp,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)469 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
470 						List *refnames_tlist,
471 						List **pTargetList)
472 {
473 	RelOptInfo *result_rel;
474 	Path	   *path;
475 	RelOptInfo *lrel,
476 			   *rrel;
477 	Path	   *lpath;
478 	Path	   *rpath;
479 	List	   *lpath_tlist;
480 	List	   *rpath_tlist;
481 	List	   *tlist;
482 	List	   *groupList;
483 	double		dNumGroups;
484 
485 	/* Parser should have rejected other cases */
486 	if (setOp->op != SETOP_UNION)
487 		elog(ERROR, "only UNION queries can be recursive");
488 	/* Worktable ID should be assigned */
489 	Assert(root->wt_param_id >= 0);
490 
491 	/*
492 	 * Unlike a regular UNION node, process the left and right inputs
493 	 * separately without any intention of combining them into one Append.
494 	 */
495 	lrel = recurse_set_operations(setOp->larg, root,
496 								  setOp->colTypes, setOp->colCollations,
497 								  false, -1,
498 								  refnames_tlist,
499 								  &lpath_tlist,
500 								  NULL);
501 	lpath = lrel->cheapest_total_path;
502 	/* The right path will want to look at the left one ... */
503 	root->non_recursive_path = lpath;
504 	rrel = recurse_set_operations(setOp->rarg, root,
505 								  setOp->colTypes, setOp->colCollations,
506 								  false, -1,
507 								  refnames_tlist,
508 								  &rpath_tlist,
509 								  NULL);
510 	rpath = rrel->cheapest_total_path;
511 	root->non_recursive_path = NULL;
512 
513 	/*
514 	 * Generate tlist for RecursiveUnion path node --- same as in Append cases
515 	 */
516 	tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
517 								  list_make2(lpath_tlist, rpath_tlist),
518 								  refnames_tlist);
519 
520 	*pTargetList = tlist;
521 
522 	/* Build result relation. */
523 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
524 								 bms_union(lrel->relids, rrel->relids));
525 	result_rel->reltarget = create_pathtarget(root, tlist);
526 
527 	/*
528 	 * If UNION, identify the grouping operators
529 	 */
530 	if (setOp->all)
531 	{
532 		groupList = NIL;
533 		dNumGroups = 0;
534 	}
535 	else
536 	{
537 		/* Identify the grouping semantics */
538 		groupList = generate_setop_grouplist(setOp, tlist);
539 
540 		/* We only support hashing here */
541 		if (!grouping_is_hashable(groupList))
542 			ereport(ERROR,
543 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
544 					 errmsg("could not implement recursive UNION"),
545 					 errdetail("All column datatypes must be hashable.")));
546 
547 		/*
548 		 * For the moment, take the number of distinct groups as equal to the
549 		 * total input size, ie, the worst case.
550 		 */
551 		dNumGroups = lpath->rows + rpath->rows * 10;
552 	}
553 
554 	/*
555 	 * And make the path node.
556 	 */
557 	path = (Path *) create_recursiveunion_path(root,
558 											   result_rel,
559 											   lpath,
560 											   rpath,
561 											   result_rel->reltarget,
562 											   groupList,
563 											   root->wt_param_id,
564 											   dNumGroups);
565 
566 	add_path(result_rel, path);
567 	postprocess_setop_rel(root, result_rel);
568 	return result_rel;
569 }
570 
571 /*
572  * Generate paths for a UNION or UNION ALL node
573  */
574 static RelOptInfo *
generate_union_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)575 generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
576 					 List *refnames_tlist,
577 					 List **pTargetList)
578 {
579 	Relids		relids = NULL;
580 	RelOptInfo *result_rel;
581 	double		save_fraction = root->tuple_fraction;
582 	ListCell   *lc;
583 	List	   *pathlist = NIL;
584 	List	   *partial_pathlist = NIL;
585 	bool		partial_paths_valid = true;
586 	bool		consider_parallel = true;
587 	List	   *rellist;
588 	List	   *tlist_list;
589 	List	   *tlist;
590 	Path	   *path;
591 
592 	/*
593 	 * If plain UNION, tell children to fetch all tuples.
594 	 *
595 	 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
596 	 * each arm of the UNION ALL.  One could make a case for reducing the
597 	 * tuple fraction for later arms (discounting by the expected size of the
598 	 * earlier arms' results) but it seems not worth the trouble. The normal
599 	 * case where tuple_fraction isn't already zero is a LIMIT at top level,
600 	 * and passing it down as-is is usually enough to get the desired result
601 	 * of preferring fast-start plans.
602 	 */
603 	if (!op->all)
604 		root->tuple_fraction = 0.0;
605 
606 	/*
607 	 * If any of my children are identical UNION nodes (same op, all-flag, and
608 	 * colTypes) then they can be merged into this node so that we generate
609 	 * only one Append and unique-ification for the lot.  Recurse to find such
610 	 * nodes and compute their children's paths.
611 	 */
612 	rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
613 
614 	/*
615 	 * Generate tlist for Append plan node.
616 	 *
617 	 * The tlist for an Append plan isn't important as far as the Append is
618 	 * concerned, but we must make it look real anyway for the benefit of the
619 	 * next plan level up.
620 	 */
621 	tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
622 								  tlist_list, refnames_tlist);
623 
624 	*pTargetList = tlist;
625 
626 	/* Build path lists and relid set. */
627 	foreach(lc, rellist)
628 	{
629 		RelOptInfo *rel = lfirst(lc);
630 
631 		pathlist = lappend(pathlist, rel->cheapest_total_path);
632 
633 		if (consider_parallel)
634 		{
635 			if (!rel->consider_parallel)
636 			{
637 				consider_parallel = false;
638 				partial_paths_valid = false;
639 			}
640 			else if (rel->partial_pathlist == NIL)
641 				partial_paths_valid = false;
642 			else
643 				partial_pathlist = lappend(partial_pathlist,
644 										   linitial(rel->partial_pathlist));
645 		}
646 
647 		relids = bms_union(relids, rel->relids);
648 	}
649 
650 	/* Build result relation. */
651 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
652 	result_rel->reltarget = create_pathtarget(root, tlist);
653 	result_rel->consider_parallel = consider_parallel;
654 
655 	/*
656 	 * Append the child results together.
657 	 */
658 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
659 									   NULL, 0, false, NIL, -1);
660 
661 	/*
662 	 * For UNION ALL, we just need the Append path.  For UNION, need to add
663 	 * node(s) to remove duplicates.
664 	 */
665 	if (!op->all)
666 		path = make_union_unique(op, path, tlist, root);
667 
668 	add_path(result_rel, path);
669 
670 	/*
671 	 * Estimate number of groups.  For now we just assume the output is unique
672 	 * --- this is certainly true for the UNION case, and we want worst-case
673 	 * estimates anyway.
674 	 */
675 	result_rel->rows = path->rows;
676 
677 	/*
678 	 * Now consider doing the same thing using the partial paths plus Append
679 	 * plus Gather.
680 	 */
681 	if (partial_paths_valid)
682 	{
683 		Path	   *ppath;
684 		ListCell   *lc;
685 		int			parallel_workers = 0;
686 
687 		/* Find the highest number of workers requested for any subpath. */
688 		foreach(lc, partial_pathlist)
689 		{
690 			Path	   *path = lfirst(lc);
691 
692 			parallel_workers = Max(parallel_workers, path->parallel_workers);
693 		}
694 		Assert(parallel_workers > 0);
695 
696 		/*
697 		 * If the use of parallel append is permitted, always request at least
698 		 * log2(# of children) paths.  We assume it can be useful to have
699 		 * extra workers in this case because they will be spread out across
700 		 * the children.  The precise formula is just a guess; see
701 		 * add_paths_to_append_rel.
702 		 */
703 		if (enable_parallel_append)
704 		{
705 			parallel_workers = Max(parallel_workers,
706 								   fls(list_length(partial_pathlist)));
707 			parallel_workers = Min(parallel_workers,
708 								   max_parallel_workers_per_gather);
709 		}
710 		Assert(parallel_workers > 0);
711 
712 		ppath = (Path *)
713 			create_append_path(root, result_rel, NIL, partial_pathlist,
714 							   NULL, parallel_workers, enable_parallel_append,
715 							   NIL, -1);
716 		ppath = (Path *)
717 			create_gather_path(root, result_rel, ppath,
718 							   result_rel->reltarget, NULL, NULL);
719 		if (!op->all)
720 			ppath = make_union_unique(op, ppath, tlist, root);
721 		add_path(result_rel, ppath);
722 	}
723 
724 	/* Undo effects of possibly forcing tuple_fraction to 0 */
725 	root->tuple_fraction = save_fraction;
726 
727 	return result_rel;
728 }
729 
730 /*
731  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
732  */
733 static RelOptInfo *
generate_nonunion_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)734 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
735 						List *refnames_tlist,
736 						List **pTargetList)
737 {
738 	RelOptInfo *result_rel;
739 	RelOptInfo *lrel,
740 			   *rrel;
741 	double		save_fraction = root->tuple_fraction;
742 	Path	   *lpath,
743 			   *rpath,
744 			   *path;
745 	List	   *lpath_tlist,
746 			   *rpath_tlist,
747 			   *tlist_list,
748 			   *tlist,
749 			   *groupList,
750 			   *pathlist;
751 	double		dLeftGroups,
752 				dRightGroups,
753 				dNumGroups,
754 				dNumOutputRows;
755 	bool		use_hash;
756 	SetOpCmd	cmd;
757 	int			firstFlag;
758 
759 	/*
760 	 * Tell children to fetch all tuples.
761 	 */
762 	root->tuple_fraction = 0.0;
763 
764 	/* Recurse on children, ensuring their outputs are marked */
765 	lrel = recurse_set_operations(op->larg, root,
766 								  op->colTypes, op->colCollations,
767 								  false, 0,
768 								  refnames_tlist,
769 								  &lpath_tlist,
770 								  &dLeftGroups);
771 	lpath = lrel->cheapest_total_path;
772 	rrel = recurse_set_operations(op->rarg, root,
773 								  op->colTypes, op->colCollations,
774 								  false, 1,
775 								  refnames_tlist,
776 								  &rpath_tlist,
777 								  &dRightGroups);
778 	rpath = rrel->cheapest_total_path;
779 
780 	/* Undo effects of forcing tuple_fraction to 0 */
781 	root->tuple_fraction = save_fraction;
782 
783 	/*
784 	 * For EXCEPT, we must put the left input first.  For INTERSECT, either
785 	 * order should give the same results, and we prefer to put the smaller
786 	 * input first in order to minimize the size of the hash table in the
787 	 * hashing case.  "Smaller" means the one with the fewer groups.
788 	 */
789 	if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
790 	{
791 		pathlist = list_make2(lpath, rpath);
792 		tlist_list = list_make2(lpath_tlist, rpath_tlist);
793 		firstFlag = 0;
794 	}
795 	else
796 	{
797 		pathlist = list_make2(rpath, lpath);
798 		tlist_list = list_make2(rpath_tlist, lpath_tlist);
799 		firstFlag = 1;
800 	}
801 
802 	/*
803 	 * Generate tlist for Append plan node.
804 	 *
805 	 * The tlist for an Append plan isn't important as far as the Append is
806 	 * concerned, but we must make it look real anyway for the benefit of the
807 	 * next plan level up.  In fact, it has to be real enough that the flag
808 	 * column is shown as a variable not a constant, else setrefs.c will get
809 	 * confused.
810 	 */
811 	tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
812 								  tlist_list, refnames_tlist);
813 
814 	*pTargetList = tlist;
815 
816 	/* Build result relation. */
817 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
818 								 bms_union(lrel->relids, rrel->relids));
819 	result_rel->reltarget = create_pathtarget(root, tlist);
820 
821 	/*
822 	 * Append the child results together.
823 	 */
824 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
825 									   NULL, 0, false, NIL, -1);
826 
827 	/* Identify the grouping semantics */
828 	groupList = generate_setop_grouplist(op, tlist);
829 
830 	/*
831 	 * Estimate number of distinct groups that we'll need hashtable entries
832 	 * for; this is the size of the left-hand input for EXCEPT, or the smaller
833 	 * input for INTERSECT.  Also estimate the number of eventual output rows.
834 	 * In non-ALL cases, we estimate each group produces one output row; in
835 	 * ALL cases use the relevant relation size.  These are worst-case
836 	 * estimates, of course, but we need to be conservative.
837 	 */
838 	if (op->op == SETOP_EXCEPT)
839 	{
840 		dNumGroups = dLeftGroups;
841 		dNumOutputRows = op->all ? lpath->rows : dNumGroups;
842 	}
843 	else
844 	{
845 		dNumGroups = Min(dLeftGroups, dRightGroups);
846 		dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
847 	}
848 
849 	/*
850 	 * Decide whether to hash or sort, and add a sort node if needed.
851 	 */
852 	use_hash = choose_hashed_setop(root, groupList, path,
853 								   dNumGroups, dNumOutputRows,
854 								   (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
855 
856 	if (groupList && !use_hash)
857 		path = (Path *) create_sort_path(root,
858 										 result_rel,
859 										 path,
860 										 make_pathkeys_for_sortclauses(root,
861 																	   groupList,
862 																	   tlist),
863 										 -1.0);
864 
865 	/*
866 	 * Finally, add a SetOp path node to generate the correct output.
867 	 */
868 	switch (op->op)
869 	{
870 		case SETOP_INTERSECT:
871 			cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
872 			break;
873 		case SETOP_EXCEPT:
874 			cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
875 			break;
876 		default:
877 			elog(ERROR, "unrecognized set op: %d", (int) op->op);
878 			cmd = SETOPCMD_INTERSECT;	/* keep compiler quiet */
879 			break;
880 	}
881 	path = (Path *) create_setop_path(root,
882 									  result_rel,
883 									  path,
884 									  cmd,
885 									  use_hash ? SETOP_HASHED : SETOP_SORTED,
886 									  groupList,
887 									  list_length(op->colTypes) + 1,
888 									  use_hash ? firstFlag : -1,
889 									  dNumGroups,
890 									  dNumOutputRows);
891 
892 	result_rel->rows = path->rows;
893 	add_path(result_rel, path);
894 	return result_rel;
895 }
896 
897 /*
898  * Pull up children of a UNION node that are identically-propertied UNIONs.
899  *
900  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
901  * output rows will be lost anyway.
902  *
903  * NOTE: currently, we ignore collations while determining if a child has
904  * the same properties.  This is semantically sound only so long as all
905  * collations have the same notion of equality.  It is valid from an
906  * implementation standpoint because we don't care about the ordering of
907  * a UNION child's result: UNION ALL results are always unordered, and
908  * generate_union_paths will force a fresh sort if the top level is a UNION.
909  */
910 static List *
plan_union_children(PlannerInfo * root,SetOperationStmt * top_union,List * refnames_tlist,List ** tlist_list)911 plan_union_children(PlannerInfo *root,
912 					SetOperationStmt *top_union,
913 					List *refnames_tlist,
914 					List **tlist_list)
915 {
916 	List	   *pending_rels = list_make1(top_union);
917 	List	   *result = NIL;
918 	List	   *child_tlist;
919 
920 	*tlist_list = NIL;
921 
922 	while (pending_rels != NIL)
923 	{
924 		Node	   *setOp = linitial(pending_rels);
925 
926 		pending_rels = list_delete_first(pending_rels);
927 
928 		if (IsA(setOp, SetOperationStmt))
929 		{
930 			SetOperationStmt *op = (SetOperationStmt *) setOp;
931 
932 			if (op->op == top_union->op &&
933 				(op->all == top_union->all || op->all) &&
934 				equal(op->colTypes, top_union->colTypes))
935 			{
936 				/* Same UNION, so fold children into parent */
937 				pending_rels = lcons(op->rarg, pending_rels);
938 				pending_rels = lcons(op->larg, pending_rels);
939 				continue;
940 			}
941 		}
942 
943 		/*
944 		 * Not same, so plan this child separately.
945 		 *
946 		 * Note we disallow any resjunk columns in child results.  This is
947 		 * necessary since the Append node that implements the union won't do
948 		 * any projection, and upper levels will get confused if some of our
949 		 * output tuples have junk and some don't.  This case only arises when
950 		 * we have an EXCEPT or INTERSECT as child, else there won't be
951 		 * resjunk anyway.
952 		 */
953 		result = lappend(result, recurse_set_operations(setOp, root,
954 														top_union->colTypes,
955 														top_union->colCollations,
956 														false, -1,
957 														refnames_tlist,
958 														&child_tlist,
959 														NULL));
960 		*tlist_list = lappend(*tlist_list, child_tlist);
961 	}
962 
963 	return result;
964 }
965 
966 /*
967  * Add nodes to the given path tree to unique-ify the result of a UNION.
968  */
969 static Path *
make_union_unique(SetOperationStmt * op,Path * path,List * tlist,PlannerInfo * root)970 make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
971 				  PlannerInfo *root)
972 {
973 	RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
974 	List	   *groupList;
975 	double		dNumGroups;
976 
977 	/* Identify the grouping semantics */
978 	groupList = generate_setop_grouplist(op, tlist);
979 
980 	/*
981 	 * XXX for the moment, take the number of distinct groups as equal to the
982 	 * total input size, ie, the worst case.  This is too conservative, but we
983 	 * don't want to risk having the hashtable overrun memory; also, it's not
984 	 * clear how to get a decent estimate of the true size.  One should note
985 	 * as well the propensity of novices to write UNION rather than UNION ALL
986 	 * even when they don't expect any duplicates...
987 	 */
988 	dNumGroups = path->rows;
989 
990 	/* Decide whether to hash or sort */
991 	if (choose_hashed_setop(root, groupList, path,
992 							dNumGroups, dNumGroups,
993 							"UNION"))
994 	{
995 		/* Hashed aggregate plan --- no sort needed */
996 		path = (Path *) create_agg_path(root,
997 										result_rel,
998 										path,
999 										create_pathtarget(root, tlist),
1000 										AGG_HASHED,
1001 										AGGSPLIT_SIMPLE,
1002 										groupList,
1003 										NIL,
1004 										NULL,
1005 										dNumGroups);
1006 	}
1007 	else
1008 	{
1009 		/* Sort and Unique */
1010 		if (groupList)
1011 			path = (Path *)
1012 				create_sort_path(root,
1013 								 result_rel,
1014 								 path,
1015 								 make_pathkeys_for_sortclauses(root,
1016 															   groupList,
1017 															   tlist),
1018 								 -1.0);
1019 		path = (Path *) create_upper_unique_path(root,
1020 												 result_rel,
1021 												 path,
1022 												 list_length(path->pathkeys),
1023 												 dNumGroups);
1024 	}
1025 
1026 	return path;
1027 }
1028 
1029 /*
1030  * postprocess_setop_rel - perform steps required after adding paths
1031  */
1032 static void
postprocess_setop_rel(PlannerInfo * root,RelOptInfo * rel)1033 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1034 {
1035 	/*
1036 	 * We don't currently worry about allowing FDWs to contribute paths to
1037 	 * this relation, but give extensions a chance.
1038 	 */
1039 	if (create_upper_paths_hook)
1040 		(*create_upper_paths_hook) (root, UPPERREL_SETOP,
1041 									NULL, rel, NULL);
1042 
1043 	/* Select cheapest path */
1044 	set_cheapest(rel);
1045 }
1046 
1047 /*
1048  * choose_hashed_setop - should we use hashing for a set operation?
1049  */
1050 static bool
choose_hashed_setop(PlannerInfo * root,List * groupClauses,Path * input_path,double dNumGroups,double dNumOutputRows,const char * construct)1051 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1052 					Path *input_path,
1053 					double dNumGroups, double dNumOutputRows,
1054 					const char *construct)
1055 {
1056 	int			numGroupCols = list_length(groupClauses);
1057 	bool		can_sort;
1058 	bool		can_hash;
1059 	Size		hashentrysize;
1060 	Path		hashed_p;
1061 	Path		sorted_p;
1062 	double		tuple_fraction;
1063 
1064 	/* Check whether the operators support sorting or hashing */
1065 	can_sort = grouping_is_sortable(groupClauses);
1066 	can_hash = grouping_is_hashable(groupClauses);
1067 	if (can_hash && can_sort)
1068 	{
1069 		/* we have a meaningful choice to make, continue ... */
1070 	}
1071 	else if (can_hash)
1072 		return true;
1073 	else if (can_sort)
1074 		return false;
1075 	else
1076 		ereport(ERROR,
1077 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1078 		/* translator: %s is UNION, INTERSECT, or EXCEPT */
1079 				 errmsg("could not implement %s", construct),
1080 				 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1081 
1082 	/* Prefer sorting when enable_hashagg is off */
1083 	if (!enable_hashagg)
1084 		return false;
1085 
1086 	/*
1087 	 * Don't do it if it doesn't look like the hashtable will fit into
1088 	 * work_mem.
1089 	 */
1090 	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1091 
1092 	if (hashentrysize * dNumGroups > work_mem * 1024L)
1093 		return false;
1094 
1095 	/*
1096 	 * See if the estimated cost is no more than doing it the other way.
1097 	 *
1098 	 * We need to consider input_plan + hashagg versus input_plan + sort +
1099 	 * group.  Note that the actual result plan might involve a SetOp or
1100 	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1101 	 * should be close enough for our purposes here.
1102 	 *
1103 	 * These path variables are dummies that just hold cost fields; we don't
1104 	 * make actual Paths for these steps.
1105 	 */
1106 	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1107 			 numGroupCols, dNumGroups,
1108 			 NIL,
1109 			 input_path->startup_cost, input_path->total_cost,
1110 			 input_path->rows);
1111 
1112 	/*
1113 	 * Now for the sorted case.  Note that the input is *always* unsorted,
1114 	 * since it was made by appending unrelated sub-relations together.
1115 	 */
1116 	sorted_p.startup_cost = input_path->startup_cost;
1117 	sorted_p.total_cost = input_path->total_cost;
1118 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1119 	cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1120 			  input_path->rows, input_path->pathtarget->width,
1121 			  0.0, work_mem, -1.0);
1122 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1123 			   NIL,
1124 			   sorted_p.startup_cost, sorted_p.total_cost,
1125 			   input_path->rows);
1126 
1127 	/*
1128 	 * Now make the decision using the top-level tuple fraction.  First we
1129 	 * have to convert an absolute count (LIMIT) into fractional form.
1130 	 */
1131 	tuple_fraction = root->tuple_fraction;
1132 	if (tuple_fraction >= 1.0)
1133 		tuple_fraction /= dNumOutputRows;
1134 
1135 	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1136 									  tuple_fraction) < 0)
1137 	{
1138 		/* Hashed is cheaper, so use it */
1139 		return true;
1140 	}
1141 	return false;
1142 }
1143 
1144 /*
1145  * Generate targetlist for a set-operation plan node
1146  *
1147  * colTypes: OID list of set-op's result column datatypes
1148  * colCollations: OID list of set-op's result column collations
1149  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1150  * varno: varno to use in generated Vars
1151  * hack_constants: true to copy up constants (see comments in code)
1152  * input_tlist: targetlist of this node's input node
1153  * refnames_tlist: targetlist to take column names from
1154  */
1155 static List *
generate_setop_tlist(List * colTypes,List * colCollations,int flag,Index varno,bool hack_constants,List * input_tlist,List * refnames_tlist)1156 generate_setop_tlist(List *colTypes, List *colCollations,
1157 					 int flag,
1158 					 Index varno,
1159 					 bool hack_constants,
1160 					 List *input_tlist,
1161 					 List *refnames_tlist)
1162 {
1163 	List	   *tlist = NIL;
1164 	int			resno = 1;
1165 	ListCell   *ctlc,
1166 			   *cclc,
1167 			   *itlc,
1168 			   *rtlc;
1169 	TargetEntry *tle;
1170 	Node	   *expr;
1171 
1172 	/* there's no forfour() so we must chase one list manually */
1173 	rtlc = list_head(refnames_tlist);
1174 	forthree(ctlc, colTypes, cclc, colCollations, itlc, input_tlist)
1175 	{
1176 		Oid			colType = lfirst_oid(ctlc);
1177 		Oid			colColl = lfirst_oid(cclc);
1178 		TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1179 		TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1180 
1181 		rtlc = lnext(rtlc);
1182 
1183 		Assert(inputtle->resno == resno);
1184 		Assert(reftle->resno == resno);
1185 		Assert(!inputtle->resjunk);
1186 		Assert(!reftle->resjunk);
1187 
1188 		/*
1189 		 * Generate columns referencing input columns and having appropriate
1190 		 * data types and column names.  Insert datatype coercions where
1191 		 * necessary.
1192 		 *
1193 		 * HACK: constants in the input's targetlist are copied up as-is
1194 		 * rather than being referenced as subquery outputs.  This is mainly
1195 		 * to ensure that when we try to coerce them to the output column's
1196 		 * datatype, the right things happen for UNKNOWN constants.  But do
1197 		 * this only at the first level of subquery-scan plans; we don't want
1198 		 * phony constants appearing in the output tlists of upper-level
1199 		 * nodes!
1200 		 */
1201 		if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1202 			expr = (Node *) inputtle->expr;
1203 		else
1204 			expr = (Node *) makeVar(varno,
1205 									inputtle->resno,
1206 									exprType((Node *) inputtle->expr),
1207 									exprTypmod((Node *) inputtle->expr),
1208 									exprCollation((Node *) inputtle->expr),
1209 									0);
1210 
1211 		if (exprType(expr) != colType)
1212 		{
1213 			/*
1214 			 * Note: it's not really cool to be applying coerce_to_common_type
1215 			 * here; one notable point is that assign_expr_collations never
1216 			 * gets run on any generated nodes.  For the moment that's not a
1217 			 * problem because we force the correct exposed collation below.
1218 			 * It would likely be best to make the parser generate the correct
1219 			 * output tlist for every set-op to begin with, though.
1220 			 */
1221 			expr = coerce_to_common_type(NULL,	/* no UNKNOWNs here */
1222 										 expr,
1223 										 colType,
1224 										 "UNION/INTERSECT/EXCEPT");
1225 		}
1226 
1227 		/*
1228 		 * Ensure the tlist entry's exposed collation matches the set-op. This
1229 		 * is necessary because plan_set_operations() reports the result
1230 		 * ordering as a list of SortGroupClauses, which don't carry collation
1231 		 * themselves but just refer to tlist entries.  If we don't show the
1232 		 * right collation then planner.c might do the wrong thing in
1233 		 * higher-level queries.
1234 		 *
1235 		 * Note we use RelabelType, not CollateExpr, since this expression
1236 		 * will reach the executor without any further processing.
1237 		 */
1238 		if (exprCollation(expr) != colColl)
1239 		{
1240 			expr = (Node *) makeRelabelType((Expr *) expr,
1241 											exprType(expr),
1242 											exprTypmod(expr),
1243 											colColl,
1244 											COERCE_IMPLICIT_CAST);
1245 		}
1246 
1247 		tle = makeTargetEntry((Expr *) expr,
1248 							  (AttrNumber) resno++,
1249 							  pstrdup(reftle->resname),
1250 							  false);
1251 
1252 		/*
1253 		 * By convention, all non-resjunk columns in a setop tree have
1254 		 * ressortgroupref equal to their resno.  In some cases the ref isn't
1255 		 * needed, but this is a cleaner way than modifying the tlist later.
1256 		 */
1257 		tle->ressortgroupref = tle->resno;
1258 
1259 		tlist = lappend(tlist, tle);
1260 	}
1261 
1262 	if (flag >= 0)
1263 	{
1264 		/* Add a resjunk flag column */
1265 		/* flag value is the given constant */
1266 		expr = (Node *) makeConst(INT4OID,
1267 								  -1,
1268 								  InvalidOid,
1269 								  sizeof(int32),
1270 								  Int32GetDatum(flag),
1271 								  false,
1272 								  true);
1273 		tle = makeTargetEntry((Expr *) expr,
1274 							  (AttrNumber) resno++,
1275 							  pstrdup("flag"),
1276 							  true);
1277 		tlist = lappend(tlist, tle);
1278 	}
1279 
1280 	return tlist;
1281 }
1282 
1283 /*
1284  * Generate targetlist for a set-operation Append node
1285  *
1286  * colTypes: OID list of set-op's result column datatypes
1287  * colCollations: OID list of set-op's result column collations
1288  * flag: true to create a flag column copied up from subplans
1289  * input_tlists: list of tlists for sub-plans of the Append
1290  * refnames_tlist: targetlist to take column names from
1291  *
1292  * The entries in the Append's targetlist should always be simple Vars;
1293  * we just have to make sure they have the right datatypes/typmods/collations.
1294  * The Vars are always generated with varno 0.
1295  *
1296  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1297  * cannot figure out a realistic width for the tlist we make here.  But we
1298  * ought to refactor this code to produce a PathTarget directly, anyway.
1299  */
1300 static List *
generate_append_tlist(List * colTypes,List * colCollations,bool flag,List * input_tlists,List * refnames_tlist)1301 generate_append_tlist(List *colTypes, List *colCollations,
1302 					  bool flag,
1303 					  List *input_tlists,
1304 					  List *refnames_tlist)
1305 {
1306 	List	   *tlist = NIL;
1307 	int			resno = 1;
1308 	ListCell   *curColType;
1309 	ListCell   *curColCollation;
1310 	ListCell   *ref_tl_item;
1311 	int			colindex;
1312 	TargetEntry *tle;
1313 	Node	   *expr;
1314 	ListCell   *tlistl;
1315 	int32	   *colTypmods;
1316 
1317 	/*
1318 	 * First extract typmods to use.
1319 	 *
1320 	 * If the inputs all agree on type and typmod of a particular column, use
1321 	 * that typmod; else use -1.
1322 	 */
1323 	colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1324 
1325 	foreach(tlistl, input_tlists)
1326 	{
1327 		List	   *subtlist = (List *) lfirst(tlistl);
1328 		ListCell   *subtlistl;
1329 
1330 		curColType = list_head(colTypes);
1331 		colindex = 0;
1332 		foreach(subtlistl, subtlist)
1333 		{
1334 			TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1335 
1336 			if (subtle->resjunk)
1337 				continue;
1338 			Assert(curColType != NULL);
1339 			if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1340 			{
1341 				/* If first subplan, copy the typmod; else compare */
1342 				int32		subtypmod = exprTypmod((Node *) subtle->expr);
1343 
1344 				if (tlistl == list_head(input_tlists))
1345 					colTypmods[colindex] = subtypmod;
1346 				else if (subtypmod != colTypmods[colindex])
1347 					colTypmods[colindex] = -1;
1348 			}
1349 			else
1350 			{
1351 				/* types disagree, so force typmod to -1 */
1352 				colTypmods[colindex] = -1;
1353 			}
1354 			curColType = lnext(curColType);
1355 			colindex++;
1356 		}
1357 		Assert(curColType == NULL);
1358 	}
1359 
1360 	/*
1361 	 * Now we can build the tlist for the Append.
1362 	 */
1363 	colindex = 0;
1364 	forthree(curColType, colTypes, curColCollation, colCollations,
1365 			 ref_tl_item, refnames_tlist)
1366 	{
1367 		Oid			colType = lfirst_oid(curColType);
1368 		int32		colTypmod = colTypmods[colindex++];
1369 		Oid			colColl = lfirst_oid(curColCollation);
1370 		TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1371 
1372 		Assert(reftle->resno == resno);
1373 		Assert(!reftle->resjunk);
1374 		expr = (Node *) makeVar(0,
1375 								resno,
1376 								colType,
1377 								colTypmod,
1378 								colColl,
1379 								0);
1380 		tle = makeTargetEntry((Expr *) expr,
1381 							  (AttrNumber) resno++,
1382 							  pstrdup(reftle->resname),
1383 							  false);
1384 
1385 		/*
1386 		 * By convention, all non-resjunk columns in a setop tree have
1387 		 * ressortgroupref equal to their resno.  In some cases the ref isn't
1388 		 * needed, but this is a cleaner way than modifying the tlist later.
1389 		 */
1390 		tle->ressortgroupref = tle->resno;
1391 
1392 		tlist = lappend(tlist, tle);
1393 	}
1394 
1395 	if (flag)
1396 	{
1397 		/* Add a resjunk flag column */
1398 		/* flag value is shown as copied up from subplan */
1399 		expr = (Node *) makeVar(0,
1400 								resno,
1401 								INT4OID,
1402 								-1,
1403 								InvalidOid,
1404 								0);
1405 		tle = makeTargetEntry((Expr *) expr,
1406 							  (AttrNumber) resno++,
1407 							  pstrdup("flag"),
1408 							  true);
1409 		tlist = lappend(tlist, tle);
1410 	}
1411 
1412 	pfree(colTypmods);
1413 
1414 	return tlist;
1415 }
1416 
1417 /*
1418  * generate_setop_grouplist
1419  *		Build a SortGroupClause list defining the sort/grouping properties
1420  *		of the setop's output columns.
1421  *
1422  * Parse analysis already determined the properties and built a suitable
1423  * list, except that the entries do not have sortgrouprefs set because
1424  * the parser output representation doesn't include a tlist for each
1425  * setop.  So what we need to do here is copy that list and install
1426  * proper sortgrouprefs into it (copying those from the targetlist).
1427  */
1428 static List *
generate_setop_grouplist(SetOperationStmt * op,List * targetlist)1429 generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1430 {
1431 	List	   *grouplist = copyObject(op->groupClauses);
1432 	ListCell   *lg;
1433 	ListCell   *lt;
1434 
1435 	lg = list_head(grouplist);
1436 	foreach(lt, targetlist)
1437 	{
1438 		TargetEntry *tle = (TargetEntry *) lfirst(lt);
1439 		SortGroupClause *sgc;
1440 
1441 		if (tle->resjunk)
1442 		{
1443 			/* resjunk columns should not have sortgrouprefs */
1444 			Assert(tle->ressortgroupref == 0);
1445 			continue;			/* ignore resjunk columns */
1446 		}
1447 
1448 		/* non-resjunk columns should have sortgroupref = resno */
1449 		Assert(tle->ressortgroupref == tle->resno);
1450 
1451 		/* non-resjunk columns should have grouping clauses */
1452 		Assert(lg != NULL);
1453 		sgc = (SortGroupClause *) lfirst(lg);
1454 		lg = lnext(lg);
1455 		Assert(sgc->tleSortGroupRef == 0);
1456 
1457 		sgc->tleSortGroupRef = tle->ressortgroupref;
1458 	}
1459 	Assert(lg == NULL);
1460 	return grouplist;
1461 }
1462 
1463 
1464 /*
1465  * expand_inherited_tables
1466  *		Expand each rangetable entry that represents an inheritance set
1467  *		into an "append relation".  At the conclusion of this process,
1468  *		the "inh" flag is set in all and only those RTEs that are append
1469  *		relation parents.
1470  */
1471 void
expand_inherited_tables(PlannerInfo * root)1472 expand_inherited_tables(PlannerInfo *root)
1473 {
1474 	Index		nrtes;
1475 	Index		rti;
1476 	ListCell   *rl;
1477 
1478 	/*
1479 	 * expand_inherited_rtentry may add RTEs to parse->rtable. The function is
1480 	 * expected to recursively handle any RTEs that it creates with inh=true.
1481 	 * So just scan as far as the original end of the rtable list.
1482 	 */
1483 	nrtes = list_length(root->parse->rtable);
1484 	rl = list_head(root->parse->rtable);
1485 	for (rti = 1; rti <= nrtes; rti++)
1486 	{
1487 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rl);
1488 
1489 		expand_inherited_rtentry(root, rte, rti);
1490 		rl = lnext(rl);
1491 	}
1492 }
1493 
1494 /*
1495  * expand_inherited_rtentry
1496  *		Check whether a rangetable entry represents an inheritance set.
1497  *		If so, add entries for all the child tables to the query's
1498  *		rangetable, and build AppendRelInfo nodes for all the child tables
1499  *		and add them to root->append_rel_list.  If not, clear the entry's
1500  *		"inh" flag to prevent later code from looking for AppendRelInfos.
1501  *
1502  * Note that the original RTE is considered to represent the whole
1503  * inheritance set.  The first of the generated RTEs is an RTE for the same
1504  * table, but with inh = false, to represent the parent table in its role
1505  * as a simple member of the inheritance set.
1506  *
1507  * A childless table is never considered to be an inheritance set. For
1508  * regular inheritance, a parent RTE must always have at least two associated
1509  * AppendRelInfos: one corresponding to the parent table as a simple member of
1510  * inheritance set and one or more corresponding to the actual children.
1511  * Since a partitioned table is not scanned, it might have only one associated
1512  * AppendRelInfo.
1513  */
1514 static void
expand_inherited_rtentry(PlannerInfo * root,RangeTblEntry * rte,Index rti)1515 expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
1516 {
1517 	Query	   *parse = root->parse;
1518 	Oid			parentOID;
1519 	PlanRowMark *oldrc;
1520 	Relation	oldrelation;
1521 	LOCKMODE	lockmode;
1522 	List	   *inhOIDs;
1523 	ListCell   *l;
1524 
1525 	/* Does RT entry allow inheritance? */
1526 	if (!rte->inh)
1527 		return;
1528 	/* Ignore any already-expanded UNION ALL nodes */
1529 	if (rte->rtekind != RTE_RELATION)
1530 	{
1531 		Assert(rte->rtekind == RTE_SUBQUERY);
1532 		return;
1533 	}
1534 	/* Fast path for common case of childless table */
1535 	parentOID = rte->relid;
1536 	if (!has_subclass(parentOID))
1537 	{
1538 		/* Clear flag before returning */
1539 		rte->inh = false;
1540 		return;
1541 	}
1542 
1543 	/*
1544 	 * The rewriter should already have obtained an appropriate lock on each
1545 	 * relation named in the query.  However, for each child relation we add
1546 	 * to the query, we must obtain an appropriate lock, because this will be
1547 	 * the first use of those relations in the parse/rewrite/plan pipeline.
1548 	 *
1549 	 * If the parent relation is the query's result relation, then we need
1550 	 * RowExclusiveLock.  Otherwise, if it's accessed FOR UPDATE/SHARE, we
1551 	 * need RowShareLock; otherwise AccessShareLock.  We can't just grab
1552 	 * AccessShareLock because then the executor would be trying to upgrade
1553 	 * the lock, leading to possible deadlocks.  (This code should match the
1554 	 * parser and rewriter.)
1555 	 */
1556 	oldrc = get_plan_rowmark(root->rowMarks, rti);
1557 	if (rti == parse->resultRelation)
1558 		lockmode = RowExclusiveLock;
1559 	else if (oldrc && RowMarkRequiresRowShareLock(oldrc->markType))
1560 		lockmode = RowShareLock;
1561 	else
1562 		lockmode = AccessShareLock;
1563 
1564 	/* Scan for all members of inheritance set, acquire needed locks */
1565 	inhOIDs = find_all_inheritors(parentOID, lockmode, NULL);
1566 
1567 	/*
1568 	 * Check that there's at least one descendant, else treat as no-child
1569 	 * case.  This could happen despite above has_subclass() check, if table
1570 	 * once had a child but no longer does.
1571 	 */
1572 	if (list_length(inhOIDs) < 2)
1573 	{
1574 		/* Clear flag before returning */
1575 		rte->inh = false;
1576 		return;
1577 	}
1578 
1579 	/*
1580 	 * If parent relation is selected FOR UPDATE/SHARE, we need to mark its
1581 	 * PlanRowMark as isParent = true, and generate a new PlanRowMark for each
1582 	 * child.
1583 	 */
1584 	if (oldrc)
1585 		oldrc->isParent = true;
1586 
1587 	/*
1588 	 * Must open the parent relation to examine its tupdesc.  We need not lock
1589 	 * it; we assume the rewriter already did.
1590 	 */
1591 	oldrelation = heap_open(parentOID, NoLock);
1592 
1593 	/* Scan the inheritance set and expand it */
1594 	if (RelationGetPartitionDesc(oldrelation) != NULL)
1595 	{
1596 		Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
1597 
1598 		/*
1599 		 * If this table has partitions, recursively expand them in the order
1600 		 * in which they appear in the PartitionDesc.  While at it, also
1601 		 * extract the partition key columns of all the partitioned tables.
1602 		 */
1603 		expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
1604 								   lockmode, &root->append_rel_list);
1605 	}
1606 	else
1607 	{
1608 		List	   *appinfos = NIL;
1609 		RangeTblEntry *childrte;
1610 		Index		childRTindex;
1611 
1612 		/*
1613 		 * This table has no partitions.  Expand any plain inheritance
1614 		 * children in the order the OIDs were returned by
1615 		 * find_all_inheritors.
1616 		 */
1617 		foreach(l, inhOIDs)
1618 		{
1619 			Oid			childOID = lfirst_oid(l);
1620 			Relation	newrelation;
1621 
1622 			/* Open rel if needed; we already have required locks */
1623 			if (childOID != parentOID)
1624 				newrelation = heap_open(childOID, NoLock);
1625 			else
1626 				newrelation = oldrelation;
1627 
1628 			/*
1629 			 * It is possible that the parent table has children that are temp
1630 			 * tables of other backends.  We cannot safely access such tables
1631 			 * (because of buffering issues), and the best thing to do seems
1632 			 * to be to silently ignore them.
1633 			 */
1634 			if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
1635 			{
1636 				heap_close(newrelation, lockmode);
1637 				continue;
1638 			}
1639 
1640 			expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1641 											newrelation,
1642 											&appinfos, &childrte,
1643 											&childRTindex);
1644 
1645 			/* Close child relations, but keep locks */
1646 			if (childOID != parentOID)
1647 				heap_close(newrelation, NoLock);
1648 		}
1649 
1650 		/*
1651 		 * If all the children were temp tables, pretend it's a
1652 		 * non-inheritance situation; we don't need Append node in that case.
1653 		 * The duplicate RTE we added for the parent table is harmless, so we
1654 		 * don't bother to get rid of it; ditto for the useless PlanRowMark
1655 		 * node.
1656 		 */
1657 		if (list_length(appinfos) < 2)
1658 			rte->inh = false;
1659 		else
1660 			root->append_rel_list = list_concat(root->append_rel_list,
1661 												appinfos);
1662 
1663 	}
1664 
1665 	heap_close(oldrelation, NoLock);
1666 }
1667 
1668 /*
1669  * expand_partitioned_rtentry
1670  *		Recursively expand an RTE for a partitioned table.
1671  *
1672  * Note that RelationGetPartitionDispatchInfo will expand partitions in the
1673  * same order as this code.
1674  */
1675 static void
expand_partitioned_rtentry(PlannerInfo * root,RangeTblEntry * parentrte,Index parentRTindex,Relation parentrel,PlanRowMark * top_parentrc,LOCKMODE lockmode,List ** appinfos)1676 expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
1677 						   Index parentRTindex, Relation parentrel,
1678 						   PlanRowMark *top_parentrc, LOCKMODE lockmode,
1679 						   List **appinfos)
1680 {
1681 	int			i;
1682 	RangeTblEntry *childrte;
1683 	Index		childRTindex;
1684 	PartitionDesc partdesc = RelationGetPartitionDesc(parentrel);
1685 
1686 	check_stack_depth();
1687 
1688 	/* A partitioned table should always have a partition descriptor. */
1689 	Assert(partdesc);
1690 
1691 	Assert(parentrte->inh);
1692 
1693 	/*
1694 	 * Note down whether any partition key cols are being updated. Though it's
1695 	 * the root partitioned table's updatedCols we are interested in, we
1696 	 * instead use parentrte to get the updatedCols. This is convenient
1697 	 * because parentrte already has the root partrel's updatedCols translated
1698 	 * to match the attribute ordering of parentrel.
1699 	 */
1700 	if (!root->partColsUpdated)
1701 		root->partColsUpdated =
1702 			has_partition_attrs(parentrel, parentrte->updatedCols, NULL);
1703 
1704 	/* First expand the partitioned table itself. */
1705 	expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel,
1706 									top_parentrc, parentrel,
1707 									appinfos, &childrte, &childRTindex);
1708 
1709 	/*
1710 	 * If the partitioned table has no partitions, treat this as the
1711 	 * non-inheritance case.
1712 	 */
1713 	if (partdesc->nparts == 0)
1714 	{
1715 		parentrte->inh = false;
1716 		return;
1717 	}
1718 
1719 	for (i = 0; i < partdesc->nparts; i++)
1720 	{
1721 		Oid			childOID = partdesc->oids[i];
1722 		Relation	childrel;
1723 
1724 		/* Open rel; we already have required locks */
1725 		childrel = heap_open(childOID, NoLock);
1726 
1727 		/*
1728 		 * Temporary partitions belonging to other sessions should have been
1729 		 * disallowed at definition, but for paranoia's sake, let's double
1730 		 * check.
1731 		 */
1732 		if (RELATION_IS_OTHER_TEMP(childrel))
1733 			elog(ERROR, "temporary relation from another session found as partition");
1734 
1735 		expand_single_inheritance_child(root, parentrte, parentRTindex,
1736 										parentrel, top_parentrc, childrel,
1737 										appinfos, &childrte, &childRTindex);
1738 
1739 		/* If this child is itself partitioned, recurse */
1740 		if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1741 			expand_partitioned_rtentry(root, childrte, childRTindex,
1742 									   childrel, top_parentrc, lockmode,
1743 									   appinfos);
1744 
1745 		/* Close child relation, but keep locks */
1746 		heap_close(childrel, NoLock);
1747 	}
1748 }
1749 
1750 /*
1751  * expand_single_inheritance_child
1752  *		Build a RangeTblEntry and an AppendRelInfo, if appropriate, plus
1753  *		maybe a PlanRowMark.
1754  *
1755  * We now expand the partition hierarchy level by level, creating a
1756  * corresponding hierarchy of AppendRelInfos and RelOptInfos, where each
1757  * partitioned descendant acts as a parent of its immediate partitions.
1758  * (This is a difference from what older versions of PostgreSQL did and what
1759  * is still done in the case of table inheritance for unpartitioned tables,
1760  * where the hierarchy is flattened during RTE expansion.)
1761  *
1762  * PlanRowMarks still carry the top-parent's RTI, and the top-parent's
1763  * allMarkTypes field still accumulates values from all descendents.
1764  *
1765  * "parentrte" and "parentRTindex" are immediate parent's RTE and
1766  * RTI. "top_parentrc" is top parent's PlanRowMark.
1767  *
1768  * The child RangeTblEntry and its RTI are returned in "childrte_p" and
1769  * "childRTindex_p" resp.
1770  */
1771 static void
expand_single_inheritance_child(PlannerInfo * root,RangeTblEntry * parentrte,Index parentRTindex,Relation parentrel,PlanRowMark * top_parentrc,Relation childrel,List ** appinfos,RangeTblEntry ** childrte_p,Index * childRTindex_p)1772 expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
1773 								Index parentRTindex, Relation parentrel,
1774 								PlanRowMark *top_parentrc, Relation childrel,
1775 								List **appinfos, RangeTblEntry **childrte_p,
1776 								Index *childRTindex_p)
1777 {
1778 	Query	   *parse = root->parse;
1779 	Oid			parentOID = RelationGetRelid(parentrel);
1780 	Oid			childOID = RelationGetRelid(childrel);
1781 	RangeTblEntry *childrte;
1782 	Index		childRTindex;
1783 	AppendRelInfo *appinfo;
1784 
1785 	/*
1786 	 * Build an RTE for the child, and attach to query's rangetable list. We
1787 	 * copy most fields of the parent's RTE, but replace relation OID and
1788 	 * relkind, and set inh = false.  Also, set requiredPerms to zero since
1789 	 * all required permissions checks are done on the original RTE. Likewise,
1790 	 * set the child's securityQuals to empty, because we only want to apply
1791 	 * the parent's RLS conditions regardless of what RLS properties
1792 	 * individual children may have.  (This is an intentional choice to make
1793 	 * inherited RLS work like regular permissions checks.) The parent
1794 	 * securityQuals will be propagated to children along with other base
1795 	 * restriction clauses, so we don't need to do it here.
1796 	 */
1797 	childrte = copyObject(parentrte);
1798 	*childrte_p = childrte;
1799 	childrte->relid = childOID;
1800 	childrte->relkind = childrel->rd_rel->relkind;
1801 	/* A partitioned child will need to be expanded further. */
1802 	if (childOID != parentOID &&
1803 		childrte->relkind == RELKIND_PARTITIONED_TABLE)
1804 		childrte->inh = true;
1805 	else
1806 		childrte->inh = false;
1807 	childrte->requiredPerms = 0;
1808 	childrte->securityQuals = NIL;
1809 	parse->rtable = lappend(parse->rtable, childrte);
1810 	childRTindex = list_length(parse->rtable);
1811 	*childRTindex_p = childRTindex;
1812 
1813 	/*
1814 	 * We need an AppendRelInfo if paths will be built for the child RTE. If
1815 	 * childrte->inh is true, then we'll always need to generate append paths
1816 	 * for it.  If childrte->inh is false, we must scan it if it's not a
1817 	 * partitioned table; but if it is a partitioned table, then it never has
1818 	 * any data of its own and need not be scanned.
1819 	 */
1820 	if (childrte->relkind != RELKIND_PARTITIONED_TABLE || childrte->inh)
1821 	{
1822 		appinfo = makeNode(AppendRelInfo);
1823 		appinfo->parent_relid = parentRTindex;
1824 		appinfo->child_relid = childRTindex;
1825 		appinfo->parent_reltype = parentrel->rd_rel->reltype;
1826 		appinfo->child_reltype = childrel->rd_rel->reltype;
1827 		make_inh_translation_list(parentrel, childrel, childRTindex,
1828 								  &appinfo->translated_vars);
1829 		appinfo->parent_reloid = parentOID;
1830 		*appinfos = lappend(*appinfos, appinfo);
1831 
1832 		/*
1833 		 * Translate the column permissions bitmaps to the child's attnums (we
1834 		 * have to build the translated_vars list before we can do this). But
1835 		 * if this is the parent table, leave copyObject's result alone.
1836 		 *
1837 		 * Note: we need to do this even though the executor won't run any
1838 		 * permissions checks on the child RTE.  The insertedCols/updatedCols
1839 		 * bitmaps may be examined for trigger-firing purposes.
1840 		 */
1841 		if (childOID != parentOID)
1842 		{
1843 			childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
1844 														 appinfo->translated_vars);
1845 			childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
1846 														 appinfo->translated_vars);
1847 			childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
1848 														appinfo->translated_vars);
1849 		}
1850 	}
1851 
1852 	/*
1853 	 * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1854 	 */
1855 	if (top_parentrc)
1856 	{
1857 		PlanRowMark *childrc = makeNode(PlanRowMark);
1858 
1859 		childrc->rti = childRTindex;
1860 		childrc->prti = top_parentrc->rti;
1861 		childrc->rowmarkId = top_parentrc->rowmarkId;
1862 		/* Reselect rowmark type, because relkind might not match parent */
1863 		childrc->markType = select_rowmark_type(childrte,
1864 												top_parentrc->strength);
1865 		childrc->allMarkTypes = (1 << childrc->markType);
1866 		childrc->strength = top_parentrc->strength;
1867 		childrc->waitPolicy = top_parentrc->waitPolicy;
1868 
1869 		/*
1870 		 * We mark RowMarks for partitioned child tables as parent RowMarks so
1871 		 * that the executor ignores them (except their existence means that
1872 		 * the child tables be locked using appropriate mode).
1873 		 */
1874 		childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1875 
1876 		/* Include child's rowmark type in top parent's allMarkTypes */
1877 		top_parentrc->allMarkTypes |= childrc->allMarkTypes;
1878 
1879 		root->rowMarks = lappend(root->rowMarks, childrc);
1880 	}
1881 }
1882 
1883 /*
1884  * make_inh_translation_list
1885  *	  Build the list of translations from parent Vars to child Vars for
1886  *	  an inheritance child.
1887  *
1888  * For paranoia's sake, we match type/collation as well as attribute name.
1889  */
1890 static void
make_inh_translation_list(Relation oldrelation,Relation newrelation,Index newvarno,List ** translated_vars)1891 make_inh_translation_list(Relation oldrelation, Relation newrelation,
1892 						  Index newvarno,
1893 						  List **translated_vars)
1894 {
1895 	List	   *vars = NIL;
1896 	TupleDesc	old_tupdesc = RelationGetDescr(oldrelation);
1897 	TupleDesc	new_tupdesc = RelationGetDescr(newrelation);
1898 	int			oldnatts = old_tupdesc->natts;
1899 	int			newnatts = new_tupdesc->natts;
1900 	int			old_attno;
1901 
1902 	for (old_attno = 0; old_attno < oldnatts; old_attno++)
1903 	{
1904 		Form_pg_attribute att;
1905 		char	   *attname;
1906 		Oid			atttypid;
1907 		int32		atttypmod;
1908 		Oid			attcollation;
1909 		int			new_attno;
1910 
1911 		att = TupleDescAttr(old_tupdesc, old_attno);
1912 		if (att->attisdropped)
1913 		{
1914 			/* Just put NULL into this list entry */
1915 			vars = lappend(vars, NULL);
1916 			continue;
1917 		}
1918 		attname = NameStr(att->attname);
1919 		atttypid = att->atttypid;
1920 		atttypmod = att->atttypmod;
1921 		attcollation = att->attcollation;
1922 
1923 		/*
1924 		 * When we are generating the "translation list" for the parent table
1925 		 * of an inheritance set, no need to search for matches.
1926 		 */
1927 		if (oldrelation == newrelation)
1928 		{
1929 			vars = lappend(vars, makeVar(newvarno,
1930 										 (AttrNumber) (old_attno + 1),
1931 										 atttypid,
1932 										 atttypmod,
1933 										 attcollation,
1934 										 0));
1935 			continue;
1936 		}
1937 
1938 		/*
1939 		 * Otherwise we have to search for the matching column by name.
1940 		 * There's no guarantee it'll have the same column position, because
1941 		 * of cases like ALTER TABLE ADD COLUMN and multiple inheritance.
1942 		 * However, in simple cases it will be the same column number, so try
1943 		 * that before we go groveling through all the columns.
1944 		 *
1945 		 * Note: the test for (att = ...) != NULL cannot fail, it's just a
1946 		 * notational device to include the assignment into the if-clause.
1947 		 */
1948 		if (old_attno < newnatts &&
1949 			(att = TupleDescAttr(new_tupdesc, old_attno)) != NULL &&
1950 			!att->attisdropped &&
1951 			strcmp(attname, NameStr(att->attname)) == 0)
1952 			new_attno = old_attno;
1953 		else
1954 		{
1955 			for (new_attno = 0; new_attno < newnatts; new_attno++)
1956 			{
1957 				att = TupleDescAttr(new_tupdesc, new_attno);
1958 				if (!att->attisdropped &&
1959 					strcmp(attname, NameStr(att->attname)) == 0)
1960 					break;
1961 			}
1962 			if (new_attno >= newnatts)
1963 				elog(ERROR, "could not find inherited attribute \"%s\" of relation \"%s\"",
1964 					 attname, RelationGetRelationName(newrelation));
1965 		}
1966 
1967 		/* Found it, check type and collation match */
1968 		if (atttypid != att->atttypid || atttypmod != att->atttypmod)
1969 			elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type",
1970 				 attname, RelationGetRelationName(newrelation));
1971 		if (attcollation != att->attcollation)
1972 			elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation",
1973 				 attname, RelationGetRelationName(newrelation));
1974 
1975 		vars = lappend(vars, makeVar(newvarno,
1976 									 (AttrNumber) (new_attno + 1),
1977 									 atttypid,
1978 									 atttypmod,
1979 									 attcollation,
1980 									 0));
1981 	}
1982 
1983 	*translated_vars = vars;
1984 }
1985 
1986 /*
1987  * translate_col_privs
1988  *	  Translate a bitmapset representing per-column privileges from the
1989  *	  parent rel's attribute numbering to the child's.
1990  *
1991  * The only surprise here is that we don't translate a parent whole-row
1992  * reference into a child whole-row reference.  That would mean requiring
1993  * permissions on all child columns, which is overly strict, since the
1994  * query is really only going to reference the inherited columns.  Instead
1995  * we set the per-column bits for all inherited columns.
1996  */
1997 static Bitmapset *
translate_col_privs(const Bitmapset * parent_privs,List * translated_vars)1998 translate_col_privs(const Bitmapset *parent_privs,
1999 					List *translated_vars)
2000 {
2001 	Bitmapset  *child_privs = NULL;
2002 	bool		whole_row;
2003 	int			attno;
2004 	ListCell   *lc;
2005 
2006 	/* System attributes have the same numbers in all tables */
2007 	for (attno = FirstLowInvalidHeapAttributeNumber + 1; attno < 0; attno++)
2008 	{
2009 		if (bms_is_member(attno - FirstLowInvalidHeapAttributeNumber,
2010 						  parent_privs))
2011 			child_privs = bms_add_member(child_privs,
2012 										 attno - FirstLowInvalidHeapAttributeNumber);
2013 	}
2014 
2015 	/* Check if parent has whole-row reference */
2016 	whole_row = bms_is_member(InvalidAttrNumber - FirstLowInvalidHeapAttributeNumber,
2017 							  parent_privs);
2018 
2019 	/* And now translate the regular user attributes, using the vars list */
2020 	attno = InvalidAttrNumber;
2021 	foreach(lc, translated_vars)
2022 	{
2023 		Var		   *var = lfirst_node(Var, lc);
2024 
2025 		attno++;
2026 		if (var == NULL)		/* ignore dropped columns */
2027 			continue;
2028 		if (whole_row ||
2029 			bms_is_member(attno - FirstLowInvalidHeapAttributeNumber,
2030 						  parent_privs))
2031 			child_privs = bms_add_member(child_privs,
2032 										 var->varattno - FirstLowInvalidHeapAttributeNumber);
2033 	}
2034 
2035 	return child_privs;
2036 }
2037 
2038 /*
2039  * adjust_appendrel_attrs
2040  *	  Copy the specified query or expression and translate Vars referring to a
2041  *	  parent rel to refer to the corresponding child rel instead.  We also
2042  *	  update rtindexes appearing outside Vars, such as resultRelation and
2043  *	  jointree relids.
2044  *
2045  * Note: this is only applied after conversion of sublinks to subplans,
2046  * so we don't need to cope with recursion into sub-queries.
2047  *
2048  * Note: this is not hugely different from what pullup_replace_vars() does;
2049  * maybe we should try to fold the two routines together.
2050  */
2051 Node *
adjust_appendrel_attrs(PlannerInfo * root,Node * node,int nappinfos,AppendRelInfo ** appinfos)2052 adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos,
2053 					   AppendRelInfo **appinfos)
2054 {
2055 	Node	   *result;
2056 	adjust_appendrel_attrs_context context;
2057 
2058 	context.root = root;
2059 	context.nappinfos = nappinfos;
2060 	context.appinfos = appinfos;
2061 
2062 	/* If there's nothing to adjust, don't call this function. */
2063 	Assert(nappinfos >= 1 && appinfos != NULL);
2064 
2065 	/*
2066 	 * Must be prepared to start with a Query or a bare expression tree.
2067 	 */
2068 	if (node && IsA(node, Query))
2069 	{
2070 		Query	   *newnode;
2071 		int			cnt;
2072 
2073 		newnode = query_tree_mutator((Query *) node,
2074 									 adjust_appendrel_attrs_mutator,
2075 									 (void *) &context,
2076 									 QTW_IGNORE_RC_SUBQUERIES);
2077 		for (cnt = 0; cnt < nappinfos; cnt++)
2078 		{
2079 			AppendRelInfo *appinfo = appinfos[cnt];
2080 
2081 			if (newnode->resultRelation == appinfo->parent_relid)
2082 			{
2083 				newnode->resultRelation = appinfo->child_relid;
2084 				/* Fix tlist resnos too, if it's inherited UPDATE */
2085 				if (newnode->commandType == CMD_UPDATE)
2086 					newnode->targetList =
2087 						adjust_inherited_tlist(newnode->targetList,
2088 											   appinfo);
2089 				break;
2090 			}
2091 		}
2092 
2093 		result = (Node *) newnode;
2094 	}
2095 	else
2096 		result = adjust_appendrel_attrs_mutator(node, &context);
2097 
2098 	return result;
2099 }
2100 
2101 static Node *
adjust_appendrel_attrs_mutator(Node * node,adjust_appendrel_attrs_context * context)2102 adjust_appendrel_attrs_mutator(Node *node,
2103 							   adjust_appendrel_attrs_context *context)
2104 {
2105 	AppendRelInfo **appinfos = context->appinfos;
2106 	int			nappinfos = context->nappinfos;
2107 	int			cnt;
2108 
2109 	if (node == NULL)
2110 		return NULL;
2111 	if (IsA(node, Var))
2112 	{
2113 		Var		   *var = (Var *) copyObject(node);
2114 		AppendRelInfo *appinfo = NULL;
2115 
2116 		for (cnt = 0; cnt < nappinfos; cnt++)
2117 		{
2118 			if (var->varno == appinfos[cnt]->parent_relid)
2119 			{
2120 				appinfo = appinfos[cnt];
2121 				break;
2122 			}
2123 		}
2124 
2125 		if (var->varlevelsup == 0 && appinfo)
2126 		{
2127 			var->varno = appinfo->child_relid;
2128 			var->varnoold = appinfo->child_relid;
2129 			if (var->varattno > 0)
2130 			{
2131 				Node	   *newnode;
2132 
2133 				if (var->varattno > list_length(appinfo->translated_vars))
2134 					elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2135 						 var->varattno, get_rel_name(appinfo->parent_reloid));
2136 				newnode = copyObject(list_nth(appinfo->translated_vars,
2137 											  var->varattno - 1));
2138 				if (newnode == NULL)
2139 					elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2140 						 var->varattno, get_rel_name(appinfo->parent_reloid));
2141 				return newnode;
2142 			}
2143 			else if (var->varattno == 0)
2144 			{
2145 				/*
2146 				 * Whole-row Var: if we are dealing with named rowtypes, we
2147 				 * can use a whole-row Var for the child table plus a coercion
2148 				 * step to convert the tuple layout to the parent's rowtype.
2149 				 * Otherwise we have to generate a RowExpr.
2150 				 */
2151 				if (OidIsValid(appinfo->child_reltype))
2152 				{
2153 					Assert(var->vartype == appinfo->parent_reltype);
2154 					if (appinfo->parent_reltype != appinfo->child_reltype)
2155 					{
2156 						ConvertRowtypeExpr *r = makeNode(ConvertRowtypeExpr);
2157 
2158 						r->arg = (Expr *) var;
2159 						r->resulttype = appinfo->parent_reltype;
2160 						r->convertformat = COERCE_IMPLICIT_CAST;
2161 						r->location = -1;
2162 						/* Make sure the Var node has the right type ID, too */
2163 						var->vartype = appinfo->child_reltype;
2164 						return (Node *) r;
2165 					}
2166 				}
2167 				else
2168 				{
2169 					/*
2170 					 * Build a RowExpr containing the translated variables.
2171 					 *
2172 					 * In practice var->vartype will always be RECORDOID here,
2173 					 * so we need to come up with some suitable column names.
2174 					 * We use the parent RTE's column names.
2175 					 *
2176 					 * Note: we can't get here for inheritance cases, so there
2177 					 * is no need to worry that translated_vars might contain
2178 					 * some dummy NULLs.
2179 					 */
2180 					RowExpr    *rowexpr;
2181 					List	   *fields;
2182 					RangeTblEntry *rte;
2183 
2184 					rte = rt_fetch(appinfo->parent_relid,
2185 								   context->root->parse->rtable);
2186 					fields = copyObject(appinfo->translated_vars);
2187 					rowexpr = makeNode(RowExpr);
2188 					rowexpr->args = fields;
2189 					rowexpr->row_typeid = var->vartype;
2190 					rowexpr->row_format = COERCE_IMPLICIT_CAST;
2191 					rowexpr->colnames = copyObject(rte->eref->colnames);
2192 					rowexpr->location = -1;
2193 
2194 					return (Node *) rowexpr;
2195 				}
2196 			}
2197 			/* system attributes don't need any other translation */
2198 		}
2199 		return (Node *) var;
2200 	}
2201 	if (IsA(node, CurrentOfExpr))
2202 	{
2203 		CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node);
2204 
2205 		for (cnt = 0; cnt < nappinfos; cnt++)
2206 		{
2207 			AppendRelInfo *appinfo = appinfos[cnt];
2208 
2209 			if (cexpr->cvarno == appinfo->parent_relid)
2210 			{
2211 				cexpr->cvarno = appinfo->child_relid;
2212 				break;
2213 			}
2214 		}
2215 		return (Node *) cexpr;
2216 	}
2217 	if (IsA(node, RangeTblRef))
2218 	{
2219 		RangeTblRef *rtr = (RangeTblRef *) copyObject(node);
2220 
2221 		for (cnt = 0; cnt < nappinfos; cnt++)
2222 		{
2223 			AppendRelInfo *appinfo = appinfos[cnt];
2224 
2225 			if (rtr->rtindex == appinfo->parent_relid)
2226 			{
2227 				rtr->rtindex = appinfo->child_relid;
2228 				break;
2229 			}
2230 		}
2231 		return (Node *) rtr;
2232 	}
2233 	if (IsA(node, JoinExpr))
2234 	{
2235 		/* Copy the JoinExpr node with correct mutation of subnodes */
2236 		JoinExpr   *j;
2237 		AppendRelInfo *appinfo;
2238 
2239 		j = (JoinExpr *) expression_tree_mutator(node,
2240 												 adjust_appendrel_attrs_mutator,
2241 												 (void *) context);
2242 		/* now fix JoinExpr's rtindex (probably never happens) */
2243 		for (cnt = 0; cnt < nappinfos; cnt++)
2244 		{
2245 			appinfo = appinfos[cnt];
2246 
2247 			if (j->rtindex == appinfo->parent_relid)
2248 			{
2249 				j->rtindex = appinfo->child_relid;
2250 				break;
2251 			}
2252 		}
2253 		return (Node *) j;
2254 	}
2255 	if (IsA(node, PlaceHolderVar))
2256 	{
2257 		/* Copy the PlaceHolderVar node with correct mutation of subnodes */
2258 		PlaceHolderVar *phv;
2259 
2260 		phv = (PlaceHolderVar *) expression_tree_mutator(node,
2261 														 adjust_appendrel_attrs_mutator,
2262 														 (void *) context);
2263 		/* now fix PlaceHolderVar's relid sets */
2264 		if (phv->phlevelsup == 0)
2265 			phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos,
2266 											  context->appinfos);
2267 		return (Node *) phv;
2268 	}
2269 	/* Shouldn't need to handle planner auxiliary nodes here */
2270 	Assert(!IsA(node, SpecialJoinInfo));
2271 	Assert(!IsA(node, AppendRelInfo));
2272 	Assert(!IsA(node, PlaceHolderInfo));
2273 	Assert(!IsA(node, MinMaxAggInfo));
2274 
2275 	/*
2276 	 * We have to process RestrictInfo nodes specially.  (Note: although
2277 	 * set_append_rel_pathlist will hide RestrictInfos in the parent's
2278 	 * baserestrictinfo list from us, it doesn't hide those in joininfo.)
2279 	 */
2280 	if (IsA(node, RestrictInfo))
2281 	{
2282 		RestrictInfo *oldinfo = (RestrictInfo *) node;
2283 		RestrictInfo *newinfo = makeNode(RestrictInfo);
2284 
2285 		/* Copy all flat-copiable fields */
2286 		memcpy(newinfo, oldinfo, sizeof(RestrictInfo));
2287 
2288 		/* Recursively fix the clause itself */
2289 		newinfo->clause = (Expr *)
2290 			adjust_appendrel_attrs_mutator((Node *) oldinfo->clause, context);
2291 
2292 		/* and the modified version, if an OR clause */
2293 		newinfo->orclause = (Expr *)
2294 			adjust_appendrel_attrs_mutator((Node *) oldinfo->orclause, context);
2295 
2296 		/* adjust relid sets too */
2297 		newinfo->clause_relids = adjust_child_relids(oldinfo->clause_relids,
2298 													 context->nappinfos,
2299 													 context->appinfos);
2300 		newinfo->required_relids = adjust_child_relids(oldinfo->required_relids,
2301 													   context->nappinfos,
2302 													   context->appinfos);
2303 		newinfo->outer_relids = adjust_child_relids(oldinfo->outer_relids,
2304 													context->nappinfos,
2305 													context->appinfos);
2306 		newinfo->nullable_relids = adjust_child_relids(oldinfo->nullable_relids,
2307 													   context->nappinfos,
2308 													   context->appinfos);
2309 		newinfo->left_relids = adjust_child_relids(oldinfo->left_relids,
2310 												   context->nappinfos,
2311 												   context->appinfos);
2312 		newinfo->right_relids = adjust_child_relids(oldinfo->right_relids,
2313 													context->nappinfos,
2314 													context->appinfos);
2315 
2316 		/*
2317 		 * Reset cached derivative fields, since these might need to have
2318 		 * different values when considering the child relation.  Note we
2319 		 * don't reset left_ec/right_ec: each child variable is implicitly
2320 		 * equivalent to its parent, so still a member of the same EC if any.
2321 		 */
2322 		newinfo->eval_cost.startup = -1;
2323 		newinfo->norm_selec = -1;
2324 		newinfo->outer_selec = -1;
2325 		newinfo->left_em = NULL;
2326 		newinfo->right_em = NULL;
2327 		newinfo->scansel_cache = NIL;
2328 		newinfo->left_bucketsize = -1;
2329 		newinfo->right_bucketsize = -1;
2330 		newinfo->left_mcvfreq = -1;
2331 		newinfo->right_mcvfreq = -1;
2332 
2333 		return (Node *) newinfo;
2334 	}
2335 
2336 	/*
2337 	 * NOTE: we do not need to recurse into sublinks, because they should
2338 	 * already have been converted to subplans before we see them.
2339 	 */
2340 	Assert(!IsA(node, SubLink));
2341 	Assert(!IsA(node, Query));
2342 
2343 	return expression_tree_mutator(node, adjust_appendrel_attrs_mutator,
2344 								   (void *) context);
2345 }
2346 
2347 /*
2348  * Substitute child relids for parent relids in a Relid set.  The array of
2349  * appinfos specifies the substitutions to be performed.
2350  */
2351 static Relids
adjust_child_relids(Relids relids,int nappinfos,AppendRelInfo ** appinfos)2352 adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos)
2353 {
2354 	Bitmapset  *result = NULL;
2355 	int			cnt;
2356 
2357 	for (cnt = 0; cnt < nappinfos; cnt++)
2358 	{
2359 		AppendRelInfo *appinfo = appinfos[cnt];
2360 
2361 		/* Remove parent, add child */
2362 		if (bms_is_member(appinfo->parent_relid, relids))
2363 		{
2364 			/* Make a copy if we are changing the set. */
2365 			if (!result)
2366 				result = bms_copy(relids);
2367 
2368 			result = bms_del_member(result, appinfo->parent_relid);
2369 			result = bms_add_member(result, appinfo->child_relid);
2370 		}
2371 	}
2372 
2373 	/* If we made any changes, return the modified copy. */
2374 	if (result)
2375 		return result;
2376 
2377 	/* Otherwise, return the original set without modification. */
2378 	return relids;
2379 }
2380 
2381 /*
2382  * Replace any relid present in top_parent_relids with its child in
2383  * child_relids. Members of child_relids can be multiple levels below top
2384  * parent in the partition hierarchy.
2385  */
2386 Relids
adjust_child_relids_multilevel(PlannerInfo * root,Relids relids,Relids child_relids,Relids top_parent_relids)2387 adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
2388 							   Relids child_relids, Relids top_parent_relids)
2389 {
2390 	AppendRelInfo **appinfos;
2391 	int			nappinfos;
2392 	Relids		parent_relids = NULL;
2393 	Relids		result;
2394 	Relids		tmp_result = NULL;
2395 	int			cnt;
2396 
2397 	/*
2398 	 * If the given relids set doesn't contain any of the top parent relids,
2399 	 * it will remain unchanged.
2400 	 */
2401 	if (!bms_overlap(relids, top_parent_relids))
2402 		return relids;
2403 
2404 	appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
2405 
2406 	/* Construct relids set for the immediate parent of the given child. */
2407 	for (cnt = 0; cnt < nappinfos; cnt++)
2408 	{
2409 		AppendRelInfo *appinfo = appinfos[cnt];
2410 
2411 		parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
2412 	}
2413 
2414 	/* Recurse if immediate parent is not the top parent. */
2415 	if (!bms_equal(parent_relids, top_parent_relids))
2416 	{
2417 		tmp_result = adjust_child_relids_multilevel(root, relids,
2418 													parent_relids,
2419 													top_parent_relids);
2420 		relids = tmp_result;
2421 	}
2422 
2423 	result = adjust_child_relids(relids, nappinfos, appinfos);
2424 
2425 	/* Free memory consumed by any intermediate result. */
2426 	if (tmp_result)
2427 		bms_free(tmp_result);
2428 	bms_free(parent_relids);
2429 	pfree(appinfos);
2430 
2431 	return result;
2432 }
2433 
2434 /*
2435  * Adjust the targetlist entries of an inherited UPDATE operation
2436  *
2437  * The expressions have already been fixed, but we have to make sure that
2438  * the target resnos match the child table (they may not, in the case of
2439  * a column that was added after-the-fact by ALTER TABLE).  In some cases
2440  * this can force us to re-order the tlist to preserve resno ordering.
2441  * (We do all this work in special cases so that preptlist.c is fast for
2442  * the typical case.)
2443  *
2444  * The given tlist has already been through expression_tree_mutator;
2445  * therefore the TargetEntry nodes are fresh copies that it's okay to
2446  * scribble on.
2447  *
2448  * Note that this is not needed for INSERT because INSERT isn't inheritable.
2449  */
2450 static List *
adjust_inherited_tlist(List * tlist,AppendRelInfo * context)2451 adjust_inherited_tlist(List *tlist, AppendRelInfo *context)
2452 {
2453 	bool		changed_it = false;
2454 	ListCell   *tl;
2455 	List	   *new_tlist;
2456 	bool		more;
2457 	int			attrno;
2458 
2459 	/* This should only happen for an inheritance case, not UNION ALL */
2460 	Assert(OidIsValid(context->parent_reloid));
2461 
2462 	/* Scan tlist and update resnos to match attnums of child rel */
2463 	foreach(tl, tlist)
2464 	{
2465 		TargetEntry *tle = (TargetEntry *) lfirst(tl);
2466 		Var		   *childvar;
2467 
2468 		if (tle->resjunk)
2469 			continue;			/* ignore junk items */
2470 
2471 		/* Look up the translation of this column: it must be a Var */
2472 		if (tle->resno <= 0 ||
2473 			tle->resno > list_length(context->translated_vars))
2474 			elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2475 				 tle->resno, get_rel_name(context->parent_reloid));
2476 		childvar = (Var *) list_nth(context->translated_vars, tle->resno - 1);
2477 		if (childvar == NULL || !IsA(childvar, Var))
2478 			elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2479 				 tle->resno, get_rel_name(context->parent_reloid));
2480 
2481 		if (tle->resno != childvar->varattno)
2482 		{
2483 			tle->resno = childvar->varattno;
2484 			changed_it = true;
2485 		}
2486 	}
2487 
2488 	/*
2489 	 * If we changed anything, re-sort the tlist by resno, and make sure
2490 	 * resjunk entries have resnos above the last real resno.  The sort
2491 	 * algorithm is a bit stupid, but for such a seldom-taken path, small is
2492 	 * probably better than fast.
2493 	 */
2494 	if (!changed_it)
2495 		return tlist;
2496 
2497 	new_tlist = NIL;
2498 	more = true;
2499 	for (attrno = 1; more; attrno++)
2500 	{
2501 		more = false;
2502 		foreach(tl, tlist)
2503 		{
2504 			TargetEntry *tle = (TargetEntry *) lfirst(tl);
2505 
2506 			if (tle->resjunk)
2507 				continue;		/* ignore junk items */
2508 
2509 			if (tle->resno == attrno)
2510 				new_tlist = lappend(new_tlist, tle);
2511 			else if (tle->resno > attrno)
2512 				more = true;
2513 		}
2514 	}
2515 
2516 	foreach(tl, tlist)
2517 	{
2518 		TargetEntry *tle = (TargetEntry *) lfirst(tl);
2519 
2520 		if (!tle->resjunk)
2521 			continue;			/* here, ignore non-junk items */
2522 
2523 		tle->resno = attrno;
2524 		new_tlist = lappend(new_tlist, tle);
2525 		attrno++;
2526 	}
2527 
2528 	return new_tlist;
2529 }
2530 
2531 /*
2532  * adjust_appendrel_attrs_multilevel
2533  *	  Apply Var translations from a toplevel appendrel parent down to a child.
2534  *
2535  * In some cases we need to translate expressions referencing a parent relation
2536  * to reference an appendrel child that's multiple levels removed from it.
2537  */
2538 Node *
adjust_appendrel_attrs_multilevel(PlannerInfo * root,Node * node,Relids child_relids,Relids top_parent_relids)2539 adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
2540 								  Relids child_relids,
2541 								  Relids top_parent_relids)
2542 {
2543 	AppendRelInfo **appinfos;
2544 	Bitmapset  *parent_relids = NULL;
2545 	int			nappinfos;
2546 	int			cnt;
2547 
2548 	Assert(bms_num_members(child_relids) == bms_num_members(top_parent_relids));
2549 
2550 	appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
2551 
2552 	/* Construct relids set for the immediate parent of given child. */
2553 	for (cnt = 0; cnt < nappinfos; cnt++)
2554 	{
2555 		AppendRelInfo *appinfo = appinfos[cnt];
2556 
2557 		parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
2558 	}
2559 
2560 	/* Recurse if immediate parent is not the top parent. */
2561 	if (!bms_equal(parent_relids, top_parent_relids))
2562 		node = adjust_appendrel_attrs_multilevel(root, node, parent_relids,
2563 												 top_parent_relids);
2564 
2565 	/* Now translate for this child */
2566 	node = adjust_appendrel_attrs(root, node, nappinfos, appinfos);
2567 
2568 	pfree(appinfos);
2569 
2570 	return node;
2571 }
2572 
2573 /*
2574  * Construct the SpecialJoinInfo for a child-join by translating
2575  * SpecialJoinInfo for the join between parents. left_relids and right_relids
2576  * are the relids of left and right side of the join respectively.
2577  */
2578 SpecialJoinInfo *
build_child_join_sjinfo(PlannerInfo * root,SpecialJoinInfo * parent_sjinfo,Relids left_relids,Relids right_relids)2579 build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
2580 						Relids left_relids, Relids right_relids)
2581 {
2582 	SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
2583 	AppendRelInfo **left_appinfos;
2584 	int			left_nappinfos;
2585 	AppendRelInfo **right_appinfos;
2586 	int			right_nappinfos;
2587 
2588 	memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
2589 	left_appinfos = find_appinfos_by_relids(root, left_relids,
2590 											&left_nappinfos);
2591 	right_appinfos = find_appinfos_by_relids(root, right_relids,
2592 											 &right_nappinfos);
2593 
2594 	sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
2595 											   left_nappinfos, left_appinfos);
2596 	sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
2597 												right_nappinfos,
2598 												right_appinfos);
2599 	sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
2600 											   left_nappinfos, left_appinfos);
2601 	sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
2602 												right_nappinfos,
2603 												right_appinfos);
2604 	sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
2605 															 (Node *) sjinfo->semi_rhs_exprs,
2606 															 right_nappinfos,
2607 															 right_appinfos);
2608 
2609 	pfree(left_appinfos);
2610 	pfree(right_appinfos);
2611 
2612 	return sjinfo;
2613 }
2614 
2615 /*
2616  * find_appinfos_by_relids
2617  * 		Find AppendRelInfo structures for all relations specified by relids.
2618  *
2619  * The AppendRelInfos are returned in an array, which can be pfree'd by the
2620  * caller. *nappinfos is set to the number of entries in the array.
2621  */
2622 AppendRelInfo **
find_appinfos_by_relids(PlannerInfo * root,Relids relids,int * nappinfos)2623 find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
2624 {
2625 	AppendRelInfo **appinfos;
2626 	int			cnt = 0;
2627 	int			i;
2628 
2629 	*nappinfos = bms_num_members(relids);
2630 	appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos);
2631 
2632 	i = -1;
2633 	while ((i = bms_next_member(relids, i)) >= 0)
2634 	{
2635 		AppendRelInfo *appinfo = root->append_rel_array[i];
2636 
2637 		if (!appinfo)
2638 			elog(ERROR, "child rel %d not found in append_rel_array", i);
2639 
2640 		appinfos[cnt++] = appinfo;
2641 	}
2642 	return appinfos;
2643 }
2644