1 /*-------------------------------------------------------------------------
2  *
3  * prepunion.c
4  *	  Routines to plan set-operation queries.  The filename is a leftover
5  *	  from a time when only UNIONs were implemented.
6  *
7  * There are two code paths in the planner for set-operation queries.
8  * If a subquery consists entirely of simple UNION ALL operations, it
9  * is converted into an "append relation".  Otherwise, it is handled
10  * by the general code in this module (plan_set_operations and its
11  * subroutines).  There is some support code here for the append-relation
12  * case, but most of the heavy lifting for that is done elsewhere,
13  * notably in prepjointree.c and allpaths.c.
14  *
15  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
16  * Portions Copyright (c) 1994, Regents of the University of California
17  *
18  *
19  * IDENTIFICATION
20  *	  src/backend/optimizer/prep/prepunion.c
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "access/sysattr.h"
28 #include "catalog/partition.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "nodes/makefuncs.h"
33 #include "nodes/nodeFuncs.h"
34 #include "optimizer/cost.h"
35 #include "optimizer/pathnode.h"
36 #include "optimizer/paths.h"
37 #include "optimizer/planmain.h"
38 #include "optimizer/planner.h"
39 #include "optimizer/prep.h"
40 #include "optimizer/tlist.h"
41 #include "parser/parse_coerce.h"
42 #include "parser/parsetree.h"
43 #include "utils/lsyscache.h"
44 #include "utils/rel.h"
45 #include "utils/selfuncs.h"
46 #include "utils/syscache.h"
47 
48 
49 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
50 										  List *colTypes, List *colCollations,
51 										  bool junkOK,
52 										  int flag, List *refnames_tlist,
53 										  List **pTargetList,
54 										  double *pNumGroups);
55 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
56 										   PlannerInfo *root,
57 										   List *refnames_tlist,
58 										   List **pTargetList);
59 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
60 										List *refnames_tlist,
61 										List **pTargetList);
62 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
63 										   List *refnames_tlist,
64 										   List **pTargetList);
65 static List *plan_union_children(PlannerInfo *root,
66 								 SetOperationStmt *top_union,
67 								 List *refnames_tlist,
68 								 List **tlist_list);
69 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
70 							   PlannerInfo *root);
71 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
72 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
73 								Path *input_path,
74 								double dNumGroups, double dNumOutputRows,
75 								const char *construct);
76 static List *generate_setop_tlist(List *colTypes, List *colCollations,
77 								  int flag,
78 								  Index varno,
79 								  bool hack_constants,
80 								  List *input_tlist,
81 								  List *refnames_tlist);
82 static List *generate_append_tlist(List *colTypes, List *colCollations,
83 								   bool flag,
84 								   List *input_tlists,
85 								   List *refnames_tlist);
86 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
87 
88 
89 /*
90  * plan_set_operations
91  *
92  *	  Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
93  *
94  * This routine only deals with the setOperations tree of the given query.
95  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
96  * when we return to grouping_planner; likewise for LIMIT.
97  *
98  * What we return is an "upperrel" RelOptInfo containing at least one Path
99  * that implements the set-operation tree.  In addition, root->processed_tlist
100  * receives a targetlist representing the output of the topmost setop node.
101  */
102 RelOptInfo *
plan_set_operations(PlannerInfo * root)103 plan_set_operations(PlannerInfo *root)
104 {
105 	Query	   *parse = root->parse;
106 	SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
107 	Node	   *node;
108 	RangeTblEntry *leftmostRTE;
109 	Query	   *leftmostQuery;
110 	RelOptInfo *setop_rel;
111 	List	   *top_tlist;
112 
113 	Assert(topop);
114 
115 	/* check for unsupported stuff */
116 	Assert(parse->jointree->fromlist == NIL);
117 	Assert(parse->jointree->quals == NULL);
118 	Assert(parse->groupClause == NIL);
119 	Assert(parse->havingQual == NULL);
120 	Assert(parse->windowClause == NIL);
121 	Assert(parse->distinctClause == NIL);
122 
123 	/*
124 	 * In the outer query level, we won't have any true equivalences to deal
125 	 * with; but we do want to be able to make pathkeys, which will require
126 	 * single-member EquivalenceClasses.  Indicate that EC merging is complete
127 	 * so that pathkeys.c won't complain.
128 	 */
129 	Assert(root->eq_classes == NIL);
130 	root->ec_merging_done = true;
131 
132 	/*
133 	 * We'll need to build RelOptInfos for each of the leaf subqueries, which
134 	 * are RTE_SUBQUERY rangetable entries in this Query.  Prepare the index
135 	 * arrays for those, and for AppendRelInfos in case they're needed.
136 	 */
137 	setup_simple_rel_arrays(root);
138 
139 	/*
140 	 * Find the leftmost component Query.  We need to use its column names for
141 	 * all generated tlists (else SELECT INTO won't work right).
142 	 */
143 	node = topop->larg;
144 	while (node && IsA(node, SetOperationStmt))
145 		node = ((SetOperationStmt *) node)->larg;
146 	Assert(node && IsA(node, RangeTblRef));
147 	leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
148 	leftmostQuery = leftmostRTE->subquery;
149 	Assert(leftmostQuery != NULL);
150 
151 	/*
152 	 * If the topmost node is a recursive union, it needs special processing.
153 	 */
154 	if (root->hasRecursion)
155 	{
156 		setop_rel = generate_recursion_path(topop, root,
157 											leftmostQuery->targetList,
158 											&top_tlist);
159 	}
160 	else
161 	{
162 		/*
163 		 * Recurse on setOperations tree to generate paths for set ops. The
164 		 * final output paths should have just the column types shown as the
165 		 * output from the top-level node, plus possibly resjunk working
166 		 * columns (we can rely on upper-level nodes to deal with that).
167 		 */
168 		setop_rel = recurse_set_operations((Node *) topop, root,
169 										   topop->colTypes, topop->colCollations,
170 										   true, -1,
171 										   leftmostQuery->targetList,
172 										   &top_tlist,
173 										   NULL);
174 	}
175 
176 	/* Must return the built tlist into root->processed_tlist. */
177 	root->processed_tlist = top_tlist;
178 
179 	return setop_rel;
180 }
181 
182 /*
183  * recurse_set_operations
184  *	  Recursively handle one step in a tree of set operations
185  *
186  * colTypes: OID list of set-op's result column datatypes
187  * colCollations: OID list of set-op's result column collations
188  * junkOK: if true, child resjunk columns may be left in the result
189  * flag: if >= 0, add a resjunk output column indicating value of flag
190  * refnames_tlist: targetlist to take column names from
191  *
192  * Returns a RelOptInfo for the subtree, as well as these output parameters:
193  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
194  * *pNumGroups: if not NULL, we estimate the number of distinct groups
195  *		in the result, and store it there
196  *
197  * The pTargetList output parameter is mostly redundant with the pathtarget
198  * of the returned RelOptInfo, but for the moment we need it because much of
199  * the logic in this file depends on flag columns being marked resjunk.
200  * Pending a redesign of how that works, this is the easy way out.
201  *
202  * We don't have to care about typmods here: the only allowed difference
203  * between set-op input and output typmods is input is a specific typmod
204  * and output is -1, and that does not require a coercion.
205  */
206 static RelOptInfo *
recurse_set_operations(Node * setOp,PlannerInfo * root,List * colTypes,List * colCollations,bool junkOK,int flag,List * refnames_tlist,List ** pTargetList,double * pNumGroups)207 recurse_set_operations(Node *setOp, PlannerInfo *root,
208 					   List *colTypes, List *colCollations,
209 					   bool junkOK,
210 					   int flag, List *refnames_tlist,
211 					   List **pTargetList,
212 					   double *pNumGroups)
213 {
214 	RelOptInfo *rel = NULL;		/* keep compiler quiet */
215 
216 	/* Guard against stack overflow due to overly complex setop nests */
217 	check_stack_depth();
218 
219 	if (IsA(setOp, RangeTblRef))
220 	{
221 		RangeTblRef *rtr = (RangeTblRef *) setOp;
222 		RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
223 		Query	   *subquery = rte->subquery;
224 		PlannerInfo *subroot;
225 		RelOptInfo *final_rel;
226 		Path	   *subpath;
227 		Path	   *path;
228 		List	   *tlist;
229 
230 		Assert(subquery != NULL);
231 
232 		/* Build a RelOptInfo for this leaf subquery. */
233 		rel = build_simple_rel(root, rtr->rtindex, NULL);
234 
235 		/* plan_params should not be in use in current query level */
236 		Assert(root->plan_params == NIL);
237 
238 		/* Generate a subroot and Paths for the subquery */
239 		subroot = rel->subroot = subquery_planner(root->glob, subquery,
240 												  root,
241 												  false,
242 												  root->tuple_fraction);
243 
244 		/*
245 		 * It should not be possible for the primitive query to contain any
246 		 * cross-references to other primitive queries in the setop tree.
247 		 */
248 		if (root->plan_params)
249 			elog(ERROR, "unexpected outer reference in set operation subquery");
250 
251 		/* Figure out the appropriate target list for this subquery. */
252 		tlist = generate_setop_tlist(colTypes, colCollations,
253 									 flag,
254 									 rtr->rtindex,
255 									 true,
256 									 subroot->processed_tlist,
257 									 refnames_tlist);
258 		rel->reltarget = create_pathtarget(root, tlist);
259 
260 		/* Return the fully-fledged tlist to caller, too */
261 		*pTargetList = tlist;
262 
263 		/*
264 		 * Mark rel with estimated output rows, width, etc.  Note that we have
265 		 * to do this before generating outer-query paths, else
266 		 * cost_subqueryscan is not happy.
267 		 */
268 		set_subquery_size_estimates(root, rel);
269 
270 		/*
271 		 * Since we may want to add a partial path to this relation, we must
272 		 * set its consider_parallel flag correctly.
273 		 */
274 		final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
275 		rel->consider_parallel = final_rel->consider_parallel;
276 
277 		/*
278 		 * For the moment, we consider only a single Path for the subquery.
279 		 * This should change soon (make it look more like
280 		 * set_subquery_pathlist).
281 		 */
282 		subpath = get_cheapest_fractional_path(final_rel,
283 											   root->tuple_fraction);
284 
285 		/*
286 		 * Stick a SubqueryScanPath atop that.
287 		 *
288 		 * We don't bother to determine the subquery's output ordering since
289 		 * it won't be reflected in the set-op result anyhow; so just label
290 		 * the SubqueryScanPath with nil pathkeys.  (XXX that should change
291 		 * soon too, likely.)
292 		 */
293 		path = (Path *) create_subqueryscan_path(root, rel, subpath,
294 												 NIL, NULL);
295 
296 		add_path(rel, path);
297 
298 		/*
299 		 * If we have a partial path for the child relation, we can use that
300 		 * to build a partial path for this relation.  But there's no point in
301 		 * considering any path but the cheapest.
302 		 */
303 		if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
304 			final_rel->partial_pathlist != NIL)
305 		{
306 			Path	   *partial_subpath;
307 			Path	   *partial_path;
308 
309 			partial_subpath = linitial(final_rel->partial_pathlist);
310 			partial_path = (Path *)
311 				create_subqueryscan_path(root, rel, partial_subpath,
312 										 NIL, NULL);
313 			add_partial_path(rel, partial_path);
314 		}
315 
316 		/*
317 		 * Estimate number of groups if caller wants it.  If the subquery used
318 		 * grouping or aggregation, its output is probably mostly unique
319 		 * anyway; otherwise do statistical estimation.
320 		 *
321 		 * XXX you don't really want to know about this: we do the estimation
322 		 * using the subquery's original targetlist expressions, not the
323 		 * subroot->processed_tlist which might seem more appropriate.  The
324 		 * reason is that if the subquery is itself a setop, it may return a
325 		 * processed_tlist containing "varno 0" Vars generated by
326 		 * generate_append_tlist, and those would confuse estimate_num_groups
327 		 * mightily.  We ought to get rid of the "varno 0" hack, but that
328 		 * requires a redesign of the parsetree representation of setops, so
329 		 * that there can be an RTE corresponding to each setop's output.
330 		 */
331 		if (pNumGroups)
332 		{
333 			if (subquery->groupClause || subquery->groupingSets ||
334 				subquery->distinctClause ||
335 				subroot->hasHavingQual || subquery->hasAggs)
336 				*pNumGroups = subpath->rows;
337 			else
338 				*pNumGroups = estimate_num_groups(subroot,
339 												  get_tlist_exprs(subquery->targetList, false),
340 												  subpath->rows,
341 												  NULL,
342 												  NULL);
343 		}
344 	}
345 	else if (IsA(setOp, SetOperationStmt))
346 	{
347 		SetOperationStmt *op = (SetOperationStmt *) setOp;
348 
349 		/* UNIONs are much different from INTERSECT/EXCEPT */
350 		if (op->op == SETOP_UNION)
351 			rel = generate_union_paths(op, root,
352 									   refnames_tlist,
353 									   pTargetList);
354 		else
355 			rel = generate_nonunion_paths(op, root,
356 										  refnames_tlist,
357 										  pTargetList);
358 		if (pNumGroups)
359 			*pNumGroups = rel->rows;
360 
361 		/*
362 		 * If necessary, add a Result node to project the caller-requested
363 		 * output columns.
364 		 *
365 		 * XXX you don't really want to know about this: setrefs.c will apply
366 		 * fix_upper_expr() to the Result node's tlist. This would fail if the
367 		 * Vars generated by generate_setop_tlist() were not exactly equal()
368 		 * to the corresponding tlist entries of the subplan. However, since
369 		 * the subplan was generated by generate_union_paths() or
370 		 * generate_nonunion_paths(), and hence its tlist was generated by
371 		 * generate_append_tlist(), this will work.  We just tell
372 		 * generate_setop_tlist() to use varno 0.
373 		 */
374 		if (flag >= 0 ||
375 			!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
376 			!tlist_same_collations(*pTargetList, colCollations, junkOK))
377 		{
378 			PathTarget *target;
379 			ListCell   *lc;
380 
381 			*pTargetList = generate_setop_tlist(colTypes, colCollations,
382 												flag,
383 												0,
384 												false,
385 												*pTargetList,
386 												refnames_tlist);
387 			target = create_pathtarget(root, *pTargetList);
388 
389 			/* Apply projection to each path */
390 			foreach(lc, rel->pathlist)
391 			{
392 				Path	   *subpath = (Path *) lfirst(lc);
393 				Path	   *path;
394 
395 				Assert(subpath->param_info == NULL);
396 				path = apply_projection_to_path(root, subpath->parent,
397 												subpath, target);
398 				/* If we had to add a Result, path is different from subpath */
399 				if (path != subpath)
400 					lfirst(lc) = path;
401 			}
402 
403 			/* Apply projection to each partial path */
404 			foreach(lc, rel->partial_pathlist)
405 			{
406 				Path	   *subpath = (Path *) lfirst(lc);
407 				Path	   *path;
408 
409 				Assert(subpath->param_info == NULL);
410 
411 				/* avoid apply_projection_to_path, in case of multiple refs */
412 				path = (Path *) create_projection_path(root, subpath->parent,
413 													   subpath, target);
414 				lfirst(lc) = path;
415 			}
416 		}
417 	}
418 	else
419 	{
420 		elog(ERROR, "unrecognized node type: %d",
421 			 (int) nodeTag(setOp));
422 		*pTargetList = NIL;
423 	}
424 
425 	postprocess_setop_rel(root, rel);
426 
427 	return rel;
428 }
429 
430 /*
431  * Generate paths for a recursive UNION node
432  */
433 static RelOptInfo *
generate_recursion_path(SetOperationStmt * setOp,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)434 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
435 						List *refnames_tlist,
436 						List **pTargetList)
437 {
438 	RelOptInfo *result_rel;
439 	Path	   *path;
440 	RelOptInfo *lrel,
441 			   *rrel;
442 	Path	   *lpath;
443 	Path	   *rpath;
444 	List	   *lpath_tlist;
445 	List	   *rpath_tlist;
446 	List	   *tlist;
447 	List	   *groupList;
448 	double		dNumGroups;
449 
450 	/* Parser should have rejected other cases */
451 	if (setOp->op != SETOP_UNION)
452 		elog(ERROR, "only UNION queries can be recursive");
453 	/* Worktable ID should be assigned */
454 	Assert(root->wt_param_id >= 0);
455 
456 	/*
457 	 * Unlike a regular UNION node, process the left and right inputs
458 	 * separately without any intention of combining them into one Append.
459 	 */
460 	lrel = recurse_set_operations(setOp->larg, root,
461 								  setOp->colTypes, setOp->colCollations,
462 								  false, -1,
463 								  refnames_tlist,
464 								  &lpath_tlist,
465 								  NULL);
466 	lpath = lrel->cheapest_total_path;
467 	/* The right path will want to look at the left one ... */
468 	root->non_recursive_path = lpath;
469 	rrel = recurse_set_operations(setOp->rarg, root,
470 								  setOp->colTypes, setOp->colCollations,
471 								  false, -1,
472 								  refnames_tlist,
473 								  &rpath_tlist,
474 								  NULL);
475 	rpath = rrel->cheapest_total_path;
476 	root->non_recursive_path = NULL;
477 
478 	/*
479 	 * Generate tlist for RecursiveUnion path node --- same as in Append cases
480 	 */
481 	tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
482 								  list_make2(lpath_tlist, rpath_tlist),
483 								  refnames_tlist);
484 
485 	*pTargetList = tlist;
486 
487 	/* Build result relation. */
488 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
489 								 bms_union(lrel->relids, rrel->relids));
490 	result_rel->reltarget = create_pathtarget(root, tlist);
491 
492 	/*
493 	 * If UNION, identify the grouping operators
494 	 */
495 	if (setOp->all)
496 	{
497 		groupList = NIL;
498 		dNumGroups = 0;
499 	}
500 	else
501 	{
502 		/* Identify the grouping semantics */
503 		groupList = generate_setop_grouplist(setOp, tlist);
504 
505 		/* We only support hashing here */
506 		if (!grouping_is_hashable(groupList))
507 			ereport(ERROR,
508 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
509 					 errmsg("could not implement recursive UNION"),
510 					 errdetail("All column datatypes must be hashable.")));
511 
512 		/*
513 		 * For the moment, take the number of distinct groups as equal to the
514 		 * total input size, ie, the worst case.
515 		 */
516 		dNumGroups = lpath->rows + rpath->rows * 10;
517 	}
518 
519 	/*
520 	 * And make the path node.
521 	 */
522 	path = (Path *) create_recursiveunion_path(root,
523 											   result_rel,
524 											   lpath,
525 											   rpath,
526 											   result_rel->reltarget,
527 											   groupList,
528 											   root->wt_param_id,
529 											   dNumGroups);
530 
531 	add_path(result_rel, path);
532 	postprocess_setop_rel(root, result_rel);
533 	return result_rel;
534 }
535 
536 /*
537  * Generate paths for a UNION or UNION ALL node
538  */
539 static RelOptInfo *
generate_union_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)540 generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
541 					 List *refnames_tlist,
542 					 List **pTargetList)
543 {
544 	Relids		relids = NULL;
545 	RelOptInfo *result_rel;
546 	double		save_fraction = root->tuple_fraction;
547 	ListCell   *lc;
548 	List	   *pathlist = NIL;
549 	List	   *partial_pathlist = NIL;
550 	bool		partial_paths_valid = true;
551 	bool		consider_parallel = true;
552 	List	   *rellist;
553 	List	   *tlist_list;
554 	List	   *tlist;
555 	Path	   *path;
556 
557 	/*
558 	 * If plain UNION, tell children to fetch all tuples.
559 	 *
560 	 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
561 	 * each arm of the UNION ALL.  One could make a case for reducing the
562 	 * tuple fraction for later arms (discounting by the expected size of the
563 	 * earlier arms' results) but it seems not worth the trouble. The normal
564 	 * case where tuple_fraction isn't already zero is a LIMIT at top level,
565 	 * and passing it down as-is is usually enough to get the desired result
566 	 * of preferring fast-start plans.
567 	 */
568 	if (!op->all)
569 		root->tuple_fraction = 0.0;
570 
571 	/*
572 	 * If any of my children are identical UNION nodes (same op, all-flag, and
573 	 * colTypes) then they can be merged into this node so that we generate
574 	 * only one Append and unique-ification for the lot.  Recurse to find such
575 	 * nodes and compute their children's paths.
576 	 */
577 	rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
578 
579 	/*
580 	 * Generate tlist for Append plan node.
581 	 *
582 	 * The tlist for an Append plan isn't important as far as the Append is
583 	 * concerned, but we must make it look real anyway for the benefit of the
584 	 * next plan level up.
585 	 */
586 	tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
587 								  tlist_list, refnames_tlist);
588 
589 	*pTargetList = tlist;
590 
591 	/* Build path lists and relid set. */
592 	foreach(lc, rellist)
593 	{
594 		RelOptInfo *rel = lfirst(lc);
595 
596 		pathlist = lappend(pathlist, rel->cheapest_total_path);
597 
598 		if (consider_parallel)
599 		{
600 			if (!rel->consider_parallel)
601 			{
602 				consider_parallel = false;
603 				partial_paths_valid = false;
604 			}
605 			else if (rel->partial_pathlist == NIL)
606 				partial_paths_valid = false;
607 			else
608 				partial_pathlist = lappend(partial_pathlist,
609 										   linitial(rel->partial_pathlist));
610 		}
611 
612 		relids = bms_union(relids, rel->relids);
613 	}
614 
615 	/* Build result relation. */
616 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
617 	result_rel->reltarget = create_pathtarget(root, tlist);
618 	result_rel->consider_parallel = consider_parallel;
619 
620 	/*
621 	 * Append the child results together.
622 	 */
623 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
624 									   NIL, NULL, 0, false, -1);
625 
626 	/*
627 	 * For UNION ALL, we just need the Append path.  For UNION, need to add
628 	 * node(s) to remove duplicates.
629 	 */
630 	if (!op->all)
631 		path = make_union_unique(op, path, tlist, root);
632 
633 	add_path(result_rel, path);
634 
635 	/*
636 	 * Estimate number of groups.  For now we just assume the output is unique
637 	 * --- this is certainly true for the UNION case, and we want worst-case
638 	 * estimates anyway.
639 	 */
640 	result_rel->rows = path->rows;
641 
642 	/*
643 	 * Now consider doing the same thing using the partial paths plus Append
644 	 * plus Gather.
645 	 */
646 	if (partial_paths_valid)
647 	{
648 		Path	   *ppath;
649 		ListCell   *lc;
650 		int			parallel_workers = 0;
651 
652 		/* Find the highest number of workers requested for any subpath. */
653 		foreach(lc, partial_pathlist)
654 		{
655 			Path	   *path = lfirst(lc);
656 
657 			parallel_workers = Max(parallel_workers, path->parallel_workers);
658 		}
659 		Assert(parallel_workers > 0);
660 
661 		/*
662 		 * If the use of parallel append is permitted, always request at least
663 		 * log2(# of children) paths.  We assume it can be useful to have
664 		 * extra workers in this case because they will be spread out across
665 		 * the children.  The precise formula is just a guess; see
666 		 * add_paths_to_append_rel.
667 		 */
668 		if (enable_parallel_append)
669 		{
670 			parallel_workers = Max(parallel_workers,
671 								   fls(list_length(partial_pathlist)));
672 			parallel_workers = Min(parallel_workers,
673 								   max_parallel_workers_per_gather);
674 		}
675 		Assert(parallel_workers > 0);
676 
677 		ppath = (Path *)
678 			create_append_path(root, result_rel, NIL, partial_pathlist,
679 							   NIL, NULL,
680 							   parallel_workers, enable_parallel_append,
681 							   -1);
682 		ppath = (Path *)
683 			create_gather_path(root, result_rel, ppath,
684 							   result_rel->reltarget, NULL, NULL);
685 		if (!op->all)
686 			ppath = make_union_unique(op, ppath, tlist, root);
687 		add_path(result_rel, ppath);
688 	}
689 
690 	/* Undo effects of possibly forcing tuple_fraction to 0 */
691 	root->tuple_fraction = save_fraction;
692 
693 	return result_rel;
694 }
695 
696 /*
697  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
698  */
699 static RelOptInfo *
generate_nonunion_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)700 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
701 						List *refnames_tlist,
702 						List **pTargetList)
703 {
704 	RelOptInfo *result_rel;
705 	RelOptInfo *lrel,
706 			   *rrel;
707 	double		save_fraction = root->tuple_fraction;
708 	Path	   *lpath,
709 			   *rpath,
710 			   *path;
711 	List	   *lpath_tlist,
712 			   *rpath_tlist,
713 			   *tlist_list,
714 			   *tlist,
715 			   *groupList,
716 			   *pathlist;
717 	double		dLeftGroups,
718 				dRightGroups,
719 				dNumGroups,
720 				dNumOutputRows;
721 	bool		use_hash;
722 	SetOpCmd	cmd;
723 	int			firstFlag;
724 
725 	/*
726 	 * Tell children to fetch all tuples.
727 	 */
728 	root->tuple_fraction = 0.0;
729 
730 	/* Recurse on children, ensuring their outputs are marked */
731 	lrel = recurse_set_operations(op->larg, root,
732 								  op->colTypes, op->colCollations,
733 								  false, 0,
734 								  refnames_tlist,
735 								  &lpath_tlist,
736 								  &dLeftGroups);
737 	lpath = lrel->cheapest_total_path;
738 	rrel = recurse_set_operations(op->rarg, root,
739 								  op->colTypes, op->colCollations,
740 								  false, 1,
741 								  refnames_tlist,
742 								  &rpath_tlist,
743 								  &dRightGroups);
744 	rpath = rrel->cheapest_total_path;
745 
746 	/* Undo effects of forcing tuple_fraction to 0 */
747 	root->tuple_fraction = save_fraction;
748 
749 	/*
750 	 * For EXCEPT, we must put the left input first.  For INTERSECT, either
751 	 * order should give the same results, and we prefer to put the smaller
752 	 * input first in order to minimize the size of the hash table in the
753 	 * hashing case.  "Smaller" means the one with the fewer groups.
754 	 */
755 	if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
756 	{
757 		pathlist = list_make2(lpath, rpath);
758 		tlist_list = list_make2(lpath_tlist, rpath_tlist);
759 		firstFlag = 0;
760 	}
761 	else
762 	{
763 		pathlist = list_make2(rpath, lpath);
764 		tlist_list = list_make2(rpath_tlist, lpath_tlist);
765 		firstFlag = 1;
766 	}
767 
768 	/*
769 	 * Generate tlist for Append plan node.
770 	 *
771 	 * The tlist for an Append plan isn't important as far as the Append is
772 	 * concerned, but we must make it look real anyway for the benefit of the
773 	 * next plan level up.  In fact, it has to be real enough that the flag
774 	 * column is shown as a variable not a constant, else setrefs.c will get
775 	 * confused.
776 	 */
777 	tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
778 								  tlist_list, refnames_tlist);
779 
780 	*pTargetList = tlist;
781 
782 	/* Build result relation. */
783 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
784 								 bms_union(lrel->relids, rrel->relids));
785 	result_rel->reltarget = create_pathtarget(root, tlist);
786 
787 	/*
788 	 * Append the child results together.
789 	 */
790 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
791 									   NIL, NULL, 0, false, -1);
792 
793 	/* Identify the grouping semantics */
794 	groupList = generate_setop_grouplist(op, tlist);
795 
796 	/*
797 	 * Estimate number of distinct groups that we'll need hashtable entries
798 	 * for; this is the size of the left-hand input for EXCEPT, or the smaller
799 	 * input for INTERSECT.  Also estimate the number of eventual output rows.
800 	 * In non-ALL cases, we estimate each group produces one output row; in
801 	 * ALL cases use the relevant relation size.  These are worst-case
802 	 * estimates, of course, but we need to be conservative.
803 	 */
804 	if (op->op == SETOP_EXCEPT)
805 	{
806 		dNumGroups = dLeftGroups;
807 		dNumOutputRows = op->all ? lpath->rows : dNumGroups;
808 	}
809 	else
810 	{
811 		dNumGroups = Min(dLeftGroups, dRightGroups);
812 		dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
813 	}
814 
815 	/*
816 	 * Decide whether to hash or sort, and add a sort node if needed.
817 	 */
818 	use_hash = choose_hashed_setop(root, groupList, path,
819 								   dNumGroups, dNumOutputRows,
820 								   (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
821 
822 	if (groupList && !use_hash)
823 		path = (Path *) create_sort_path(root,
824 										 result_rel,
825 										 path,
826 										 make_pathkeys_for_sortclauses(root,
827 																	   groupList,
828 																	   tlist),
829 										 -1.0);
830 
831 	/*
832 	 * Finally, add a SetOp path node to generate the correct output.
833 	 */
834 	switch (op->op)
835 	{
836 		case SETOP_INTERSECT:
837 			cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
838 			break;
839 		case SETOP_EXCEPT:
840 			cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
841 			break;
842 		default:
843 			elog(ERROR, "unrecognized set op: %d", (int) op->op);
844 			cmd = SETOPCMD_INTERSECT;	/* keep compiler quiet */
845 			break;
846 	}
847 	path = (Path *) create_setop_path(root,
848 									  result_rel,
849 									  path,
850 									  cmd,
851 									  use_hash ? SETOP_HASHED : SETOP_SORTED,
852 									  groupList,
853 									  list_length(op->colTypes) + 1,
854 									  use_hash ? firstFlag : -1,
855 									  dNumGroups,
856 									  dNumOutputRows);
857 
858 	result_rel->rows = path->rows;
859 	add_path(result_rel, path);
860 	return result_rel;
861 }
862 
863 /*
864  * Pull up children of a UNION node that are identically-propertied UNIONs.
865  *
866  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
867  * output rows will be lost anyway.
868  *
869  * NOTE: currently, we ignore collations while determining if a child has
870  * the same properties.  This is semantically sound only so long as all
871  * collations have the same notion of equality.  It is valid from an
872  * implementation standpoint because we don't care about the ordering of
873  * a UNION child's result: UNION ALL results are always unordered, and
874  * generate_union_paths will force a fresh sort if the top level is a UNION.
875  */
876 static List *
plan_union_children(PlannerInfo * root,SetOperationStmt * top_union,List * refnames_tlist,List ** tlist_list)877 plan_union_children(PlannerInfo *root,
878 					SetOperationStmt *top_union,
879 					List *refnames_tlist,
880 					List **tlist_list)
881 {
882 	List	   *pending_rels = list_make1(top_union);
883 	List	   *result = NIL;
884 	List	   *child_tlist;
885 
886 	*tlist_list = NIL;
887 
888 	while (pending_rels != NIL)
889 	{
890 		Node	   *setOp = linitial(pending_rels);
891 
892 		pending_rels = list_delete_first(pending_rels);
893 
894 		if (IsA(setOp, SetOperationStmt))
895 		{
896 			SetOperationStmt *op = (SetOperationStmt *) setOp;
897 
898 			if (op->op == top_union->op &&
899 				(op->all == top_union->all || op->all) &&
900 				equal(op->colTypes, top_union->colTypes))
901 			{
902 				/* Same UNION, so fold children into parent */
903 				pending_rels = lcons(op->rarg, pending_rels);
904 				pending_rels = lcons(op->larg, pending_rels);
905 				continue;
906 			}
907 		}
908 
909 		/*
910 		 * Not same, so plan this child separately.
911 		 *
912 		 * Note we disallow any resjunk columns in child results.  This is
913 		 * necessary since the Append node that implements the union won't do
914 		 * any projection, and upper levels will get confused if some of our
915 		 * output tuples have junk and some don't.  This case only arises when
916 		 * we have an EXCEPT or INTERSECT as child, else there won't be
917 		 * resjunk anyway.
918 		 */
919 		result = lappend(result, recurse_set_operations(setOp, root,
920 														top_union->colTypes,
921 														top_union->colCollations,
922 														false, -1,
923 														refnames_tlist,
924 														&child_tlist,
925 														NULL));
926 		*tlist_list = lappend(*tlist_list, child_tlist);
927 	}
928 
929 	return result;
930 }
931 
932 /*
933  * Add nodes to the given path tree to unique-ify the result of a UNION.
934  */
935 static Path *
make_union_unique(SetOperationStmt * op,Path * path,List * tlist,PlannerInfo * root)936 make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
937 				  PlannerInfo *root)
938 {
939 	RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
940 	List	   *groupList;
941 	double		dNumGroups;
942 
943 	/* Identify the grouping semantics */
944 	groupList = generate_setop_grouplist(op, tlist);
945 
946 	/*
947 	 * XXX for the moment, take the number of distinct groups as equal to the
948 	 * total input size, ie, the worst case.  This is too conservative, but
949 	 * it's not clear how to get a decent estimate of the true size.  One
950 	 * should note as well the propensity of novices to write UNION rather
951 	 * than UNION ALL even when they don't expect any duplicates...
952 	 */
953 	dNumGroups = path->rows;
954 
955 	/* Decide whether to hash or sort */
956 	if (choose_hashed_setop(root, groupList, path,
957 							dNumGroups, dNumGroups,
958 							"UNION"))
959 	{
960 		/* Hashed aggregate plan --- no sort needed */
961 		path = (Path *) create_agg_path(root,
962 										result_rel,
963 										path,
964 										create_pathtarget(root, tlist),
965 										AGG_HASHED,
966 										AGGSPLIT_SIMPLE,
967 										groupList,
968 										NIL,
969 										NULL,
970 										dNumGroups);
971 	}
972 	else
973 	{
974 		/* Sort and Unique */
975 		if (groupList)
976 			path = (Path *)
977 				create_sort_path(root,
978 								 result_rel,
979 								 path,
980 								 make_pathkeys_for_sortclauses(root,
981 															   groupList,
982 															   tlist),
983 								 -1.0);
984 		path = (Path *) create_upper_unique_path(root,
985 												 result_rel,
986 												 path,
987 												 list_length(path->pathkeys),
988 												 dNumGroups);
989 	}
990 
991 	return path;
992 }
993 
994 /*
995  * postprocess_setop_rel - perform steps required after adding paths
996  */
997 static void
postprocess_setop_rel(PlannerInfo * root,RelOptInfo * rel)998 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
999 {
1000 	/*
1001 	 * We don't currently worry about allowing FDWs to contribute paths to
1002 	 * this relation, but give extensions a chance.
1003 	 */
1004 	if (create_upper_paths_hook)
1005 		(*create_upper_paths_hook) (root, UPPERREL_SETOP,
1006 									NULL, rel, NULL);
1007 
1008 	/* Select cheapest path */
1009 	set_cheapest(rel);
1010 }
1011 
1012 /*
1013  * choose_hashed_setop - should we use hashing for a set operation?
1014  */
1015 static bool
choose_hashed_setop(PlannerInfo * root,List * groupClauses,Path * input_path,double dNumGroups,double dNumOutputRows,const char * construct)1016 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1017 					Path *input_path,
1018 					double dNumGroups, double dNumOutputRows,
1019 					const char *construct)
1020 {
1021 	int			numGroupCols = list_length(groupClauses);
1022 	Size		hash_mem_limit = get_hash_memory_limit();
1023 	bool		can_sort;
1024 	bool		can_hash;
1025 	Size		hashentrysize;
1026 	Path		hashed_p;
1027 	Path		sorted_p;
1028 	double		tuple_fraction;
1029 
1030 	/* Check whether the operators support sorting or hashing */
1031 	can_sort = grouping_is_sortable(groupClauses);
1032 	can_hash = grouping_is_hashable(groupClauses);
1033 	if (can_hash && can_sort)
1034 	{
1035 		/* we have a meaningful choice to make, continue ... */
1036 	}
1037 	else if (can_hash)
1038 		return true;
1039 	else if (can_sort)
1040 		return false;
1041 	else
1042 		ereport(ERROR,
1043 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1044 		/* translator: %s is UNION, INTERSECT, or EXCEPT */
1045 				 errmsg("could not implement %s", construct),
1046 				 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1047 
1048 	/* Prefer sorting when enable_hashagg is off */
1049 	if (!enable_hashagg)
1050 		return false;
1051 
1052 	/*
1053 	 * Don't do it if it doesn't look like the hashtable will fit into
1054 	 * hash_mem.
1055 	 */
1056 	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1057 
1058 	if (hashentrysize * dNumGroups > hash_mem_limit)
1059 		return false;
1060 
1061 	/*
1062 	 * See if the estimated cost is no more than doing it the other way.
1063 	 *
1064 	 * We need to consider input_plan + hashagg versus input_plan + sort +
1065 	 * group.  Note that the actual result plan might involve a SetOp or
1066 	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1067 	 * should be close enough for our purposes here.
1068 	 *
1069 	 * These path variables are dummies that just hold cost fields; we don't
1070 	 * make actual Paths for these steps.
1071 	 */
1072 	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1073 			 numGroupCols, dNumGroups,
1074 			 NIL,
1075 			 input_path->startup_cost, input_path->total_cost,
1076 			 input_path->rows, input_path->pathtarget->width);
1077 
1078 	/*
1079 	 * Now for the sorted case.  Note that the input is *always* unsorted,
1080 	 * since it was made by appending unrelated sub-relations together.
1081 	 */
1082 	sorted_p.startup_cost = input_path->startup_cost;
1083 	sorted_p.total_cost = input_path->total_cost;
1084 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1085 	cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1086 			  input_path->rows, input_path->pathtarget->width,
1087 			  0.0, work_mem, -1.0);
1088 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1089 			   NIL,
1090 			   sorted_p.startup_cost, sorted_p.total_cost,
1091 			   input_path->rows);
1092 
1093 	/*
1094 	 * Now make the decision using the top-level tuple fraction.  First we
1095 	 * have to convert an absolute count (LIMIT) into fractional form.
1096 	 */
1097 	tuple_fraction = root->tuple_fraction;
1098 	if (tuple_fraction >= 1.0)
1099 		tuple_fraction /= dNumOutputRows;
1100 
1101 	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1102 									  tuple_fraction) < 0)
1103 	{
1104 		/* Hashed is cheaper, so use it */
1105 		return true;
1106 	}
1107 	return false;
1108 }
1109 
1110 /*
1111  * Generate targetlist for a set-operation plan node
1112  *
1113  * colTypes: OID list of set-op's result column datatypes
1114  * colCollations: OID list of set-op's result column collations
1115  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1116  * varno: varno to use in generated Vars
1117  * hack_constants: true to copy up constants (see comments in code)
1118  * input_tlist: targetlist of this node's input node
1119  * refnames_tlist: targetlist to take column names from
1120  */
1121 static List *
generate_setop_tlist(List * colTypes,List * colCollations,int flag,Index varno,bool hack_constants,List * input_tlist,List * refnames_tlist)1122 generate_setop_tlist(List *colTypes, List *colCollations,
1123 					 int flag,
1124 					 Index varno,
1125 					 bool hack_constants,
1126 					 List *input_tlist,
1127 					 List *refnames_tlist)
1128 {
1129 	List	   *tlist = NIL;
1130 	int			resno = 1;
1131 	ListCell   *ctlc,
1132 			   *cclc,
1133 			   *itlc,
1134 			   *rtlc;
1135 	TargetEntry *tle;
1136 	Node	   *expr;
1137 
1138 	forfour(ctlc, colTypes, cclc, colCollations,
1139 			itlc, input_tlist, rtlc, refnames_tlist)
1140 	{
1141 		Oid			colType = lfirst_oid(ctlc);
1142 		Oid			colColl = lfirst_oid(cclc);
1143 		TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1144 		TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1145 
1146 		Assert(inputtle->resno == resno);
1147 		Assert(reftle->resno == resno);
1148 		Assert(!inputtle->resjunk);
1149 		Assert(!reftle->resjunk);
1150 
1151 		/*
1152 		 * Generate columns referencing input columns and having appropriate
1153 		 * data types and column names.  Insert datatype coercions where
1154 		 * necessary.
1155 		 *
1156 		 * HACK: constants in the input's targetlist are copied up as-is
1157 		 * rather than being referenced as subquery outputs.  This is mainly
1158 		 * to ensure that when we try to coerce them to the output column's
1159 		 * datatype, the right things happen for UNKNOWN constants.  But do
1160 		 * this only at the first level of subquery-scan plans; we don't want
1161 		 * phony constants appearing in the output tlists of upper-level
1162 		 * nodes!
1163 		 */
1164 		if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1165 			expr = (Node *) inputtle->expr;
1166 		else
1167 			expr = (Node *) makeVar(varno,
1168 									inputtle->resno,
1169 									exprType((Node *) inputtle->expr),
1170 									exprTypmod((Node *) inputtle->expr),
1171 									exprCollation((Node *) inputtle->expr),
1172 									0);
1173 
1174 		if (exprType(expr) != colType)
1175 		{
1176 			/*
1177 			 * Note: it's not really cool to be applying coerce_to_common_type
1178 			 * here; one notable point is that assign_expr_collations never
1179 			 * gets run on any generated nodes.  For the moment that's not a
1180 			 * problem because we force the correct exposed collation below.
1181 			 * It would likely be best to make the parser generate the correct
1182 			 * output tlist for every set-op to begin with, though.
1183 			 */
1184 			expr = coerce_to_common_type(NULL,	/* no UNKNOWNs here */
1185 										 expr,
1186 										 colType,
1187 										 "UNION/INTERSECT/EXCEPT");
1188 		}
1189 
1190 		/*
1191 		 * Ensure the tlist entry's exposed collation matches the set-op. This
1192 		 * is necessary because plan_set_operations() reports the result
1193 		 * ordering as a list of SortGroupClauses, which don't carry collation
1194 		 * themselves but just refer to tlist entries.  If we don't show the
1195 		 * right collation then planner.c might do the wrong thing in
1196 		 * higher-level queries.
1197 		 *
1198 		 * Note we use RelabelType, not CollateExpr, since this expression
1199 		 * will reach the executor without any further processing.
1200 		 */
1201 		if (exprCollation(expr) != colColl)
1202 			expr = applyRelabelType(expr,
1203 									exprType(expr), exprTypmod(expr), colColl,
1204 									COERCE_IMPLICIT_CAST, -1, false);
1205 
1206 		tle = makeTargetEntry((Expr *) expr,
1207 							  (AttrNumber) resno++,
1208 							  pstrdup(reftle->resname),
1209 							  false);
1210 
1211 		/*
1212 		 * By convention, all non-resjunk columns in a setop tree have
1213 		 * ressortgroupref equal to their resno.  In some cases the ref isn't
1214 		 * needed, but this is a cleaner way than modifying the tlist later.
1215 		 */
1216 		tle->ressortgroupref = tle->resno;
1217 
1218 		tlist = lappend(tlist, tle);
1219 	}
1220 
1221 	if (flag >= 0)
1222 	{
1223 		/* Add a resjunk flag column */
1224 		/* flag value is the given constant */
1225 		expr = (Node *) makeConst(INT4OID,
1226 								  -1,
1227 								  InvalidOid,
1228 								  sizeof(int32),
1229 								  Int32GetDatum(flag),
1230 								  false,
1231 								  true);
1232 		tle = makeTargetEntry((Expr *) expr,
1233 							  (AttrNumber) resno++,
1234 							  pstrdup("flag"),
1235 							  true);
1236 		tlist = lappend(tlist, tle);
1237 	}
1238 
1239 	return tlist;
1240 }
1241 
1242 /*
1243  * Generate targetlist for a set-operation Append node
1244  *
1245  * colTypes: OID list of set-op's result column datatypes
1246  * colCollations: OID list of set-op's result column collations
1247  * flag: true to create a flag column copied up from subplans
1248  * input_tlists: list of tlists for sub-plans of the Append
1249  * refnames_tlist: targetlist to take column names from
1250  *
1251  * The entries in the Append's targetlist should always be simple Vars;
1252  * we just have to make sure they have the right datatypes/typmods/collations.
1253  * The Vars are always generated with varno 0.
1254  *
1255  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1256  * cannot figure out a realistic width for the tlist we make here.  But we
1257  * ought to refactor this code to produce a PathTarget directly, anyway.
1258  */
1259 static List *
generate_append_tlist(List * colTypes,List * colCollations,bool flag,List * input_tlists,List * refnames_tlist)1260 generate_append_tlist(List *colTypes, List *colCollations,
1261 					  bool flag,
1262 					  List *input_tlists,
1263 					  List *refnames_tlist)
1264 {
1265 	List	   *tlist = NIL;
1266 	int			resno = 1;
1267 	ListCell   *curColType;
1268 	ListCell   *curColCollation;
1269 	ListCell   *ref_tl_item;
1270 	int			colindex;
1271 	TargetEntry *tle;
1272 	Node	   *expr;
1273 	ListCell   *tlistl;
1274 	int32	   *colTypmods;
1275 
1276 	/*
1277 	 * First extract typmods to use.
1278 	 *
1279 	 * If the inputs all agree on type and typmod of a particular column, use
1280 	 * that typmod; else use -1.
1281 	 */
1282 	colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1283 
1284 	foreach(tlistl, input_tlists)
1285 	{
1286 		List	   *subtlist = (List *) lfirst(tlistl);
1287 		ListCell   *subtlistl;
1288 
1289 		curColType = list_head(colTypes);
1290 		colindex = 0;
1291 		foreach(subtlistl, subtlist)
1292 		{
1293 			TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1294 
1295 			if (subtle->resjunk)
1296 				continue;
1297 			Assert(curColType != NULL);
1298 			if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1299 			{
1300 				/* If first subplan, copy the typmod; else compare */
1301 				int32		subtypmod = exprTypmod((Node *) subtle->expr);
1302 
1303 				if (tlistl == list_head(input_tlists))
1304 					colTypmods[colindex] = subtypmod;
1305 				else if (subtypmod != colTypmods[colindex])
1306 					colTypmods[colindex] = -1;
1307 			}
1308 			else
1309 			{
1310 				/* types disagree, so force typmod to -1 */
1311 				colTypmods[colindex] = -1;
1312 			}
1313 			curColType = lnext(colTypes, curColType);
1314 			colindex++;
1315 		}
1316 		Assert(curColType == NULL);
1317 	}
1318 
1319 	/*
1320 	 * Now we can build the tlist for the Append.
1321 	 */
1322 	colindex = 0;
1323 	forthree(curColType, colTypes, curColCollation, colCollations,
1324 			 ref_tl_item, refnames_tlist)
1325 	{
1326 		Oid			colType = lfirst_oid(curColType);
1327 		int32		colTypmod = colTypmods[colindex++];
1328 		Oid			colColl = lfirst_oid(curColCollation);
1329 		TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1330 
1331 		Assert(reftle->resno == resno);
1332 		Assert(!reftle->resjunk);
1333 		expr = (Node *) makeVar(0,
1334 								resno,
1335 								colType,
1336 								colTypmod,
1337 								colColl,
1338 								0);
1339 		tle = makeTargetEntry((Expr *) expr,
1340 							  (AttrNumber) resno++,
1341 							  pstrdup(reftle->resname),
1342 							  false);
1343 
1344 		/*
1345 		 * By convention, all non-resjunk columns in a setop tree have
1346 		 * ressortgroupref equal to their resno.  In some cases the ref isn't
1347 		 * needed, but this is a cleaner way than modifying the tlist later.
1348 		 */
1349 		tle->ressortgroupref = tle->resno;
1350 
1351 		tlist = lappend(tlist, tle);
1352 	}
1353 
1354 	if (flag)
1355 	{
1356 		/* Add a resjunk flag column */
1357 		/* flag value is shown as copied up from subplan */
1358 		expr = (Node *) makeVar(0,
1359 								resno,
1360 								INT4OID,
1361 								-1,
1362 								InvalidOid,
1363 								0);
1364 		tle = makeTargetEntry((Expr *) expr,
1365 							  (AttrNumber) resno++,
1366 							  pstrdup("flag"),
1367 							  true);
1368 		tlist = lappend(tlist, tle);
1369 	}
1370 
1371 	pfree(colTypmods);
1372 
1373 	return tlist;
1374 }
1375 
1376 /*
1377  * generate_setop_grouplist
1378  *		Build a SortGroupClause list defining the sort/grouping properties
1379  *		of the setop's output columns.
1380  *
1381  * Parse analysis already determined the properties and built a suitable
1382  * list, except that the entries do not have sortgrouprefs set because
1383  * the parser output representation doesn't include a tlist for each
1384  * setop.  So what we need to do here is copy that list and install
1385  * proper sortgrouprefs into it (copying those from the targetlist).
1386  */
1387 static List *
generate_setop_grouplist(SetOperationStmt * op,List * targetlist)1388 generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1389 {
1390 	List	   *grouplist = copyObject(op->groupClauses);
1391 	ListCell   *lg;
1392 	ListCell   *lt;
1393 
1394 	lg = list_head(grouplist);
1395 	foreach(lt, targetlist)
1396 	{
1397 		TargetEntry *tle = (TargetEntry *) lfirst(lt);
1398 		SortGroupClause *sgc;
1399 
1400 		if (tle->resjunk)
1401 		{
1402 			/* resjunk columns should not have sortgrouprefs */
1403 			Assert(tle->ressortgroupref == 0);
1404 			continue;			/* ignore resjunk columns */
1405 		}
1406 
1407 		/* non-resjunk columns should have sortgroupref = resno */
1408 		Assert(tle->ressortgroupref == tle->resno);
1409 
1410 		/* non-resjunk columns should have grouping clauses */
1411 		Assert(lg != NULL);
1412 		sgc = (SortGroupClause *) lfirst(lg);
1413 		lg = lnext(grouplist, lg);
1414 		Assert(sgc->tleSortGroupRef == 0);
1415 
1416 		sgc->tleSortGroupRef = tle->ressortgroupref;
1417 	}
1418 	Assert(lg == NULL);
1419 	return grouplist;
1420 }
1421