1 /*-------------------------------------------------------------------------
2 *
3 * prepunion.c
4 * Routines to plan set-operation queries. The filename is a leftover
5 * from a time when only UNIONs were implemented.
6 *
7 * There are two code paths in the planner for set-operation queries.
8 * If a subquery consists entirely of simple UNION ALL operations, it
9 * is converted into an "append relation". Otherwise, it is handled
10 * by the general code in this module (plan_set_operations and its
11 * subroutines). There is some support code here for the append-relation
12 * case, but most of the heavy lifting for that is done elsewhere,
13 * notably in prepjointree.c and allpaths.c.
14 *
15 * There is also some code here to support planning of queries that use
16 * inheritance (SELECT FROM foo*). Inheritance trees are converted into
17 * append relations, and thenceforth share code with the UNION ALL case.
18 *
19 *
20 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
21 * Portions Copyright (c) 1994, Regents of the University of California
22 *
23 *
24 * IDENTIFICATION
25 * src/backend/optimizer/prep/prepunion.c
26 *
27 *-------------------------------------------------------------------------
28 */
29 #include "postgres.h"
30
31 #include <limits.h>
32
33 #include "access/heapam.h"
34 #include "access/htup_details.h"
35 #include "access/sysattr.h"
36 #include "catalog/partition.h"
37 #include "catalog/pg_inherits.h"
38 #include "catalog/pg_type.h"
39 #include "miscadmin.h"
40 #include "nodes/makefuncs.h"
41 #include "nodes/nodeFuncs.h"
42 #include "optimizer/cost.h"
43 #include "optimizer/pathnode.h"
44 #include "optimizer/paths.h"
45 #include "optimizer/planmain.h"
46 #include "optimizer/planner.h"
47 #include "optimizer/prep.h"
48 #include "optimizer/tlist.h"
49 #include "parser/parse_coerce.h"
50 #include "parser/parsetree.h"
51 #include "utils/lsyscache.h"
52 #include "utils/rel.h"
53 #include "utils/selfuncs.h"
54
55
56 typedef struct
57 {
58 PlannerInfo *root;
59 int nappinfos;
60 AppendRelInfo **appinfos;
61 } adjust_appendrel_attrs_context;
62
63 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
64 List *colTypes, List *colCollations,
65 bool junkOK,
66 int flag, List *refnames_tlist,
67 List **pTargetList,
68 double *pNumGroups);
69 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
70 PlannerInfo *root,
71 List *refnames_tlist,
72 List **pTargetList);
73 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
74 List *refnames_tlist,
75 List **pTargetList);
76 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
77 List *refnames_tlist,
78 List **pTargetList);
79 static List *plan_union_children(PlannerInfo *root,
80 SetOperationStmt *top_union,
81 List *refnames_tlist,
82 List **tlist_list);
83 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
84 PlannerInfo *root);
85 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
86 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
87 Path *input_path,
88 double dNumGroups, double dNumOutputRows,
89 const char *construct);
90 static List *generate_setop_tlist(List *colTypes, List *colCollations,
91 int flag,
92 Index varno,
93 bool hack_constants,
94 List *input_tlist,
95 List *refnames_tlist);
96 static List *generate_append_tlist(List *colTypes, List *colCollations,
97 bool flag,
98 List *input_tlists,
99 List *refnames_tlist);
100 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
101 static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
102 Index rti);
103 static void expand_partitioned_rtentry(PlannerInfo *root,
104 RangeTblEntry *parentrte,
105 Index parentRTindex, Relation parentrel,
106 PlanRowMark *top_parentrc, LOCKMODE lockmode,
107 List **appinfos);
108 static void expand_single_inheritance_child(PlannerInfo *root,
109 RangeTblEntry *parentrte,
110 Index parentRTindex, Relation parentrel,
111 PlanRowMark *top_parentrc, Relation childrel,
112 List **appinfos, RangeTblEntry **childrte_p,
113 Index *childRTindex_p);
114 static void make_inh_translation_list(Relation oldrelation,
115 Relation newrelation,
116 Index newvarno,
117 List **translated_vars);
118 static Bitmapset *translate_col_privs(const Bitmapset *parent_privs,
119 List *translated_vars);
120 static Node *adjust_appendrel_attrs_mutator(Node *node,
121 adjust_appendrel_attrs_context *context);
122 static Relids adjust_child_relids(Relids relids, int nappinfos,
123 AppendRelInfo **appinfos);
124 static List *adjust_inherited_tlist(List *tlist,
125 AppendRelInfo *context);
126
127
128 /*
129 * plan_set_operations
130 *
131 * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
132 *
133 * This routine only deals with the setOperations tree of the given query.
134 * Any top-level ORDER BY requested in root->parse->sortClause will be handled
135 * when we return to grouping_planner; likewise for LIMIT.
136 *
137 * What we return is an "upperrel" RelOptInfo containing at least one Path
138 * that implements the set-operation tree. In addition, root->processed_tlist
139 * receives a targetlist representing the output of the topmost setop node.
140 */
141 RelOptInfo *
plan_set_operations(PlannerInfo * root)142 plan_set_operations(PlannerInfo *root)
143 {
144 Query *parse = root->parse;
145 SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
146 Node *node;
147 RangeTblEntry *leftmostRTE;
148 Query *leftmostQuery;
149 RelOptInfo *setop_rel;
150 List *top_tlist;
151
152 Assert(topop);
153
154 /* check for unsupported stuff */
155 Assert(parse->jointree->fromlist == NIL);
156 Assert(parse->jointree->quals == NULL);
157 Assert(parse->groupClause == NIL);
158 Assert(parse->havingQual == NULL);
159 Assert(parse->windowClause == NIL);
160 Assert(parse->distinctClause == NIL);
161
162 /*
163 * We'll need to build RelOptInfos for each of the leaf subqueries, which
164 * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
165 * arrays for that.
166 */
167 setup_simple_rel_arrays(root);
168
169 /*
170 * Populate append_rel_array with each AppendRelInfo to allow direct
171 * lookups by child relid.
172 */
173 setup_append_rel_array(root);
174
175 /*
176 * Find the leftmost component Query. We need to use its column names for
177 * all generated tlists (else SELECT INTO won't work right).
178 */
179 node = topop->larg;
180 while (node && IsA(node, SetOperationStmt))
181 node = ((SetOperationStmt *) node)->larg;
182 Assert(node && IsA(node, RangeTblRef));
183 leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
184 leftmostQuery = leftmostRTE->subquery;
185 Assert(leftmostQuery != NULL);
186
187 /*
188 * If the topmost node is a recursive union, it needs special processing.
189 */
190 if (root->hasRecursion)
191 {
192 setop_rel = generate_recursion_path(topop, root,
193 leftmostQuery->targetList,
194 &top_tlist);
195 }
196 else
197 {
198 /*
199 * Recurse on setOperations tree to generate paths for set ops. The
200 * final output paths should have just the column types shown as the
201 * output from the top-level node, plus possibly resjunk working
202 * columns (we can rely on upper-level nodes to deal with that).
203 */
204 setop_rel = recurse_set_operations((Node *) topop, root,
205 topop->colTypes, topop->colCollations,
206 true, -1,
207 leftmostQuery->targetList,
208 &top_tlist,
209 NULL);
210 }
211
212 /* Must return the built tlist into root->processed_tlist. */
213 root->processed_tlist = top_tlist;
214
215 return setop_rel;
216 }
217
218 /*
219 * recurse_set_operations
220 * Recursively handle one step in a tree of set operations
221 *
222 * colTypes: OID list of set-op's result column datatypes
223 * colCollations: OID list of set-op's result column collations
224 * junkOK: if true, child resjunk columns may be left in the result
225 * flag: if >= 0, add a resjunk output column indicating value of flag
226 * refnames_tlist: targetlist to take column names from
227 *
228 * Returns a RelOptInfo for the subtree, as well as these output parameters:
229 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
230 * *pNumGroups: if not NULL, we estimate the number of distinct groups
231 * in the result, and store it there
232 *
233 * The pTargetList output parameter is mostly redundant with the pathtarget
234 * of the returned RelOptInfo, but for the moment we need it because much of
235 * the logic in this file depends on flag columns being marked resjunk.
236 * Pending a redesign of how that works, this is the easy way out.
237 *
238 * We don't have to care about typmods here: the only allowed difference
239 * between set-op input and output typmods is input is a specific typmod
240 * and output is -1, and that does not require a coercion.
241 */
242 static RelOptInfo *
recurse_set_operations(Node * setOp,PlannerInfo * root,List * colTypes,List * colCollations,bool junkOK,int flag,List * refnames_tlist,List ** pTargetList,double * pNumGroups)243 recurse_set_operations(Node *setOp, PlannerInfo *root,
244 List *colTypes, List *colCollations,
245 bool junkOK,
246 int flag, List *refnames_tlist,
247 List **pTargetList,
248 double *pNumGroups)
249 {
250 RelOptInfo *rel = NULL; /* keep compiler quiet */
251
252 /* Guard against stack overflow due to overly complex setop nests */
253 check_stack_depth();
254
255 if (IsA(setOp, RangeTblRef))
256 {
257 RangeTblRef *rtr = (RangeTblRef *) setOp;
258 RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
259 Query *subquery = rte->subquery;
260 PlannerInfo *subroot;
261 RelOptInfo *final_rel;
262 Path *subpath;
263 Path *path;
264 List *tlist;
265
266 Assert(subquery != NULL);
267
268 /* Build a RelOptInfo for this leaf subquery. */
269 rel = build_simple_rel(root, rtr->rtindex, NULL);
270
271 /* plan_params should not be in use in current query level */
272 Assert(root->plan_params == NIL);
273
274 /* Generate a subroot and Paths for the subquery */
275 subroot = rel->subroot = subquery_planner(root->glob, subquery,
276 root,
277 false,
278 root->tuple_fraction);
279
280 /*
281 * It should not be possible for the primitive query to contain any
282 * cross-references to other primitive queries in the setop tree.
283 */
284 if (root->plan_params)
285 elog(ERROR, "unexpected outer reference in set operation subquery");
286
287 /* Figure out the appropriate target list for this subquery. */
288 tlist = generate_setop_tlist(colTypes, colCollations,
289 flag,
290 rtr->rtindex,
291 true,
292 subroot->processed_tlist,
293 refnames_tlist);
294 rel->reltarget = create_pathtarget(root, tlist);
295
296 /* Return the fully-fledged tlist to caller, too */
297 *pTargetList = tlist;
298
299 /*
300 * Mark rel with estimated output rows, width, etc. Note that we have
301 * to do this before generating outer-query paths, else
302 * cost_subqueryscan is not happy.
303 */
304 set_subquery_size_estimates(root, rel);
305
306 /*
307 * Since we may want to add a partial path to this relation, we must
308 * set its consider_parallel flag correctly.
309 */
310 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
311 rel->consider_parallel = final_rel->consider_parallel;
312
313 /*
314 * For the moment, we consider only a single Path for the subquery.
315 * This should change soon (make it look more like
316 * set_subquery_pathlist).
317 */
318 subpath = get_cheapest_fractional_path(final_rel,
319 root->tuple_fraction);
320
321 /*
322 * Stick a SubqueryScanPath atop that.
323 *
324 * We don't bother to determine the subquery's output ordering since
325 * it won't be reflected in the set-op result anyhow; so just label
326 * the SubqueryScanPath with nil pathkeys. (XXX that should change
327 * soon too, likely.)
328 */
329 path = (Path *) create_subqueryscan_path(root, rel, subpath,
330 NIL, NULL);
331
332 add_path(rel, path);
333
334 /*
335 * If we have a partial path for the child relation, we can use that
336 * to build a partial path for this relation. But there's no point in
337 * considering any path but the cheapest.
338 */
339 if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
340 final_rel->partial_pathlist != NIL)
341 {
342 Path *partial_subpath;
343 Path *partial_path;
344
345 partial_subpath = linitial(final_rel->partial_pathlist);
346 partial_path = (Path *)
347 create_subqueryscan_path(root, rel, partial_subpath,
348 NIL, NULL);
349 add_partial_path(rel, partial_path);
350 }
351
352 /*
353 * Estimate number of groups if caller wants it. If the subquery used
354 * grouping or aggregation, its output is probably mostly unique
355 * anyway; otherwise do statistical estimation.
356 *
357 * XXX you don't really want to know about this: we do the estimation
358 * using the subquery's original targetlist expressions, not the
359 * subroot->processed_tlist which might seem more appropriate. The
360 * reason is that if the subquery is itself a setop, it may return a
361 * processed_tlist containing "varno 0" Vars generated by
362 * generate_append_tlist, and those would confuse estimate_num_groups
363 * mightily. We ought to get rid of the "varno 0" hack, but that
364 * requires a redesign of the parsetree representation of setops, so
365 * that there can be an RTE corresponding to each setop's output.
366 */
367 if (pNumGroups)
368 {
369 if (subquery->groupClause || subquery->groupingSets ||
370 subquery->distinctClause ||
371 subroot->hasHavingQual || subquery->hasAggs)
372 *pNumGroups = subpath->rows;
373 else
374 *pNumGroups = estimate_num_groups(subroot,
375 get_tlist_exprs(subquery->targetList, false),
376 subpath->rows,
377 NULL);
378 }
379 }
380 else if (IsA(setOp, SetOperationStmt))
381 {
382 SetOperationStmt *op = (SetOperationStmt *) setOp;
383
384 /* UNIONs are much different from INTERSECT/EXCEPT */
385 if (op->op == SETOP_UNION)
386 rel = generate_union_paths(op, root,
387 refnames_tlist,
388 pTargetList);
389 else
390 rel = generate_nonunion_paths(op, root,
391 refnames_tlist,
392 pTargetList);
393 if (pNumGroups)
394 *pNumGroups = rel->rows;
395
396 /*
397 * If necessary, add a Result node to project the caller-requested
398 * output columns.
399 *
400 * XXX you don't really want to know about this: setrefs.c will apply
401 * fix_upper_expr() to the Result node's tlist. This would fail if the
402 * Vars generated by generate_setop_tlist() were not exactly equal()
403 * to the corresponding tlist entries of the subplan. However, since
404 * the subplan was generated by generate_union_plan() or
405 * generate_nonunion_plan(), and hence its tlist was generated by
406 * generate_append_tlist(), this will work. We just tell
407 * generate_setop_tlist() to use varno 0.
408 */
409 if (flag >= 0 ||
410 !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
411 !tlist_same_collations(*pTargetList, colCollations, junkOK))
412 {
413 PathTarget *target;
414 ListCell *lc;
415
416 *pTargetList = generate_setop_tlist(colTypes, colCollations,
417 flag,
418 0,
419 false,
420 *pTargetList,
421 refnames_tlist);
422 target = create_pathtarget(root, *pTargetList);
423
424 /* Apply projection to each path */
425 foreach(lc, rel->pathlist)
426 {
427 Path *subpath = (Path *) lfirst(lc);
428 Path *path;
429
430 Assert(subpath->param_info == NULL);
431 path = apply_projection_to_path(root, subpath->parent,
432 subpath, target);
433 /* If we had to add a Result, path is different from subpath */
434 if (path != subpath)
435 lfirst(lc) = path;
436 }
437
438 /* Apply projection to each partial path */
439 foreach(lc, rel->partial_pathlist)
440 {
441 Path *subpath = (Path *) lfirst(lc);
442 Path *path;
443
444 Assert(subpath->param_info == NULL);
445
446 /* avoid apply_projection_to_path, in case of multiple refs */
447 path = (Path *) create_projection_path(root, subpath->parent,
448 subpath, target);
449 lfirst(lc) = path;
450 }
451 }
452 }
453 else
454 {
455 elog(ERROR, "unrecognized node type: %d",
456 (int) nodeTag(setOp));
457 *pTargetList = NIL;
458 }
459
460 postprocess_setop_rel(root, rel);
461
462 return rel;
463 }
464
465 /*
466 * Generate paths for a recursive UNION node
467 */
468 static RelOptInfo *
generate_recursion_path(SetOperationStmt * setOp,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)469 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
470 List *refnames_tlist,
471 List **pTargetList)
472 {
473 RelOptInfo *result_rel;
474 Path *path;
475 RelOptInfo *lrel,
476 *rrel;
477 Path *lpath;
478 Path *rpath;
479 List *lpath_tlist;
480 List *rpath_tlist;
481 List *tlist;
482 List *groupList;
483 double dNumGroups;
484
485 /* Parser should have rejected other cases */
486 if (setOp->op != SETOP_UNION)
487 elog(ERROR, "only UNION queries can be recursive");
488 /* Worktable ID should be assigned */
489 Assert(root->wt_param_id >= 0);
490
491 /*
492 * Unlike a regular UNION node, process the left and right inputs
493 * separately without any intention of combining them into one Append.
494 */
495 lrel = recurse_set_operations(setOp->larg, root,
496 setOp->colTypes, setOp->colCollations,
497 false, -1,
498 refnames_tlist,
499 &lpath_tlist,
500 NULL);
501 lpath = lrel->cheapest_total_path;
502 /* The right path will want to look at the left one ... */
503 root->non_recursive_path = lpath;
504 rrel = recurse_set_operations(setOp->rarg, root,
505 setOp->colTypes, setOp->colCollations,
506 false, -1,
507 refnames_tlist,
508 &rpath_tlist,
509 NULL);
510 rpath = rrel->cheapest_total_path;
511 root->non_recursive_path = NULL;
512
513 /*
514 * Generate tlist for RecursiveUnion path node --- same as in Append cases
515 */
516 tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
517 list_make2(lpath_tlist, rpath_tlist),
518 refnames_tlist);
519
520 *pTargetList = tlist;
521
522 /* Build result relation. */
523 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
524 bms_union(lrel->relids, rrel->relids));
525 result_rel->reltarget = create_pathtarget(root, tlist);
526
527 /*
528 * If UNION, identify the grouping operators
529 */
530 if (setOp->all)
531 {
532 groupList = NIL;
533 dNumGroups = 0;
534 }
535 else
536 {
537 /* Identify the grouping semantics */
538 groupList = generate_setop_grouplist(setOp, tlist);
539
540 /* We only support hashing here */
541 if (!grouping_is_hashable(groupList))
542 ereport(ERROR,
543 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
544 errmsg("could not implement recursive UNION"),
545 errdetail("All column datatypes must be hashable.")));
546
547 /*
548 * For the moment, take the number of distinct groups as equal to the
549 * total input size, ie, the worst case.
550 */
551 dNumGroups = lpath->rows + rpath->rows * 10;
552 }
553
554 /*
555 * And make the path node.
556 */
557 path = (Path *) create_recursiveunion_path(root,
558 result_rel,
559 lpath,
560 rpath,
561 result_rel->reltarget,
562 groupList,
563 root->wt_param_id,
564 dNumGroups);
565
566 add_path(result_rel, path);
567 postprocess_setop_rel(root, result_rel);
568 return result_rel;
569 }
570
571 /*
572 * Generate paths for a UNION or UNION ALL node
573 */
574 static RelOptInfo *
generate_union_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)575 generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
576 List *refnames_tlist,
577 List **pTargetList)
578 {
579 Relids relids = NULL;
580 RelOptInfo *result_rel;
581 double save_fraction = root->tuple_fraction;
582 ListCell *lc;
583 List *pathlist = NIL;
584 List *partial_pathlist = NIL;
585 bool partial_paths_valid = true;
586 bool consider_parallel = true;
587 List *rellist;
588 List *tlist_list;
589 List *tlist;
590 Path *path;
591
592 /*
593 * If plain UNION, tell children to fetch all tuples.
594 *
595 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
596 * each arm of the UNION ALL. One could make a case for reducing the
597 * tuple fraction for later arms (discounting by the expected size of the
598 * earlier arms' results) but it seems not worth the trouble. The normal
599 * case where tuple_fraction isn't already zero is a LIMIT at top level,
600 * and passing it down as-is is usually enough to get the desired result
601 * of preferring fast-start plans.
602 */
603 if (!op->all)
604 root->tuple_fraction = 0.0;
605
606 /*
607 * If any of my children are identical UNION nodes (same op, all-flag, and
608 * colTypes) then they can be merged into this node so that we generate
609 * only one Append and unique-ification for the lot. Recurse to find such
610 * nodes and compute their children's paths.
611 */
612 rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
613
614 /*
615 * Generate tlist for Append plan node.
616 *
617 * The tlist for an Append plan isn't important as far as the Append is
618 * concerned, but we must make it look real anyway for the benefit of the
619 * next plan level up.
620 */
621 tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
622 tlist_list, refnames_tlist);
623
624 *pTargetList = tlist;
625
626 /* Build path lists and relid set. */
627 foreach(lc, rellist)
628 {
629 RelOptInfo *rel = lfirst(lc);
630
631 pathlist = lappend(pathlist, rel->cheapest_total_path);
632
633 if (consider_parallel)
634 {
635 if (!rel->consider_parallel)
636 {
637 consider_parallel = false;
638 partial_paths_valid = false;
639 }
640 else if (rel->partial_pathlist == NIL)
641 partial_paths_valid = false;
642 else
643 partial_pathlist = lappend(partial_pathlist,
644 linitial(rel->partial_pathlist));
645 }
646
647 relids = bms_union(relids, rel->relids);
648 }
649
650 /* Build result relation. */
651 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
652 result_rel->reltarget = create_pathtarget(root, tlist);
653 result_rel->consider_parallel = consider_parallel;
654
655 /*
656 * Append the child results together.
657 */
658 path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
659 NULL, 0, false, NIL, -1);
660
661 /*
662 * For UNION ALL, we just need the Append path. For UNION, need to add
663 * node(s) to remove duplicates.
664 */
665 if (!op->all)
666 path = make_union_unique(op, path, tlist, root);
667
668 add_path(result_rel, path);
669
670 /*
671 * Estimate number of groups. For now we just assume the output is unique
672 * --- this is certainly true for the UNION case, and we want worst-case
673 * estimates anyway.
674 */
675 result_rel->rows = path->rows;
676
677 /*
678 * Now consider doing the same thing using the partial paths plus Append
679 * plus Gather.
680 */
681 if (partial_paths_valid)
682 {
683 Path *ppath;
684 ListCell *lc;
685 int parallel_workers = 0;
686
687 /* Find the highest number of workers requested for any subpath. */
688 foreach(lc, partial_pathlist)
689 {
690 Path *path = lfirst(lc);
691
692 parallel_workers = Max(parallel_workers, path->parallel_workers);
693 }
694 Assert(parallel_workers > 0);
695
696 /*
697 * If the use of parallel append is permitted, always request at least
698 * log2(# of children) paths. We assume it can be useful to have
699 * extra workers in this case because they will be spread out across
700 * the children. The precise formula is just a guess; see
701 * add_paths_to_append_rel.
702 */
703 if (enable_parallel_append)
704 {
705 parallel_workers = Max(parallel_workers,
706 fls(list_length(partial_pathlist)));
707 parallel_workers = Min(parallel_workers,
708 max_parallel_workers_per_gather);
709 }
710 Assert(parallel_workers > 0);
711
712 ppath = (Path *)
713 create_append_path(root, result_rel, NIL, partial_pathlist,
714 NULL, parallel_workers, enable_parallel_append,
715 NIL, -1);
716 ppath = (Path *)
717 create_gather_path(root, result_rel, ppath,
718 result_rel->reltarget, NULL, NULL);
719 if (!op->all)
720 ppath = make_union_unique(op, ppath, tlist, root);
721 add_path(result_rel, ppath);
722 }
723
724 /* Undo effects of possibly forcing tuple_fraction to 0 */
725 root->tuple_fraction = save_fraction;
726
727 return result_rel;
728 }
729
730 /*
731 * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
732 */
733 static RelOptInfo *
generate_nonunion_paths(SetOperationStmt * op,PlannerInfo * root,List * refnames_tlist,List ** pTargetList)734 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
735 List *refnames_tlist,
736 List **pTargetList)
737 {
738 RelOptInfo *result_rel;
739 RelOptInfo *lrel,
740 *rrel;
741 double save_fraction = root->tuple_fraction;
742 Path *lpath,
743 *rpath,
744 *path;
745 List *lpath_tlist,
746 *rpath_tlist,
747 *tlist_list,
748 *tlist,
749 *groupList,
750 *pathlist;
751 double dLeftGroups,
752 dRightGroups,
753 dNumGroups,
754 dNumOutputRows;
755 bool use_hash;
756 SetOpCmd cmd;
757 int firstFlag;
758
759 /*
760 * Tell children to fetch all tuples.
761 */
762 root->tuple_fraction = 0.0;
763
764 /* Recurse on children, ensuring their outputs are marked */
765 lrel = recurse_set_operations(op->larg, root,
766 op->colTypes, op->colCollations,
767 false, 0,
768 refnames_tlist,
769 &lpath_tlist,
770 &dLeftGroups);
771 lpath = lrel->cheapest_total_path;
772 rrel = recurse_set_operations(op->rarg, root,
773 op->colTypes, op->colCollations,
774 false, 1,
775 refnames_tlist,
776 &rpath_tlist,
777 &dRightGroups);
778 rpath = rrel->cheapest_total_path;
779
780 /* Undo effects of forcing tuple_fraction to 0 */
781 root->tuple_fraction = save_fraction;
782
783 /*
784 * For EXCEPT, we must put the left input first. For INTERSECT, either
785 * order should give the same results, and we prefer to put the smaller
786 * input first in order to minimize the size of the hash table in the
787 * hashing case. "Smaller" means the one with the fewer groups.
788 */
789 if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
790 {
791 pathlist = list_make2(lpath, rpath);
792 tlist_list = list_make2(lpath_tlist, rpath_tlist);
793 firstFlag = 0;
794 }
795 else
796 {
797 pathlist = list_make2(rpath, lpath);
798 tlist_list = list_make2(rpath_tlist, lpath_tlist);
799 firstFlag = 1;
800 }
801
802 /*
803 * Generate tlist for Append plan node.
804 *
805 * The tlist for an Append plan isn't important as far as the Append is
806 * concerned, but we must make it look real anyway for the benefit of the
807 * next plan level up. In fact, it has to be real enough that the flag
808 * column is shown as a variable not a constant, else setrefs.c will get
809 * confused.
810 */
811 tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
812 tlist_list, refnames_tlist);
813
814 *pTargetList = tlist;
815
816 /* Build result relation. */
817 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
818 bms_union(lrel->relids, rrel->relids));
819 result_rel->reltarget = create_pathtarget(root, tlist);
820
821 /*
822 * Append the child results together.
823 */
824 path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
825 NULL, 0, false, NIL, -1);
826
827 /* Identify the grouping semantics */
828 groupList = generate_setop_grouplist(op, tlist);
829
830 /*
831 * Estimate number of distinct groups that we'll need hashtable entries
832 * for; this is the size of the left-hand input for EXCEPT, or the smaller
833 * input for INTERSECT. Also estimate the number of eventual output rows.
834 * In non-ALL cases, we estimate each group produces one output row; in
835 * ALL cases use the relevant relation size. These are worst-case
836 * estimates, of course, but we need to be conservative.
837 */
838 if (op->op == SETOP_EXCEPT)
839 {
840 dNumGroups = dLeftGroups;
841 dNumOutputRows = op->all ? lpath->rows : dNumGroups;
842 }
843 else
844 {
845 dNumGroups = Min(dLeftGroups, dRightGroups);
846 dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
847 }
848
849 /*
850 * Decide whether to hash or sort, and add a sort node if needed.
851 */
852 use_hash = choose_hashed_setop(root, groupList, path,
853 dNumGroups, dNumOutputRows,
854 (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
855
856 if (groupList && !use_hash)
857 path = (Path *) create_sort_path(root,
858 result_rel,
859 path,
860 make_pathkeys_for_sortclauses(root,
861 groupList,
862 tlist),
863 -1.0);
864
865 /*
866 * Finally, add a SetOp path node to generate the correct output.
867 */
868 switch (op->op)
869 {
870 case SETOP_INTERSECT:
871 cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
872 break;
873 case SETOP_EXCEPT:
874 cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
875 break;
876 default:
877 elog(ERROR, "unrecognized set op: %d", (int) op->op);
878 cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
879 break;
880 }
881 path = (Path *) create_setop_path(root,
882 result_rel,
883 path,
884 cmd,
885 use_hash ? SETOP_HASHED : SETOP_SORTED,
886 groupList,
887 list_length(op->colTypes) + 1,
888 use_hash ? firstFlag : -1,
889 dNumGroups,
890 dNumOutputRows);
891
892 result_rel->rows = path->rows;
893 add_path(result_rel, path);
894 return result_rel;
895 }
896
897 /*
898 * Pull up children of a UNION node that are identically-propertied UNIONs.
899 *
900 * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
901 * output rows will be lost anyway.
902 *
903 * NOTE: currently, we ignore collations while determining if a child has
904 * the same properties. This is semantically sound only so long as all
905 * collations have the same notion of equality. It is valid from an
906 * implementation standpoint because we don't care about the ordering of
907 * a UNION child's result: UNION ALL results are always unordered, and
908 * generate_union_paths will force a fresh sort if the top level is a UNION.
909 */
910 static List *
plan_union_children(PlannerInfo * root,SetOperationStmt * top_union,List * refnames_tlist,List ** tlist_list)911 plan_union_children(PlannerInfo *root,
912 SetOperationStmt *top_union,
913 List *refnames_tlist,
914 List **tlist_list)
915 {
916 List *pending_rels = list_make1(top_union);
917 List *result = NIL;
918 List *child_tlist;
919
920 *tlist_list = NIL;
921
922 while (pending_rels != NIL)
923 {
924 Node *setOp = linitial(pending_rels);
925
926 pending_rels = list_delete_first(pending_rels);
927
928 if (IsA(setOp, SetOperationStmt))
929 {
930 SetOperationStmt *op = (SetOperationStmt *) setOp;
931
932 if (op->op == top_union->op &&
933 (op->all == top_union->all || op->all) &&
934 equal(op->colTypes, top_union->colTypes))
935 {
936 /* Same UNION, so fold children into parent */
937 pending_rels = lcons(op->rarg, pending_rels);
938 pending_rels = lcons(op->larg, pending_rels);
939 continue;
940 }
941 }
942
943 /*
944 * Not same, so plan this child separately.
945 *
946 * Note we disallow any resjunk columns in child results. This is
947 * necessary since the Append node that implements the union won't do
948 * any projection, and upper levels will get confused if some of our
949 * output tuples have junk and some don't. This case only arises when
950 * we have an EXCEPT or INTERSECT as child, else there won't be
951 * resjunk anyway.
952 */
953 result = lappend(result, recurse_set_operations(setOp, root,
954 top_union->colTypes,
955 top_union->colCollations,
956 false, -1,
957 refnames_tlist,
958 &child_tlist,
959 NULL));
960 *tlist_list = lappend(*tlist_list, child_tlist);
961 }
962
963 return result;
964 }
965
966 /*
967 * Add nodes to the given path tree to unique-ify the result of a UNION.
968 */
969 static Path *
make_union_unique(SetOperationStmt * op,Path * path,List * tlist,PlannerInfo * root)970 make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
971 PlannerInfo *root)
972 {
973 RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
974 List *groupList;
975 double dNumGroups;
976
977 /* Identify the grouping semantics */
978 groupList = generate_setop_grouplist(op, tlist);
979
980 /*
981 * XXX for the moment, take the number of distinct groups as equal to the
982 * total input size, ie, the worst case. This is too conservative, but we
983 * don't want to risk having the hashtable overrun memory; also, it's not
984 * clear how to get a decent estimate of the true size. One should note
985 * as well the propensity of novices to write UNION rather than UNION ALL
986 * even when they don't expect any duplicates...
987 */
988 dNumGroups = path->rows;
989
990 /* Decide whether to hash or sort */
991 if (choose_hashed_setop(root, groupList, path,
992 dNumGroups, dNumGroups,
993 "UNION"))
994 {
995 /* Hashed aggregate plan --- no sort needed */
996 path = (Path *) create_agg_path(root,
997 result_rel,
998 path,
999 create_pathtarget(root, tlist),
1000 AGG_HASHED,
1001 AGGSPLIT_SIMPLE,
1002 groupList,
1003 NIL,
1004 NULL,
1005 dNumGroups);
1006 }
1007 else
1008 {
1009 /* Sort and Unique */
1010 if (groupList)
1011 path = (Path *)
1012 create_sort_path(root,
1013 result_rel,
1014 path,
1015 make_pathkeys_for_sortclauses(root,
1016 groupList,
1017 tlist),
1018 -1.0);
1019 path = (Path *) create_upper_unique_path(root,
1020 result_rel,
1021 path,
1022 list_length(path->pathkeys),
1023 dNumGroups);
1024 }
1025
1026 return path;
1027 }
1028
1029 /*
1030 * postprocess_setop_rel - perform steps required after adding paths
1031 */
1032 static void
postprocess_setop_rel(PlannerInfo * root,RelOptInfo * rel)1033 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1034 {
1035 /*
1036 * We don't currently worry about allowing FDWs to contribute paths to
1037 * this relation, but give extensions a chance.
1038 */
1039 if (create_upper_paths_hook)
1040 (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1041 NULL, rel, NULL);
1042
1043 /* Select cheapest path */
1044 set_cheapest(rel);
1045 }
1046
1047 /*
1048 * choose_hashed_setop - should we use hashing for a set operation?
1049 */
1050 static bool
choose_hashed_setop(PlannerInfo * root,List * groupClauses,Path * input_path,double dNumGroups,double dNumOutputRows,const char * construct)1051 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1052 Path *input_path,
1053 double dNumGroups, double dNumOutputRows,
1054 const char *construct)
1055 {
1056 int numGroupCols = list_length(groupClauses);
1057 bool can_sort;
1058 bool can_hash;
1059 Size hashentrysize;
1060 Path hashed_p;
1061 Path sorted_p;
1062 double tuple_fraction;
1063
1064 /* Check whether the operators support sorting or hashing */
1065 can_sort = grouping_is_sortable(groupClauses);
1066 can_hash = grouping_is_hashable(groupClauses);
1067 if (can_hash && can_sort)
1068 {
1069 /* we have a meaningful choice to make, continue ... */
1070 }
1071 else if (can_hash)
1072 return true;
1073 else if (can_sort)
1074 return false;
1075 else
1076 ereport(ERROR,
1077 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1078 /* translator: %s is UNION, INTERSECT, or EXCEPT */
1079 errmsg("could not implement %s", construct),
1080 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1081
1082 /* Prefer sorting when enable_hashagg is off */
1083 if (!enable_hashagg)
1084 return false;
1085
1086 /*
1087 * Don't do it if it doesn't look like the hashtable will fit into
1088 * work_mem.
1089 */
1090 hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1091
1092 if (hashentrysize * dNumGroups > work_mem * 1024L)
1093 return false;
1094
1095 /*
1096 * See if the estimated cost is no more than doing it the other way.
1097 *
1098 * We need to consider input_plan + hashagg versus input_plan + sort +
1099 * group. Note that the actual result plan might involve a SetOp or
1100 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1101 * should be close enough for our purposes here.
1102 *
1103 * These path variables are dummies that just hold cost fields; we don't
1104 * make actual Paths for these steps.
1105 */
1106 cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1107 numGroupCols, dNumGroups,
1108 NIL,
1109 input_path->startup_cost, input_path->total_cost,
1110 input_path->rows);
1111
1112 /*
1113 * Now for the sorted case. Note that the input is *always* unsorted,
1114 * since it was made by appending unrelated sub-relations together.
1115 */
1116 sorted_p.startup_cost = input_path->startup_cost;
1117 sorted_p.total_cost = input_path->total_cost;
1118 /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1119 cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1120 input_path->rows, input_path->pathtarget->width,
1121 0.0, work_mem, -1.0);
1122 cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1123 NIL,
1124 sorted_p.startup_cost, sorted_p.total_cost,
1125 input_path->rows);
1126
1127 /*
1128 * Now make the decision using the top-level tuple fraction. First we
1129 * have to convert an absolute count (LIMIT) into fractional form.
1130 */
1131 tuple_fraction = root->tuple_fraction;
1132 if (tuple_fraction >= 1.0)
1133 tuple_fraction /= dNumOutputRows;
1134
1135 if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1136 tuple_fraction) < 0)
1137 {
1138 /* Hashed is cheaper, so use it */
1139 return true;
1140 }
1141 return false;
1142 }
1143
1144 /*
1145 * Generate targetlist for a set-operation plan node
1146 *
1147 * colTypes: OID list of set-op's result column datatypes
1148 * colCollations: OID list of set-op's result column collations
1149 * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1150 * varno: varno to use in generated Vars
1151 * hack_constants: true to copy up constants (see comments in code)
1152 * input_tlist: targetlist of this node's input node
1153 * refnames_tlist: targetlist to take column names from
1154 */
1155 static List *
generate_setop_tlist(List * colTypes,List * colCollations,int flag,Index varno,bool hack_constants,List * input_tlist,List * refnames_tlist)1156 generate_setop_tlist(List *colTypes, List *colCollations,
1157 int flag,
1158 Index varno,
1159 bool hack_constants,
1160 List *input_tlist,
1161 List *refnames_tlist)
1162 {
1163 List *tlist = NIL;
1164 int resno = 1;
1165 ListCell *ctlc,
1166 *cclc,
1167 *itlc,
1168 *rtlc;
1169 TargetEntry *tle;
1170 Node *expr;
1171
1172 /* there's no forfour() so we must chase one list manually */
1173 rtlc = list_head(refnames_tlist);
1174 forthree(ctlc, colTypes, cclc, colCollations, itlc, input_tlist)
1175 {
1176 Oid colType = lfirst_oid(ctlc);
1177 Oid colColl = lfirst_oid(cclc);
1178 TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1179 TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1180
1181 rtlc = lnext(rtlc);
1182
1183 Assert(inputtle->resno == resno);
1184 Assert(reftle->resno == resno);
1185 Assert(!inputtle->resjunk);
1186 Assert(!reftle->resjunk);
1187
1188 /*
1189 * Generate columns referencing input columns and having appropriate
1190 * data types and column names. Insert datatype coercions where
1191 * necessary.
1192 *
1193 * HACK: constants in the input's targetlist are copied up as-is
1194 * rather than being referenced as subquery outputs. This is mainly
1195 * to ensure that when we try to coerce them to the output column's
1196 * datatype, the right things happen for UNKNOWN constants. But do
1197 * this only at the first level of subquery-scan plans; we don't want
1198 * phony constants appearing in the output tlists of upper-level
1199 * nodes!
1200 */
1201 if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1202 expr = (Node *) inputtle->expr;
1203 else
1204 expr = (Node *) makeVar(varno,
1205 inputtle->resno,
1206 exprType((Node *) inputtle->expr),
1207 exprTypmod((Node *) inputtle->expr),
1208 exprCollation((Node *) inputtle->expr),
1209 0);
1210
1211 if (exprType(expr) != colType)
1212 {
1213 /*
1214 * Note: it's not really cool to be applying coerce_to_common_type
1215 * here; one notable point is that assign_expr_collations never
1216 * gets run on any generated nodes. For the moment that's not a
1217 * problem because we force the correct exposed collation below.
1218 * It would likely be best to make the parser generate the correct
1219 * output tlist for every set-op to begin with, though.
1220 */
1221 expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1222 expr,
1223 colType,
1224 "UNION/INTERSECT/EXCEPT");
1225 }
1226
1227 /*
1228 * Ensure the tlist entry's exposed collation matches the set-op. This
1229 * is necessary because plan_set_operations() reports the result
1230 * ordering as a list of SortGroupClauses, which don't carry collation
1231 * themselves but just refer to tlist entries. If we don't show the
1232 * right collation then planner.c might do the wrong thing in
1233 * higher-level queries.
1234 *
1235 * Note we use RelabelType, not CollateExpr, since this expression
1236 * will reach the executor without any further processing.
1237 */
1238 if (exprCollation(expr) != colColl)
1239 {
1240 expr = (Node *) makeRelabelType((Expr *) expr,
1241 exprType(expr),
1242 exprTypmod(expr),
1243 colColl,
1244 COERCE_IMPLICIT_CAST);
1245 }
1246
1247 tle = makeTargetEntry((Expr *) expr,
1248 (AttrNumber) resno++,
1249 pstrdup(reftle->resname),
1250 false);
1251
1252 /*
1253 * By convention, all non-resjunk columns in a setop tree have
1254 * ressortgroupref equal to their resno. In some cases the ref isn't
1255 * needed, but this is a cleaner way than modifying the tlist later.
1256 */
1257 tle->ressortgroupref = tle->resno;
1258
1259 tlist = lappend(tlist, tle);
1260 }
1261
1262 if (flag >= 0)
1263 {
1264 /* Add a resjunk flag column */
1265 /* flag value is the given constant */
1266 expr = (Node *) makeConst(INT4OID,
1267 -1,
1268 InvalidOid,
1269 sizeof(int32),
1270 Int32GetDatum(flag),
1271 false,
1272 true);
1273 tle = makeTargetEntry((Expr *) expr,
1274 (AttrNumber) resno++,
1275 pstrdup("flag"),
1276 true);
1277 tlist = lappend(tlist, tle);
1278 }
1279
1280 return tlist;
1281 }
1282
1283 /*
1284 * Generate targetlist for a set-operation Append node
1285 *
1286 * colTypes: OID list of set-op's result column datatypes
1287 * colCollations: OID list of set-op's result column collations
1288 * flag: true to create a flag column copied up from subplans
1289 * input_tlists: list of tlists for sub-plans of the Append
1290 * refnames_tlist: targetlist to take column names from
1291 *
1292 * The entries in the Append's targetlist should always be simple Vars;
1293 * we just have to make sure they have the right datatypes/typmods/collations.
1294 * The Vars are always generated with varno 0.
1295 *
1296 * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1297 * cannot figure out a realistic width for the tlist we make here. But we
1298 * ought to refactor this code to produce a PathTarget directly, anyway.
1299 */
1300 static List *
generate_append_tlist(List * colTypes,List * colCollations,bool flag,List * input_tlists,List * refnames_tlist)1301 generate_append_tlist(List *colTypes, List *colCollations,
1302 bool flag,
1303 List *input_tlists,
1304 List *refnames_tlist)
1305 {
1306 List *tlist = NIL;
1307 int resno = 1;
1308 ListCell *curColType;
1309 ListCell *curColCollation;
1310 ListCell *ref_tl_item;
1311 int colindex;
1312 TargetEntry *tle;
1313 Node *expr;
1314 ListCell *tlistl;
1315 int32 *colTypmods;
1316
1317 /*
1318 * First extract typmods to use.
1319 *
1320 * If the inputs all agree on type and typmod of a particular column, use
1321 * that typmod; else use -1.
1322 */
1323 colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1324
1325 foreach(tlistl, input_tlists)
1326 {
1327 List *subtlist = (List *) lfirst(tlistl);
1328 ListCell *subtlistl;
1329
1330 curColType = list_head(colTypes);
1331 colindex = 0;
1332 foreach(subtlistl, subtlist)
1333 {
1334 TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1335
1336 if (subtle->resjunk)
1337 continue;
1338 Assert(curColType != NULL);
1339 if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1340 {
1341 /* If first subplan, copy the typmod; else compare */
1342 int32 subtypmod = exprTypmod((Node *) subtle->expr);
1343
1344 if (tlistl == list_head(input_tlists))
1345 colTypmods[colindex] = subtypmod;
1346 else if (subtypmod != colTypmods[colindex])
1347 colTypmods[colindex] = -1;
1348 }
1349 else
1350 {
1351 /* types disagree, so force typmod to -1 */
1352 colTypmods[colindex] = -1;
1353 }
1354 curColType = lnext(curColType);
1355 colindex++;
1356 }
1357 Assert(curColType == NULL);
1358 }
1359
1360 /*
1361 * Now we can build the tlist for the Append.
1362 */
1363 colindex = 0;
1364 forthree(curColType, colTypes, curColCollation, colCollations,
1365 ref_tl_item, refnames_tlist)
1366 {
1367 Oid colType = lfirst_oid(curColType);
1368 int32 colTypmod = colTypmods[colindex++];
1369 Oid colColl = lfirst_oid(curColCollation);
1370 TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1371
1372 Assert(reftle->resno == resno);
1373 Assert(!reftle->resjunk);
1374 expr = (Node *) makeVar(0,
1375 resno,
1376 colType,
1377 colTypmod,
1378 colColl,
1379 0);
1380 tle = makeTargetEntry((Expr *) expr,
1381 (AttrNumber) resno++,
1382 pstrdup(reftle->resname),
1383 false);
1384
1385 /*
1386 * By convention, all non-resjunk columns in a setop tree have
1387 * ressortgroupref equal to their resno. In some cases the ref isn't
1388 * needed, but this is a cleaner way than modifying the tlist later.
1389 */
1390 tle->ressortgroupref = tle->resno;
1391
1392 tlist = lappend(tlist, tle);
1393 }
1394
1395 if (flag)
1396 {
1397 /* Add a resjunk flag column */
1398 /* flag value is shown as copied up from subplan */
1399 expr = (Node *) makeVar(0,
1400 resno,
1401 INT4OID,
1402 -1,
1403 InvalidOid,
1404 0);
1405 tle = makeTargetEntry((Expr *) expr,
1406 (AttrNumber) resno++,
1407 pstrdup("flag"),
1408 true);
1409 tlist = lappend(tlist, tle);
1410 }
1411
1412 pfree(colTypmods);
1413
1414 return tlist;
1415 }
1416
1417 /*
1418 * generate_setop_grouplist
1419 * Build a SortGroupClause list defining the sort/grouping properties
1420 * of the setop's output columns.
1421 *
1422 * Parse analysis already determined the properties and built a suitable
1423 * list, except that the entries do not have sortgrouprefs set because
1424 * the parser output representation doesn't include a tlist for each
1425 * setop. So what we need to do here is copy that list and install
1426 * proper sortgrouprefs into it (copying those from the targetlist).
1427 */
1428 static List *
generate_setop_grouplist(SetOperationStmt * op,List * targetlist)1429 generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1430 {
1431 List *grouplist = copyObject(op->groupClauses);
1432 ListCell *lg;
1433 ListCell *lt;
1434
1435 lg = list_head(grouplist);
1436 foreach(lt, targetlist)
1437 {
1438 TargetEntry *tle = (TargetEntry *) lfirst(lt);
1439 SortGroupClause *sgc;
1440
1441 if (tle->resjunk)
1442 {
1443 /* resjunk columns should not have sortgrouprefs */
1444 Assert(tle->ressortgroupref == 0);
1445 continue; /* ignore resjunk columns */
1446 }
1447
1448 /* non-resjunk columns should have sortgroupref = resno */
1449 Assert(tle->ressortgroupref == tle->resno);
1450
1451 /* non-resjunk columns should have grouping clauses */
1452 Assert(lg != NULL);
1453 sgc = (SortGroupClause *) lfirst(lg);
1454 lg = lnext(lg);
1455 Assert(sgc->tleSortGroupRef == 0);
1456
1457 sgc->tleSortGroupRef = tle->ressortgroupref;
1458 }
1459 Assert(lg == NULL);
1460 return grouplist;
1461 }
1462
1463
1464 /*
1465 * expand_inherited_tables
1466 * Expand each rangetable entry that represents an inheritance set
1467 * into an "append relation". At the conclusion of this process,
1468 * the "inh" flag is set in all and only those RTEs that are append
1469 * relation parents.
1470 */
1471 void
expand_inherited_tables(PlannerInfo * root)1472 expand_inherited_tables(PlannerInfo *root)
1473 {
1474 Index nrtes;
1475 Index rti;
1476 ListCell *rl;
1477
1478 /*
1479 * expand_inherited_rtentry may add RTEs to parse->rtable. The function is
1480 * expected to recursively handle any RTEs that it creates with inh=true.
1481 * So just scan as far as the original end of the rtable list.
1482 */
1483 nrtes = list_length(root->parse->rtable);
1484 rl = list_head(root->parse->rtable);
1485 for (rti = 1; rti <= nrtes; rti++)
1486 {
1487 RangeTblEntry *rte = (RangeTblEntry *) lfirst(rl);
1488
1489 expand_inherited_rtentry(root, rte, rti);
1490 rl = lnext(rl);
1491 }
1492 }
1493
1494 /*
1495 * expand_inherited_rtentry
1496 * Check whether a rangetable entry represents an inheritance set.
1497 * If so, add entries for all the child tables to the query's
1498 * rangetable, and build AppendRelInfo nodes for all the child tables
1499 * and add them to root->append_rel_list. If not, clear the entry's
1500 * "inh" flag to prevent later code from looking for AppendRelInfos.
1501 *
1502 * Note that the original RTE is considered to represent the whole
1503 * inheritance set. The first of the generated RTEs is an RTE for the same
1504 * table, but with inh = false, to represent the parent table in its role
1505 * as a simple member of the inheritance set.
1506 *
1507 * A childless table is never considered to be an inheritance set. For
1508 * regular inheritance, a parent RTE must always have at least two associated
1509 * AppendRelInfos: one corresponding to the parent table as a simple member of
1510 * inheritance set and one or more corresponding to the actual children.
1511 * Since a partitioned table is not scanned, it might have only one associated
1512 * AppendRelInfo.
1513 */
1514 static void
expand_inherited_rtentry(PlannerInfo * root,RangeTblEntry * rte,Index rti)1515 expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
1516 {
1517 Query *parse = root->parse;
1518 Oid parentOID;
1519 PlanRowMark *oldrc;
1520 Relation oldrelation;
1521 LOCKMODE lockmode;
1522 List *inhOIDs;
1523 ListCell *l;
1524
1525 /* Does RT entry allow inheritance? */
1526 if (!rte->inh)
1527 return;
1528 /* Ignore any already-expanded UNION ALL nodes */
1529 if (rte->rtekind != RTE_RELATION)
1530 {
1531 Assert(rte->rtekind == RTE_SUBQUERY);
1532 return;
1533 }
1534 /* Fast path for common case of childless table */
1535 parentOID = rte->relid;
1536 if (!has_subclass(parentOID))
1537 {
1538 /* Clear flag before returning */
1539 rte->inh = false;
1540 return;
1541 }
1542
1543 /*
1544 * The rewriter should already have obtained an appropriate lock on each
1545 * relation named in the query. However, for each child relation we add
1546 * to the query, we must obtain an appropriate lock, because this will be
1547 * the first use of those relations in the parse/rewrite/plan pipeline.
1548 *
1549 * If the parent relation is the query's result relation, then we need
1550 * RowExclusiveLock. Otherwise, if it's accessed FOR UPDATE/SHARE, we
1551 * need RowShareLock; otherwise AccessShareLock. We can't just grab
1552 * AccessShareLock because then the executor would be trying to upgrade
1553 * the lock, leading to possible deadlocks. (This code should match the
1554 * parser and rewriter.)
1555 */
1556 oldrc = get_plan_rowmark(root->rowMarks, rti);
1557 if (rti == parse->resultRelation)
1558 lockmode = RowExclusiveLock;
1559 else if (oldrc && RowMarkRequiresRowShareLock(oldrc->markType))
1560 lockmode = RowShareLock;
1561 else
1562 lockmode = AccessShareLock;
1563
1564 /* Scan for all members of inheritance set, acquire needed locks */
1565 inhOIDs = find_all_inheritors(parentOID, lockmode, NULL);
1566
1567 /*
1568 * Check that there's at least one descendant, else treat as no-child
1569 * case. This could happen despite above has_subclass() check, if table
1570 * once had a child but no longer does.
1571 */
1572 if (list_length(inhOIDs) < 2)
1573 {
1574 /* Clear flag before returning */
1575 rte->inh = false;
1576 return;
1577 }
1578
1579 /*
1580 * If parent relation is selected FOR UPDATE/SHARE, we need to mark its
1581 * PlanRowMark as isParent = true, and generate a new PlanRowMark for each
1582 * child.
1583 */
1584 if (oldrc)
1585 oldrc->isParent = true;
1586
1587 /*
1588 * Must open the parent relation to examine its tupdesc. We need not lock
1589 * it; we assume the rewriter already did.
1590 */
1591 oldrelation = heap_open(parentOID, NoLock);
1592
1593 /* Scan the inheritance set and expand it */
1594 if (RelationGetPartitionDesc(oldrelation) != NULL)
1595 {
1596 Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
1597
1598 /*
1599 * If this table has partitions, recursively expand them in the order
1600 * in which they appear in the PartitionDesc. While at it, also
1601 * extract the partition key columns of all the partitioned tables.
1602 */
1603 expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
1604 lockmode, &root->append_rel_list);
1605 }
1606 else
1607 {
1608 List *appinfos = NIL;
1609 RangeTblEntry *childrte;
1610 Index childRTindex;
1611
1612 /*
1613 * This table has no partitions. Expand any plain inheritance
1614 * children in the order the OIDs were returned by
1615 * find_all_inheritors.
1616 */
1617 foreach(l, inhOIDs)
1618 {
1619 Oid childOID = lfirst_oid(l);
1620 Relation newrelation;
1621
1622 /* Open rel if needed; we already have required locks */
1623 if (childOID != parentOID)
1624 newrelation = heap_open(childOID, NoLock);
1625 else
1626 newrelation = oldrelation;
1627
1628 /*
1629 * It is possible that the parent table has children that are temp
1630 * tables of other backends. We cannot safely access such tables
1631 * (because of buffering issues), and the best thing to do seems
1632 * to be to silently ignore them.
1633 */
1634 if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
1635 {
1636 heap_close(newrelation, lockmode);
1637 continue;
1638 }
1639
1640 expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1641 newrelation,
1642 &appinfos, &childrte,
1643 &childRTindex);
1644
1645 /* Close child relations, but keep locks */
1646 if (childOID != parentOID)
1647 heap_close(newrelation, NoLock);
1648 }
1649
1650 /*
1651 * If all the children were temp tables, pretend it's a
1652 * non-inheritance situation; we don't need Append node in that case.
1653 * The duplicate RTE we added for the parent table is harmless, so we
1654 * don't bother to get rid of it; ditto for the useless PlanRowMark
1655 * node.
1656 */
1657 if (list_length(appinfos) < 2)
1658 rte->inh = false;
1659 else
1660 root->append_rel_list = list_concat(root->append_rel_list,
1661 appinfos);
1662
1663 }
1664
1665 heap_close(oldrelation, NoLock);
1666 }
1667
1668 /*
1669 * expand_partitioned_rtentry
1670 * Recursively expand an RTE for a partitioned table.
1671 *
1672 * Note that RelationGetPartitionDispatchInfo will expand partitions in the
1673 * same order as this code.
1674 */
1675 static void
expand_partitioned_rtentry(PlannerInfo * root,RangeTblEntry * parentrte,Index parentRTindex,Relation parentrel,PlanRowMark * top_parentrc,LOCKMODE lockmode,List ** appinfos)1676 expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
1677 Index parentRTindex, Relation parentrel,
1678 PlanRowMark *top_parentrc, LOCKMODE lockmode,
1679 List **appinfos)
1680 {
1681 int i;
1682 RangeTblEntry *childrte;
1683 Index childRTindex;
1684 PartitionDesc partdesc = RelationGetPartitionDesc(parentrel);
1685
1686 check_stack_depth();
1687
1688 /* A partitioned table should always have a partition descriptor. */
1689 Assert(partdesc);
1690
1691 Assert(parentrte->inh);
1692
1693 /*
1694 * Note down whether any partition key cols are being updated. Though it's
1695 * the root partitioned table's updatedCols we are interested in, we
1696 * instead use parentrte to get the updatedCols. This is convenient
1697 * because parentrte already has the root partrel's updatedCols translated
1698 * to match the attribute ordering of parentrel.
1699 */
1700 if (!root->partColsUpdated)
1701 root->partColsUpdated =
1702 has_partition_attrs(parentrel, parentrte->updatedCols, NULL);
1703
1704 /* First expand the partitioned table itself. */
1705 expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel,
1706 top_parentrc, parentrel,
1707 appinfos, &childrte, &childRTindex);
1708
1709 /*
1710 * If the partitioned table has no partitions, treat this as the
1711 * non-inheritance case.
1712 */
1713 if (partdesc->nparts == 0)
1714 {
1715 parentrte->inh = false;
1716 return;
1717 }
1718
1719 for (i = 0; i < partdesc->nparts; i++)
1720 {
1721 Oid childOID = partdesc->oids[i];
1722 Relation childrel;
1723
1724 /* Open rel; we already have required locks */
1725 childrel = heap_open(childOID, NoLock);
1726
1727 /*
1728 * Temporary partitions belonging to other sessions should have been
1729 * disallowed at definition, but for paranoia's sake, let's double
1730 * check.
1731 */
1732 if (RELATION_IS_OTHER_TEMP(childrel))
1733 elog(ERROR, "temporary relation from another session found as partition");
1734
1735 expand_single_inheritance_child(root, parentrte, parentRTindex,
1736 parentrel, top_parentrc, childrel,
1737 appinfos, &childrte, &childRTindex);
1738
1739 /* If this child is itself partitioned, recurse */
1740 if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1741 expand_partitioned_rtentry(root, childrte, childRTindex,
1742 childrel, top_parentrc, lockmode,
1743 appinfos);
1744
1745 /* Close child relation, but keep locks */
1746 heap_close(childrel, NoLock);
1747 }
1748 }
1749
1750 /*
1751 * expand_single_inheritance_child
1752 * Build a RangeTblEntry and an AppendRelInfo, if appropriate, plus
1753 * maybe a PlanRowMark.
1754 *
1755 * We now expand the partition hierarchy level by level, creating a
1756 * corresponding hierarchy of AppendRelInfos and RelOptInfos, where each
1757 * partitioned descendant acts as a parent of its immediate partitions.
1758 * (This is a difference from what older versions of PostgreSQL did and what
1759 * is still done in the case of table inheritance for unpartitioned tables,
1760 * where the hierarchy is flattened during RTE expansion.)
1761 *
1762 * PlanRowMarks still carry the top-parent's RTI, and the top-parent's
1763 * allMarkTypes field still accumulates values from all descendents.
1764 *
1765 * "parentrte" and "parentRTindex" are immediate parent's RTE and
1766 * RTI. "top_parentrc" is top parent's PlanRowMark.
1767 *
1768 * The child RangeTblEntry and its RTI are returned in "childrte_p" and
1769 * "childRTindex_p" resp.
1770 */
1771 static void
expand_single_inheritance_child(PlannerInfo * root,RangeTblEntry * parentrte,Index parentRTindex,Relation parentrel,PlanRowMark * top_parentrc,Relation childrel,List ** appinfos,RangeTblEntry ** childrte_p,Index * childRTindex_p)1772 expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
1773 Index parentRTindex, Relation parentrel,
1774 PlanRowMark *top_parentrc, Relation childrel,
1775 List **appinfos, RangeTblEntry **childrte_p,
1776 Index *childRTindex_p)
1777 {
1778 Query *parse = root->parse;
1779 Oid parentOID = RelationGetRelid(parentrel);
1780 Oid childOID = RelationGetRelid(childrel);
1781 RangeTblEntry *childrte;
1782 Index childRTindex;
1783 AppendRelInfo *appinfo;
1784
1785 /*
1786 * Build an RTE for the child, and attach to query's rangetable list. We
1787 * copy most fields of the parent's RTE, but replace relation OID and
1788 * relkind, and set inh = false. Also, set requiredPerms to zero since
1789 * all required permissions checks are done on the original RTE. Likewise,
1790 * set the child's securityQuals to empty, because we only want to apply
1791 * the parent's RLS conditions regardless of what RLS properties
1792 * individual children may have. (This is an intentional choice to make
1793 * inherited RLS work like regular permissions checks.) The parent
1794 * securityQuals will be propagated to children along with other base
1795 * restriction clauses, so we don't need to do it here.
1796 */
1797 childrte = copyObject(parentrte);
1798 *childrte_p = childrte;
1799 childrte->relid = childOID;
1800 childrte->relkind = childrel->rd_rel->relkind;
1801 /* A partitioned child will need to be expanded further. */
1802 if (childOID != parentOID &&
1803 childrte->relkind == RELKIND_PARTITIONED_TABLE)
1804 childrte->inh = true;
1805 else
1806 childrte->inh = false;
1807 childrte->requiredPerms = 0;
1808 childrte->securityQuals = NIL;
1809 parse->rtable = lappend(parse->rtable, childrte);
1810 childRTindex = list_length(parse->rtable);
1811 *childRTindex_p = childRTindex;
1812
1813 /*
1814 * We need an AppendRelInfo if paths will be built for the child RTE. If
1815 * childrte->inh is true, then we'll always need to generate append paths
1816 * for it. If childrte->inh is false, we must scan it if it's not a
1817 * partitioned table; but if it is a partitioned table, then it never has
1818 * any data of its own and need not be scanned.
1819 */
1820 if (childrte->relkind != RELKIND_PARTITIONED_TABLE || childrte->inh)
1821 {
1822 appinfo = makeNode(AppendRelInfo);
1823 appinfo->parent_relid = parentRTindex;
1824 appinfo->child_relid = childRTindex;
1825 appinfo->parent_reltype = parentrel->rd_rel->reltype;
1826 appinfo->child_reltype = childrel->rd_rel->reltype;
1827 make_inh_translation_list(parentrel, childrel, childRTindex,
1828 &appinfo->translated_vars);
1829 appinfo->parent_reloid = parentOID;
1830 *appinfos = lappend(*appinfos, appinfo);
1831
1832 /*
1833 * Translate the column permissions bitmaps to the child's attnums (we
1834 * have to build the translated_vars list before we can do this). But
1835 * if this is the parent table, leave copyObject's result alone.
1836 *
1837 * Note: we need to do this even though the executor won't run any
1838 * permissions checks on the child RTE. The insertedCols/updatedCols
1839 * bitmaps may be examined for trigger-firing purposes.
1840 */
1841 if (childOID != parentOID)
1842 {
1843 childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
1844 appinfo->translated_vars);
1845 childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
1846 appinfo->translated_vars);
1847 childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
1848 appinfo->translated_vars);
1849 }
1850 }
1851
1852 /*
1853 * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1854 */
1855 if (top_parentrc)
1856 {
1857 PlanRowMark *childrc = makeNode(PlanRowMark);
1858
1859 childrc->rti = childRTindex;
1860 childrc->prti = top_parentrc->rti;
1861 childrc->rowmarkId = top_parentrc->rowmarkId;
1862 /* Reselect rowmark type, because relkind might not match parent */
1863 childrc->markType = select_rowmark_type(childrte,
1864 top_parentrc->strength);
1865 childrc->allMarkTypes = (1 << childrc->markType);
1866 childrc->strength = top_parentrc->strength;
1867 childrc->waitPolicy = top_parentrc->waitPolicy;
1868
1869 /*
1870 * We mark RowMarks for partitioned child tables as parent RowMarks so
1871 * that the executor ignores them (except their existence means that
1872 * the child tables be locked using appropriate mode).
1873 */
1874 childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1875
1876 /* Include child's rowmark type in top parent's allMarkTypes */
1877 top_parentrc->allMarkTypes |= childrc->allMarkTypes;
1878
1879 root->rowMarks = lappend(root->rowMarks, childrc);
1880 }
1881 }
1882
1883 /*
1884 * make_inh_translation_list
1885 * Build the list of translations from parent Vars to child Vars for
1886 * an inheritance child.
1887 *
1888 * For paranoia's sake, we match type/collation as well as attribute name.
1889 */
1890 static void
make_inh_translation_list(Relation oldrelation,Relation newrelation,Index newvarno,List ** translated_vars)1891 make_inh_translation_list(Relation oldrelation, Relation newrelation,
1892 Index newvarno,
1893 List **translated_vars)
1894 {
1895 List *vars = NIL;
1896 TupleDesc old_tupdesc = RelationGetDescr(oldrelation);
1897 TupleDesc new_tupdesc = RelationGetDescr(newrelation);
1898 int oldnatts = old_tupdesc->natts;
1899 int newnatts = new_tupdesc->natts;
1900 int old_attno;
1901
1902 for (old_attno = 0; old_attno < oldnatts; old_attno++)
1903 {
1904 Form_pg_attribute att;
1905 char *attname;
1906 Oid atttypid;
1907 int32 atttypmod;
1908 Oid attcollation;
1909 int new_attno;
1910
1911 att = TupleDescAttr(old_tupdesc, old_attno);
1912 if (att->attisdropped)
1913 {
1914 /* Just put NULL into this list entry */
1915 vars = lappend(vars, NULL);
1916 continue;
1917 }
1918 attname = NameStr(att->attname);
1919 atttypid = att->atttypid;
1920 atttypmod = att->atttypmod;
1921 attcollation = att->attcollation;
1922
1923 /*
1924 * When we are generating the "translation list" for the parent table
1925 * of an inheritance set, no need to search for matches.
1926 */
1927 if (oldrelation == newrelation)
1928 {
1929 vars = lappend(vars, makeVar(newvarno,
1930 (AttrNumber) (old_attno + 1),
1931 atttypid,
1932 atttypmod,
1933 attcollation,
1934 0));
1935 continue;
1936 }
1937
1938 /*
1939 * Otherwise we have to search for the matching column by name.
1940 * There's no guarantee it'll have the same column position, because
1941 * of cases like ALTER TABLE ADD COLUMN and multiple inheritance.
1942 * However, in simple cases it will be the same column number, so try
1943 * that before we go groveling through all the columns.
1944 *
1945 * Note: the test for (att = ...) != NULL cannot fail, it's just a
1946 * notational device to include the assignment into the if-clause.
1947 */
1948 if (old_attno < newnatts &&
1949 (att = TupleDescAttr(new_tupdesc, old_attno)) != NULL &&
1950 !att->attisdropped &&
1951 strcmp(attname, NameStr(att->attname)) == 0)
1952 new_attno = old_attno;
1953 else
1954 {
1955 for (new_attno = 0; new_attno < newnatts; new_attno++)
1956 {
1957 att = TupleDescAttr(new_tupdesc, new_attno);
1958 if (!att->attisdropped &&
1959 strcmp(attname, NameStr(att->attname)) == 0)
1960 break;
1961 }
1962 if (new_attno >= newnatts)
1963 elog(ERROR, "could not find inherited attribute \"%s\" of relation \"%s\"",
1964 attname, RelationGetRelationName(newrelation));
1965 }
1966
1967 /* Found it, check type and collation match */
1968 if (atttypid != att->atttypid || atttypmod != att->atttypmod)
1969 elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type",
1970 attname, RelationGetRelationName(newrelation));
1971 if (attcollation != att->attcollation)
1972 elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation",
1973 attname, RelationGetRelationName(newrelation));
1974
1975 vars = lappend(vars, makeVar(newvarno,
1976 (AttrNumber) (new_attno + 1),
1977 atttypid,
1978 atttypmod,
1979 attcollation,
1980 0));
1981 }
1982
1983 *translated_vars = vars;
1984 }
1985
1986 /*
1987 * translate_col_privs
1988 * Translate a bitmapset representing per-column privileges from the
1989 * parent rel's attribute numbering to the child's.
1990 *
1991 * The only surprise here is that we don't translate a parent whole-row
1992 * reference into a child whole-row reference. That would mean requiring
1993 * permissions on all child columns, which is overly strict, since the
1994 * query is really only going to reference the inherited columns. Instead
1995 * we set the per-column bits for all inherited columns.
1996 */
1997 static Bitmapset *
translate_col_privs(const Bitmapset * parent_privs,List * translated_vars)1998 translate_col_privs(const Bitmapset *parent_privs,
1999 List *translated_vars)
2000 {
2001 Bitmapset *child_privs = NULL;
2002 bool whole_row;
2003 int attno;
2004 ListCell *lc;
2005
2006 /* System attributes have the same numbers in all tables */
2007 for (attno = FirstLowInvalidHeapAttributeNumber + 1; attno < 0; attno++)
2008 {
2009 if (bms_is_member(attno - FirstLowInvalidHeapAttributeNumber,
2010 parent_privs))
2011 child_privs = bms_add_member(child_privs,
2012 attno - FirstLowInvalidHeapAttributeNumber);
2013 }
2014
2015 /* Check if parent has whole-row reference */
2016 whole_row = bms_is_member(InvalidAttrNumber - FirstLowInvalidHeapAttributeNumber,
2017 parent_privs);
2018
2019 /* And now translate the regular user attributes, using the vars list */
2020 attno = InvalidAttrNumber;
2021 foreach(lc, translated_vars)
2022 {
2023 Var *var = lfirst_node(Var, lc);
2024
2025 attno++;
2026 if (var == NULL) /* ignore dropped columns */
2027 continue;
2028 if (whole_row ||
2029 bms_is_member(attno - FirstLowInvalidHeapAttributeNumber,
2030 parent_privs))
2031 child_privs = bms_add_member(child_privs,
2032 var->varattno - FirstLowInvalidHeapAttributeNumber);
2033 }
2034
2035 return child_privs;
2036 }
2037
2038 /*
2039 * adjust_appendrel_attrs
2040 * Copy the specified query or expression and translate Vars referring to a
2041 * parent rel to refer to the corresponding child rel instead. We also
2042 * update rtindexes appearing outside Vars, such as resultRelation and
2043 * jointree relids.
2044 *
2045 * Note: this is only applied after conversion of sublinks to subplans,
2046 * so we don't need to cope with recursion into sub-queries.
2047 *
2048 * Note: this is not hugely different from what pullup_replace_vars() does;
2049 * maybe we should try to fold the two routines together.
2050 */
2051 Node *
adjust_appendrel_attrs(PlannerInfo * root,Node * node,int nappinfos,AppendRelInfo ** appinfos)2052 adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos,
2053 AppendRelInfo **appinfos)
2054 {
2055 Node *result;
2056 adjust_appendrel_attrs_context context;
2057
2058 context.root = root;
2059 context.nappinfos = nappinfos;
2060 context.appinfos = appinfos;
2061
2062 /* If there's nothing to adjust, don't call this function. */
2063 Assert(nappinfos >= 1 && appinfos != NULL);
2064
2065 /*
2066 * Must be prepared to start with a Query or a bare expression tree.
2067 */
2068 if (node && IsA(node, Query))
2069 {
2070 Query *newnode;
2071 int cnt;
2072
2073 newnode = query_tree_mutator((Query *) node,
2074 adjust_appendrel_attrs_mutator,
2075 (void *) &context,
2076 QTW_IGNORE_RC_SUBQUERIES);
2077 for (cnt = 0; cnt < nappinfos; cnt++)
2078 {
2079 AppendRelInfo *appinfo = appinfos[cnt];
2080
2081 if (newnode->resultRelation == appinfo->parent_relid)
2082 {
2083 newnode->resultRelation = appinfo->child_relid;
2084 /* Fix tlist resnos too, if it's inherited UPDATE */
2085 if (newnode->commandType == CMD_UPDATE)
2086 newnode->targetList =
2087 adjust_inherited_tlist(newnode->targetList,
2088 appinfo);
2089 break;
2090 }
2091 }
2092
2093 result = (Node *) newnode;
2094 }
2095 else
2096 result = adjust_appendrel_attrs_mutator(node, &context);
2097
2098 return result;
2099 }
2100
2101 static Node *
adjust_appendrel_attrs_mutator(Node * node,adjust_appendrel_attrs_context * context)2102 adjust_appendrel_attrs_mutator(Node *node,
2103 adjust_appendrel_attrs_context *context)
2104 {
2105 AppendRelInfo **appinfos = context->appinfos;
2106 int nappinfos = context->nappinfos;
2107 int cnt;
2108
2109 if (node == NULL)
2110 return NULL;
2111 if (IsA(node, Var))
2112 {
2113 Var *var = (Var *) copyObject(node);
2114 AppendRelInfo *appinfo = NULL;
2115
2116 for (cnt = 0; cnt < nappinfos; cnt++)
2117 {
2118 if (var->varno == appinfos[cnt]->parent_relid)
2119 {
2120 appinfo = appinfos[cnt];
2121 break;
2122 }
2123 }
2124
2125 if (var->varlevelsup == 0 && appinfo)
2126 {
2127 var->varno = appinfo->child_relid;
2128 var->varnoold = appinfo->child_relid;
2129 if (var->varattno > 0)
2130 {
2131 Node *newnode;
2132
2133 if (var->varattno > list_length(appinfo->translated_vars))
2134 elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2135 var->varattno, get_rel_name(appinfo->parent_reloid));
2136 newnode = copyObject(list_nth(appinfo->translated_vars,
2137 var->varattno - 1));
2138 if (newnode == NULL)
2139 elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2140 var->varattno, get_rel_name(appinfo->parent_reloid));
2141 return newnode;
2142 }
2143 else if (var->varattno == 0)
2144 {
2145 /*
2146 * Whole-row Var: if we are dealing with named rowtypes, we
2147 * can use a whole-row Var for the child table plus a coercion
2148 * step to convert the tuple layout to the parent's rowtype.
2149 * Otherwise we have to generate a RowExpr.
2150 */
2151 if (OidIsValid(appinfo->child_reltype))
2152 {
2153 Assert(var->vartype == appinfo->parent_reltype);
2154 if (appinfo->parent_reltype != appinfo->child_reltype)
2155 {
2156 ConvertRowtypeExpr *r = makeNode(ConvertRowtypeExpr);
2157
2158 r->arg = (Expr *) var;
2159 r->resulttype = appinfo->parent_reltype;
2160 r->convertformat = COERCE_IMPLICIT_CAST;
2161 r->location = -1;
2162 /* Make sure the Var node has the right type ID, too */
2163 var->vartype = appinfo->child_reltype;
2164 return (Node *) r;
2165 }
2166 }
2167 else
2168 {
2169 /*
2170 * Build a RowExpr containing the translated variables.
2171 *
2172 * In practice var->vartype will always be RECORDOID here,
2173 * so we need to come up with some suitable column names.
2174 * We use the parent RTE's column names.
2175 *
2176 * Note: we can't get here for inheritance cases, so there
2177 * is no need to worry that translated_vars might contain
2178 * some dummy NULLs.
2179 */
2180 RowExpr *rowexpr;
2181 List *fields;
2182 RangeTblEntry *rte;
2183
2184 rte = rt_fetch(appinfo->parent_relid,
2185 context->root->parse->rtable);
2186 fields = copyObject(appinfo->translated_vars);
2187 rowexpr = makeNode(RowExpr);
2188 rowexpr->args = fields;
2189 rowexpr->row_typeid = var->vartype;
2190 rowexpr->row_format = COERCE_IMPLICIT_CAST;
2191 rowexpr->colnames = copyObject(rte->eref->colnames);
2192 rowexpr->location = -1;
2193
2194 return (Node *) rowexpr;
2195 }
2196 }
2197 /* system attributes don't need any other translation */
2198 }
2199 return (Node *) var;
2200 }
2201 if (IsA(node, CurrentOfExpr))
2202 {
2203 CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node);
2204
2205 for (cnt = 0; cnt < nappinfos; cnt++)
2206 {
2207 AppendRelInfo *appinfo = appinfos[cnt];
2208
2209 if (cexpr->cvarno == appinfo->parent_relid)
2210 {
2211 cexpr->cvarno = appinfo->child_relid;
2212 break;
2213 }
2214 }
2215 return (Node *) cexpr;
2216 }
2217 if (IsA(node, RangeTblRef))
2218 {
2219 RangeTblRef *rtr = (RangeTblRef *) copyObject(node);
2220
2221 for (cnt = 0; cnt < nappinfos; cnt++)
2222 {
2223 AppendRelInfo *appinfo = appinfos[cnt];
2224
2225 if (rtr->rtindex == appinfo->parent_relid)
2226 {
2227 rtr->rtindex = appinfo->child_relid;
2228 break;
2229 }
2230 }
2231 return (Node *) rtr;
2232 }
2233 if (IsA(node, JoinExpr))
2234 {
2235 /* Copy the JoinExpr node with correct mutation of subnodes */
2236 JoinExpr *j;
2237 AppendRelInfo *appinfo;
2238
2239 j = (JoinExpr *) expression_tree_mutator(node,
2240 adjust_appendrel_attrs_mutator,
2241 (void *) context);
2242 /* now fix JoinExpr's rtindex (probably never happens) */
2243 for (cnt = 0; cnt < nappinfos; cnt++)
2244 {
2245 appinfo = appinfos[cnt];
2246
2247 if (j->rtindex == appinfo->parent_relid)
2248 {
2249 j->rtindex = appinfo->child_relid;
2250 break;
2251 }
2252 }
2253 return (Node *) j;
2254 }
2255 if (IsA(node, PlaceHolderVar))
2256 {
2257 /* Copy the PlaceHolderVar node with correct mutation of subnodes */
2258 PlaceHolderVar *phv;
2259
2260 phv = (PlaceHolderVar *) expression_tree_mutator(node,
2261 adjust_appendrel_attrs_mutator,
2262 (void *) context);
2263 /* now fix PlaceHolderVar's relid sets */
2264 if (phv->phlevelsup == 0)
2265 phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos,
2266 context->appinfos);
2267 return (Node *) phv;
2268 }
2269 /* Shouldn't need to handle planner auxiliary nodes here */
2270 Assert(!IsA(node, SpecialJoinInfo));
2271 Assert(!IsA(node, AppendRelInfo));
2272 Assert(!IsA(node, PlaceHolderInfo));
2273 Assert(!IsA(node, MinMaxAggInfo));
2274
2275 /*
2276 * We have to process RestrictInfo nodes specially. (Note: although
2277 * set_append_rel_pathlist will hide RestrictInfos in the parent's
2278 * baserestrictinfo list from us, it doesn't hide those in joininfo.)
2279 */
2280 if (IsA(node, RestrictInfo))
2281 {
2282 RestrictInfo *oldinfo = (RestrictInfo *) node;
2283 RestrictInfo *newinfo = makeNode(RestrictInfo);
2284
2285 /* Copy all flat-copiable fields */
2286 memcpy(newinfo, oldinfo, sizeof(RestrictInfo));
2287
2288 /* Recursively fix the clause itself */
2289 newinfo->clause = (Expr *)
2290 adjust_appendrel_attrs_mutator((Node *) oldinfo->clause, context);
2291
2292 /* and the modified version, if an OR clause */
2293 newinfo->orclause = (Expr *)
2294 adjust_appendrel_attrs_mutator((Node *) oldinfo->orclause, context);
2295
2296 /* adjust relid sets too */
2297 newinfo->clause_relids = adjust_child_relids(oldinfo->clause_relids,
2298 context->nappinfos,
2299 context->appinfos);
2300 newinfo->required_relids = adjust_child_relids(oldinfo->required_relids,
2301 context->nappinfos,
2302 context->appinfos);
2303 newinfo->outer_relids = adjust_child_relids(oldinfo->outer_relids,
2304 context->nappinfos,
2305 context->appinfos);
2306 newinfo->nullable_relids = adjust_child_relids(oldinfo->nullable_relids,
2307 context->nappinfos,
2308 context->appinfos);
2309 newinfo->left_relids = adjust_child_relids(oldinfo->left_relids,
2310 context->nappinfos,
2311 context->appinfos);
2312 newinfo->right_relids = adjust_child_relids(oldinfo->right_relids,
2313 context->nappinfos,
2314 context->appinfos);
2315
2316 /*
2317 * Reset cached derivative fields, since these might need to have
2318 * different values when considering the child relation. Note we
2319 * don't reset left_ec/right_ec: each child variable is implicitly
2320 * equivalent to its parent, so still a member of the same EC if any.
2321 */
2322 newinfo->eval_cost.startup = -1;
2323 newinfo->norm_selec = -1;
2324 newinfo->outer_selec = -1;
2325 newinfo->left_em = NULL;
2326 newinfo->right_em = NULL;
2327 newinfo->scansel_cache = NIL;
2328 newinfo->left_bucketsize = -1;
2329 newinfo->right_bucketsize = -1;
2330 newinfo->left_mcvfreq = -1;
2331 newinfo->right_mcvfreq = -1;
2332
2333 return (Node *) newinfo;
2334 }
2335
2336 /*
2337 * NOTE: we do not need to recurse into sublinks, because they should
2338 * already have been converted to subplans before we see them.
2339 */
2340 Assert(!IsA(node, SubLink));
2341 Assert(!IsA(node, Query));
2342
2343 return expression_tree_mutator(node, adjust_appendrel_attrs_mutator,
2344 (void *) context);
2345 }
2346
2347 /*
2348 * Substitute child relids for parent relids in a Relid set. The array of
2349 * appinfos specifies the substitutions to be performed.
2350 */
2351 static Relids
adjust_child_relids(Relids relids,int nappinfos,AppendRelInfo ** appinfos)2352 adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos)
2353 {
2354 Bitmapset *result = NULL;
2355 int cnt;
2356
2357 for (cnt = 0; cnt < nappinfos; cnt++)
2358 {
2359 AppendRelInfo *appinfo = appinfos[cnt];
2360
2361 /* Remove parent, add child */
2362 if (bms_is_member(appinfo->parent_relid, relids))
2363 {
2364 /* Make a copy if we are changing the set. */
2365 if (!result)
2366 result = bms_copy(relids);
2367
2368 result = bms_del_member(result, appinfo->parent_relid);
2369 result = bms_add_member(result, appinfo->child_relid);
2370 }
2371 }
2372
2373 /* If we made any changes, return the modified copy. */
2374 if (result)
2375 return result;
2376
2377 /* Otherwise, return the original set without modification. */
2378 return relids;
2379 }
2380
2381 /*
2382 * Replace any relid present in top_parent_relids with its child in
2383 * child_relids. Members of child_relids can be multiple levels below top
2384 * parent in the partition hierarchy.
2385 */
2386 Relids
adjust_child_relids_multilevel(PlannerInfo * root,Relids relids,Relids child_relids,Relids top_parent_relids)2387 adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
2388 Relids child_relids, Relids top_parent_relids)
2389 {
2390 AppendRelInfo **appinfos;
2391 int nappinfos;
2392 Relids parent_relids = NULL;
2393 Relids result;
2394 Relids tmp_result = NULL;
2395 int cnt;
2396
2397 /*
2398 * If the given relids set doesn't contain any of the top parent relids,
2399 * it will remain unchanged.
2400 */
2401 if (!bms_overlap(relids, top_parent_relids))
2402 return relids;
2403
2404 appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
2405
2406 /* Construct relids set for the immediate parent of the given child. */
2407 for (cnt = 0; cnt < nappinfos; cnt++)
2408 {
2409 AppendRelInfo *appinfo = appinfos[cnt];
2410
2411 parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
2412 }
2413
2414 /* Recurse if immediate parent is not the top parent. */
2415 if (!bms_equal(parent_relids, top_parent_relids))
2416 {
2417 tmp_result = adjust_child_relids_multilevel(root, relids,
2418 parent_relids,
2419 top_parent_relids);
2420 relids = tmp_result;
2421 }
2422
2423 result = adjust_child_relids(relids, nappinfos, appinfos);
2424
2425 /* Free memory consumed by any intermediate result. */
2426 if (tmp_result)
2427 bms_free(tmp_result);
2428 bms_free(parent_relids);
2429 pfree(appinfos);
2430
2431 return result;
2432 }
2433
2434 /*
2435 * Adjust the targetlist entries of an inherited UPDATE operation
2436 *
2437 * The expressions have already been fixed, but we have to make sure that
2438 * the target resnos match the child table (they may not, in the case of
2439 * a column that was added after-the-fact by ALTER TABLE). In some cases
2440 * this can force us to re-order the tlist to preserve resno ordering.
2441 * (We do all this work in special cases so that preptlist.c is fast for
2442 * the typical case.)
2443 *
2444 * The given tlist has already been through expression_tree_mutator;
2445 * therefore the TargetEntry nodes are fresh copies that it's okay to
2446 * scribble on.
2447 *
2448 * Note that this is not needed for INSERT because INSERT isn't inheritable.
2449 */
2450 static List *
adjust_inherited_tlist(List * tlist,AppendRelInfo * context)2451 adjust_inherited_tlist(List *tlist, AppendRelInfo *context)
2452 {
2453 bool changed_it = false;
2454 ListCell *tl;
2455 List *new_tlist;
2456 bool more;
2457 int attrno;
2458
2459 /* This should only happen for an inheritance case, not UNION ALL */
2460 Assert(OidIsValid(context->parent_reloid));
2461
2462 /* Scan tlist and update resnos to match attnums of child rel */
2463 foreach(tl, tlist)
2464 {
2465 TargetEntry *tle = (TargetEntry *) lfirst(tl);
2466 Var *childvar;
2467
2468 if (tle->resjunk)
2469 continue; /* ignore junk items */
2470
2471 /* Look up the translation of this column: it must be a Var */
2472 if (tle->resno <= 0 ||
2473 tle->resno > list_length(context->translated_vars))
2474 elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2475 tle->resno, get_rel_name(context->parent_reloid));
2476 childvar = (Var *) list_nth(context->translated_vars, tle->resno - 1);
2477 if (childvar == NULL || !IsA(childvar, Var))
2478 elog(ERROR, "attribute %d of relation \"%s\" does not exist",
2479 tle->resno, get_rel_name(context->parent_reloid));
2480
2481 if (tle->resno != childvar->varattno)
2482 {
2483 tle->resno = childvar->varattno;
2484 changed_it = true;
2485 }
2486 }
2487
2488 /*
2489 * If we changed anything, re-sort the tlist by resno, and make sure
2490 * resjunk entries have resnos above the last real resno. The sort
2491 * algorithm is a bit stupid, but for such a seldom-taken path, small is
2492 * probably better than fast.
2493 */
2494 if (!changed_it)
2495 return tlist;
2496
2497 new_tlist = NIL;
2498 more = true;
2499 for (attrno = 1; more; attrno++)
2500 {
2501 more = false;
2502 foreach(tl, tlist)
2503 {
2504 TargetEntry *tle = (TargetEntry *) lfirst(tl);
2505
2506 if (tle->resjunk)
2507 continue; /* ignore junk items */
2508
2509 if (tle->resno == attrno)
2510 new_tlist = lappend(new_tlist, tle);
2511 else if (tle->resno > attrno)
2512 more = true;
2513 }
2514 }
2515
2516 foreach(tl, tlist)
2517 {
2518 TargetEntry *tle = (TargetEntry *) lfirst(tl);
2519
2520 if (!tle->resjunk)
2521 continue; /* here, ignore non-junk items */
2522
2523 tle->resno = attrno;
2524 new_tlist = lappend(new_tlist, tle);
2525 attrno++;
2526 }
2527
2528 return new_tlist;
2529 }
2530
2531 /*
2532 * adjust_appendrel_attrs_multilevel
2533 * Apply Var translations from a toplevel appendrel parent down to a child.
2534 *
2535 * In some cases we need to translate expressions referencing a parent relation
2536 * to reference an appendrel child that's multiple levels removed from it.
2537 */
2538 Node *
adjust_appendrel_attrs_multilevel(PlannerInfo * root,Node * node,Relids child_relids,Relids top_parent_relids)2539 adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
2540 Relids child_relids,
2541 Relids top_parent_relids)
2542 {
2543 AppendRelInfo **appinfos;
2544 Bitmapset *parent_relids = NULL;
2545 int nappinfos;
2546 int cnt;
2547
2548 Assert(bms_num_members(child_relids) == bms_num_members(top_parent_relids));
2549
2550 appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
2551
2552 /* Construct relids set for the immediate parent of given child. */
2553 for (cnt = 0; cnt < nappinfos; cnt++)
2554 {
2555 AppendRelInfo *appinfo = appinfos[cnt];
2556
2557 parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
2558 }
2559
2560 /* Recurse if immediate parent is not the top parent. */
2561 if (!bms_equal(parent_relids, top_parent_relids))
2562 node = adjust_appendrel_attrs_multilevel(root, node, parent_relids,
2563 top_parent_relids);
2564
2565 /* Now translate for this child */
2566 node = adjust_appendrel_attrs(root, node, nappinfos, appinfos);
2567
2568 pfree(appinfos);
2569
2570 return node;
2571 }
2572
2573 /*
2574 * Construct the SpecialJoinInfo for a child-join by translating
2575 * SpecialJoinInfo for the join between parents. left_relids and right_relids
2576 * are the relids of left and right side of the join respectively.
2577 */
2578 SpecialJoinInfo *
build_child_join_sjinfo(PlannerInfo * root,SpecialJoinInfo * parent_sjinfo,Relids left_relids,Relids right_relids)2579 build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
2580 Relids left_relids, Relids right_relids)
2581 {
2582 SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
2583 AppendRelInfo **left_appinfos;
2584 int left_nappinfos;
2585 AppendRelInfo **right_appinfos;
2586 int right_nappinfos;
2587
2588 memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
2589 left_appinfos = find_appinfos_by_relids(root, left_relids,
2590 &left_nappinfos);
2591 right_appinfos = find_appinfos_by_relids(root, right_relids,
2592 &right_nappinfos);
2593
2594 sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
2595 left_nappinfos, left_appinfos);
2596 sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
2597 right_nappinfos,
2598 right_appinfos);
2599 sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
2600 left_nappinfos, left_appinfos);
2601 sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
2602 right_nappinfos,
2603 right_appinfos);
2604 sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
2605 (Node *) sjinfo->semi_rhs_exprs,
2606 right_nappinfos,
2607 right_appinfos);
2608
2609 pfree(left_appinfos);
2610 pfree(right_appinfos);
2611
2612 return sjinfo;
2613 }
2614
2615 /*
2616 * find_appinfos_by_relids
2617 * Find AppendRelInfo structures for all relations specified by relids.
2618 *
2619 * The AppendRelInfos are returned in an array, which can be pfree'd by the
2620 * caller. *nappinfos is set to the number of entries in the array.
2621 */
2622 AppendRelInfo **
find_appinfos_by_relids(PlannerInfo * root,Relids relids,int * nappinfos)2623 find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
2624 {
2625 AppendRelInfo **appinfos;
2626 int cnt = 0;
2627 int i;
2628
2629 *nappinfos = bms_num_members(relids);
2630 appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos);
2631
2632 i = -1;
2633 while ((i = bms_next_member(relids, i)) >= 0)
2634 {
2635 AppendRelInfo *appinfo = root->append_rel_array[i];
2636
2637 if (!appinfo)
2638 elog(ERROR, "child rel %d not found in append_rel_array", i);
2639
2640 appinfos[cnt++] = appinfo;
2641 }
2642 return appinfos;
2643 }
2644