1 /*------------------------------------------------------------------------- 2 * 3 * prepunion.c 4 * Routines to plan set-operation queries. The filename is a leftover 5 * from a time when only UNIONs were implemented. 6 * 7 * There are two code paths in the planner for set-operation queries. 8 * If a subquery consists entirely of simple UNION ALL operations, it 9 * is converted into an "append relation". Otherwise, it is handled 10 * by the general code in this module (plan_set_operations and its 11 * subroutines). There is some support code here for the append-relation 12 * case, but most of the heavy lifting for that is done elsewhere, 13 * notably in prepjointree.c and allpaths.c. 14 * 15 * There is also some code here to support planning of queries that use 16 * inheritance (SELECT FROM foo*). Inheritance trees are converted into 17 * append relations, and thenceforth share code with the UNION ALL case. 18 * 19 * 20 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 21 * Portions Copyright (c) 1994, Regents of the University of California 22 * 23 * 24 * IDENTIFICATION 25 * src/backend/optimizer/prep/prepunion.c 26 * 27 *------------------------------------------------------------------------- 28 */ 29 #include "postgres.h" 30 31 #include <limits.h> 32 33 #include "access/heapam.h" 34 #include "access/htup_details.h" 35 #include "access/sysattr.h" 36 #include "catalog/partition.h" 37 #include "catalog/pg_inherits.h" 38 #include "catalog/pg_type.h" 39 #include "miscadmin.h" 40 #include "nodes/makefuncs.h" 41 #include "nodes/nodeFuncs.h" 42 #include "optimizer/cost.h" 43 #include "optimizer/pathnode.h" 44 #include "optimizer/paths.h" 45 #include "optimizer/planmain.h" 46 #include "optimizer/planner.h" 47 #include "optimizer/prep.h" 48 #include "optimizer/tlist.h" 49 #include "parser/parse_coerce.h" 50 #include "parser/parsetree.h" 51 #include "utils/lsyscache.h" 52 #include "utils/rel.h" 53 #include "utils/selfuncs.h" 54 55 56 typedef struct 57 { 58 PlannerInfo *root; 59 int nappinfos; 60 AppendRelInfo **appinfos; 61 } adjust_appendrel_attrs_context; 62 63 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root, 64 List *colTypes, List *colCollations, 65 bool junkOK, 66 int flag, List *refnames_tlist, 67 List **pTargetList, 68 double *pNumGroups); 69 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp, 70 PlannerInfo *root, 71 List *refnames_tlist, 72 List **pTargetList); 73 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root, 74 List *refnames_tlist, 75 List **pTargetList); 76 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, 77 List *refnames_tlist, 78 List **pTargetList); 79 static List *plan_union_children(PlannerInfo *root, 80 SetOperationStmt *top_union, 81 List *refnames_tlist, 82 List **tlist_list); 83 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist, 84 PlannerInfo *root); 85 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel); 86 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, 87 Path *input_path, 88 double dNumGroups, double dNumOutputRows, 89 const char *construct); 90 static List *generate_setop_tlist(List *colTypes, List *colCollations, 91 int flag, 92 Index varno, 93 bool hack_constants, 94 List *input_tlist, 95 List *refnames_tlist); 96 static List *generate_append_tlist(List *colTypes, List *colCollations, 97 bool flag, 98 List *input_tlists, 99 List *refnames_tlist); 100 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); 101 static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, 102 Index rti); 103 static void expand_partitioned_rtentry(PlannerInfo *root, 104 RangeTblEntry *parentrte, 105 Index parentRTindex, Relation parentrel, 106 PlanRowMark *top_parentrc, LOCKMODE lockmode, 107 List **appinfos); 108 static void expand_single_inheritance_child(PlannerInfo *root, 109 RangeTblEntry *parentrte, 110 Index parentRTindex, Relation parentrel, 111 PlanRowMark *top_parentrc, Relation childrel, 112 List **appinfos, RangeTblEntry **childrte_p, 113 Index *childRTindex_p); 114 static void make_inh_translation_list(Relation oldrelation, 115 Relation newrelation, 116 Index newvarno, 117 List **translated_vars); 118 static Bitmapset *translate_col_privs(const Bitmapset *parent_privs, 119 List *translated_vars); 120 static Node *adjust_appendrel_attrs_mutator(Node *node, 121 adjust_appendrel_attrs_context *context); 122 static Relids adjust_child_relids(Relids relids, int nappinfos, 123 AppendRelInfo **appinfos); 124 static List *adjust_inherited_tlist(List *tlist, 125 AppendRelInfo *context); 126 127 128 /* 129 * plan_set_operations 130 * 131 * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT) 132 * 133 * This routine only deals with the setOperations tree of the given query. 134 * Any top-level ORDER BY requested in root->parse->sortClause will be handled 135 * when we return to grouping_planner; likewise for LIMIT. 136 * 137 * What we return is an "upperrel" RelOptInfo containing at least one Path 138 * that implements the set-operation tree. In addition, root->processed_tlist 139 * receives a targetlist representing the output of the topmost setop node. 140 */ 141 RelOptInfo * 142 plan_set_operations(PlannerInfo *root) 143 { 144 Query *parse = root->parse; 145 SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations); 146 Node *node; 147 RangeTblEntry *leftmostRTE; 148 Query *leftmostQuery; 149 RelOptInfo *setop_rel; 150 List *top_tlist; 151 152 Assert(topop); 153 154 /* check for unsupported stuff */ 155 Assert(parse->jointree->fromlist == NIL); 156 Assert(parse->jointree->quals == NULL); 157 Assert(parse->groupClause == NIL); 158 Assert(parse->havingQual == NULL); 159 Assert(parse->windowClause == NIL); 160 Assert(parse->distinctClause == NIL); 161 162 /* 163 * We'll need to build RelOptInfos for each of the leaf subqueries, which 164 * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index 165 * arrays for that. 166 */ 167 setup_simple_rel_arrays(root); 168 169 /* 170 * Populate append_rel_array with each AppendRelInfo to allow direct 171 * lookups by child relid. 172 */ 173 setup_append_rel_array(root); 174 175 /* 176 * Find the leftmost component Query. We need to use its column names for 177 * all generated tlists (else SELECT INTO won't work right). 178 */ 179 node = topop->larg; 180 while (node && IsA(node, SetOperationStmt)) 181 node = ((SetOperationStmt *) node)->larg; 182 Assert(node && IsA(node, RangeTblRef)); 183 leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex]; 184 leftmostQuery = leftmostRTE->subquery; 185 Assert(leftmostQuery != NULL); 186 187 /* 188 * If the topmost node is a recursive union, it needs special processing. 189 */ 190 if (root->hasRecursion) 191 { 192 setop_rel = generate_recursion_path(topop, root, 193 leftmostQuery->targetList, 194 &top_tlist); 195 } 196 else 197 { 198 /* 199 * Recurse on setOperations tree to generate paths for set ops. The 200 * final output paths should have just the column types shown as the 201 * output from the top-level node, plus possibly resjunk working 202 * columns (we can rely on upper-level nodes to deal with that). 203 */ 204 setop_rel = recurse_set_operations((Node *) topop, root, 205 topop->colTypes, topop->colCollations, 206 true, -1, 207 leftmostQuery->targetList, 208 &top_tlist, 209 NULL); 210 } 211 212 /* Must return the built tlist into root->processed_tlist. */ 213 root->processed_tlist = top_tlist; 214 215 return setop_rel; 216 } 217 218 /* 219 * recurse_set_operations 220 * Recursively handle one step in a tree of set operations 221 * 222 * colTypes: OID list of set-op's result column datatypes 223 * colCollations: OID list of set-op's result column collations 224 * junkOK: if true, child resjunk columns may be left in the result 225 * flag: if >= 0, add a resjunk output column indicating value of flag 226 * refnames_tlist: targetlist to take column names from 227 * 228 * Returns a RelOptInfo for the subtree, as well as these output parameters: 229 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan 230 * *pNumGroups: if not NULL, we estimate the number of distinct groups 231 * in the result, and store it there 232 * 233 * The pTargetList output parameter is mostly redundant with the pathtarget 234 * of the returned RelOptInfo, but for the moment we need it because much of 235 * the logic in this file depends on flag columns being marked resjunk. 236 * Pending a redesign of how that works, this is the easy way out. 237 * 238 * We don't have to care about typmods here: the only allowed difference 239 * between set-op input and output typmods is input is a specific typmod 240 * and output is -1, and that does not require a coercion. 241 */ 242 static RelOptInfo * 243 recurse_set_operations(Node *setOp, PlannerInfo *root, 244 List *colTypes, List *colCollations, 245 bool junkOK, 246 int flag, List *refnames_tlist, 247 List **pTargetList, 248 double *pNumGroups) 249 { 250 RelOptInfo *rel = NULL; /* keep compiler quiet */ 251 252 /* Guard against stack overflow due to overly complex setop nests */ 253 check_stack_depth(); 254 255 if (IsA(setOp, RangeTblRef)) 256 { 257 RangeTblRef *rtr = (RangeTblRef *) setOp; 258 RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex]; 259 Query *subquery = rte->subquery; 260 PlannerInfo *subroot; 261 RelOptInfo *final_rel; 262 Path *subpath; 263 Path *path; 264 List *tlist; 265 266 Assert(subquery != NULL); 267 268 /* Build a RelOptInfo for this leaf subquery. */ 269 rel = build_simple_rel(root, rtr->rtindex, NULL); 270 271 /* plan_params should not be in use in current query level */ 272 Assert(root->plan_params == NIL); 273 274 /* Generate a subroot and Paths for the subquery */ 275 subroot = rel->subroot = subquery_planner(root->glob, subquery, 276 root, 277 false, 278 root->tuple_fraction); 279 280 /* 281 * It should not be possible for the primitive query to contain any 282 * cross-references to other primitive queries in the setop tree. 283 */ 284 if (root->plan_params) 285 elog(ERROR, "unexpected outer reference in set operation subquery"); 286 287 /* Figure out the appropriate target list for this subquery. */ 288 tlist = generate_setop_tlist(colTypes, colCollations, 289 flag, 290 rtr->rtindex, 291 true, 292 subroot->processed_tlist, 293 refnames_tlist); 294 rel->reltarget = create_pathtarget(root, tlist); 295 296 /* Return the fully-fledged tlist to caller, too */ 297 *pTargetList = tlist; 298 299 /* 300 * Mark rel with estimated output rows, width, etc. Note that we have 301 * to do this before generating outer-query paths, else 302 * cost_subqueryscan is not happy. 303 */ 304 set_subquery_size_estimates(root, rel); 305 306 /* 307 * Since we may want to add a partial path to this relation, we must 308 * set its consider_parallel flag correctly. 309 */ 310 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); 311 rel->consider_parallel = final_rel->consider_parallel; 312 313 /* 314 * For the moment, we consider only a single Path for the subquery. 315 * This should change soon (make it look more like 316 * set_subquery_pathlist). 317 */ 318 subpath = get_cheapest_fractional_path(final_rel, 319 root->tuple_fraction); 320 321 /* 322 * Stick a SubqueryScanPath atop that. 323 * 324 * We don't bother to determine the subquery's output ordering since 325 * it won't be reflected in the set-op result anyhow; so just label 326 * the SubqueryScanPath with nil pathkeys. (XXX that should change 327 * soon too, likely.) 328 */ 329 path = (Path *) create_subqueryscan_path(root, rel, subpath, 330 NIL, NULL); 331 332 add_path(rel, path); 333 334 /* 335 * If we have a partial path for the child relation, we can use that 336 * to build a partial path for this relation. But there's no point in 337 * considering any path but the cheapest. 338 */ 339 if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) && 340 final_rel->partial_pathlist != NIL) 341 { 342 Path *partial_subpath; 343 Path *partial_path; 344 345 partial_subpath = linitial(final_rel->partial_pathlist); 346 partial_path = (Path *) 347 create_subqueryscan_path(root, rel, partial_subpath, 348 NIL, NULL); 349 add_partial_path(rel, partial_path); 350 } 351 352 /* 353 * Estimate number of groups if caller wants it. If the subquery used 354 * grouping or aggregation, its output is probably mostly unique 355 * anyway; otherwise do statistical estimation. 356 * 357 * XXX you don't really want to know about this: we do the estimation 358 * using the subquery's original targetlist expressions, not the 359 * subroot->processed_tlist which might seem more appropriate. The 360 * reason is that if the subquery is itself a setop, it may return a 361 * processed_tlist containing "varno 0" Vars generated by 362 * generate_append_tlist, and those would confuse estimate_num_groups 363 * mightily. We ought to get rid of the "varno 0" hack, but that 364 * requires a redesign of the parsetree representation of setops, so 365 * that there can be an RTE corresponding to each setop's output. 366 */ 367 if (pNumGroups) 368 { 369 if (subquery->groupClause || subquery->groupingSets || 370 subquery->distinctClause || 371 subroot->hasHavingQual || subquery->hasAggs) 372 *pNumGroups = subpath->rows; 373 else 374 *pNumGroups = estimate_num_groups(subroot, 375 get_tlist_exprs(subquery->targetList, false), 376 subpath->rows, 377 NULL); 378 } 379 } 380 else if (IsA(setOp, SetOperationStmt)) 381 { 382 SetOperationStmt *op = (SetOperationStmt *) setOp; 383 384 /* UNIONs are much different from INTERSECT/EXCEPT */ 385 if (op->op == SETOP_UNION) 386 rel = generate_union_paths(op, root, 387 refnames_tlist, 388 pTargetList); 389 else 390 rel = generate_nonunion_paths(op, root, 391 refnames_tlist, 392 pTargetList); 393 if (pNumGroups) 394 *pNumGroups = rel->rows; 395 396 /* 397 * If necessary, add a Result node to project the caller-requested 398 * output columns. 399 * 400 * XXX you don't really want to know about this: setrefs.c will apply 401 * fix_upper_expr() to the Result node's tlist. This would fail if the 402 * Vars generated by generate_setop_tlist() were not exactly equal() 403 * to the corresponding tlist entries of the subplan. However, since 404 * the subplan was generated by generate_union_plan() or 405 * generate_nonunion_plan(), and hence its tlist was generated by 406 * generate_append_tlist(), this will work. We just tell 407 * generate_setop_tlist() to use varno 0. 408 */ 409 if (flag >= 0 || 410 !tlist_same_datatypes(*pTargetList, colTypes, junkOK) || 411 !tlist_same_collations(*pTargetList, colCollations, junkOK)) 412 { 413 PathTarget *target; 414 ListCell *lc; 415 416 *pTargetList = generate_setop_tlist(colTypes, colCollations, 417 flag, 418 0, 419 false, 420 *pTargetList, 421 refnames_tlist); 422 target = create_pathtarget(root, *pTargetList); 423 424 /* Apply projection to each path */ 425 foreach(lc, rel->pathlist) 426 { 427 Path *subpath = (Path *) lfirst(lc); 428 Path *path; 429 430 Assert(subpath->param_info == NULL); 431 path = apply_projection_to_path(root, subpath->parent, 432 subpath, target); 433 /* If we had to add a Result, path is different from subpath */ 434 if (path != subpath) 435 lfirst(lc) = path; 436 } 437 438 /* Apply projection to each partial path */ 439 foreach(lc, rel->partial_pathlist) 440 { 441 Path *subpath = (Path *) lfirst(lc); 442 Path *path; 443 444 Assert(subpath->param_info == NULL); 445 446 /* avoid apply_projection_to_path, in case of multiple refs */ 447 path = (Path *) create_projection_path(root, subpath->parent, 448 subpath, target); 449 lfirst(lc) = path; 450 } 451 } 452 } 453 else 454 { 455 elog(ERROR, "unrecognized node type: %d", 456 (int) nodeTag(setOp)); 457 *pTargetList = NIL; 458 } 459 460 postprocess_setop_rel(root, rel); 461 462 return rel; 463 } 464 465 /* 466 * Generate paths for a recursive UNION node 467 */ 468 static RelOptInfo * 469 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root, 470 List *refnames_tlist, 471 List **pTargetList) 472 { 473 RelOptInfo *result_rel; 474 Path *path; 475 RelOptInfo *lrel, 476 *rrel; 477 Path *lpath; 478 Path *rpath; 479 List *lpath_tlist; 480 List *rpath_tlist; 481 List *tlist; 482 List *groupList; 483 double dNumGroups; 484 485 /* Parser should have rejected other cases */ 486 if (setOp->op != SETOP_UNION) 487 elog(ERROR, "only UNION queries can be recursive"); 488 /* Worktable ID should be assigned */ 489 Assert(root->wt_param_id >= 0); 490 491 /* 492 * Unlike a regular UNION node, process the left and right inputs 493 * separately without any intention of combining them into one Append. 494 */ 495 lrel = recurse_set_operations(setOp->larg, root, 496 setOp->colTypes, setOp->colCollations, 497 false, -1, 498 refnames_tlist, 499 &lpath_tlist, 500 NULL); 501 lpath = lrel->cheapest_total_path; 502 /* The right path will want to look at the left one ... */ 503 root->non_recursive_path = lpath; 504 rrel = recurse_set_operations(setOp->rarg, root, 505 setOp->colTypes, setOp->colCollations, 506 false, -1, 507 refnames_tlist, 508 &rpath_tlist, 509 NULL); 510 rpath = rrel->cheapest_total_path; 511 root->non_recursive_path = NULL; 512 513 /* 514 * Generate tlist for RecursiveUnion path node --- same as in Append cases 515 */ 516 tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false, 517 list_make2(lpath_tlist, rpath_tlist), 518 refnames_tlist); 519 520 *pTargetList = tlist; 521 522 /* Build result relation. */ 523 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, 524 bms_union(lrel->relids, rrel->relids)); 525 result_rel->reltarget = create_pathtarget(root, tlist); 526 527 /* 528 * If UNION, identify the grouping operators 529 */ 530 if (setOp->all) 531 { 532 groupList = NIL; 533 dNumGroups = 0; 534 } 535 else 536 { 537 /* Identify the grouping semantics */ 538 groupList = generate_setop_grouplist(setOp, tlist); 539 540 /* We only support hashing here */ 541 if (!grouping_is_hashable(groupList)) 542 ereport(ERROR, 543 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 544 errmsg("could not implement recursive UNION"), 545 errdetail("All column datatypes must be hashable."))); 546 547 /* 548 * For the moment, take the number of distinct groups as equal to the 549 * total input size, ie, the worst case. 550 */ 551 dNumGroups = lpath->rows + rpath->rows * 10; 552 } 553 554 /* 555 * And make the path node. 556 */ 557 path = (Path *) create_recursiveunion_path(root, 558 result_rel, 559 lpath, 560 rpath, 561 result_rel->reltarget, 562 groupList, 563 root->wt_param_id, 564 dNumGroups); 565 566 add_path(result_rel, path); 567 postprocess_setop_rel(root, result_rel); 568 return result_rel; 569 } 570 571 /* 572 * Generate paths for a UNION or UNION ALL node 573 */ 574 static RelOptInfo * 575 generate_union_paths(SetOperationStmt *op, PlannerInfo *root, 576 List *refnames_tlist, 577 List **pTargetList) 578 { 579 Relids relids = NULL; 580 RelOptInfo *result_rel; 581 double save_fraction = root->tuple_fraction; 582 ListCell *lc; 583 List *pathlist = NIL; 584 List *partial_pathlist = NIL; 585 bool partial_paths_valid = true; 586 bool consider_parallel = true; 587 List *rellist; 588 List *tlist_list; 589 List *tlist; 590 Path *path; 591 592 /* 593 * If plain UNION, tell children to fetch all tuples. 594 * 595 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to 596 * each arm of the UNION ALL. One could make a case for reducing the 597 * tuple fraction for later arms (discounting by the expected size of the 598 * earlier arms' results) but it seems not worth the trouble. The normal 599 * case where tuple_fraction isn't already zero is a LIMIT at top level, 600 * and passing it down as-is is usually enough to get the desired result 601 * of preferring fast-start plans. 602 */ 603 if (!op->all) 604 root->tuple_fraction = 0.0; 605 606 /* 607 * If any of my children are identical UNION nodes (same op, all-flag, and 608 * colTypes) then they can be merged into this node so that we generate 609 * only one Append and unique-ification for the lot. Recurse to find such 610 * nodes and compute their children's paths. 611 */ 612 rellist = plan_union_children(root, op, refnames_tlist, &tlist_list); 613 614 /* 615 * Generate tlist for Append plan node. 616 * 617 * The tlist for an Append plan isn't important as far as the Append is 618 * concerned, but we must make it look real anyway for the benefit of the 619 * next plan level up. 620 */ 621 tlist = generate_append_tlist(op->colTypes, op->colCollations, false, 622 tlist_list, refnames_tlist); 623 624 *pTargetList = tlist; 625 626 /* Build path lists and relid set. */ 627 foreach(lc, rellist) 628 { 629 RelOptInfo *rel = lfirst(lc); 630 631 pathlist = lappend(pathlist, rel->cheapest_total_path); 632 633 if (consider_parallel) 634 { 635 if (!rel->consider_parallel) 636 { 637 consider_parallel = false; 638 partial_paths_valid = false; 639 } 640 else if (rel->partial_pathlist == NIL) 641 partial_paths_valid = false; 642 else 643 partial_pathlist = lappend(partial_pathlist, 644 linitial(rel->partial_pathlist)); 645 } 646 647 relids = bms_union(relids, rel->relids); 648 } 649 650 /* Build result relation. */ 651 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids); 652 result_rel->reltarget = create_pathtarget(root, tlist); 653 result_rel->consider_parallel = consider_parallel; 654 655 /* 656 * Append the child results together. 657 */ 658 path = (Path *) create_append_path(root, result_rel, pathlist, NIL, 659 NULL, 0, false, NIL, -1); 660 661 /* 662 * For UNION ALL, we just need the Append path. For UNION, need to add 663 * node(s) to remove duplicates. 664 */ 665 if (!op->all) 666 path = make_union_unique(op, path, tlist, root); 667 668 add_path(result_rel, path); 669 670 /* 671 * Estimate number of groups. For now we just assume the output is unique 672 * --- this is certainly true for the UNION case, and we want worst-case 673 * estimates anyway. 674 */ 675 result_rel->rows = path->rows; 676 677 /* 678 * Now consider doing the same thing using the partial paths plus Append 679 * plus Gather. 680 */ 681 if (partial_paths_valid) 682 { 683 Path *ppath; 684 ListCell *lc; 685 int parallel_workers = 0; 686 687 /* Find the highest number of workers requested for any subpath. */ 688 foreach(lc, partial_pathlist) 689 { 690 Path *path = lfirst(lc); 691 692 parallel_workers = Max(parallel_workers, path->parallel_workers); 693 } 694 Assert(parallel_workers > 0); 695 696 /* 697 * If the use of parallel append is permitted, always request at least 698 * log2(# of children) paths. We assume it can be useful to have 699 * extra workers in this case because they will be spread out across 700 * the children. The precise formula is just a guess; see 701 * add_paths_to_append_rel. 702 */ 703 if (enable_parallel_append) 704 { 705 parallel_workers = Max(parallel_workers, 706 fls(list_length(partial_pathlist))); 707 parallel_workers = Min(parallel_workers, 708 max_parallel_workers_per_gather); 709 } 710 Assert(parallel_workers > 0); 711 712 ppath = (Path *) 713 create_append_path(root, result_rel, NIL, partial_pathlist, 714 NULL, parallel_workers, enable_parallel_append, 715 NIL, -1); 716 ppath = (Path *) 717 create_gather_path(root, result_rel, ppath, 718 result_rel->reltarget, NULL, NULL); 719 if (!op->all) 720 ppath = make_union_unique(op, ppath, tlist, root); 721 add_path(result_rel, ppath); 722 } 723 724 /* Undo effects of possibly forcing tuple_fraction to 0 */ 725 root->tuple_fraction = save_fraction; 726 727 return result_rel; 728 } 729 730 /* 731 * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node 732 */ 733 static RelOptInfo * 734 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, 735 List *refnames_tlist, 736 List **pTargetList) 737 { 738 RelOptInfo *result_rel; 739 RelOptInfo *lrel, 740 *rrel; 741 double save_fraction = root->tuple_fraction; 742 Path *lpath, 743 *rpath, 744 *path; 745 List *lpath_tlist, 746 *rpath_tlist, 747 *tlist_list, 748 *tlist, 749 *groupList, 750 *pathlist; 751 double dLeftGroups, 752 dRightGroups, 753 dNumGroups, 754 dNumOutputRows; 755 bool use_hash; 756 SetOpCmd cmd; 757 int firstFlag; 758 759 /* 760 * Tell children to fetch all tuples. 761 */ 762 root->tuple_fraction = 0.0; 763 764 /* Recurse on children, ensuring their outputs are marked */ 765 lrel = recurse_set_operations(op->larg, root, 766 op->colTypes, op->colCollations, 767 false, 0, 768 refnames_tlist, 769 &lpath_tlist, 770 &dLeftGroups); 771 lpath = lrel->cheapest_total_path; 772 rrel = recurse_set_operations(op->rarg, root, 773 op->colTypes, op->colCollations, 774 false, 1, 775 refnames_tlist, 776 &rpath_tlist, 777 &dRightGroups); 778 rpath = rrel->cheapest_total_path; 779 780 /* Undo effects of forcing tuple_fraction to 0 */ 781 root->tuple_fraction = save_fraction; 782 783 /* 784 * For EXCEPT, we must put the left input first. For INTERSECT, either 785 * order should give the same results, and we prefer to put the smaller 786 * input first in order to minimize the size of the hash table in the 787 * hashing case. "Smaller" means the one with the fewer groups. 788 */ 789 if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups) 790 { 791 pathlist = list_make2(lpath, rpath); 792 tlist_list = list_make2(lpath_tlist, rpath_tlist); 793 firstFlag = 0; 794 } 795 else 796 { 797 pathlist = list_make2(rpath, lpath); 798 tlist_list = list_make2(rpath_tlist, lpath_tlist); 799 firstFlag = 1; 800 } 801 802 /* 803 * Generate tlist for Append plan node. 804 * 805 * The tlist for an Append plan isn't important as far as the Append is 806 * concerned, but we must make it look real anyway for the benefit of the 807 * next plan level up. In fact, it has to be real enough that the flag 808 * column is shown as a variable not a constant, else setrefs.c will get 809 * confused. 810 */ 811 tlist = generate_append_tlist(op->colTypes, op->colCollations, true, 812 tlist_list, refnames_tlist); 813 814 *pTargetList = tlist; 815 816 /* Build result relation. */ 817 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, 818 bms_union(lrel->relids, rrel->relids)); 819 result_rel->reltarget = create_pathtarget(root, tlist); 820 821 /* 822 * Append the child results together. 823 */ 824 path = (Path *) create_append_path(root, result_rel, pathlist, NIL, 825 NULL, 0, false, NIL, -1); 826 827 /* Identify the grouping semantics */ 828 groupList = generate_setop_grouplist(op, tlist); 829 830 /* 831 * Estimate number of distinct groups that we'll need hashtable entries 832 * for; this is the size of the left-hand input for EXCEPT, or the smaller 833 * input for INTERSECT. Also estimate the number of eventual output rows. 834 * In non-ALL cases, we estimate each group produces one output row; in 835 * ALL cases use the relevant relation size. These are worst-case 836 * estimates, of course, but we need to be conservative. 837 */ 838 if (op->op == SETOP_EXCEPT) 839 { 840 dNumGroups = dLeftGroups; 841 dNumOutputRows = op->all ? lpath->rows : dNumGroups; 842 } 843 else 844 { 845 dNumGroups = Min(dLeftGroups, dRightGroups); 846 dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups; 847 } 848 849 /* 850 * Decide whether to hash or sort, and add a sort node if needed. 851 */ 852 use_hash = choose_hashed_setop(root, groupList, path, 853 dNumGroups, dNumOutputRows, 854 (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); 855 856 if (groupList && !use_hash) 857 path = (Path *) create_sort_path(root, 858 result_rel, 859 path, 860 make_pathkeys_for_sortclauses(root, 861 groupList, 862 tlist), 863 -1.0); 864 865 /* 866 * Finally, add a SetOp path node to generate the correct output. 867 */ 868 switch (op->op) 869 { 870 case SETOP_INTERSECT: 871 cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT; 872 break; 873 case SETOP_EXCEPT: 874 cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT; 875 break; 876 default: 877 elog(ERROR, "unrecognized set op: %d", (int) op->op); 878 cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */ 879 break; 880 } 881 path = (Path *) create_setop_path(root, 882 result_rel, 883 path, 884 cmd, 885 use_hash ? SETOP_HASHED : SETOP_SORTED, 886 groupList, 887 list_length(op->colTypes) + 1, 888 use_hash ? firstFlag : -1, 889 dNumGroups, 890 dNumOutputRows); 891 892 result_rel->rows = path->rows; 893 add_path(result_rel, path); 894 return result_rel; 895 } 896 897 /* 898 * Pull up children of a UNION node that are identically-propertied UNIONs. 899 * 900 * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct 901 * output rows will be lost anyway. 902 * 903 * NOTE: currently, we ignore collations while determining if a child has 904 * the same properties. This is semantically sound only so long as all 905 * collations have the same notion of equality. It is valid from an 906 * implementation standpoint because we don't care about the ordering of 907 * a UNION child's result: UNION ALL results are always unordered, and 908 * generate_union_paths will force a fresh sort if the top level is a UNION. 909 */ 910 static List * 911 plan_union_children(PlannerInfo *root, 912 SetOperationStmt *top_union, 913 List *refnames_tlist, 914 List **tlist_list) 915 { 916 List *pending_rels = list_make1(top_union); 917 List *result = NIL; 918 List *child_tlist; 919 920 *tlist_list = NIL; 921 922 while (pending_rels != NIL) 923 { 924 Node *setOp = linitial(pending_rels); 925 926 pending_rels = list_delete_first(pending_rels); 927 928 if (IsA(setOp, SetOperationStmt)) 929 { 930 SetOperationStmt *op = (SetOperationStmt *) setOp; 931 932 if (op->op == top_union->op && 933 (op->all == top_union->all || op->all) && 934 equal(op->colTypes, top_union->colTypes)) 935 { 936 /* Same UNION, so fold children into parent */ 937 pending_rels = lcons(op->rarg, pending_rels); 938 pending_rels = lcons(op->larg, pending_rels); 939 continue; 940 } 941 } 942 943 /* 944 * Not same, so plan this child separately. 945 * 946 * Note we disallow any resjunk columns in child results. This is 947 * necessary since the Append node that implements the union won't do 948 * any projection, and upper levels will get confused if some of our 949 * output tuples have junk and some don't. This case only arises when 950 * we have an EXCEPT or INTERSECT as child, else there won't be 951 * resjunk anyway. 952 */ 953 result = lappend(result, recurse_set_operations(setOp, root, 954 top_union->colTypes, 955 top_union->colCollations, 956 false, -1, 957 refnames_tlist, 958 &child_tlist, 959 NULL)); 960 *tlist_list = lappend(*tlist_list, child_tlist); 961 } 962 963 return result; 964 } 965 966 /* 967 * Add nodes to the given path tree to unique-ify the result of a UNION. 968 */ 969 static Path * 970 make_union_unique(SetOperationStmt *op, Path *path, List *tlist, 971 PlannerInfo *root) 972 { 973 RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL); 974 List *groupList; 975 double dNumGroups; 976 977 /* Identify the grouping semantics */ 978 groupList = generate_setop_grouplist(op, tlist); 979 980 /* 981 * XXX for the moment, take the number of distinct groups as equal to the 982 * total input size, ie, the worst case. This is too conservative, but we 983 * don't want to risk having the hashtable overrun memory; also, it's not 984 * clear how to get a decent estimate of the true size. One should note 985 * as well the propensity of novices to write UNION rather than UNION ALL 986 * even when they don't expect any duplicates... 987 */ 988 dNumGroups = path->rows; 989 990 /* Decide whether to hash or sort */ 991 if (choose_hashed_setop(root, groupList, path, 992 dNumGroups, dNumGroups, 993 "UNION")) 994 { 995 /* Hashed aggregate plan --- no sort needed */ 996 path = (Path *) create_agg_path(root, 997 result_rel, 998 path, 999 create_pathtarget(root, tlist), 1000 AGG_HASHED, 1001 AGGSPLIT_SIMPLE, 1002 groupList, 1003 NIL, 1004 NULL, 1005 dNumGroups); 1006 } 1007 else 1008 { 1009 /* Sort and Unique */ 1010 if (groupList) 1011 path = (Path *) 1012 create_sort_path(root, 1013 result_rel, 1014 path, 1015 make_pathkeys_for_sortclauses(root, 1016 groupList, 1017 tlist), 1018 -1.0); 1019 path = (Path *) create_upper_unique_path(root, 1020 result_rel, 1021 path, 1022 list_length(path->pathkeys), 1023 dNumGroups); 1024 } 1025 1026 return path; 1027 } 1028 1029 /* 1030 * postprocess_setop_rel - perform steps required after adding paths 1031 */ 1032 static void 1033 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel) 1034 { 1035 /* 1036 * We don't currently worry about allowing FDWs to contribute paths to 1037 * this relation, but give extensions a chance. 1038 */ 1039 if (create_upper_paths_hook) 1040 (*create_upper_paths_hook) (root, UPPERREL_SETOP, 1041 NULL, rel, NULL); 1042 1043 /* Select cheapest path */ 1044 set_cheapest(rel); 1045 } 1046 1047 /* 1048 * choose_hashed_setop - should we use hashing for a set operation? 1049 */ 1050 static bool 1051 choose_hashed_setop(PlannerInfo *root, List *groupClauses, 1052 Path *input_path, 1053 double dNumGroups, double dNumOutputRows, 1054 const char *construct) 1055 { 1056 int numGroupCols = list_length(groupClauses); 1057 bool can_sort; 1058 bool can_hash; 1059 Size hashentrysize; 1060 Path hashed_p; 1061 Path sorted_p; 1062 double tuple_fraction; 1063 1064 /* Check whether the operators support sorting or hashing */ 1065 can_sort = grouping_is_sortable(groupClauses); 1066 can_hash = grouping_is_hashable(groupClauses); 1067 if (can_hash && can_sort) 1068 { 1069 /* we have a meaningful choice to make, continue ... */ 1070 } 1071 else if (can_hash) 1072 return true; 1073 else if (can_sort) 1074 return false; 1075 else 1076 ereport(ERROR, 1077 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 1078 /* translator: %s is UNION, INTERSECT, or EXCEPT */ 1079 errmsg("could not implement %s", construct), 1080 errdetail("Some of the datatypes only support hashing, while others only support sorting."))); 1081 1082 /* Prefer sorting when enable_hashagg is off */ 1083 if (!enable_hashagg) 1084 return false; 1085 1086 /* 1087 * Don't do it if it doesn't look like the hashtable will fit into 1088 * work_mem. 1089 */ 1090 hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader); 1091 1092 if (hashentrysize * dNumGroups > work_mem * 1024L) 1093 return false; 1094 1095 /* 1096 * See if the estimated cost is no more than doing it the other way. 1097 * 1098 * We need to consider input_plan + hashagg versus input_plan + sort + 1099 * group. Note that the actual result plan might involve a SetOp or 1100 * Unique node, not Agg or Group, but the cost estimates for Agg and Group 1101 * should be close enough for our purposes here. 1102 * 1103 * These path variables are dummies that just hold cost fields; we don't 1104 * make actual Paths for these steps. 1105 */ 1106 cost_agg(&hashed_p, root, AGG_HASHED, NULL, 1107 numGroupCols, dNumGroups, 1108 NIL, 1109 input_path->startup_cost, input_path->total_cost, 1110 input_path->rows); 1111 1112 /* 1113 * Now for the sorted case. Note that the input is *always* unsorted, 1114 * since it was made by appending unrelated sub-relations together. 1115 */ 1116 sorted_p.startup_cost = input_path->startup_cost; 1117 sorted_p.total_cost = input_path->total_cost; 1118 /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */ 1119 cost_sort(&sorted_p, root, NIL, sorted_p.total_cost, 1120 input_path->rows, input_path->pathtarget->width, 1121 0.0, work_mem, -1.0); 1122 cost_group(&sorted_p, root, numGroupCols, dNumGroups, 1123 NIL, 1124 sorted_p.startup_cost, sorted_p.total_cost, 1125 input_path->rows); 1126 1127 /* 1128 * Now make the decision using the top-level tuple fraction. First we 1129 * have to convert an absolute count (LIMIT) into fractional form. 1130 */ 1131 tuple_fraction = root->tuple_fraction; 1132 if (tuple_fraction >= 1.0) 1133 tuple_fraction /= dNumOutputRows; 1134 1135 if (compare_fractional_path_costs(&hashed_p, &sorted_p, 1136 tuple_fraction) < 0) 1137 { 1138 /* Hashed is cheaper, so use it */ 1139 return true; 1140 } 1141 return false; 1142 } 1143 1144 /* 1145 * Generate targetlist for a set-operation plan node 1146 * 1147 * colTypes: OID list of set-op's result column datatypes 1148 * colCollations: OID list of set-op's result column collations 1149 * flag: -1 if no flag column needed, 0 or 1 to create a const flag column 1150 * varno: varno to use in generated Vars 1151 * hack_constants: true to copy up constants (see comments in code) 1152 * input_tlist: targetlist of this node's input node 1153 * refnames_tlist: targetlist to take column names from 1154 */ 1155 static List * 1156 generate_setop_tlist(List *colTypes, List *colCollations, 1157 int flag, 1158 Index varno, 1159 bool hack_constants, 1160 List *input_tlist, 1161 List *refnames_tlist) 1162 { 1163 List *tlist = NIL; 1164 int resno = 1; 1165 ListCell *ctlc, 1166 *cclc, 1167 *itlc, 1168 *rtlc; 1169 TargetEntry *tle; 1170 Node *expr; 1171 1172 /* there's no forfour() so we must chase one list manually */ 1173 rtlc = list_head(refnames_tlist); 1174 forthree(ctlc, colTypes, cclc, colCollations, itlc, input_tlist) 1175 { 1176 Oid colType = lfirst_oid(ctlc); 1177 Oid colColl = lfirst_oid(cclc); 1178 TargetEntry *inputtle = (TargetEntry *) lfirst(itlc); 1179 TargetEntry *reftle = (TargetEntry *) lfirst(rtlc); 1180 1181 rtlc = lnext(rtlc); 1182 1183 Assert(inputtle->resno == resno); 1184 Assert(reftle->resno == resno); 1185 Assert(!inputtle->resjunk); 1186 Assert(!reftle->resjunk); 1187 1188 /* 1189 * Generate columns referencing input columns and having appropriate 1190 * data types and column names. Insert datatype coercions where 1191 * necessary. 1192 * 1193 * HACK: constants in the input's targetlist are copied up as-is 1194 * rather than being referenced as subquery outputs. This is mainly 1195 * to ensure that when we try to coerce them to the output column's 1196 * datatype, the right things happen for UNKNOWN constants. But do 1197 * this only at the first level of subquery-scan plans; we don't want 1198 * phony constants appearing in the output tlists of upper-level 1199 * nodes! 1200 */ 1201 if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const)) 1202 expr = (Node *) inputtle->expr; 1203 else 1204 expr = (Node *) makeVar(varno, 1205 inputtle->resno, 1206 exprType((Node *) inputtle->expr), 1207 exprTypmod((Node *) inputtle->expr), 1208 exprCollation((Node *) inputtle->expr), 1209 0); 1210 1211 if (exprType(expr) != colType) 1212 { 1213 /* 1214 * Note: it's not really cool to be applying coerce_to_common_type 1215 * here; one notable point is that assign_expr_collations never 1216 * gets run on any generated nodes. For the moment that's not a 1217 * problem because we force the correct exposed collation below. 1218 * It would likely be best to make the parser generate the correct 1219 * output tlist for every set-op to begin with, though. 1220 */ 1221 expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */ 1222 expr, 1223 colType, 1224 "UNION/INTERSECT/EXCEPT"); 1225 } 1226 1227 /* 1228 * Ensure the tlist entry's exposed collation matches the set-op. This 1229 * is necessary because plan_set_operations() reports the result 1230 * ordering as a list of SortGroupClauses, which don't carry collation 1231 * themselves but just refer to tlist entries. If we don't show the 1232 * right collation then planner.c might do the wrong thing in 1233 * higher-level queries. 1234 * 1235 * Note we use RelabelType, not CollateExpr, since this expression 1236 * will reach the executor without any further processing. 1237 */ 1238 if (exprCollation(expr) != colColl) 1239 { 1240 expr = (Node *) makeRelabelType((Expr *) expr, 1241 exprType(expr), 1242 exprTypmod(expr), 1243 colColl, 1244 COERCE_IMPLICIT_CAST); 1245 } 1246 1247 tle = makeTargetEntry((Expr *) expr, 1248 (AttrNumber) resno++, 1249 pstrdup(reftle->resname), 1250 false); 1251 1252 /* 1253 * By convention, all non-resjunk columns in a setop tree have 1254 * ressortgroupref equal to their resno. In some cases the ref isn't 1255 * needed, but this is a cleaner way than modifying the tlist later. 1256 */ 1257 tle->ressortgroupref = tle->resno; 1258 1259 tlist = lappend(tlist, tle); 1260 } 1261 1262 if (flag >= 0) 1263 { 1264 /* Add a resjunk flag column */ 1265 /* flag value is the given constant */ 1266 expr = (Node *) makeConst(INT4OID, 1267 -1, 1268 InvalidOid, 1269 sizeof(int32), 1270 Int32GetDatum(flag), 1271 false, 1272 true); 1273 tle = makeTargetEntry((Expr *) expr, 1274 (AttrNumber) resno++, 1275 pstrdup("flag"), 1276 true); 1277 tlist = lappend(tlist, tle); 1278 } 1279 1280 return tlist; 1281 } 1282 1283 /* 1284 * Generate targetlist for a set-operation Append node 1285 * 1286 * colTypes: OID list of set-op's result column datatypes 1287 * colCollations: OID list of set-op's result column collations 1288 * flag: true to create a flag column copied up from subplans 1289 * input_tlists: list of tlists for sub-plans of the Append 1290 * refnames_tlist: targetlist to take column names from 1291 * 1292 * The entries in the Append's targetlist should always be simple Vars; 1293 * we just have to make sure they have the right datatypes/typmods/collations. 1294 * The Vars are always generated with varno 0. 1295 * 1296 * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width 1297 * cannot figure out a realistic width for the tlist we make here. But we 1298 * ought to refactor this code to produce a PathTarget directly, anyway. 1299 */ 1300 static List * 1301 generate_append_tlist(List *colTypes, List *colCollations, 1302 bool flag, 1303 List *input_tlists, 1304 List *refnames_tlist) 1305 { 1306 List *tlist = NIL; 1307 int resno = 1; 1308 ListCell *curColType; 1309 ListCell *curColCollation; 1310 ListCell *ref_tl_item; 1311 int colindex; 1312 TargetEntry *tle; 1313 Node *expr; 1314 ListCell *tlistl; 1315 int32 *colTypmods; 1316 1317 /* 1318 * First extract typmods to use. 1319 * 1320 * If the inputs all agree on type and typmod of a particular column, use 1321 * that typmod; else use -1. 1322 */ 1323 colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32)); 1324 1325 foreach(tlistl, input_tlists) 1326 { 1327 List *subtlist = (List *) lfirst(tlistl); 1328 ListCell *subtlistl; 1329 1330 curColType = list_head(colTypes); 1331 colindex = 0; 1332 foreach(subtlistl, subtlist) 1333 { 1334 TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl); 1335 1336 if (subtle->resjunk) 1337 continue; 1338 Assert(curColType != NULL); 1339 if (exprType((Node *) subtle->expr) == lfirst_oid(curColType)) 1340 { 1341 /* If first subplan, copy the typmod; else compare */ 1342 int32 subtypmod = exprTypmod((Node *) subtle->expr); 1343 1344 if (tlistl == list_head(input_tlists)) 1345 colTypmods[colindex] = subtypmod; 1346 else if (subtypmod != colTypmods[colindex]) 1347 colTypmods[colindex] = -1; 1348 } 1349 else 1350 { 1351 /* types disagree, so force typmod to -1 */ 1352 colTypmods[colindex] = -1; 1353 } 1354 curColType = lnext(curColType); 1355 colindex++; 1356 } 1357 Assert(curColType == NULL); 1358 } 1359 1360 /* 1361 * Now we can build the tlist for the Append. 1362 */ 1363 colindex = 0; 1364 forthree(curColType, colTypes, curColCollation, colCollations, 1365 ref_tl_item, refnames_tlist) 1366 { 1367 Oid colType = lfirst_oid(curColType); 1368 int32 colTypmod = colTypmods[colindex++]; 1369 Oid colColl = lfirst_oid(curColCollation); 1370 TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item); 1371 1372 Assert(reftle->resno == resno); 1373 Assert(!reftle->resjunk); 1374 expr = (Node *) makeVar(0, 1375 resno, 1376 colType, 1377 colTypmod, 1378 colColl, 1379 0); 1380 tle = makeTargetEntry((Expr *) expr, 1381 (AttrNumber) resno++, 1382 pstrdup(reftle->resname), 1383 false); 1384 1385 /* 1386 * By convention, all non-resjunk columns in a setop tree have 1387 * ressortgroupref equal to their resno. In some cases the ref isn't 1388 * needed, but this is a cleaner way than modifying the tlist later. 1389 */ 1390 tle->ressortgroupref = tle->resno; 1391 1392 tlist = lappend(tlist, tle); 1393 } 1394 1395 if (flag) 1396 { 1397 /* Add a resjunk flag column */ 1398 /* flag value is shown as copied up from subplan */ 1399 expr = (Node *) makeVar(0, 1400 resno, 1401 INT4OID, 1402 -1, 1403 InvalidOid, 1404 0); 1405 tle = makeTargetEntry((Expr *) expr, 1406 (AttrNumber) resno++, 1407 pstrdup("flag"), 1408 true); 1409 tlist = lappend(tlist, tle); 1410 } 1411 1412 pfree(colTypmods); 1413 1414 return tlist; 1415 } 1416 1417 /* 1418 * generate_setop_grouplist 1419 * Build a SortGroupClause list defining the sort/grouping properties 1420 * of the setop's output columns. 1421 * 1422 * Parse analysis already determined the properties and built a suitable 1423 * list, except that the entries do not have sortgrouprefs set because 1424 * the parser output representation doesn't include a tlist for each 1425 * setop. So what we need to do here is copy that list and install 1426 * proper sortgrouprefs into it (copying those from the targetlist). 1427 */ 1428 static List * 1429 generate_setop_grouplist(SetOperationStmt *op, List *targetlist) 1430 { 1431 List *grouplist = copyObject(op->groupClauses); 1432 ListCell *lg; 1433 ListCell *lt; 1434 1435 lg = list_head(grouplist); 1436 foreach(lt, targetlist) 1437 { 1438 TargetEntry *tle = (TargetEntry *) lfirst(lt); 1439 SortGroupClause *sgc; 1440 1441 if (tle->resjunk) 1442 { 1443 /* resjunk columns should not have sortgrouprefs */ 1444 Assert(tle->ressortgroupref == 0); 1445 continue; /* ignore resjunk columns */ 1446 } 1447 1448 /* non-resjunk columns should have sortgroupref = resno */ 1449 Assert(tle->ressortgroupref == tle->resno); 1450 1451 /* non-resjunk columns should have grouping clauses */ 1452 Assert(lg != NULL); 1453 sgc = (SortGroupClause *) lfirst(lg); 1454 lg = lnext(lg); 1455 Assert(sgc->tleSortGroupRef == 0); 1456 1457 sgc->tleSortGroupRef = tle->ressortgroupref; 1458 } 1459 Assert(lg == NULL); 1460 return grouplist; 1461 } 1462 1463 1464 /* 1465 * expand_inherited_tables 1466 * Expand each rangetable entry that represents an inheritance set 1467 * into an "append relation". At the conclusion of this process, 1468 * the "inh" flag is set in all and only those RTEs that are append 1469 * relation parents. 1470 */ 1471 void 1472 expand_inherited_tables(PlannerInfo *root) 1473 { 1474 Index nrtes; 1475 Index rti; 1476 ListCell *rl; 1477 1478 /* 1479 * expand_inherited_rtentry may add RTEs to parse->rtable. The function is 1480 * expected to recursively handle any RTEs that it creates with inh=true. 1481 * So just scan as far as the original end of the rtable list. 1482 */ 1483 nrtes = list_length(root->parse->rtable); 1484 rl = list_head(root->parse->rtable); 1485 for (rti = 1; rti <= nrtes; rti++) 1486 { 1487 RangeTblEntry *rte = (RangeTblEntry *) lfirst(rl); 1488 1489 expand_inherited_rtentry(root, rte, rti); 1490 rl = lnext(rl); 1491 } 1492 } 1493 1494 /* 1495 * expand_inherited_rtentry 1496 * Check whether a rangetable entry represents an inheritance set. 1497 * If so, add entries for all the child tables to the query's 1498 * rangetable, and build AppendRelInfo nodes for all the child tables 1499 * and add them to root->append_rel_list. If not, clear the entry's 1500 * "inh" flag to prevent later code from looking for AppendRelInfos. 1501 * 1502 * Note that the original RTE is considered to represent the whole 1503 * inheritance set. The first of the generated RTEs is an RTE for the same 1504 * table, but with inh = false, to represent the parent table in its role 1505 * as a simple member of the inheritance set. 1506 * 1507 * A childless table is never considered to be an inheritance set. For 1508 * regular inheritance, a parent RTE must always have at least two associated 1509 * AppendRelInfos: one corresponding to the parent table as a simple member of 1510 * inheritance set and one or more corresponding to the actual children. 1511 * Since a partitioned table is not scanned, it might have only one associated 1512 * AppendRelInfo. 1513 */ 1514 static void 1515 expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) 1516 { 1517 Query *parse = root->parse; 1518 Oid parentOID; 1519 PlanRowMark *oldrc; 1520 Relation oldrelation; 1521 LOCKMODE lockmode; 1522 List *inhOIDs; 1523 ListCell *l; 1524 1525 /* Does RT entry allow inheritance? */ 1526 if (!rte->inh) 1527 return; 1528 /* Ignore any already-expanded UNION ALL nodes */ 1529 if (rte->rtekind != RTE_RELATION) 1530 { 1531 Assert(rte->rtekind == RTE_SUBQUERY); 1532 return; 1533 } 1534 /* Fast path for common case of childless table */ 1535 parentOID = rte->relid; 1536 if (!has_subclass(parentOID)) 1537 { 1538 /* Clear flag before returning */ 1539 rte->inh = false; 1540 return; 1541 } 1542 1543 /* 1544 * The rewriter should already have obtained an appropriate lock on each 1545 * relation named in the query. However, for each child relation we add 1546 * to the query, we must obtain an appropriate lock, because this will be 1547 * the first use of those relations in the parse/rewrite/plan pipeline. 1548 * 1549 * If the parent relation is the query's result relation, then we need 1550 * RowExclusiveLock. Otherwise, if it's accessed FOR UPDATE/SHARE, we 1551 * need RowShareLock; otherwise AccessShareLock. We can't just grab 1552 * AccessShareLock because then the executor would be trying to upgrade 1553 * the lock, leading to possible deadlocks. (This code should match the 1554 * parser and rewriter.) 1555 */ 1556 oldrc = get_plan_rowmark(root->rowMarks, rti); 1557 if (rti == parse->resultRelation) 1558 lockmode = RowExclusiveLock; 1559 else if (oldrc && RowMarkRequiresRowShareLock(oldrc->markType)) 1560 lockmode = RowShareLock; 1561 else 1562 lockmode = AccessShareLock; 1563 1564 /* Scan for all members of inheritance set, acquire needed locks */ 1565 inhOIDs = find_all_inheritors(parentOID, lockmode, NULL); 1566 1567 /* 1568 * Check that there's at least one descendant, else treat as no-child 1569 * case. This could happen despite above has_subclass() check, if table 1570 * once had a child but no longer does. 1571 */ 1572 if (list_length(inhOIDs) < 2) 1573 { 1574 /* Clear flag before returning */ 1575 rte->inh = false; 1576 return; 1577 } 1578 1579 /* 1580 * If parent relation is selected FOR UPDATE/SHARE, we need to mark its 1581 * PlanRowMark as isParent = true, and generate a new PlanRowMark for each 1582 * child. 1583 */ 1584 if (oldrc) 1585 oldrc->isParent = true; 1586 1587 /* 1588 * Must open the parent relation to examine its tupdesc. We need not lock 1589 * it; we assume the rewriter already did. 1590 */ 1591 oldrelation = heap_open(parentOID, NoLock); 1592 1593 /* Scan the inheritance set and expand it */ 1594 if (RelationGetPartitionDesc(oldrelation) != NULL) 1595 { 1596 Assert(rte->relkind == RELKIND_PARTITIONED_TABLE); 1597 1598 /* 1599 * If this table has partitions, recursively expand them in the order 1600 * in which they appear in the PartitionDesc. While at it, also 1601 * extract the partition key columns of all the partitioned tables. 1602 */ 1603 expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc, 1604 lockmode, &root->append_rel_list); 1605 } 1606 else 1607 { 1608 List *appinfos = NIL; 1609 RangeTblEntry *childrte; 1610 Index childRTindex; 1611 1612 /* 1613 * This table has no partitions. Expand any plain inheritance 1614 * children in the order the OIDs were returned by 1615 * find_all_inheritors. 1616 */ 1617 foreach(l, inhOIDs) 1618 { 1619 Oid childOID = lfirst_oid(l); 1620 Relation newrelation; 1621 1622 /* Open rel if needed; we already have required locks */ 1623 if (childOID != parentOID) 1624 newrelation = heap_open(childOID, NoLock); 1625 else 1626 newrelation = oldrelation; 1627 1628 /* 1629 * It is possible that the parent table has children that are temp 1630 * tables of other backends. We cannot safely access such tables 1631 * (because of buffering issues), and the best thing to do seems 1632 * to be to silently ignore them. 1633 */ 1634 if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation)) 1635 { 1636 heap_close(newrelation, lockmode); 1637 continue; 1638 } 1639 1640 expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc, 1641 newrelation, 1642 &appinfos, &childrte, 1643 &childRTindex); 1644 1645 /* Close child relations, but keep locks */ 1646 if (childOID != parentOID) 1647 heap_close(newrelation, NoLock); 1648 } 1649 1650 /* 1651 * If all the children were temp tables, pretend it's a 1652 * non-inheritance situation; we don't need Append node in that case. 1653 * The duplicate RTE we added for the parent table is harmless, so we 1654 * don't bother to get rid of it; ditto for the useless PlanRowMark 1655 * node. 1656 */ 1657 if (list_length(appinfos) < 2) 1658 rte->inh = false; 1659 else 1660 root->append_rel_list = list_concat(root->append_rel_list, 1661 appinfos); 1662 1663 } 1664 1665 heap_close(oldrelation, NoLock); 1666 } 1667 1668 /* 1669 * expand_partitioned_rtentry 1670 * Recursively expand an RTE for a partitioned table. 1671 * 1672 * Note that RelationGetPartitionDispatchInfo will expand partitions in the 1673 * same order as this code. 1674 */ 1675 static void 1676 expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, 1677 Index parentRTindex, Relation parentrel, 1678 PlanRowMark *top_parentrc, LOCKMODE lockmode, 1679 List **appinfos) 1680 { 1681 int i; 1682 RangeTblEntry *childrte; 1683 Index childRTindex; 1684 PartitionDesc partdesc = RelationGetPartitionDesc(parentrel); 1685 1686 check_stack_depth(); 1687 1688 /* A partitioned table should always have a partition descriptor. */ 1689 Assert(partdesc); 1690 1691 Assert(parentrte->inh); 1692 1693 /* 1694 * Note down whether any partition key cols are being updated. Though it's 1695 * the root partitioned table's updatedCols we are interested in, we 1696 * instead use parentrte to get the updatedCols. This is convenient 1697 * because parentrte already has the root partrel's updatedCols translated 1698 * to match the attribute ordering of parentrel. 1699 */ 1700 if (!root->partColsUpdated) 1701 root->partColsUpdated = 1702 has_partition_attrs(parentrel, parentrte->updatedCols, NULL); 1703 1704 /* First expand the partitioned table itself. */ 1705 expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel, 1706 top_parentrc, parentrel, 1707 appinfos, &childrte, &childRTindex); 1708 1709 /* 1710 * If the partitioned table has no partitions, treat this as the 1711 * non-inheritance case. 1712 */ 1713 if (partdesc->nparts == 0) 1714 { 1715 parentrte->inh = false; 1716 return; 1717 } 1718 1719 for (i = 0; i < partdesc->nparts; i++) 1720 { 1721 Oid childOID = partdesc->oids[i]; 1722 Relation childrel; 1723 1724 /* Open rel; we already have required locks */ 1725 childrel = heap_open(childOID, NoLock); 1726 1727 /* 1728 * Temporary partitions belonging to other sessions should have been 1729 * disallowed at definition, but for paranoia's sake, let's double 1730 * check. 1731 */ 1732 if (RELATION_IS_OTHER_TEMP(childrel)) 1733 elog(ERROR, "temporary relation from another session found as partition"); 1734 1735 expand_single_inheritance_child(root, parentrte, parentRTindex, 1736 parentrel, top_parentrc, childrel, 1737 appinfos, &childrte, &childRTindex); 1738 1739 /* If this child is itself partitioned, recurse */ 1740 if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) 1741 expand_partitioned_rtentry(root, childrte, childRTindex, 1742 childrel, top_parentrc, lockmode, 1743 appinfos); 1744 1745 /* Close child relation, but keep locks */ 1746 heap_close(childrel, NoLock); 1747 } 1748 } 1749 1750 /* 1751 * expand_single_inheritance_child 1752 * Build a RangeTblEntry and an AppendRelInfo, if appropriate, plus 1753 * maybe a PlanRowMark. 1754 * 1755 * We now expand the partition hierarchy level by level, creating a 1756 * corresponding hierarchy of AppendRelInfos and RelOptInfos, where each 1757 * partitioned descendant acts as a parent of its immediate partitions. 1758 * (This is a difference from what older versions of PostgreSQL did and what 1759 * is still done in the case of table inheritance for unpartitioned tables, 1760 * where the hierarchy is flattened during RTE expansion.) 1761 * 1762 * PlanRowMarks still carry the top-parent's RTI, and the top-parent's 1763 * allMarkTypes field still accumulates values from all descendents. 1764 * 1765 * "parentrte" and "parentRTindex" are immediate parent's RTE and 1766 * RTI. "top_parentrc" is top parent's PlanRowMark. 1767 * 1768 * The child RangeTblEntry and its RTI are returned in "childrte_p" and 1769 * "childRTindex_p" resp. 1770 */ 1771 static void 1772 expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, 1773 Index parentRTindex, Relation parentrel, 1774 PlanRowMark *top_parentrc, Relation childrel, 1775 List **appinfos, RangeTblEntry **childrte_p, 1776 Index *childRTindex_p) 1777 { 1778 Query *parse = root->parse; 1779 Oid parentOID = RelationGetRelid(parentrel); 1780 Oid childOID = RelationGetRelid(childrel); 1781 RangeTblEntry *childrte; 1782 Index childRTindex; 1783 AppendRelInfo *appinfo; 1784 1785 /* 1786 * Build an RTE for the child, and attach to query's rangetable list. We 1787 * copy most fields of the parent's RTE, but replace relation OID and 1788 * relkind, and set inh = false. Also, set requiredPerms to zero since 1789 * all required permissions checks are done on the original RTE. Likewise, 1790 * set the child's securityQuals to empty, because we only want to apply 1791 * the parent's RLS conditions regardless of what RLS properties 1792 * individual children may have. (This is an intentional choice to make 1793 * inherited RLS work like regular permissions checks.) The parent 1794 * securityQuals will be propagated to children along with other base 1795 * restriction clauses, so we don't need to do it here. 1796 */ 1797 childrte = copyObject(parentrte); 1798 *childrte_p = childrte; 1799 childrte->relid = childOID; 1800 childrte->relkind = childrel->rd_rel->relkind; 1801 /* A partitioned child will need to be expanded further. */ 1802 if (childOID != parentOID && 1803 childrte->relkind == RELKIND_PARTITIONED_TABLE) 1804 childrte->inh = true; 1805 else 1806 childrte->inh = false; 1807 childrte->requiredPerms = 0; 1808 childrte->securityQuals = NIL; 1809 parse->rtable = lappend(parse->rtable, childrte); 1810 childRTindex = list_length(parse->rtable); 1811 *childRTindex_p = childRTindex; 1812 1813 /* 1814 * We need an AppendRelInfo if paths will be built for the child RTE. If 1815 * childrte->inh is true, then we'll always need to generate append paths 1816 * for it. If childrte->inh is false, we must scan it if it's not a 1817 * partitioned table; but if it is a partitioned table, then it never has 1818 * any data of its own and need not be scanned. 1819 */ 1820 if (childrte->relkind != RELKIND_PARTITIONED_TABLE || childrte->inh) 1821 { 1822 appinfo = makeNode(AppendRelInfo); 1823 appinfo->parent_relid = parentRTindex; 1824 appinfo->child_relid = childRTindex; 1825 appinfo->parent_reltype = parentrel->rd_rel->reltype; 1826 appinfo->child_reltype = childrel->rd_rel->reltype; 1827 make_inh_translation_list(parentrel, childrel, childRTindex, 1828 &appinfo->translated_vars); 1829 appinfo->parent_reloid = parentOID; 1830 *appinfos = lappend(*appinfos, appinfo); 1831 1832 /* 1833 * Translate the column permissions bitmaps to the child's attnums (we 1834 * have to build the translated_vars list before we can do this). But 1835 * if this is the parent table, leave copyObject's result alone. 1836 * 1837 * Note: we need to do this even though the executor won't run any 1838 * permissions checks on the child RTE. The insertedCols/updatedCols 1839 * bitmaps may be examined for trigger-firing purposes. 1840 */ 1841 if (childOID != parentOID) 1842 { 1843 childrte->selectedCols = translate_col_privs(parentrte->selectedCols, 1844 appinfo->translated_vars); 1845 childrte->insertedCols = translate_col_privs(parentrte->insertedCols, 1846 appinfo->translated_vars); 1847 childrte->updatedCols = translate_col_privs(parentrte->updatedCols, 1848 appinfo->translated_vars); 1849 } 1850 } 1851 1852 /* 1853 * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE. 1854 */ 1855 if (top_parentrc) 1856 { 1857 PlanRowMark *childrc = makeNode(PlanRowMark); 1858 1859 childrc->rti = childRTindex; 1860 childrc->prti = top_parentrc->rti; 1861 childrc->rowmarkId = top_parentrc->rowmarkId; 1862 /* Reselect rowmark type, because relkind might not match parent */ 1863 childrc->markType = select_rowmark_type(childrte, 1864 top_parentrc->strength); 1865 childrc->allMarkTypes = (1 << childrc->markType); 1866 childrc->strength = top_parentrc->strength; 1867 childrc->waitPolicy = top_parentrc->waitPolicy; 1868 1869 /* 1870 * We mark RowMarks for partitioned child tables as parent RowMarks so 1871 * that the executor ignores them (except their existence means that 1872 * the child tables be locked using appropriate mode). 1873 */ 1874 childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); 1875 1876 /* Include child's rowmark type in top parent's allMarkTypes */ 1877 top_parentrc->allMarkTypes |= childrc->allMarkTypes; 1878 1879 root->rowMarks = lappend(root->rowMarks, childrc); 1880 } 1881 } 1882 1883 /* 1884 * make_inh_translation_list 1885 * Build the list of translations from parent Vars to child Vars for 1886 * an inheritance child. 1887 * 1888 * For paranoia's sake, we match type/collation as well as attribute name. 1889 */ 1890 static void 1891 make_inh_translation_list(Relation oldrelation, Relation newrelation, 1892 Index newvarno, 1893 List **translated_vars) 1894 { 1895 List *vars = NIL; 1896 TupleDesc old_tupdesc = RelationGetDescr(oldrelation); 1897 TupleDesc new_tupdesc = RelationGetDescr(newrelation); 1898 int oldnatts = old_tupdesc->natts; 1899 int newnatts = new_tupdesc->natts; 1900 int old_attno; 1901 1902 for (old_attno = 0; old_attno < oldnatts; old_attno++) 1903 { 1904 Form_pg_attribute att; 1905 char *attname; 1906 Oid atttypid; 1907 int32 atttypmod; 1908 Oid attcollation; 1909 int new_attno; 1910 1911 att = TupleDescAttr(old_tupdesc, old_attno); 1912 if (att->attisdropped) 1913 { 1914 /* Just put NULL into this list entry */ 1915 vars = lappend(vars, NULL); 1916 continue; 1917 } 1918 attname = NameStr(att->attname); 1919 atttypid = att->atttypid; 1920 atttypmod = att->atttypmod; 1921 attcollation = att->attcollation; 1922 1923 /* 1924 * When we are generating the "translation list" for the parent table 1925 * of an inheritance set, no need to search for matches. 1926 */ 1927 if (oldrelation == newrelation) 1928 { 1929 vars = lappend(vars, makeVar(newvarno, 1930 (AttrNumber) (old_attno + 1), 1931 atttypid, 1932 atttypmod, 1933 attcollation, 1934 0)); 1935 continue; 1936 } 1937 1938 /* 1939 * Otherwise we have to search for the matching column by name. 1940 * There's no guarantee it'll have the same column position, because 1941 * of cases like ALTER TABLE ADD COLUMN and multiple inheritance. 1942 * However, in simple cases it will be the same column number, so try 1943 * that before we go groveling through all the columns. 1944 * 1945 * Note: the test for (att = ...) != NULL cannot fail, it's just a 1946 * notational device to include the assignment into the if-clause. 1947 */ 1948 if (old_attno < newnatts && 1949 (att = TupleDescAttr(new_tupdesc, old_attno)) != NULL && 1950 !att->attisdropped && 1951 strcmp(attname, NameStr(att->attname)) == 0) 1952 new_attno = old_attno; 1953 else 1954 { 1955 for (new_attno = 0; new_attno < newnatts; new_attno++) 1956 { 1957 att = TupleDescAttr(new_tupdesc, new_attno); 1958 if (!att->attisdropped && 1959 strcmp(attname, NameStr(att->attname)) == 0) 1960 break; 1961 } 1962 if (new_attno >= newnatts) 1963 elog(ERROR, "could not find inherited attribute \"%s\" of relation \"%s\"", 1964 attname, RelationGetRelationName(newrelation)); 1965 } 1966 1967 /* Found it, check type and collation match */ 1968 if (atttypid != att->atttypid || atttypmod != att->atttypmod) 1969 elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type", 1970 attname, RelationGetRelationName(newrelation)); 1971 if (attcollation != att->attcollation) 1972 elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation", 1973 attname, RelationGetRelationName(newrelation)); 1974 1975 vars = lappend(vars, makeVar(newvarno, 1976 (AttrNumber) (new_attno + 1), 1977 atttypid, 1978 atttypmod, 1979 attcollation, 1980 0)); 1981 } 1982 1983 *translated_vars = vars; 1984 } 1985 1986 /* 1987 * translate_col_privs 1988 * Translate a bitmapset representing per-column privileges from the 1989 * parent rel's attribute numbering to the child's. 1990 * 1991 * The only surprise here is that we don't translate a parent whole-row 1992 * reference into a child whole-row reference. That would mean requiring 1993 * permissions on all child columns, which is overly strict, since the 1994 * query is really only going to reference the inherited columns. Instead 1995 * we set the per-column bits for all inherited columns. 1996 */ 1997 static Bitmapset * 1998 translate_col_privs(const Bitmapset *parent_privs, 1999 List *translated_vars) 2000 { 2001 Bitmapset *child_privs = NULL; 2002 bool whole_row; 2003 int attno; 2004 ListCell *lc; 2005 2006 /* System attributes have the same numbers in all tables */ 2007 for (attno = FirstLowInvalidHeapAttributeNumber + 1; attno < 0; attno++) 2008 { 2009 if (bms_is_member(attno - FirstLowInvalidHeapAttributeNumber, 2010 parent_privs)) 2011 child_privs = bms_add_member(child_privs, 2012 attno - FirstLowInvalidHeapAttributeNumber); 2013 } 2014 2015 /* Check if parent has whole-row reference */ 2016 whole_row = bms_is_member(InvalidAttrNumber - FirstLowInvalidHeapAttributeNumber, 2017 parent_privs); 2018 2019 /* And now translate the regular user attributes, using the vars list */ 2020 attno = InvalidAttrNumber; 2021 foreach(lc, translated_vars) 2022 { 2023 Var *var = lfirst_node(Var, lc); 2024 2025 attno++; 2026 if (var == NULL) /* ignore dropped columns */ 2027 continue; 2028 if (whole_row || 2029 bms_is_member(attno - FirstLowInvalidHeapAttributeNumber, 2030 parent_privs)) 2031 child_privs = bms_add_member(child_privs, 2032 var->varattno - FirstLowInvalidHeapAttributeNumber); 2033 } 2034 2035 return child_privs; 2036 } 2037 2038 /* 2039 * adjust_appendrel_attrs 2040 * Copy the specified query or expression and translate Vars referring to a 2041 * parent rel to refer to the corresponding child rel instead. We also 2042 * update rtindexes appearing outside Vars, such as resultRelation and 2043 * jointree relids. 2044 * 2045 * Note: this is only applied after conversion of sublinks to subplans, 2046 * so we don't need to cope with recursion into sub-queries. 2047 * 2048 * Note: this is not hugely different from what pullup_replace_vars() does; 2049 * maybe we should try to fold the two routines together. 2050 */ 2051 Node * 2052 adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos, 2053 AppendRelInfo **appinfos) 2054 { 2055 Node *result; 2056 adjust_appendrel_attrs_context context; 2057 2058 context.root = root; 2059 context.nappinfos = nappinfos; 2060 context.appinfos = appinfos; 2061 2062 /* If there's nothing to adjust, don't call this function. */ 2063 Assert(nappinfos >= 1 && appinfos != NULL); 2064 2065 /* 2066 * Must be prepared to start with a Query or a bare expression tree. 2067 */ 2068 if (node && IsA(node, Query)) 2069 { 2070 Query *newnode; 2071 int cnt; 2072 2073 newnode = query_tree_mutator((Query *) node, 2074 adjust_appendrel_attrs_mutator, 2075 (void *) &context, 2076 QTW_IGNORE_RC_SUBQUERIES); 2077 for (cnt = 0; cnt < nappinfos; cnt++) 2078 { 2079 AppendRelInfo *appinfo = appinfos[cnt]; 2080 2081 if (newnode->resultRelation == appinfo->parent_relid) 2082 { 2083 newnode->resultRelation = appinfo->child_relid; 2084 /* Fix tlist resnos too, if it's inherited UPDATE */ 2085 if (newnode->commandType == CMD_UPDATE) 2086 newnode->targetList = 2087 adjust_inherited_tlist(newnode->targetList, 2088 appinfo); 2089 break; 2090 } 2091 } 2092 2093 result = (Node *) newnode; 2094 } 2095 else 2096 result = adjust_appendrel_attrs_mutator(node, &context); 2097 2098 return result; 2099 } 2100 2101 static Node * 2102 adjust_appendrel_attrs_mutator(Node *node, 2103 adjust_appendrel_attrs_context *context) 2104 { 2105 AppendRelInfo **appinfos = context->appinfos; 2106 int nappinfos = context->nappinfos; 2107 int cnt; 2108 2109 if (node == NULL) 2110 return NULL; 2111 if (IsA(node, Var)) 2112 { 2113 Var *var = (Var *) copyObject(node); 2114 AppendRelInfo *appinfo = NULL; 2115 2116 for (cnt = 0; cnt < nappinfos; cnt++) 2117 { 2118 if (var->varno == appinfos[cnt]->parent_relid) 2119 { 2120 appinfo = appinfos[cnt]; 2121 break; 2122 } 2123 } 2124 2125 if (var->varlevelsup == 0 && appinfo) 2126 { 2127 var->varno = appinfo->child_relid; 2128 var->varnoold = appinfo->child_relid; 2129 if (var->varattno > 0) 2130 { 2131 Node *newnode; 2132 2133 if (var->varattno > list_length(appinfo->translated_vars)) 2134 elog(ERROR, "attribute %d of relation \"%s\" does not exist", 2135 var->varattno, get_rel_name(appinfo->parent_reloid)); 2136 newnode = copyObject(list_nth(appinfo->translated_vars, 2137 var->varattno - 1)); 2138 if (newnode == NULL) 2139 elog(ERROR, "attribute %d of relation \"%s\" does not exist", 2140 var->varattno, get_rel_name(appinfo->parent_reloid)); 2141 return newnode; 2142 } 2143 else if (var->varattno == 0) 2144 { 2145 /* 2146 * Whole-row Var: if we are dealing with named rowtypes, we 2147 * can use a whole-row Var for the child table plus a coercion 2148 * step to convert the tuple layout to the parent's rowtype. 2149 * Otherwise we have to generate a RowExpr. 2150 */ 2151 if (OidIsValid(appinfo->child_reltype)) 2152 { 2153 Assert(var->vartype == appinfo->parent_reltype); 2154 if (appinfo->parent_reltype != appinfo->child_reltype) 2155 { 2156 ConvertRowtypeExpr *r = makeNode(ConvertRowtypeExpr); 2157 2158 r->arg = (Expr *) var; 2159 r->resulttype = appinfo->parent_reltype; 2160 r->convertformat = COERCE_IMPLICIT_CAST; 2161 r->location = -1; 2162 /* Make sure the Var node has the right type ID, too */ 2163 var->vartype = appinfo->child_reltype; 2164 return (Node *) r; 2165 } 2166 } 2167 else 2168 { 2169 /* 2170 * Build a RowExpr containing the translated variables. 2171 * 2172 * In practice var->vartype will always be RECORDOID here, 2173 * so we need to come up with some suitable column names. 2174 * We use the parent RTE's column names. 2175 * 2176 * Note: we can't get here for inheritance cases, so there 2177 * is no need to worry that translated_vars might contain 2178 * some dummy NULLs. 2179 */ 2180 RowExpr *rowexpr; 2181 List *fields; 2182 RangeTblEntry *rte; 2183 2184 rte = rt_fetch(appinfo->parent_relid, 2185 context->root->parse->rtable); 2186 fields = copyObject(appinfo->translated_vars); 2187 rowexpr = makeNode(RowExpr); 2188 rowexpr->args = fields; 2189 rowexpr->row_typeid = var->vartype; 2190 rowexpr->row_format = COERCE_IMPLICIT_CAST; 2191 rowexpr->colnames = copyObject(rte->eref->colnames); 2192 rowexpr->location = -1; 2193 2194 return (Node *) rowexpr; 2195 } 2196 } 2197 /* system attributes don't need any other translation */ 2198 } 2199 return (Node *) var; 2200 } 2201 if (IsA(node, CurrentOfExpr)) 2202 { 2203 CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); 2204 2205 for (cnt = 0; cnt < nappinfos; cnt++) 2206 { 2207 AppendRelInfo *appinfo = appinfos[cnt]; 2208 2209 if (cexpr->cvarno == appinfo->parent_relid) 2210 { 2211 cexpr->cvarno = appinfo->child_relid; 2212 break; 2213 } 2214 } 2215 return (Node *) cexpr; 2216 } 2217 if (IsA(node, RangeTblRef)) 2218 { 2219 RangeTblRef *rtr = (RangeTblRef *) copyObject(node); 2220 2221 for (cnt = 0; cnt < nappinfos; cnt++) 2222 { 2223 AppendRelInfo *appinfo = appinfos[cnt]; 2224 2225 if (rtr->rtindex == appinfo->parent_relid) 2226 { 2227 rtr->rtindex = appinfo->child_relid; 2228 break; 2229 } 2230 } 2231 return (Node *) rtr; 2232 } 2233 if (IsA(node, JoinExpr)) 2234 { 2235 /* Copy the JoinExpr node with correct mutation of subnodes */ 2236 JoinExpr *j; 2237 AppendRelInfo *appinfo; 2238 2239 j = (JoinExpr *) expression_tree_mutator(node, 2240 adjust_appendrel_attrs_mutator, 2241 (void *) context); 2242 /* now fix JoinExpr's rtindex (probably never happens) */ 2243 for (cnt = 0; cnt < nappinfos; cnt++) 2244 { 2245 appinfo = appinfos[cnt]; 2246 2247 if (j->rtindex == appinfo->parent_relid) 2248 { 2249 j->rtindex = appinfo->child_relid; 2250 break; 2251 } 2252 } 2253 return (Node *) j; 2254 } 2255 if (IsA(node, PlaceHolderVar)) 2256 { 2257 /* Copy the PlaceHolderVar node with correct mutation of subnodes */ 2258 PlaceHolderVar *phv; 2259 2260 phv = (PlaceHolderVar *) expression_tree_mutator(node, 2261 adjust_appendrel_attrs_mutator, 2262 (void *) context); 2263 /* now fix PlaceHolderVar's relid sets */ 2264 if (phv->phlevelsup == 0) 2265 phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos, 2266 context->appinfos); 2267 return (Node *) phv; 2268 } 2269 /* Shouldn't need to handle planner auxiliary nodes here */ 2270 Assert(!IsA(node, SpecialJoinInfo)); 2271 Assert(!IsA(node, AppendRelInfo)); 2272 Assert(!IsA(node, PlaceHolderInfo)); 2273 Assert(!IsA(node, MinMaxAggInfo)); 2274 2275 /* 2276 * We have to process RestrictInfo nodes specially. (Note: although 2277 * set_append_rel_pathlist will hide RestrictInfos in the parent's 2278 * baserestrictinfo list from us, it doesn't hide those in joininfo.) 2279 */ 2280 if (IsA(node, RestrictInfo)) 2281 { 2282 RestrictInfo *oldinfo = (RestrictInfo *) node; 2283 RestrictInfo *newinfo = makeNode(RestrictInfo); 2284 2285 /* Copy all flat-copiable fields */ 2286 memcpy(newinfo, oldinfo, sizeof(RestrictInfo)); 2287 2288 /* Recursively fix the clause itself */ 2289 newinfo->clause = (Expr *) 2290 adjust_appendrel_attrs_mutator((Node *) oldinfo->clause, context); 2291 2292 /* and the modified version, if an OR clause */ 2293 newinfo->orclause = (Expr *) 2294 adjust_appendrel_attrs_mutator((Node *) oldinfo->orclause, context); 2295 2296 /* adjust relid sets too */ 2297 newinfo->clause_relids = adjust_child_relids(oldinfo->clause_relids, 2298 context->nappinfos, 2299 context->appinfos); 2300 newinfo->required_relids = adjust_child_relids(oldinfo->required_relids, 2301 context->nappinfos, 2302 context->appinfos); 2303 newinfo->outer_relids = adjust_child_relids(oldinfo->outer_relids, 2304 context->nappinfos, 2305 context->appinfos); 2306 newinfo->nullable_relids = adjust_child_relids(oldinfo->nullable_relids, 2307 context->nappinfos, 2308 context->appinfos); 2309 newinfo->left_relids = adjust_child_relids(oldinfo->left_relids, 2310 context->nappinfos, 2311 context->appinfos); 2312 newinfo->right_relids = adjust_child_relids(oldinfo->right_relids, 2313 context->nappinfos, 2314 context->appinfos); 2315 2316 /* 2317 * Reset cached derivative fields, since these might need to have 2318 * different values when considering the child relation. Note we 2319 * don't reset left_ec/right_ec: each child variable is implicitly 2320 * equivalent to its parent, so still a member of the same EC if any. 2321 */ 2322 newinfo->eval_cost.startup = -1; 2323 newinfo->norm_selec = -1; 2324 newinfo->outer_selec = -1; 2325 newinfo->left_em = NULL; 2326 newinfo->right_em = NULL; 2327 newinfo->scansel_cache = NIL; 2328 newinfo->left_bucketsize = -1; 2329 newinfo->right_bucketsize = -1; 2330 newinfo->left_mcvfreq = -1; 2331 newinfo->right_mcvfreq = -1; 2332 2333 return (Node *) newinfo; 2334 } 2335 2336 /* 2337 * NOTE: we do not need to recurse into sublinks, because they should 2338 * already have been converted to subplans before we see them. 2339 */ 2340 Assert(!IsA(node, SubLink)); 2341 Assert(!IsA(node, Query)); 2342 2343 return expression_tree_mutator(node, adjust_appendrel_attrs_mutator, 2344 (void *) context); 2345 } 2346 2347 /* 2348 * Substitute child relids for parent relids in a Relid set. The array of 2349 * appinfos specifies the substitutions to be performed. 2350 */ 2351 static Relids 2352 adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos) 2353 { 2354 Bitmapset *result = NULL; 2355 int cnt; 2356 2357 for (cnt = 0; cnt < nappinfos; cnt++) 2358 { 2359 AppendRelInfo *appinfo = appinfos[cnt]; 2360 2361 /* Remove parent, add child */ 2362 if (bms_is_member(appinfo->parent_relid, relids)) 2363 { 2364 /* Make a copy if we are changing the set. */ 2365 if (!result) 2366 result = bms_copy(relids); 2367 2368 result = bms_del_member(result, appinfo->parent_relid); 2369 result = bms_add_member(result, appinfo->child_relid); 2370 } 2371 } 2372 2373 /* If we made any changes, return the modified copy. */ 2374 if (result) 2375 return result; 2376 2377 /* Otherwise, return the original set without modification. */ 2378 return relids; 2379 } 2380 2381 /* 2382 * Replace any relid present in top_parent_relids with its child in 2383 * child_relids. Members of child_relids can be multiple levels below top 2384 * parent in the partition hierarchy. 2385 */ 2386 Relids 2387 adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, 2388 Relids child_relids, Relids top_parent_relids) 2389 { 2390 AppendRelInfo **appinfos; 2391 int nappinfos; 2392 Relids parent_relids = NULL; 2393 Relids result; 2394 Relids tmp_result = NULL; 2395 int cnt; 2396 2397 /* 2398 * If the given relids set doesn't contain any of the top parent relids, 2399 * it will remain unchanged. 2400 */ 2401 if (!bms_overlap(relids, top_parent_relids)) 2402 return relids; 2403 2404 appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos); 2405 2406 /* Construct relids set for the immediate parent of the given child. */ 2407 for (cnt = 0; cnt < nappinfos; cnt++) 2408 { 2409 AppendRelInfo *appinfo = appinfos[cnt]; 2410 2411 parent_relids = bms_add_member(parent_relids, appinfo->parent_relid); 2412 } 2413 2414 /* Recurse if immediate parent is not the top parent. */ 2415 if (!bms_equal(parent_relids, top_parent_relids)) 2416 { 2417 tmp_result = adjust_child_relids_multilevel(root, relids, 2418 parent_relids, 2419 top_parent_relids); 2420 relids = tmp_result; 2421 } 2422 2423 result = adjust_child_relids(relids, nappinfos, appinfos); 2424 2425 /* Free memory consumed by any intermediate result. */ 2426 if (tmp_result) 2427 bms_free(tmp_result); 2428 bms_free(parent_relids); 2429 pfree(appinfos); 2430 2431 return result; 2432 } 2433 2434 /* 2435 * Adjust the targetlist entries of an inherited UPDATE operation 2436 * 2437 * The expressions have already been fixed, but we have to make sure that 2438 * the target resnos match the child table (they may not, in the case of 2439 * a column that was added after-the-fact by ALTER TABLE). In some cases 2440 * this can force us to re-order the tlist to preserve resno ordering. 2441 * (We do all this work in special cases so that preptlist.c is fast for 2442 * the typical case.) 2443 * 2444 * The given tlist has already been through expression_tree_mutator; 2445 * therefore the TargetEntry nodes are fresh copies that it's okay to 2446 * scribble on. 2447 * 2448 * Note that this is not needed for INSERT because INSERT isn't inheritable. 2449 */ 2450 static List * 2451 adjust_inherited_tlist(List *tlist, AppendRelInfo *context) 2452 { 2453 bool changed_it = false; 2454 ListCell *tl; 2455 List *new_tlist; 2456 bool more; 2457 int attrno; 2458 2459 /* This should only happen for an inheritance case, not UNION ALL */ 2460 Assert(OidIsValid(context->parent_reloid)); 2461 2462 /* Scan tlist and update resnos to match attnums of child rel */ 2463 foreach(tl, tlist) 2464 { 2465 TargetEntry *tle = (TargetEntry *) lfirst(tl); 2466 Var *childvar; 2467 2468 if (tle->resjunk) 2469 continue; /* ignore junk items */ 2470 2471 /* Look up the translation of this column: it must be a Var */ 2472 if (tle->resno <= 0 || 2473 tle->resno > list_length(context->translated_vars)) 2474 elog(ERROR, "attribute %d of relation \"%s\" does not exist", 2475 tle->resno, get_rel_name(context->parent_reloid)); 2476 childvar = (Var *) list_nth(context->translated_vars, tle->resno - 1); 2477 if (childvar == NULL || !IsA(childvar, Var)) 2478 elog(ERROR, "attribute %d of relation \"%s\" does not exist", 2479 tle->resno, get_rel_name(context->parent_reloid)); 2480 2481 if (tle->resno != childvar->varattno) 2482 { 2483 tle->resno = childvar->varattno; 2484 changed_it = true; 2485 } 2486 } 2487 2488 /* 2489 * If we changed anything, re-sort the tlist by resno, and make sure 2490 * resjunk entries have resnos above the last real resno. The sort 2491 * algorithm is a bit stupid, but for such a seldom-taken path, small is 2492 * probably better than fast. 2493 */ 2494 if (!changed_it) 2495 return tlist; 2496 2497 new_tlist = NIL; 2498 more = true; 2499 for (attrno = 1; more; attrno++) 2500 { 2501 more = false; 2502 foreach(tl, tlist) 2503 { 2504 TargetEntry *tle = (TargetEntry *) lfirst(tl); 2505 2506 if (tle->resjunk) 2507 continue; /* ignore junk items */ 2508 2509 if (tle->resno == attrno) 2510 new_tlist = lappend(new_tlist, tle); 2511 else if (tle->resno > attrno) 2512 more = true; 2513 } 2514 } 2515 2516 foreach(tl, tlist) 2517 { 2518 TargetEntry *tle = (TargetEntry *) lfirst(tl); 2519 2520 if (!tle->resjunk) 2521 continue; /* here, ignore non-junk items */ 2522 2523 tle->resno = attrno; 2524 new_tlist = lappend(new_tlist, tle); 2525 attrno++; 2526 } 2527 2528 return new_tlist; 2529 } 2530 2531 /* 2532 * adjust_appendrel_attrs_multilevel 2533 * Apply Var translations from a toplevel appendrel parent down to a child. 2534 * 2535 * In some cases we need to translate expressions referencing a parent relation 2536 * to reference an appendrel child that's multiple levels removed from it. 2537 */ 2538 Node * 2539 adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, 2540 Relids child_relids, 2541 Relids top_parent_relids) 2542 { 2543 AppendRelInfo **appinfos; 2544 Bitmapset *parent_relids = NULL; 2545 int nappinfos; 2546 int cnt; 2547 2548 Assert(bms_num_members(child_relids) == bms_num_members(top_parent_relids)); 2549 2550 appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos); 2551 2552 /* Construct relids set for the immediate parent of given child. */ 2553 for (cnt = 0; cnt < nappinfos; cnt++) 2554 { 2555 AppendRelInfo *appinfo = appinfos[cnt]; 2556 2557 parent_relids = bms_add_member(parent_relids, appinfo->parent_relid); 2558 } 2559 2560 /* Recurse if immediate parent is not the top parent. */ 2561 if (!bms_equal(parent_relids, top_parent_relids)) 2562 node = adjust_appendrel_attrs_multilevel(root, node, parent_relids, 2563 top_parent_relids); 2564 2565 /* Now translate for this child */ 2566 node = adjust_appendrel_attrs(root, node, nappinfos, appinfos); 2567 2568 pfree(appinfos); 2569 2570 return node; 2571 } 2572 2573 /* 2574 * Construct the SpecialJoinInfo for a child-join by translating 2575 * SpecialJoinInfo for the join between parents. left_relids and right_relids 2576 * are the relids of left and right side of the join respectively. 2577 */ 2578 SpecialJoinInfo * 2579 build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, 2580 Relids left_relids, Relids right_relids) 2581 { 2582 SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo); 2583 AppendRelInfo **left_appinfos; 2584 int left_nappinfos; 2585 AppendRelInfo **right_appinfos; 2586 int right_nappinfos; 2587 2588 memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo)); 2589 left_appinfos = find_appinfos_by_relids(root, left_relids, 2590 &left_nappinfos); 2591 right_appinfos = find_appinfos_by_relids(root, right_relids, 2592 &right_nappinfos); 2593 2594 sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand, 2595 left_nappinfos, left_appinfos); 2596 sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand, 2597 right_nappinfos, 2598 right_appinfos); 2599 sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand, 2600 left_nappinfos, left_appinfos); 2601 sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand, 2602 right_nappinfos, 2603 right_appinfos); 2604 sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root, 2605 (Node *) sjinfo->semi_rhs_exprs, 2606 right_nappinfos, 2607 right_appinfos); 2608 2609 pfree(left_appinfos); 2610 pfree(right_appinfos); 2611 2612 return sjinfo; 2613 } 2614 2615 /* 2616 * find_appinfos_by_relids 2617 * Find AppendRelInfo structures for all relations specified by relids. 2618 * 2619 * The AppendRelInfos are returned in an array, which can be pfree'd by the 2620 * caller. *nappinfos is set to the number of entries in the array. 2621 */ 2622 AppendRelInfo ** 2623 find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) 2624 { 2625 AppendRelInfo **appinfos; 2626 int cnt = 0; 2627 int i; 2628 2629 *nappinfos = bms_num_members(relids); 2630 appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos); 2631 2632 i = -1; 2633 while ((i = bms_next_member(relids, i)) >= 0) 2634 { 2635 AppendRelInfo *appinfo = root->append_rel_array[i]; 2636 2637 if (!appinfo) 2638 elog(ERROR, "child rel %d not found in append_rel_array", i); 2639 2640 appinfos[cnt++] = appinfo; 2641 } 2642 return appinfos; 2643 } 2644