1 /*------------------------------------------------------------------------- 2 * 3 * planagg.c 4 * Special planning for aggregate queries. 5 * 6 * This module tries to replace MIN/MAX aggregate functions by subqueries 7 * of the form 8 * (SELECT col FROM tab 9 * WHERE col IS NOT NULL AND existing-quals 10 * ORDER BY col ASC/DESC 11 * LIMIT 1) 12 * Given a suitable index on tab.col, this can be much faster than the 13 * generic scan-all-the-rows aggregation plan. We can handle multiple 14 * MIN/MAX aggregates by generating multiple subqueries, and their 15 * orderings can be different. However, if the query contains any 16 * non-optimizable aggregates, there's no point since we'll have to 17 * scan all the rows anyway. 18 * 19 * 20 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 21 * Portions Copyright (c) 1994, Regents of the University of California 22 * 23 * 24 * IDENTIFICATION 25 * src/backend/optimizer/plan/planagg.c 26 * 27 *------------------------------------------------------------------------- 28 */ 29 #include "postgres.h" 30 31 #include "access/htup_details.h" 32 #include "catalog/pg_aggregate.h" 33 #include "catalog/pg_type.h" 34 #include "nodes/makefuncs.h" 35 #include "nodes/nodeFuncs.h" 36 #include "optimizer/clauses.h" 37 #include "optimizer/cost.h" 38 #include "optimizer/optimizer.h" 39 #include "optimizer/pathnode.h" 40 #include "optimizer/paths.h" 41 #include "optimizer/planmain.h" 42 #include "optimizer/subselect.h" 43 #include "optimizer/tlist.h" 44 #include "parser/parse_clause.h" 45 #include "parser/parsetree.h" 46 #include "rewrite/rewriteManip.h" 47 #include "utils/lsyscache.h" 48 #include "utils/syscache.h" 49 50 static bool find_minmax_aggs_walker(Node *node, List **context); 51 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, 52 Oid eqop, Oid sortop, bool nulls_first); 53 static void minmax_qp_callback(PlannerInfo *root, void *extra); 54 static Oid fetch_agg_sort_op(Oid aggfnoid); 55 56 57 /* 58 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates 59 * 60 * Check to see whether the query contains MIN/MAX aggregate functions that 61 * might be optimizable via indexscans. If it does, and all the aggregates 62 * are potentially optimizable, then create a MinMaxAggPath and add it to 63 * the (UPPERREL_GROUP_AGG, NULL) upperrel. 64 * 65 * This should be called by grouping_planner() just before it's ready to call 66 * query_planner(), because we generate indexscan paths by cloning the 67 * planner's state and invoking query_planner() on a modified version of 68 * the query parsetree. Thus, all preprocessing needed before query_planner() 69 * must already be done. 70 */ 71 void 72 preprocess_minmax_aggregates(PlannerInfo *root) 73 { 74 Query *parse = root->parse; 75 FromExpr *jtnode; 76 RangeTblRef *rtr; 77 RangeTblEntry *rte; 78 List *aggs_list; 79 RelOptInfo *grouped_rel; 80 ListCell *lc; 81 82 /* minmax_aggs list should be empty at this point */ 83 Assert(root->minmax_aggs == NIL); 84 85 /* Nothing to do if query has no aggregates */ 86 if (!parse->hasAggs) 87 return; 88 89 Assert(!parse->setOperations); /* shouldn't get here if a setop */ 90 Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ 91 92 /* 93 * Reject unoptimizable cases. 94 * 95 * We don't handle GROUP BY or windowing, because our current 96 * implementations of grouping require looking at all the rows anyway, and 97 * so there's not much point in optimizing MIN/MAX. 98 */ 99 if (parse->groupClause || list_length(parse->groupingSets) > 1 || 100 parse->hasWindowFuncs) 101 return; 102 103 /* 104 * Reject if query contains any CTEs; there's no way to build an indexscan 105 * on one so we couldn't succeed here. (If the CTEs are unreferenced, 106 * that's not true, but it doesn't seem worth expending cycles to check.) 107 */ 108 if (parse->cteList) 109 return; 110 111 /* 112 * We also restrict the query to reference exactly one table, since join 113 * conditions can't be handled reasonably. (We could perhaps handle a 114 * query containing cartesian-product joins, but it hardly seems worth the 115 * trouble.) However, the single table could be buried in several levels 116 * of FromExpr due to subqueries. Note the "single" table could be an 117 * inheritance parent, too, including the case of a UNION ALL subquery 118 * that's been flattened to an appendrel. 119 */ 120 jtnode = parse->jointree; 121 while (IsA(jtnode, FromExpr)) 122 { 123 if (list_length(jtnode->fromlist) != 1) 124 return; 125 jtnode = linitial(jtnode->fromlist); 126 } 127 if (!IsA(jtnode, RangeTblRef)) 128 return; 129 rtr = (RangeTblRef *) jtnode; 130 rte = planner_rt_fetch(rtr->rtindex, root); 131 if (rte->rtekind == RTE_RELATION) 132 /* ordinary relation, ok */ ; 133 else if (rte->rtekind == RTE_SUBQUERY && rte->inh) 134 /* flattened UNION ALL subquery, ok */ ; 135 else 136 return; 137 138 /* 139 * Scan the tlist and HAVING qual to find all the aggregates and verify 140 * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. 141 */ 142 aggs_list = NIL; 143 if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list)) 144 return; 145 if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) 146 return; 147 148 /* 149 * OK, there is at least the possibility of performing the optimization. 150 * Build an access path for each aggregate. If any of the aggregates 151 * prove to be non-indexable, give up; there is no point in optimizing 152 * just some of them. 153 */ 154 foreach(lc, aggs_list) 155 { 156 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); 157 Oid eqop; 158 bool reverse; 159 160 /* 161 * We'll need the equality operator that goes with the aggregate's 162 * ordering operator. 163 */ 164 eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); 165 if (!OidIsValid(eqop)) /* shouldn't happen */ 166 elog(ERROR, "could not find equality operator for ordering operator %u", 167 mminfo->aggsortop); 168 169 /* 170 * We can use either an ordering that gives NULLS FIRST or one that 171 * gives NULLS LAST; furthermore there's unlikely to be much 172 * performance difference between them, so it doesn't seem worth 173 * costing out both ways if we get a hit on the first one. NULLS 174 * FIRST is more likely to be available if the operator is a 175 * reverse-sort operator, so try that first if reverse. 176 */ 177 if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) 178 continue; 179 if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) 180 continue; 181 182 /* No indexable path for this aggregate, so fail */ 183 return; 184 } 185 186 /* 187 * OK, we can do the query this way. Prepare to create a MinMaxAggPath 188 * node. 189 * 190 * First, create an output Param node for each agg. (If we end up not 191 * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg, 192 * which is not worth worrying about. We can't wait till create_plan time 193 * to decide whether to make the Param, unfortunately.) 194 */ 195 foreach(lc, aggs_list) 196 { 197 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); 198 199 mminfo->param = 200 SS_make_initplan_output_param(root, 201 exprType((Node *) mminfo->target), 202 -1, 203 exprCollation((Node *) mminfo->target)); 204 } 205 206 /* 207 * Create a MinMaxAggPath node with the appropriate estimated costs and 208 * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where 209 * it will compete against the standard aggregate implementation. (It 210 * will likely always win, but we need not assume that here.) 211 * 212 * Note: grouping_planner won't have created this upperrel yet, but it's 213 * fine for us to create it first. We will not have inserted the correct 214 * consider_parallel value in it, but MinMaxAggPath paths are currently 215 * never parallel-safe anyway, so that doesn't matter. Likewise, it 216 * doesn't matter that we haven't filled FDW-related fields in the rel. 217 * Also, because there are no rowmarks, we know that the processed_tlist 218 * doesn't need to change anymore, so making the pathtarget now is safe. 219 */ 220 grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); 221 add_path(grouped_rel, (Path *) 222 create_minmaxagg_path(root, grouped_rel, 223 create_pathtarget(root, 224 root->processed_tlist), 225 aggs_list, 226 (List *) parse->havingQual)); 227 } 228 229 /* 230 * find_minmax_aggs_walker 231 * Recursively scan the Aggref nodes in an expression tree, and check 232 * that each one is a MIN/MAX aggregate. If so, build a list of the 233 * distinct aggregate calls in the tree. 234 * 235 * Returns true if a non-MIN/MAX aggregate is found, false otherwise. 236 * (This seemingly-backward definition is used because expression_tree_walker 237 * aborts the scan on true return, which is what we want.) 238 * 239 * Found aggregates are added to the list at *context; it's up to the caller 240 * to initialize the list to NIL. 241 * 242 * This does not descend into subqueries, and so should be used only after 243 * reduction of sublinks to subplans. There mustn't be outer-aggregate 244 * references either. 245 */ 246 static bool 247 find_minmax_aggs_walker(Node *node, List **context) 248 { 249 if (node == NULL) 250 return false; 251 if (IsA(node, Aggref)) 252 { 253 Aggref *aggref = (Aggref *) node; 254 Oid aggsortop; 255 TargetEntry *curTarget; 256 MinMaxAggInfo *mminfo; 257 ListCell *l; 258 259 Assert(aggref->agglevelsup == 0); 260 if (list_length(aggref->args) != 1) 261 return true; /* it couldn't be MIN/MAX */ 262 263 /* 264 * ORDER BY is usually irrelevant for MIN/MAX, but it can change the 265 * outcome if the aggsortop's operator class recognizes non-identical 266 * values as equal. For example, 4.0 and 4.00 are equal according to 267 * numeric_ops, yet distinguishable. If MIN() receives more than one 268 * value equal to 4.0 and no value less than 4.0, it is unspecified 269 * which of those equal values MIN() returns. An ORDER BY expression 270 * that differs for each of those equal values of the argument 271 * expression makes the result predictable once again. This is a 272 * niche requirement, and we do not implement it with subquery paths. 273 * In any case, this test lets us reject ordered-set aggregates 274 * quickly. 275 */ 276 if (aggref->aggorder != NIL) 277 return true; 278 /* note: we do not care if DISTINCT is mentioned ... */ 279 280 /* 281 * We might implement the optimization when a FILTER clause is present 282 * by adding the filter to the quals of the generated subquery. For 283 * now, just punt. 284 */ 285 if (aggref->aggfilter != NULL) 286 return true; 287 288 aggsortop = fetch_agg_sort_op(aggref->aggfnoid); 289 if (!OidIsValid(aggsortop)) 290 return true; /* not a MIN/MAX aggregate */ 291 292 curTarget = (TargetEntry *) linitial(aggref->args); 293 294 if (contain_mutable_functions((Node *) curTarget->expr)) 295 return true; /* not potentially indexable */ 296 297 if (type_is_rowtype(exprType((Node *) curTarget->expr))) 298 return true; /* IS NOT NULL would have weird semantics */ 299 300 /* 301 * Check whether it's already in the list, and add it if not. 302 */ 303 foreach(l, *context) 304 { 305 mminfo = (MinMaxAggInfo *) lfirst(l); 306 if (mminfo->aggfnoid == aggref->aggfnoid && 307 equal(mminfo->target, curTarget->expr)) 308 return false; 309 } 310 311 mminfo = makeNode(MinMaxAggInfo); 312 mminfo->aggfnoid = aggref->aggfnoid; 313 mminfo->aggsortop = aggsortop; 314 mminfo->target = curTarget->expr; 315 mminfo->subroot = NULL; /* don't compute path yet */ 316 mminfo->path = NULL; 317 mminfo->pathcost = 0; 318 mminfo->param = NULL; 319 320 *context = lappend(*context, mminfo); 321 322 /* 323 * We need not recurse into the argument, since it can't contain any 324 * aggregates. 325 */ 326 return false; 327 } 328 Assert(!IsA(node, SubLink)); 329 return expression_tree_walker(node, find_minmax_aggs_walker, 330 (void *) context); 331 } 332 333 /* 334 * build_minmax_path 335 * Given a MIN/MAX aggregate, try to build an indexscan Path it can be 336 * optimized with. 337 * 338 * If successful, stash the best path in *mminfo and return true. 339 * Otherwise, return false. 340 */ 341 static bool 342 build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, 343 Oid eqop, Oid sortop, bool nulls_first) 344 { 345 PlannerInfo *subroot; 346 Query *parse; 347 TargetEntry *tle; 348 List *tlist; 349 NullTest *ntest; 350 SortGroupClause *sortcl; 351 RelOptInfo *final_rel; 352 Path *sorted_path; 353 Cost path_cost; 354 double path_fraction; 355 356 /* 357 * We are going to construct what is effectively a sub-SELECT query, so 358 * clone the current query level's state and adjust it to make it look 359 * like a subquery. Any outer references will now be one level higher 360 * than before. (This means that when we are done, there will be no Vars 361 * of level 1, which is why the subquery can become an initplan.) 362 */ 363 subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); 364 memcpy(subroot, root, sizeof(PlannerInfo)); 365 subroot->query_level++; 366 subroot->parent_root = root; 367 /* reset subplan-related stuff */ 368 subroot->plan_params = NIL; 369 subroot->outer_params = NULL; 370 subroot->init_plans = NIL; 371 372 subroot->parse = parse = copyObject(root->parse); 373 IncrementVarSublevelsUp((Node *) parse, 1, 1); 374 375 /* append_rel_list might contain outer Vars? */ 376 subroot->append_rel_list = copyObject(root->append_rel_list); 377 IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1); 378 /* There shouldn't be any OJ info to translate, as yet */ 379 Assert(subroot->join_info_list == NIL); 380 /* and we haven't made equivalence classes, either */ 381 Assert(subroot->eq_classes == NIL); 382 /* and we haven't created PlaceHolderInfos, either */ 383 Assert(subroot->placeholder_list == NIL); 384 385 /*---------- 386 * Generate modified query of the form 387 * (SELECT col FROM tab 388 * WHERE col IS NOT NULL AND existing-quals 389 * ORDER BY col ASC/DESC 390 * LIMIT 1) 391 *---------- 392 */ 393 /* single tlist entry that is the aggregate target */ 394 tle = makeTargetEntry(copyObject(mminfo->target), 395 (AttrNumber) 1, 396 pstrdup("agg_target"), 397 false); 398 tlist = list_make1(tle); 399 subroot->processed_tlist = parse->targetList = tlist; 400 401 /* No HAVING, no DISTINCT, no aggregates anymore */ 402 parse->havingQual = NULL; 403 subroot->hasHavingQual = false; 404 parse->distinctClause = NIL; 405 parse->hasDistinctOn = false; 406 parse->hasAggs = false; 407 408 /* Build "target IS NOT NULL" expression */ 409 ntest = makeNode(NullTest); 410 ntest->nulltesttype = IS_NOT_NULL; 411 ntest->arg = copyObject(mminfo->target); 412 /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ 413 ntest->argisrow = false; 414 ntest->location = -1; 415 416 /* User might have had that in WHERE already */ 417 if (!list_member((List *) parse->jointree->quals, ntest)) 418 parse->jointree->quals = (Node *) 419 lcons(ntest, (List *) parse->jointree->quals); 420 421 /* Build suitable ORDER BY clause */ 422 sortcl = makeNode(SortGroupClause); 423 sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist); 424 sortcl->eqop = eqop; 425 sortcl->sortop = sortop; 426 sortcl->nulls_first = nulls_first; 427 sortcl->hashable = false; /* no need to make this accurate */ 428 parse->sortClause = list_make1(sortcl); 429 430 /* set up expressions for LIMIT 1 */ 431 parse->limitOffset = NULL; 432 parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, 433 sizeof(int64), 434 Int64GetDatum(1), false, 435 FLOAT8PASSBYVAL); 436 437 /* 438 * Generate the best paths for this query, telling query_planner that we 439 * have LIMIT 1. 440 */ 441 subroot->tuple_fraction = 1.0; 442 subroot->limit_tuples = 1.0; 443 444 final_rel = query_planner(subroot, minmax_qp_callback, NULL); 445 446 /* 447 * Since we didn't go through subquery_planner() to handle the subquery, 448 * we have to do some of the same cleanup it would do, in particular cope 449 * with params and initplans used within this subquery. (This won't 450 * matter if we end up not using the subplan.) 451 */ 452 SS_identify_outer_params(subroot); 453 SS_charge_for_initplans(subroot, final_rel); 454 455 /* 456 * Get the best presorted path, that being the one that's cheapest for 457 * fetching just one row. If there's no such path, fail. 458 */ 459 if (final_rel->rows > 1.0) 460 path_fraction = 1.0 / final_rel->rows; 461 else 462 path_fraction = 1.0; 463 464 sorted_path = 465 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, 466 subroot->query_pathkeys, 467 NULL, 468 path_fraction); 469 if (!sorted_path) 470 return false; 471 472 /* 473 * The path might not return exactly what we want, so fix that. (We 474 * assume that this won't change any conclusions about which was the 475 * cheapest path.) 476 */ 477 sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, 478 create_pathtarget(subroot, 479 subroot->processed_tlist)); 480 481 /* 482 * Determine cost to get just the first row of the presorted path. 483 * 484 * Note: cost calculation here should match 485 * compare_fractional_path_costs(). 486 */ 487 path_cost = sorted_path->startup_cost + 488 path_fraction * (sorted_path->total_cost - sorted_path->startup_cost); 489 490 /* Save state for further processing */ 491 mminfo->subroot = subroot; 492 mminfo->path = sorted_path; 493 mminfo->pathcost = path_cost; 494 495 return true; 496 } 497 498 /* 499 * Compute query_pathkeys and other pathkeys during query_planner() 500 */ 501 static void 502 minmax_qp_callback(PlannerInfo *root, void *extra) 503 { 504 root->group_pathkeys = NIL; 505 root->window_pathkeys = NIL; 506 root->distinct_pathkeys = NIL; 507 508 root->sort_pathkeys = 509 make_pathkeys_for_sortclauses(root, 510 root->parse->sortClause, 511 root->parse->targetList); 512 513 root->query_pathkeys = root->sort_pathkeys; 514 } 515 516 /* 517 * Get the OID of the sort operator, if any, associated with an aggregate. 518 * Returns InvalidOid if there is no such operator. 519 */ 520 static Oid 521 fetch_agg_sort_op(Oid aggfnoid) 522 { 523 HeapTuple aggTuple; 524 Form_pg_aggregate aggform; 525 Oid aggsortop; 526 527 /* fetch aggregate entry from pg_aggregate */ 528 aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid)); 529 if (!HeapTupleIsValid(aggTuple)) 530 return InvalidOid; 531 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); 532 aggsortop = aggform->aggsortop; 533 ReleaseSysCache(aggTuple); 534 535 return aggsortop; 536 } 537