1 /*-------------------------------------------------------------------------
2  *
3  * planagg.c
4  *	  Special planning for aggregate queries.
5  *
6  * This module tries to replace MIN/MAX aggregate functions by subqueries
7  * of the form
8  *		(SELECT col FROM tab
9  *		 WHERE col IS NOT NULL AND existing-quals
10  *		 ORDER BY col ASC/DESC
11  *		 LIMIT 1)
12  * Given a suitable index on tab.col, this can be much faster than the
13  * generic scan-all-the-rows aggregation plan.  We can handle multiple
14  * MIN/MAX aggregates by generating multiple subqueries, and their
15  * orderings can be different.  However, if the query contains any
16  * non-optimizable aggregates, there's no point since we'll have to
17  * scan all the rows anyway.
18  *
19  *
20  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  *
24  * IDENTIFICATION
25  *	  src/backend/optimizer/plan/planagg.c
26  *
27  *-------------------------------------------------------------------------
28  */
29 #include "postgres.h"
30 
31 #include "access/htup_details.h"
32 #include "catalog/pg_aggregate.h"
33 #include "catalog/pg_type.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/cost.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/planmain.h"
42 #include "optimizer/subselect.h"
43 #include "optimizer/tlist.h"
44 #include "parser/parse_clause.h"
45 #include "parser/parsetree.h"
46 #include "rewrite/rewriteManip.h"
47 #include "utils/lsyscache.h"
48 #include "utils/syscache.h"
49 
50 static bool find_minmax_aggs_walker(Node *node, List **context);
51 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
52 							  Oid eqop, Oid sortop, bool nulls_first);
53 static void minmax_qp_callback(PlannerInfo *root, void *extra);
54 static Oid	fetch_agg_sort_op(Oid aggfnoid);
55 
56 
57 /*
58  * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
59  *
60  * Check to see whether the query contains MIN/MAX aggregate functions that
61  * might be optimizable via indexscans.  If it does, and all the aggregates
62  * are potentially optimizable, then create a MinMaxAggPath and add it to
63  * the (UPPERREL_GROUP_AGG, NULL) upperrel.
64  *
65  * This should be called by grouping_planner() just before it's ready to call
66  * query_planner(), because we generate indexscan paths by cloning the
67  * planner's state and invoking query_planner() on a modified version of
68  * the query parsetree.  Thus, all preprocessing needed before query_planner()
69  * must already be done.
70  */
71 void
72 preprocess_minmax_aggregates(PlannerInfo *root)
73 {
74 	Query	   *parse = root->parse;
75 	FromExpr   *jtnode;
76 	RangeTblRef *rtr;
77 	RangeTblEntry *rte;
78 	List	   *aggs_list;
79 	RelOptInfo *grouped_rel;
80 	ListCell   *lc;
81 
82 	/* minmax_aggs list should be empty at this point */
83 	Assert(root->minmax_aggs == NIL);
84 
85 	/* Nothing to do if query has no aggregates */
86 	if (!parse->hasAggs)
87 		return;
88 
89 	Assert(!parse->setOperations);	/* shouldn't get here if a setop */
90 	Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
91 
92 	/*
93 	 * Reject unoptimizable cases.
94 	 *
95 	 * We don't handle GROUP BY or windowing, because our current
96 	 * implementations of grouping require looking at all the rows anyway, and
97 	 * so there's not much point in optimizing MIN/MAX.
98 	 */
99 	if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
100 		parse->hasWindowFuncs)
101 		return;
102 
103 	/*
104 	 * Reject if query contains any CTEs; there's no way to build an indexscan
105 	 * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
106 	 * that's not true, but it doesn't seem worth expending cycles to check.)
107 	 */
108 	if (parse->cteList)
109 		return;
110 
111 	/*
112 	 * We also restrict the query to reference exactly one table, since join
113 	 * conditions can't be handled reasonably.  (We could perhaps handle a
114 	 * query containing cartesian-product joins, but it hardly seems worth the
115 	 * trouble.)  However, the single table could be buried in several levels
116 	 * of FromExpr due to subqueries.  Note the "single" table could be an
117 	 * inheritance parent, too, including the case of a UNION ALL subquery
118 	 * that's been flattened to an appendrel.
119 	 */
120 	jtnode = parse->jointree;
121 	while (IsA(jtnode, FromExpr))
122 	{
123 		if (list_length(jtnode->fromlist) != 1)
124 			return;
125 		jtnode = linitial(jtnode->fromlist);
126 	}
127 	if (!IsA(jtnode, RangeTblRef))
128 		return;
129 	rtr = (RangeTblRef *) jtnode;
130 	rte = planner_rt_fetch(rtr->rtindex, root);
131 	if (rte->rtekind == RTE_RELATION)
132 		 /* ordinary relation, ok */ ;
133 	else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
134 		 /* flattened UNION ALL subquery, ok */ ;
135 	else
136 		return;
137 
138 	/*
139 	 * Scan the tlist and HAVING qual to find all the aggregates and verify
140 	 * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
141 	 */
142 	aggs_list = NIL;
143 	if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list))
144 		return;
145 	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
146 		return;
147 
148 	/*
149 	 * OK, there is at least the possibility of performing the optimization.
150 	 * Build an access path for each aggregate.  If any of the aggregates
151 	 * prove to be non-indexable, give up; there is no point in optimizing
152 	 * just some of them.
153 	 */
154 	foreach(lc, aggs_list)
155 	{
156 		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
157 		Oid			eqop;
158 		bool		reverse;
159 
160 		/*
161 		 * We'll need the equality operator that goes with the aggregate's
162 		 * ordering operator.
163 		 */
164 		eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
165 		if (!OidIsValid(eqop))	/* shouldn't happen */
166 			elog(ERROR, "could not find equality operator for ordering operator %u",
167 				 mminfo->aggsortop);
168 
169 		/*
170 		 * We can use either an ordering that gives NULLS FIRST or one that
171 		 * gives NULLS LAST; furthermore there's unlikely to be much
172 		 * performance difference between them, so it doesn't seem worth
173 		 * costing out both ways if we get a hit on the first one.  NULLS
174 		 * FIRST is more likely to be available if the operator is a
175 		 * reverse-sort operator, so try that first if reverse.
176 		 */
177 		if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
178 			continue;
179 		if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
180 			continue;
181 
182 		/* No indexable path for this aggregate, so fail */
183 		return;
184 	}
185 
186 	/*
187 	 * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
188 	 * node.
189 	 *
190 	 * First, create an output Param node for each agg.  (If we end up not
191 	 * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
192 	 * which is not worth worrying about.  We can't wait till create_plan time
193 	 * to decide whether to make the Param, unfortunately.)
194 	 */
195 	foreach(lc, aggs_list)
196 	{
197 		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
198 
199 		mminfo->param =
200 			SS_make_initplan_output_param(root,
201 										  exprType((Node *) mminfo->target),
202 										  -1,
203 										  exprCollation((Node *) mminfo->target));
204 	}
205 
206 	/*
207 	 * Create a MinMaxAggPath node with the appropriate estimated costs and
208 	 * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
209 	 * it will compete against the standard aggregate implementation.  (It
210 	 * will likely always win, but we need not assume that here.)
211 	 *
212 	 * Note: grouping_planner won't have created this upperrel yet, but it's
213 	 * fine for us to create it first.  We will not have inserted the correct
214 	 * consider_parallel value in it, but MinMaxAggPath paths are currently
215 	 * never parallel-safe anyway, so that doesn't matter.  Likewise, it
216 	 * doesn't matter that we haven't filled FDW-related fields in the rel.
217 	 * Also, because there are no rowmarks, we know that the processed_tlist
218 	 * doesn't need to change anymore, so making the pathtarget now is safe.
219 	 */
220 	grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
221 	add_path(grouped_rel, (Path *)
222 			 create_minmaxagg_path(root, grouped_rel,
223 								   create_pathtarget(root,
224 													 root->processed_tlist),
225 								   aggs_list,
226 								   (List *) parse->havingQual));
227 }
228 
229 /*
230  * find_minmax_aggs_walker
231  *		Recursively scan the Aggref nodes in an expression tree, and check
232  *		that each one is a MIN/MAX aggregate.  If so, build a list of the
233  *		distinct aggregate calls in the tree.
234  *
235  * Returns true if a non-MIN/MAX aggregate is found, false otherwise.
236  * (This seemingly-backward definition is used because expression_tree_walker
237  * aborts the scan on true return, which is what we want.)
238  *
239  * Found aggregates are added to the list at *context; it's up to the caller
240  * to initialize the list to NIL.
241  *
242  * This does not descend into subqueries, and so should be used only after
243  * reduction of sublinks to subplans.  There mustn't be outer-aggregate
244  * references either.
245  */
246 static bool
247 find_minmax_aggs_walker(Node *node, List **context)
248 {
249 	if (node == NULL)
250 		return false;
251 	if (IsA(node, Aggref))
252 	{
253 		Aggref	   *aggref = (Aggref *) node;
254 		Oid			aggsortop;
255 		TargetEntry *curTarget;
256 		MinMaxAggInfo *mminfo;
257 		ListCell   *l;
258 
259 		Assert(aggref->agglevelsup == 0);
260 		if (list_length(aggref->args) != 1)
261 			return true;		/* it couldn't be MIN/MAX */
262 
263 		/*
264 		 * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
265 		 * outcome if the aggsortop's operator class recognizes non-identical
266 		 * values as equal.  For example, 4.0 and 4.00 are equal according to
267 		 * numeric_ops, yet distinguishable.  If MIN() receives more than one
268 		 * value equal to 4.0 and no value less than 4.0, it is unspecified
269 		 * which of those equal values MIN() returns.  An ORDER BY expression
270 		 * that differs for each of those equal values of the argument
271 		 * expression makes the result predictable once again.  This is a
272 		 * niche requirement, and we do not implement it with subquery paths.
273 		 * In any case, this test lets us reject ordered-set aggregates
274 		 * quickly.
275 		 */
276 		if (aggref->aggorder != NIL)
277 			return true;
278 		/* note: we do not care if DISTINCT is mentioned ... */
279 
280 		/*
281 		 * We might implement the optimization when a FILTER clause is present
282 		 * by adding the filter to the quals of the generated subquery.  For
283 		 * now, just punt.
284 		 */
285 		if (aggref->aggfilter != NULL)
286 			return true;
287 
288 		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
289 		if (!OidIsValid(aggsortop))
290 			return true;		/* not a MIN/MAX aggregate */
291 
292 		curTarget = (TargetEntry *) linitial(aggref->args);
293 
294 		if (contain_mutable_functions((Node *) curTarget->expr))
295 			return true;		/* not potentially indexable */
296 
297 		if (type_is_rowtype(exprType((Node *) curTarget->expr)))
298 			return true;		/* IS NOT NULL would have weird semantics */
299 
300 		/*
301 		 * Check whether it's already in the list, and add it if not.
302 		 */
303 		foreach(l, *context)
304 		{
305 			mminfo = (MinMaxAggInfo *) lfirst(l);
306 			if (mminfo->aggfnoid == aggref->aggfnoid &&
307 				equal(mminfo->target, curTarget->expr))
308 				return false;
309 		}
310 
311 		mminfo = makeNode(MinMaxAggInfo);
312 		mminfo->aggfnoid = aggref->aggfnoid;
313 		mminfo->aggsortop = aggsortop;
314 		mminfo->target = curTarget->expr;
315 		mminfo->subroot = NULL; /* don't compute path yet */
316 		mminfo->path = NULL;
317 		mminfo->pathcost = 0;
318 		mminfo->param = NULL;
319 
320 		*context = lappend(*context, mminfo);
321 
322 		/*
323 		 * We need not recurse into the argument, since it can't contain any
324 		 * aggregates.
325 		 */
326 		return false;
327 	}
328 	Assert(!IsA(node, SubLink));
329 	return expression_tree_walker(node, find_minmax_aggs_walker,
330 								  (void *) context);
331 }
332 
333 /*
334  * build_minmax_path
335  *		Given a MIN/MAX aggregate, try to build an indexscan Path it can be
336  *		optimized with.
337  *
338  * If successful, stash the best path in *mminfo and return true.
339  * Otherwise, return false.
340  */
341 static bool
342 build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
343 				  Oid eqop, Oid sortop, bool nulls_first)
344 {
345 	PlannerInfo *subroot;
346 	Query	   *parse;
347 	TargetEntry *tle;
348 	List	   *tlist;
349 	NullTest   *ntest;
350 	SortGroupClause *sortcl;
351 	RelOptInfo *final_rel;
352 	Path	   *sorted_path;
353 	Cost		path_cost;
354 	double		path_fraction;
355 
356 	/*
357 	 * We are going to construct what is effectively a sub-SELECT query, so
358 	 * clone the current query level's state and adjust it to make it look
359 	 * like a subquery.  Any outer references will now be one level higher
360 	 * than before.  (This means that when we are done, there will be no Vars
361 	 * of level 1, which is why the subquery can become an initplan.)
362 	 */
363 	subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
364 	memcpy(subroot, root, sizeof(PlannerInfo));
365 	subroot->query_level++;
366 	subroot->parent_root = root;
367 	/* reset subplan-related stuff */
368 	subroot->plan_params = NIL;
369 	subroot->outer_params = NULL;
370 	subroot->init_plans = NIL;
371 
372 	subroot->parse = parse = copyObject(root->parse);
373 	IncrementVarSublevelsUp((Node *) parse, 1, 1);
374 
375 	/* append_rel_list might contain outer Vars? */
376 	subroot->append_rel_list = copyObject(root->append_rel_list);
377 	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
378 	/* There shouldn't be any OJ info to translate, as yet */
379 	Assert(subroot->join_info_list == NIL);
380 	/* and we haven't made equivalence classes, either */
381 	Assert(subroot->eq_classes == NIL);
382 	/* and we haven't created PlaceHolderInfos, either */
383 	Assert(subroot->placeholder_list == NIL);
384 
385 	/*----------
386 	 * Generate modified query of the form
387 	 *		(SELECT col FROM tab
388 	 *		 WHERE col IS NOT NULL AND existing-quals
389 	 *		 ORDER BY col ASC/DESC
390 	 *		 LIMIT 1)
391 	 *----------
392 	 */
393 	/* single tlist entry that is the aggregate target */
394 	tle = makeTargetEntry(copyObject(mminfo->target),
395 						  (AttrNumber) 1,
396 						  pstrdup("agg_target"),
397 						  false);
398 	tlist = list_make1(tle);
399 	subroot->processed_tlist = parse->targetList = tlist;
400 
401 	/* No HAVING, no DISTINCT, no aggregates anymore */
402 	parse->havingQual = NULL;
403 	subroot->hasHavingQual = false;
404 	parse->distinctClause = NIL;
405 	parse->hasDistinctOn = false;
406 	parse->hasAggs = false;
407 
408 	/* Build "target IS NOT NULL" expression */
409 	ntest = makeNode(NullTest);
410 	ntest->nulltesttype = IS_NOT_NULL;
411 	ntest->arg = copyObject(mminfo->target);
412 	/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
413 	ntest->argisrow = false;
414 	ntest->location = -1;
415 
416 	/* User might have had that in WHERE already */
417 	if (!list_member((List *) parse->jointree->quals, ntest))
418 		parse->jointree->quals = (Node *)
419 			lcons(ntest, (List *) parse->jointree->quals);
420 
421 	/* Build suitable ORDER BY clause */
422 	sortcl = makeNode(SortGroupClause);
423 	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
424 	sortcl->eqop = eqop;
425 	sortcl->sortop = sortop;
426 	sortcl->nulls_first = nulls_first;
427 	sortcl->hashable = false;	/* no need to make this accurate */
428 	parse->sortClause = list_make1(sortcl);
429 
430 	/* set up expressions for LIMIT 1 */
431 	parse->limitOffset = NULL;
432 	parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
433 										   sizeof(int64),
434 										   Int64GetDatum(1), false,
435 										   FLOAT8PASSBYVAL);
436 
437 	/*
438 	 * Generate the best paths for this query, telling query_planner that we
439 	 * have LIMIT 1.
440 	 */
441 	subroot->tuple_fraction = 1.0;
442 	subroot->limit_tuples = 1.0;
443 
444 	final_rel = query_planner(subroot, minmax_qp_callback, NULL);
445 
446 	/*
447 	 * Since we didn't go through subquery_planner() to handle the subquery,
448 	 * we have to do some of the same cleanup it would do, in particular cope
449 	 * with params and initplans used within this subquery.  (This won't
450 	 * matter if we end up not using the subplan.)
451 	 */
452 	SS_identify_outer_params(subroot);
453 	SS_charge_for_initplans(subroot, final_rel);
454 
455 	/*
456 	 * Get the best presorted path, that being the one that's cheapest for
457 	 * fetching just one row.  If there's no such path, fail.
458 	 */
459 	if (final_rel->rows > 1.0)
460 		path_fraction = 1.0 / final_rel->rows;
461 	else
462 		path_fraction = 1.0;
463 
464 	sorted_path =
465 		get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
466 												  subroot->query_pathkeys,
467 												  NULL,
468 												  path_fraction);
469 	if (!sorted_path)
470 		return false;
471 
472 	/*
473 	 * The path might not return exactly what we want, so fix that.  (We
474 	 * assume that this won't change any conclusions about which was the
475 	 * cheapest path.)
476 	 */
477 	sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
478 										   create_pathtarget(subroot,
479 															 subroot->processed_tlist));
480 
481 	/*
482 	 * Determine cost to get just the first row of the presorted path.
483 	 *
484 	 * Note: cost calculation here should match
485 	 * compare_fractional_path_costs().
486 	 */
487 	path_cost = sorted_path->startup_cost +
488 		path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
489 
490 	/* Save state for further processing */
491 	mminfo->subroot = subroot;
492 	mminfo->path = sorted_path;
493 	mminfo->pathcost = path_cost;
494 
495 	return true;
496 }
497 
498 /*
499  * Compute query_pathkeys and other pathkeys during query_planner()
500  */
501 static void
502 minmax_qp_callback(PlannerInfo *root, void *extra)
503 {
504 	root->group_pathkeys = NIL;
505 	root->window_pathkeys = NIL;
506 	root->distinct_pathkeys = NIL;
507 
508 	root->sort_pathkeys =
509 		make_pathkeys_for_sortclauses(root,
510 									  root->parse->sortClause,
511 									  root->parse->targetList);
512 
513 	root->query_pathkeys = root->sort_pathkeys;
514 }
515 
516 /*
517  * Get the OID of the sort operator, if any, associated with an aggregate.
518  * Returns InvalidOid if there is no such operator.
519  */
520 static Oid
521 fetch_agg_sort_op(Oid aggfnoid)
522 {
523 	HeapTuple	aggTuple;
524 	Form_pg_aggregate aggform;
525 	Oid			aggsortop;
526 
527 	/* fetch aggregate entry from pg_aggregate */
528 	aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
529 	if (!HeapTupleIsValid(aggTuple))
530 		return InvalidOid;
531 	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
532 	aggsortop = aggform->aggsortop;
533 	ReleaseSysCache(aggTuple);
534 
535 	return aggsortop;
536 }
537