1 /*
2  * This file and its contents are licensed under the Timescale License.
3  * Please see the included NOTICE for copyright information and
4  * LICENSE-TIMESCALE for a copy of the license.
5  */
6 #include <postgres.h>
7 #include <nodes/nodes.h>
8 #include <nodes/nodeFuncs.h>
9 #include <optimizer/cost.h>
10 #include <optimizer/clauses.h>
11 #include <optimizer/prep.h>
12 #include <optimizer/tlist.h>
13 #include <optimizer/paths.h>
14 #include <utils/selfuncs.h>
15 #include <utils/rel.h>
16 #include <lib/stringinfo.h>
17 #include <miscadmin.h>
18 
19 #include <remote/connection.h>
20 #include <remote/async.h>
21 #include <remote/dist_txn.h>
22 
23 #include <compat/compat.h>
24 #include "relinfo.h"
25 #include "estimate.h"
26 #include "deparse.h"
27 
28 /* If no remote estimates, assume a sort costs 5% extra.  */
29 #define DEFAULT_FDW_SORT_MULTIPLIER 1.05
30 
31 typedef struct CostEstimate
32 {
33 	double rows;
34 	double retrieved_rows;
35 	int width;
36 	Cost startup_cost;
37 	Cost total_cost;
38 	Cost cpu_per_tuple;
39 	Cost run_cost;
40 } CostEstimate;
41 
42 static bool
find_first_aggref_walker(Node * node,Aggref ** aggref)43 find_first_aggref_walker(Node *node, Aggref **aggref)
44 {
45 	if (node == NULL)
46 		return false;
47 
48 	if (IsA(node, Aggref))
49 	{
50 		*aggref = castNode(Aggref, node);
51 		return true;
52 	}
53 
54 	return expression_tree_walker(node, find_first_aggref_walker, aggref);
55 }
56 
57 /*
58  * Get the AggsSplit mode of a relation.
59  *
60  * The AggSplit (partial or full aggregation) affects costing.
61  * All aggregates to compute this relation must have the same
62  * mode, so we only check mode on first match.
63  */
64 static AggSplit
get_aggsplit(PlannerInfo * root,RelOptInfo * rel)65 get_aggsplit(PlannerInfo *root, RelOptInfo *rel)
66 {
67 	Aggref *agg;
68 	Assert(root->parse->hasAggs);
69 
70 	if (find_first_aggref_walker((Node *) rel->reltarget->exprs, &agg))
71 		return agg->aggsplit;
72 
73 	/* If the aggregate is only referenced in the HAVING clause it will
74 	 * not be present in the targetlist so we have to check HAVING clause too. */
75 	if (root->hasHavingQual && find_first_aggref_walker((Node *) root->parse->havingQual, &agg))
76 		return agg->aggsplit;
77 
78 	/* Since PlannerInfo has hasAggs true (checked in caller) we should
79 	 * never get here and always find an Aggref. */
80 	elog(ERROR, "no aggref found in targetlist or HAVING clause");
81 	pg_unreachable();
82 }
83 
84 static void
get_upper_rel_estimate(PlannerInfo * root,RelOptInfo * rel,CostEstimate * ce)85 get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
86 {
87 	TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
88 	TsFdwRelInfo *ofpinfo = fdw_relinfo_get(fpinfo->outerrel);
89 	AggClauseCosts aggcosts;
90 	double input_rows;
91 	int num_group_cols;
92 	double num_groups = 1;
93 
94 	/* Make sure the core code set the pathtarget. */
95 	Assert(rel->reltarget != NULL);
96 
97 	/*
98 	 * This cost model is mixture of costing done for sorted and
99 	 * hashed aggregates in cost_agg().  We are not sure which
100 	 * strategy will be considered at remote side, thus for
101 	 * simplicity, we put all startup related costs in startup_cost
102 	 * and all finalization and run cost are added in total_cost.
103 	 *
104 	 * Also, core does not care about costing HAVING expressions and
105 	 * adding that to the costs.  So similarly, here too we are not
106 	 * considering remote and local conditions for costing.
107 	 */
108 
109 	/* Get rows from input rel */
110 	input_rows = ofpinfo->rows;
111 
112 	/* Collect statistics about aggregates for estimating costs. */
113 	MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
114 
115 	if (root->parse->hasAggs)
116 	{
117 		/* Get the aggsplit to use in order to support push-down of partial
118 		 * aggregation */
119 		AggSplit aggsplit = get_aggsplit(root, rel);
120 
121 		get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts);
122 	}
123 
124 	/* Get number of grouping columns and possible number of groups */
125 	num_group_cols = list_length(root->parse->groupClause);
126 	num_groups = estimate_num_groups_compat(root,
127 											get_sortgrouplist_exprs(root->parse->groupClause,
128 																	fpinfo->grouped_tlist),
129 											input_rows,
130 											NULL,
131 											NULL);
132 
133 	/*
134 	 * Get the retrieved_rows and rows estimates.  If there are HAVING
135 	 * quals, account for their selectivity.
136 	 */
137 	if (root->parse->havingQual)
138 	{
139 		/* Factor in the selectivity of the remotely-checked quals */
140 		ce->retrieved_rows = clamp_row_est(
141 			num_groups * clauselist_selectivity(root, fpinfo->remote_conds, 0, JOIN_INNER, NULL));
142 		/* Factor in the selectivity of the locally-checked quals */
143 		ce->rows = clamp_row_est(ce->retrieved_rows * fpinfo->local_conds_sel);
144 	}
145 	else
146 	{
147 		/*
148 		 * Number of rows expected from data node will be same as
149 		 * that of number of groups.
150 		 */
151 		ce->rows = ce->retrieved_rows = num_groups;
152 	}
153 
154 	/* Use width estimate made by the core code. */
155 	ce->width = rel->reltarget->width;
156 
157 	/*-----
158 	 * Startup cost includes:
159 	 *	  1. Startup cost for underneath input * relation
160 	 *	  2. Cost of performing aggregation, per cost_agg()
161 	 *	  3. Startup cost for PathTarget eval
162 	 *-----
163 	 */
164 	ce->startup_cost = ofpinfo->rel_startup_cost;
165 	ce->startup_cost += rel->reltarget->cost.startup;
166 	ce->startup_cost += aggcosts.transCost.startup;
167 	ce->startup_cost += aggcosts.transCost.per_tuple * input_rows;
168 	ce->startup_cost += aggcosts.finalCost.startup;
169 	ce->startup_cost += (cpu_operator_cost * num_group_cols) * input_rows;
170 
171 	/*-----
172 	 * Run time cost includes:
173 	 *	  1. Run time cost of underneath input relation, adjusted for
174 	 *	     tlist replacement by apply_scanjoin_target_to_paths()
175 	 *	  2. Run time cost of performing aggregation, per cost_agg()
176 	 *-----
177 	 */
178 	ce->run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
179 	ce->run_cost += rel->reltarget->cost.per_tuple * input_rows;
180 	ce->run_cost += aggcosts.finalCost.per_tuple * num_groups;
181 	ce->run_cost += cpu_tuple_cost * num_groups;
182 
183 	/* Account for the eval cost of HAVING quals, if any */
184 	if (root->parse->havingQual)
185 	{
186 		QualCost remote_cost;
187 
188 		/* Add in the eval cost of the remotely-checked quals */
189 		cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
190 		ce->startup_cost += remote_cost.startup;
191 		ce->run_cost += remote_cost.per_tuple * num_groups;
192 		/* Add in the eval cost of the locally-checked quals */
193 		ce->startup_cost += fpinfo->local_conds_cost.startup;
194 		ce->run_cost += fpinfo->local_conds_cost.per_tuple * ce->retrieved_rows;
195 	}
196 
197 	/* Add in tlist eval cost for each output row */
198 	ce->startup_cost += rel->reltarget->cost.startup;
199 	ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows;
200 }
201 
202 static void
get_base_rel_estimate(PlannerInfo * root,RelOptInfo * rel,CostEstimate * ce)203 get_base_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
204 {
205 	TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
206 
207 	ce->rows = rel->rows;
208 	ce->width = rel->reltarget->width;
209 
210 	/* Back into an estimate of the number of retrieved rows. */
211 	ce->retrieved_rows = clamp_row_est(ce->rows / fpinfo->local_conds_sel);
212 
213 	/* Clamp retrieved rows estimates to at most rel->tuples. */
214 	ce->retrieved_rows = Min(ce->retrieved_rows, rel->tuples);
215 
216 	/*
217 	 * Cost as though this were a seqscan, which is pessimistic.  We
218 	 * effectively imagine the local_conds are being evaluated
219 	 * remotely, too.
220 	 */
221 	ce->startup_cost = 0;
222 	ce->run_cost = 0;
223 	ce->run_cost += seq_page_cost * rel->pages;
224 
225 	ce->startup_cost += rel->baserestrictcost.startup;
226 	ce->cpu_per_tuple = cpu_tuple_cost + rel->baserestrictcost.per_tuple;
227 	ce->run_cost += ce->cpu_per_tuple * rel->tuples;
228 
229 	/* Add in tlist eval cost for each output row */
230 	ce->startup_cost += rel->reltarget->cost.startup;
231 	ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows;
232 }
233 
234 #define REL_HAS_CACHED_COSTS(fpinfo)                                                               \
235 	((fpinfo)->rel_startup_cost >= 0 && (fpinfo)->rel_total_cost >= 0 &&                           \
236 	 (fpinfo)->rel_retrieved_rows >= 0)
237 
238 /*
239  * Adjust the cost estimates of a foreign grouping path to include the cost of
240  * generating properly-sorted output.
241  */
242 static void
adjust_foreign_grouping_path_cost(PlannerInfo * root,List * pathkeys,double retrieved_rows,double width,double limit_tuples,Cost * p_startup_cost,Cost * p_run_cost)243 adjust_foreign_grouping_path_cost(PlannerInfo *root, List *pathkeys, double retrieved_rows,
244 								  double width, double limit_tuples, Cost *p_startup_cost,
245 								  Cost *p_run_cost)
246 {
247 	/*
248 	 * If the GROUP BY clause isn't sort-able, the plan chosen by the remote
249 	 * side is unlikely to generate properly-sorted output, so it would need
250 	 * an explicit sort; adjust the given costs with cost_sort().  Likewise,
251 	 * if the GROUP BY clause is sort-able but isn't a superset of the given
252 	 * pathkeys, adjust the costs with that function.  Otherwise, adjust the
253 	 * costs by applying the same heuristic as for the scan or join case.
254 	 */
255 	if (!grouping_is_sortable(root->parse->groupClause) ||
256 		!pathkeys_contained_in(pathkeys, root->group_pathkeys))
257 	{
258 		Path sort_path; /* dummy for result of cost_sort */
259 
260 		cost_sort(&sort_path,
261 				  root,
262 				  pathkeys,
263 				  *p_startup_cost + *p_run_cost,
264 				  retrieved_rows,
265 				  width,
266 				  0.0,
267 				  work_mem,
268 				  limit_tuples);
269 
270 		*p_startup_cost = sort_path.startup_cost;
271 		*p_run_cost = sort_path.total_cost - sort_path.startup_cost;
272 	}
273 	else
274 	{
275 		/*
276 		 * The default extra cost seems too large for foreign-grouping cases;
277 		 * add 1/4th of that default.
278 		 */
279 		double sort_multiplier = 1.0 + (DEFAULT_FDW_SORT_MULTIPLIER - 1.0) * 0.25;
280 
281 		*p_startup_cost *= sort_multiplier;
282 		*p_run_cost *= sort_multiplier;
283 	}
284 }
285 
286 /*
287  * fdw_estimate_path_cost_size
288  *		Get cost and size estimates for a foreign scan on given foreign
289  *		relation either a base relation or an upper relation containing
290  *		foreign relations. Estimate rows using whatever statistics we have
291  *      locally, in a way similar to ordinary tables.
292  *
293  * pathkeys specify the expected sort order if any for given path being costed.
294  *
295  * The function returns the cost and size estimates in p_row, p_width,
296  * p_startup_cost and p_total_cost variables.
297  */
298 void
fdw_estimate_path_cost_size(PlannerInfo * root,RelOptInfo * rel,List * pathkeys,double * p_rows,int * p_width,Cost * p_startup_cost,Cost * p_total_cost)299 fdw_estimate_path_cost_size(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, double *p_rows,
300 							int *p_width, Cost *p_startup_cost, Cost *p_total_cost)
301 {
302 	TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
303 	CostEstimate ce = {
304 		/*
305 		 * Use rows/width estimates made by set_baserel_size_estimates() for
306 		 * base foreign relations.
307 		 */
308 		.rows = rel->rows,
309 		.width = rel->reltarget->width,
310 	};
311 
312 	if (IS_JOIN_REL(rel))
313 		ereport(ERROR,
314 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
315 				 errmsg("foreign joins are not supported")));
316 
317 	/*
318 	 * We will come here again and again with different set of pathkeys
319 	 * that caller wants to cost. We don't need to calculate the cost of
320 	 * bare scan each time. Instead, use the costs if we have cached them
321 	 * already.
322 	 */
323 	if (REL_HAS_CACHED_COSTS(fpinfo))
324 	{
325 		ce.rows = fpinfo->rows;
326 		ce.width = fpinfo->width;
327 		ce.startup_cost = fpinfo->rel_startup_cost;
328 		ce.run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
329 		ce.retrieved_rows = fpinfo->rel_retrieved_rows;
330 	}
331 	else if (IS_UPPER_REL(rel))
332 		get_upper_rel_estimate(root, rel, &ce);
333 	else
334 		get_base_rel_estimate(root, rel, &ce);
335 
336 	/*
337 	 * Without remote estimates, we have no real way to estimate the cost
338 	 * of generating sorted output.  It could be free if the query plan
339 	 * the remote side would have chosen generates properly-sorted output
340 	 * anyway, but in most cases it will cost something.  Estimate a value
341 	 * high enough that we won't pick the sorted path when the ordering
342 	 * isn't locally useful, but low enough that we'll err on the side of
343 	 * pushing down the ORDER BY clause when it's useful to do so.
344 	 */
345 	if (pathkeys != NIL)
346 	{
347 		if (IS_UPPER_REL(rel))
348 		{
349 			Assert(rel->reloptkind == RELOPT_UPPER_REL ||
350 				   rel->reloptkind == RELOPT_OTHER_UPPER_REL);
351 
352 			/* FIXME: Currently don't have a way to pass on limit here */
353 			const double limit_tuples = -1;
354 
355 			adjust_foreign_grouping_path_cost(root,
356 											  pathkeys,
357 											  ce.retrieved_rows,
358 											  ce.width,
359 											  limit_tuples,
360 											  &ce.startup_cost,
361 											  &ce.run_cost);
362 		}
363 		else
364 		{
365 			ce.startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
366 			ce.run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
367 		}
368 	}
369 
370 	ce.total_cost = ce.startup_cost + ce.run_cost;
371 
372 	/*
373 	 * Cache the costs for scans without any pathkeys
374 	 * before adding the costs for transferring data from the data node.
375 	 * These costs are useful for costing the join between this relation and
376 	 * another foreign relation or to calculate the costs of paths with
377 	 * pathkeys for this relation, when the costs can not be obtained from the
378 	 * data node. This function will be called at least once for every
379 	 * foreign relation without pathkeys.
380 	 */
381 	if (!REL_HAS_CACHED_COSTS(fpinfo) && pathkeys == NIL)
382 	{
383 		fpinfo->rel_startup_cost = ce.startup_cost;
384 		fpinfo->rel_total_cost = ce.total_cost;
385 		fpinfo->rel_retrieved_rows = ce.retrieved_rows;
386 	}
387 
388 	/*
389 	 * Add some additional cost factors to account for connection overhead
390 	 * (fdw_startup_cost), transferring data across the network
391 	 * (fdw_tuple_cost per retrieved row), and local manipulation of the data
392 	 * (cpu_tuple_cost per retrieved row).
393 	 */
394 	ce.startup_cost += fpinfo->fdw_startup_cost;
395 	ce.total_cost += fpinfo->fdw_startup_cost;
396 	ce.total_cost += fpinfo->fdw_tuple_cost * ce.retrieved_rows;
397 	ce.total_cost += cpu_tuple_cost * ce.retrieved_rows;
398 
399 	/* Return results. */
400 	*p_rows = ce.rows;
401 	*p_width = ce.width;
402 	*p_startup_cost = ce.startup_cost;
403 	*p_total_cost = ce.total_cost;
404 }
405