1 /*
2 * This file and its contents are licensed under the Timescale License.
3 * Please see the included NOTICE for copyright information and
4 * LICENSE-TIMESCALE for a copy of the license.
5 */
6 #include <postgres.h>
7 #include <nodes/nodes.h>
8 #include <nodes/nodeFuncs.h>
9 #include <optimizer/cost.h>
10 #include <optimizer/clauses.h>
11 #include <optimizer/prep.h>
12 #include <optimizer/tlist.h>
13 #include <optimizer/paths.h>
14 #include <utils/selfuncs.h>
15 #include <utils/rel.h>
16 #include <lib/stringinfo.h>
17 #include <miscadmin.h>
18
19 #include <remote/connection.h>
20 #include <remote/async.h>
21 #include <remote/dist_txn.h>
22
23 #include <compat/compat.h>
24 #include "relinfo.h"
25 #include "estimate.h"
26 #include "deparse.h"
27
28 /* If no remote estimates, assume a sort costs 5% extra. */
29 #define DEFAULT_FDW_SORT_MULTIPLIER 1.05
30
31 typedef struct CostEstimate
32 {
33 double rows;
34 double retrieved_rows;
35 int width;
36 Cost startup_cost;
37 Cost total_cost;
38 Cost cpu_per_tuple;
39 Cost run_cost;
40 } CostEstimate;
41
42 static bool
find_first_aggref_walker(Node * node,Aggref ** aggref)43 find_first_aggref_walker(Node *node, Aggref **aggref)
44 {
45 if (node == NULL)
46 return false;
47
48 if (IsA(node, Aggref))
49 {
50 *aggref = castNode(Aggref, node);
51 return true;
52 }
53
54 return expression_tree_walker(node, find_first_aggref_walker, aggref);
55 }
56
57 /*
58 * Get the AggsSplit mode of a relation.
59 *
60 * The AggSplit (partial or full aggregation) affects costing.
61 * All aggregates to compute this relation must have the same
62 * mode, so we only check mode on first match.
63 */
64 static AggSplit
get_aggsplit(PlannerInfo * root,RelOptInfo * rel)65 get_aggsplit(PlannerInfo *root, RelOptInfo *rel)
66 {
67 Aggref *agg;
68 Assert(root->parse->hasAggs);
69
70 if (find_first_aggref_walker((Node *) rel->reltarget->exprs, &agg))
71 return agg->aggsplit;
72
73 /* If the aggregate is only referenced in the HAVING clause it will
74 * not be present in the targetlist so we have to check HAVING clause too. */
75 if (root->hasHavingQual && find_first_aggref_walker((Node *) root->parse->havingQual, &agg))
76 return agg->aggsplit;
77
78 /* Since PlannerInfo has hasAggs true (checked in caller) we should
79 * never get here and always find an Aggref. */
80 elog(ERROR, "no aggref found in targetlist or HAVING clause");
81 pg_unreachable();
82 }
83
84 static void
get_upper_rel_estimate(PlannerInfo * root,RelOptInfo * rel,CostEstimate * ce)85 get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
86 {
87 TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
88 TsFdwRelInfo *ofpinfo = fdw_relinfo_get(fpinfo->outerrel);
89 AggClauseCosts aggcosts;
90 double input_rows;
91 int num_group_cols;
92 double num_groups = 1;
93
94 /* Make sure the core code set the pathtarget. */
95 Assert(rel->reltarget != NULL);
96
97 /*
98 * This cost model is mixture of costing done for sorted and
99 * hashed aggregates in cost_agg(). We are not sure which
100 * strategy will be considered at remote side, thus for
101 * simplicity, we put all startup related costs in startup_cost
102 * and all finalization and run cost are added in total_cost.
103 *
104 * Also, core does not care about costing HAVING expressions and
105 * adding that to the costs. So similarly, here too we are not
106 * considering remote and local conditions for costing.
107 */
108
109 /* Get rows from input rel */
110 input_rows = ofpinfo->rows;
111
112 /* Collect statistics about aggregates for estimating costs. */
113 MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
114
115 if (root->parse->hasAggs)
116 {
117 /* Get the aggsplit to use in order to support push-down of partial
118 * aggregation */
119 AggSplit aggsplit = get_aggsplit(root, rel);
120
121 get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts);
122 }
123
124 /* Get number of grouping columns and possible number of groups */
125 num_group_cols = list_length(root->parse->groupClause);
126 num_groups = estimate_num_groups_compat(root,
127 get_sortgrouplist_exprs(root->parse->groupClause,
128 fpinfo->grouped_tlist),
129 input_rows,
130 NULL,
131 NULL);
132
133 /*
134 * Get the retrieved_rows and rows estimates. If there are HAVING
135 * quals, account for their selectivity.
136 */
137 if (root->parse->havingQual)
138 {
139 /* Factor in the selectivity of the remotely-checked quals */
140 ce->retrieved_rows = clamp_row_est(
141 num_groups * clauselist_selectivity(root, fpinfo->remote_conds, 0, JOIN_INNER, NULL));
142 /* Factor in the selectivity of the locally-checked quals */
143 ce->rows = clamp_row_est(ce->retrieved_rows * fpinfo->local_conds_sel);
144 }
145 else
146 {
147 /*
148 * Number of rows expected from data node will be same as
149 * that of number of groups.
150 */
151 ce->rows = ce->retrieved_rows = num_groups;
152 }
153
154 /* Use width estimate made by the core code. */
155 ce->width = rel->reltarget->width;
156
157 /*-----
158 * Startup cost includes:
159 * 1. Startup cost for underneath input * relation
160 * 2. Cost of performing aggregation, per cost_agg()
161 * 3. Startup cost for PathTarget eval
162 *-----
163 */
164 ce->startup_cost = ofpinfo->rel_startup_cost;
165 ce->startup_cost += rel->reltarget->cost.startup;
166 ce->startup_cost += aggcosts.transCost.startup;
167 ce->startup_cost += aggcosts.transCost.per_tuple * input_rows;
168 ce->startup_cost += aggcosts.finalCost.startup;
169 ce->startup_cost += (cpu_operator_cost * num_group_cols) * input_rows;
170
171 /*-----
172 * Run time cost includes:
173 * 1. Run time cost of underneath input relation, adjusted for
174 * tlist replacement by apply_scanjoin_target_to_paths()
175 * 2. Run time cost of performing aggregation, per cost_agg()
176 *-----
177 */
178 ce->run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
179 ce->run_cost += rel->reltarget->cost.per_tuple * input_rows;
180 ce->run_cost += aggcosts.finalCost.per_tuple * num_groups;
181 ce->run_cost += cpu_tuple_cost * num_groups;
182
183 /* Account for the eval cost of HAVING quals, if any */
184 if (root->parse->havingQual)
185 {
186 QualCost remote_cost;
187
188 /* Add in the eval cost of the remotely-checked quals */
189 cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
190 ce->startup_cost += remote_cost.startup;
191 ce->run_cost += remote_cost.per_tuple * num_groups;
192 /* Add in the eval cost of the locally-checked quals */
193 ce->startup_cost += fpinfo->local_conds_cost.startup;
194 ce->run_cost += fpinfo->local_conds_cost.per_tuple * ce->retrieved_rows;
195 }
196
197 /* Add in tlist eval cost for each output row */
198 ce->startup_cost += rel->reltarget->cost.startup;
199 ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows;
200 }
201
202 static void
get_base_rel_estimate(PlannerInfo * root,RelOptInfo * rel,CostEstimate * ce)203 get_base_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
204 {
205 TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
206
207 ce->rows = rel->rows;
208 ce->width = rel->reltarget->width;
209
210 /* Back into an estimate of the number of retrieved rows. */
211 ce->retrieved_rows = clamp_row_est(ce->rows / fpinfo->local_conds_sel);
212
213 /* Clamp retrieved rows estimates to at most rel->tuples. */
214 ce->retrieved_rows = Min(ce->retrieved_rows, rel->tuples);
215
216 /*
217 * Cost as though this were a seqscan, which is pessimistic. We
218 * effectively imagine the local_conds are being evaluated
219 * remotely, too.
220 */
221 ce->startup_cost = 0;
222 ce->run_cost = 0;
223 ce->run_cost += seq_page_cost * rel->pages;
224
225 ce->startup_cost += rel->baserestrictcost.startup;
226 ce->cpu_per_tuple = cpu_tuple_cost + rel->baserestrictcost.per_tuple;
227 ce->run_cost += ce->cpu_per_tuple * rel->tuples;
228
229 /* Add in tlist eval cost for each output row */
230 ce->startup_cost += rel->reltarget->cost.startup;
231 ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows;
232 }
233
234 #define REL_HAS_CACHED_COSTS(fpinfo) \
235 ((fpinfo)->rel_startup_cost >= 0 && (fpinfo)->rel_total_cost >= 0 && \
236 (fpinfo)->rel_retrieved_rows >= 0)
237
238 /*
239 * Adjust the cost estimates of a foreign grouping path to include the cost of
240 * generating properly-sorted output.
241 */
242 static void
adjust_foreign_grouping_path_cost(PlannerInfo * root,List * pathkeys,double retrieved_rows,double width,double limit_tuples,Cost * p_startup_cost,Cost * p_run_cost)243 adjust_foreign_grouping_path_cost(PlannerInfo *root, List *pathkeys, double retrieved_rows,
244 double width, double limit_tuples, Cost *p_startup_cost,
245 Cost *p_run_cost)
246 {
247 /*
248 * If the GROUP BY clause isn't sort-able, the plan chosen by the remote
249 * side is unlikely to generate properly-sorted output, so it would need
250 * an explicit sort; adjust the given costs with cost_sort(). Likewise,
251 * if the GROUP BY clause is sort-able but isn't a superset of the given
252 * pathkeys, adjust the costs with that function. Otherwise, adjust the
253 * costs by applying the same heuristic as for the scan or join case.
254 */
255 if (!grouping_is_sortable(root->parse->groupClause) ||
256 !pathkeys_contained_in(pathkeys, root->group_pathkeys))
257 {
258 Path sort_path; /* dummy for result of cost_sort */
259
260 cost_sort(&sort_path,
261 root,
262 pathkeys,
263 *p_startup_cost + *p_run_cost,
264 retrieved_rows,
265 width,
266 0.0,
267 work_mem,
268 limit_tuples);
269
270 *p_startup_cost = sort_path.startup_cost;
271 *p_run_cost = sort_path.total_cost - sort_path.startup_cost;
272 }
273 else
274 {
275 /*
276 * The default extra cost seems too large for foreign-grouping cases;
277 * add 1/4th of that default.
278 */
279 double sort_multiplier = 1.0 + (DEFAULT_FDW_SORT_MULTIPLIER - 1.0) * 0.25;
280
281 *p_startup_cost *= sort_multiplier;
282 *p_run_cost *= sort_multiplier;
283 }
284 }
285
286 /*
287 * fdw_estimate_path_cost_size
288 * Get cost and size estimates for a foreign scan on given foreign
289 * relation either a base relation or an upper relation containing
290 * foreign relations. Estimate rows using whatever statistics we have
291 * locally, in a way similar to ordinary tables.
292 *
293 * pathkeys specify the expected sort order if any for given path being costed.
294 *
295 * The function returns the cost and size estimates in p_row, p_width,
296 * p_startup_cost and p_total_cost variables.
297 */
298 void
fdw_estimate_path_cost_size(PlannerInfo * root,RelOptInfo * rel,List * pathkeys,double * p_rows,int * p_width,Cost * p_startup_cost,Cost * p_total_cost)299 fdw_estimate_path_cost_size(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, double *p_rows,
300 int *p_width, Cost *p_startup_cost, Cost *p_total_cost)
301 {
302 TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel);
303 CostEstimate ce = {
304 /*
305 * Use rows/width estimates made by set_baserel_size_estimates() for
306 * base foreign relations.
307 */
308 .rows = rel->rows,
309 .width = rel->reltarget->width,
310 };
311
312 if (IS_JOIN_REL(rel))
313 ereport(ERROR,
314 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
315 errmsg("foreign joins are not supported")));
316
317 /*
318 * We will come here again and again with different set of pathkeys
319 * that caller wants to cost. We don't need to calculate the cost of
320 * bare scan each time. Instead, use the costs if we have cached them
321 * already.
322 */
323 if (REL_HAS_CACHED_COSTS(fpinfo))
324 {
325 ce.rows = fpinfo->rows;
326 ce.width = fpinfo->width;
327 ce.startup_cost = fpinfo->rel_startup_cost;
328 ce.run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
329 ce.retrieved_rows = fpinfo->rel_retrieved_rows;
330 }
331 else if (IS_UPPER_REL(rel))
332 get_upper_rel_estimate(root, rel, &ce);
333 else
334 get_base_rel_estimate(root, rel, &ce);
335
336 /*
337 * Without remote estimates, we have no real way to estimate the cost
338 * of generating sorted output. It could be free if the query plan
339 * the remote side would have chosen generates properly-sorted output
340 * anyway, but in most cases it will cost something. Estimate a value
341 * high enough that we won't pick the sorted path when the ordering
342 * isn't locally useful, but low enough that we'll err on the side of
343 * pushing down the ORDER BY clause when it's useful to do so.
344 */
345 if (pathkeys != NIL)
346 {
347 if (IS_UPPER_REL(rel))
348 {
349 Assert(rel->reloptkind == RELOPT_UPPER_REL ||
350 rel->reloptkind == RELOPT_OTHER_UPPER_REL);
351
352 /* FIXME: Currently don't have a way to pass on limit here */
353 const double limit_tuples = -1;
354
355 adjust_foreign_grouping_path_cost(root,
356 pathkeys,
357 ce.retrieved_rows,
358 ce.width,
359 limit_tuples,
360 &ce.startup_cost,
361 &ce.run_cost);
362 }
363 else
364 {
365 ce.startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
366 ce.run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
367 }
368 }
369
370 ce.total_cost = ce.startup_cost + ce.run_cost;
371
372 /*
373 * Cache the costs for scans without any pathkeys
374 * before adding the costs for transferring data from the data node.
375 * These costs are useful for costing the join between this relation and
376 * another foreign relation or to calculate the costs of paths with
377 * pathkeys for this relation, when the costs can not be obtained from the
378 * data node. This function will be called at least once for every
379 * foreign relation without pathkeys.
380 */
381 if (!REL_HAS_CACHED_COSTS(fpinfo) && pathkeys == NIL)
382 {
383 fpinfo->rel_startup_cost = ce.startup_cost;
384 fpinfo->rel_total_cost = ce.total_cost;
385 fpinfo->rel_retrieved_rows = ce.retrieved_rows;
386 }
387
388 /*
389 * Add some additional cost factors to account for connection overhead
390 * (fdw_startup_cost), transferring data across the network
391 * (fdw_tuple_cost per retrieved row), and local manipulation of the data
392 * (cpu_tuple_cost per retrieved row).
393 */
394 ce.startup_cost += fpinfo->fdw_startup_cost;
395 ce.total_cost += fpinfo->fdw_startup_cost;
396 ce.total_cost += fpinfo->fdw_tuple_cost * ce.retrieved_rows;
397 ce.total_cost += cpu_tuple_cost * ce.retrieved_rows;
398
399 /* Return results. */
400 *p_rows = ce.rows;
401 *p_width = ce.width;
402 *p_startup_cost = ce.startup_cost;
403 *p_total_cost = ce.total_cost;
404 }
405