1 /*------------------------------------------------------------------------
2 *
3 * geqo_eval.c
4 * Routines to evaluate query trees
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * src/backend/optimizer/geqo/geqo_eval.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14 /* contributed by:
15 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
16 * Martin Utesch * Institute of Automatic Control *
17 = = University of Mining and Technology =
18 * utesch@aut.tu-freiberg.de * Freiberg, Germany *
19 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
20 */
21
22 #include "postgres.h"
23
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27
28 #include "optimizer/geqo.h"
29 #include "optimizer/joininfo.h"
30 #include "optimizer/pathnode.h"
31 #include "optimizer/paths.h"
32 #include "utils/memutils.h"
33
34
35 /* A "clump" of already-joined relations within gimme_tree */
36 typedef struct
37 {
38 RelOptInfo *joinrel; /* joinrel for the set of relations */
39 int size; /* number of input relations in clump */
40 } Clump;
41
42 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
43 bool force);
44 static bool desirable_join(PlannerInfo *root,
45 RelOptInfo *outer_rel, RelOptInfo *inner_rel);
46
47
48 /*
49 * geqo_eval
50 *
51 * Returns cost of a query tree as an individual of the population.
52 *
53 * If no legal join order can be extracted from the proposed tour,
54 * returns DBL_MAX.
55 */
56 Cost
geqo_eval(PlannerInfo * root,Gene * tour,int num_gene)57 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
58 {
59 MemoryContext mycontext;
60 MemoryContext oldcxt;
61 RelOptInfo *joinrel;
62 Cost fitness;
63 int savelength;
64 struct HTAB *savehash;
65
66 /*
67 * Create a private memory context that will hold all temp storage
68 * allocated inside gimme_tree().
69 *
70 * Since geqo_eval() will be called many times, we can't afford to let all
71 * that memory go unreclaimed until end of statement. Note we make the
72 * temp context a child of the planner's normal context, so that it will
73 * be freed even if we abort via ereport(ERROR).
74 */
75 mycontext = AllocSetContextCreate(CurrentMemoryContext,
76 "GEQO",
77 ALLOCSET_DEFAULT_SIZES);
78 oldcxt = MemoryContextSwitchTo(mycontext);
79
80 /*
81 * gimme_tree will add entries to root->join_rel_list, which may or may
82 * not already contain some entries. The newly added entries will be
83 * recycled by the MemoryContextDelete below, so we must ensure that the
84 * list is restored to its former state before exiting. We can do this by
85 * truncating the list to its original length. NOTE this assumes that any
86 * added entries are appended at the end!
87 *
88 * We also must take care not to mess up the outer join_rel_hash, if there
89 * is one. We can do this by just temporarily setting the link to NULL.
90 * (If we are dealing with enough join rels, which we very likely are, a
91 * new hash table will get built and used locally.)
92 *
93 * join_rel_level[] shouldn't be in use, so just Assert it isn't.
94 */
95 savelength = list_length(root->join_rel_list);
96 savehash = root->join_rel_hash;
97 Assert(root->join_rel_level == NULL);
98
99 root->join_rel_hash = NULL;
100
101 /* construct the best path for the given combination of relations */
102 joinrel = gimme_tree(root, tour, num_gene);
103
104 /*
105 * compute fitness, if we found a valid join
106 *
107 * XXX geqo does not currently support optimization for partial result
108 * retrieval, nor do we take any cognizance of possible use of
109 * parameterized paths --- how to fix?
110 */
111 if (joinrel)
112 {
113 Path *best_path = joinrel->cheapest_total_path;
114
115 fitness = best_path->total_cost;
116 }
117 else
118 fitness = DBL_MAX;
119
120 /*
121 * Restore join_rel_list to its former state, and put back original
122 * hashtable if any.
123 */
124 root->join_rel_list = list_truncate(root->join_rel_list,
125 savelength);
126 root->join_rel_hash = savehash;
127
128 /* release all the memory acquired within gimme_tree */
129 MemoryContextSwitchTo(oldcxt);
130 MemoryContextDelete(mycontext);
131
132 return fitness;
133 }
134
135 /*
136 * gimme_tree
137 * Form planner estimates for a join tree constructed in the specified
138 * order.
139 *
140 * 'tour' is the proposed join order, of length 'num_gene'
141 *
142 * Returns a new join relation whose cheapest path is the best plan for
143 * this join order. NB: will return NULL if join order is invalid and
144 * we can't modify it into a valid order.
145 *
146 * The original implementation of this routine always joined in the specified
147 * order, and so could only build left-sided plans (and right-sided and
148 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
149 * It could never produce a "bushy" plan. This had a couple of big problems,
150 * of which the worst was that there are situations involving join order
151 * restrictions where the only valid plans are bushy.
152 *
153 * The present implementation takes the given tour as a guideline, but
154 * postpones joins that are illegal or seem unsuitable according to some
155 * heuristic rules. This allows correct bushy plans to be generated at need,
156 * and as a nice side-effect it seems to materially improve the quality of the
157 * generated plans. Note however that since it's just a heuristic, it can
158 * still fail in some cases. (In particular, we might clump together
159 * relations that actually mustn't be joined yet due to LATERAL restrictions;
160 * since there's no provision for un-clumping, this must lead to failure.)
161 */
162 RelOptInfo *
gimme_tree(PlannerInfo * root,Gene * tour,int num_gene)163 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
164 {
165 GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
166 List *clumps;
167 int rel_count;
168
169 /*
170 * Sometimes, a relation can't yet be joined to others due to heuristics
171 * or actual semantic restrictions. We maintain a list of "clumps" of
172 * successfully joined relations, with larger clumps at the front. Each
173 * new relation from the tour is added to the first clump it can be joined
174 * to; if there is none then it becomes a new clump of its own. When we
175 * enlarge an existing clump we check to see if it can now be merged with
176 * any other clumps. After the tour is all scanned, we forget about the
177 * heuristics and try to forcibly join any remaining clumps. If we are
178 * unable to merge all the clumps into one, fail.
179 */
180 clumps = NIL;
181
182 for (rel_count = 0; rel_count < num_gene; rel_count++)
183 {
184 int cur_rel_index;
185 RelOptInfo *cur_rel;
186 Clump *cur_clump;
187
188 /* Get the next input relation */
189 cur_rel_index = (int) tour[rel_count];
190 cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
191 cur_rel_index - 1);
192
193 /* Make it into a single-rel clump */
194 cur_clump = (Clump *) palloc(sizeof(Clump));
195 cur_clump->joinrel = cur_rel;
196 cur_clump->size = 1;
197
198 /* Merge it into the clumps list, using only desirable joins */
199 clumps = merge_clump(root, clumps, cur_clump, false);
200 }
201
202 if (list_length(clumps) > 1)
203 {
204 /* Force-join the remaining clumps in some legal order */
205 List *fclumps;
206 ListCell *lc;
207
208 fclumps = NIL;
209 foreach(lc, clumps)
210 {
211 Clump *clump = (Clump *) lfirst(lc);
212
213 fclumps = merge_clump(root, fclumps, clump, true);
214 }
215 clumps = fclumps;
216 }
217
218 /* Did we succeed in forming a single join relation? */
219 if (list_length(clumps) != 1)
220 return NULL;
221
222 return ((Clump *) linitial(clumps))->joinrel;
223 }
224
225 /*
226 * Merge a "clump" into the list of existing clumps for gimme_tree.
227 *
228 * We try to merge the clump into some existing clump, and repeat if
229 * successful. When no more merging is possible, insert the clump
230 * into the list, preserving the list ordering rule (namely, that
231 * clumps of larger size appear earlier).
232 *
233 * If force is true, merge anywhere a join is legal, even if it causes
234 * a cartesian join to be performed. When force is false, do only
235 * "desirable" joins.
236 */
237 static List *
merge_clump(PlannerInfo * root,List * clumps,Clump * new_clump,bool force)238 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
239 {
240 ListCell *prev;
241 ListCell *lc;
242
243 /* Look for a clump that new_clump can join to */
244 prev = NULL;
245 foreach(lc, clumps)
246 {
247 Clump *old_clump = (Clump *) lfirst(lc);
248
249 if (force ||
250 desirable_join(root, old_clump->joinrel, new_clump->joinrel))
251 {
252 RelOptInfo *joinrel;
253
254 /*
255 * Construct a RelOptInfo representing the join of these two input
256 * relations. Note that we expect the joinrel not to exist in
257 * root->join_rel_list yet, and so the paths constructed for it
258 * will only include the ones we want.
259 */
260 joinrel = make_join_rel(root,
261 old_clump->joinrel,
262 new_clump->joinrel);
263
264 /* Keep searching if join order is not valid */
265 if (joinrel)
266 {
267 /* Create GatherPaths for any useful partial paths for rel */
268 generate_gather_paths(root, joinrel);
269
270 /* Find and save the cheapest paths for this joinrel */
271 set_cheapest(joinrel);
272
273 /* Absorb new clump into old */
274 old_clump->joinrel = joinrel;
275 old_clump->size += new_clump->size;
276 pfree(new_clump);
277
278 /* Remove old_clump from list */
279 clumps = list_delete_cell(clumps, lc, prev);
280
281 /*
282 * Recursively try to merge the enlarged old_clump with
283 * others. When no further merge is possible, we'll reinsert
284 * it into the list.
285 */
286 return merge_clump(root, clumps, old_clump, force);
287 }
288 }
289 prev = lc;
290 }
291
292 /*
293 * No merging is possible, so add new_clump as an independent clump, in
294 * proper order according to size. We can be fast for the common case
295 * where it has size 1 --- it should always go at the end.
296 */
297 if (clumps == NIL || new_clump->size == 1)
298 return lappend(clumps, new_clump);
299
300 /* Check if it belongs at the front */
301 lc = list_head(clumps);
302 if (new_clump->size > ((Clump *) lfirst(lc))->size)
303 return lcons(new_clump, clumps);
304
305 /* Else search for the place to insert it */
306 for (;;)
307 {
308 ListCell *nxt = lnext(lc);
309
310 if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
311 break; /* it belongs after 'lc', before 'nxt' */
312 lc = nxt;
313 }
314 lappend_cell(clumps, lc, new_clump);
315
316 return clumps;
317 }
318
319 /*
320 * Heuristics for gimme_tree: do we want to join these two relations?
321 */
322 static bool
desirable_join(PlannerInfo * root,RelOptInfo * outer_rel,RelOptInfo * inner_rel)323 desirable_join(PlannerInfo *root,
324 RelOptInfo *outer_rel, RelOptInfo *inner_rel)
325 {
326 /*
327 * Join if there is an applicable join clause, or if there is a join order
328 * restriction forcing these rels to be joined.
329 */
330 if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
331 have_join_order_restriction(root, outer_rel, inner_rel))
332 return true;
333
334 /* Otherwise postpone the join till later. */
335 return false;
336 }
337