1 /*------------------------------------------------------------------------
2  *
3  * geqo_eval.c
4  *	  Routines to evaluate query trees
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/backend/optimizer/geqo/geqo_eval.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 /* contributed by:
15    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
16    *  Martin Utesch				 * Institute of Automatic Control	   *
17    =							 = University of Mining and Technology =
18    *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   *
19    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
20  */
21 
22 #include "postgres.h"
23 
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27 
28 #include "optimizer/geqo.h"
29 #include "optimizer/joininfo.h"
30 #include "optimizer/pathnode.h"
31 #include "optimizer/paths.h"
32 #include "utils/memutils.h"
33 
34 
35 /* A "clump" of already-joined relations within gimme_tree */
36 typedef struct
37 {
38 	RelOptInfo *joinrel;		/* joinrel for the set of relations */
39 	int			size;			/* number of input relations in clump */
40 } Clump;
41 
42 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
43 						 int num_gene, bool force);
44 static bool desirable_join(PlannerInfo *root,
45 						   RelOptInfo *outer_rel, RelOptInfo *inner_rel);
46 
47 
48 /*
49  * geqo_eval
50  *
51  * Returns cost of a query tree as an individual of the population.
52  *
53  * If no legal join order can be extracted from the proposed tour,
54  * returns DBL_MAX.
55  */
56 Cost
57 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
58 {
59 	MemoryContext mycontext;
60 	MemoryContext oldcxt;
61 	RelOptInfo *joinrel;
62 	Cost		fitness;
63 	int			savelength;
64 	struct HTAB *savehash;
65 
66 	/*
67 	 * Create a private memory context that will hold all temp storage
68 	 * allocated inside gimme_tree().
69 	 *
70 	 * Since geqo_eval() will be called many times, we can't afford to let all
71 	 * that memory go unreclaimed until end of statement.  Note we make the
72 	 * temp context a child of the planner's normal context, so that it will
73 	 * be freed even if we abort via ereport(ERROR).
74 	 */
75 	mycontext = AllocSetContextCreate(CurrentMemoryContext,
76 									  "GEQO",
77 									  ALLOCSET_DEFAULT_SIZES);
78 	oldcxt = MemoryContextSwitchTo(mycontext);
79 
80 	/*
81 	 * gimme_tree will add entries to root->join_rel_list, which may or may
82 	 * not already contain some entries.  The newly added entries will be
83 	 * recycled by the MemoryContextDelete below, so we must ensure that the
84 	 * list is restored to its former state before exiting.  We can do this by
85 	 * truncating the list to its original length.  NOTE this assumes that any
86 	 * added entries are appended at the end!
87 	 *
88 	 * We also must take care not to mess up the outer join_rel_hash, if there
89 	 * is one.  We can do this by just temporarily setting the link to NULL.
90 	 * (If we are dealing with enough join rels, which we very likely are, a
91 	 * new hash table will get built and used locally.)
92 	 *
93 	 * join_rel_level[] shouldn't be in use, so just Assert it isn't.
94 	 */
95 	savelength = list_length(root->join_rel_list);
96 	savehash = root->join_rel_hash;
97 	Assert(root->join_rel_level == NULL);
98 
99 	root->join_rel_hash = NULL;
100 
101 	/* construct the best path for the given combination of relations */
102 	joinrel = gimme_tree(root, tour, num_gene);
103 
104 	/*
105 	 * compute fitness, if we found a valid join
106 	 *
107 	 * XXX geqo does not currently support optimization for partial result
108 	 * retrieval, nor do we take any cognizance of possible use of
109 	 * parameterized paths --- how to fix?
110 	 */
111 	if (joinrel)
112 	{
113 		Path	   *best_path = joinrel->cheapest_total_path;
114 
115 		fitness = best_path->total_cost;
116 	}
117 	else
118 		fitness = DBL_MAX;
119 
120 	/*
121 	 * Restore join_rel_list to its former state, and put back original
122 	 * hashtable if any.
123 	 */
124 	root->join_rel_list = list_truncate(root->join_rel_list,
125 										savelength);
126 	root->join_rel_hash = savehash;
127 
128 	/* release all the memory acquired within gimme_tree */
129 	MemoryContextSwitchTo(oldcxt);
130 	MemoryContextDelete(mycontext);
131 
132 	return fitness;
133 }
134 
135 /*
136  * gimme_tree
137  *	  Form planner estimates for a join tree constructed in the specified
138  *	  order.
139  *
140  *	 'tour' is the proposed join order, of length 'num_gene'
141  *
142  * Returns a new join relation whose cheapest path is the best plan for
143  * this join order.  NB: will return NULL if join order is invalid and
144  * we can't modify it into a valid order.
145  *
146  * The original implementation of this routine always joined in the specified
147  * order, and so could only build left-sided plans (and right-sided and
148  * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
149  * It could never produce a "bushy" plan.  This had a couple of big problems,
150  * of which the worst was that there are situations involving join order
151  * restrictions where the only valid plans are bushy.
152  *
153  * The present implementation takes the given tour as a guideline, but
154  * postpones joins that are illegal or seem unsuitable according to some
155  * heuristic rules.  This allows correct bushy plans to be generated at need,
156  * and as a nice side-effect it seems to materially improve the quality of the
157  * generated plans.  Note however that since it's just a heuristic, it can
158  * still fail in some cases.  (In particular, we might clump together
159  * relations that actually mustn't be joined yet due to LATERAL restrictions;
160  * since there's no provision for un-clumping, this must lead to failure.)
161  */
162 RelOptInfo *
163 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
164 {
165 	GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
166 	List	   *clumps;
167 	int			rel_count;
168 
169 	/*
170 	 * Sometimes, a relation can't yet be joined to others due to heuristics
171 	 * or actual semantic restrictions.  We maintain a list of "clumps" of
172 	 * successfully joined relations, with larger clumps at the front. Each
173 	 * new relation from the tour is added to the first clump it can be joined
174 	 * to; if there is none then it becomes a new clump of its own. When we
175 	 * enlarge an existing clump we check to see if it can now be merged with
176 	 * any other clumps.  After the tour is all scanned, we forget about the
177 	 * heuristics and try to forcibly join any remaining clumps.  If we are
178 	 * unable to merge all the clumps into one, fail.
179 	 */
180 	clumps = NIL;
181 
182 	for (rel_count = 0; rel_count < num_gene; rel_count++)
183 	{
184 		int			cur_rel_index;
185 		RelOptInfo *cur_rel;
186 		Clump	   *cur_clump;
187 
188 		/* Get the next input relation */
189 		cur_rel_index = (int) tour[rel_count];
190 		cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
191 										  cur_rel_index - 1);
192 
193 		/* Make it into a single-rel clump */
194 		cur_clump = (Clump *) palloc(sizeof(Clump));
195 		cur_clump->joinrel = cur_rel;
196 		cur_clump->size = 1;
197 
198 		/* Merge it into the clumps list, using only desirable joins */
199 		clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
200 	}
201 
202 	if (list_length(clumps) > 1)
203 	{
204 		/* Force-join the remaining clumps in some legal order */
205 		List	   *fclumps;
206 		ListCell   *lc;
207 
208 		fclumps = NIL;
209 		foreach(lc, clumps)
210 		{
211 			Clump	   *clump = (Clump *) lfirst(lc);
212 
213 			fclumps = merge_clump(root, fclumps, clump, num_gene, true);
214 		}
215 		clumps = fclumps;
216 	}
217 
218 	/* Did we succeed in forming a single join relation? */
219 	if (list_length(clumps) != 1)
220 		return NULL;
221 
222 	return ((Clump *) linitial(clumps))->joinrel;
223 }
224 
225 /*
226  * Merge a "clump" into the list of existing clumps for gimme_tree.
227  *
228  * We try to merge the clump into some existing clump, and repeat if
229  * successful.  When no more merging is possible, insert the clump
230  * into the list, preserving the list ordering rule (namely, that
231  * clumps of larger size appear earlier).
232  *
233  * If force is true, merge anywhere a join is legal, even if it causes
234  * a cartesian join to be performed.  When force is false, do only
235  * "desirable" joins.
236  */
237 static List *
238 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
239 			bool force)
240 {
241 	ListCell   *prev;
242 	ListCell   *lc;
243 
244 	/* Look for a clump that new_clump can join to */
245 	prev = NULL;
246 	foreach(lc, clumps)
247 	{
248 		Clump	   *old_clump = (Clump *) lfirst(lc);
249 
250 		if (force ||
251 			desirable_join(root, old_clump->joinrel, new_clump->joinrel))
252 		{
253 			RelOptInfo *joinrel;
254 
255 			/*
256 			 * Construct a RelOptInfo representing the join of these two input
257 			 * relations.  Note that we expect the joinrel not to exist in
258 			 * root->join_rel_list yet, and so the paths constructed for it
259 			 * will only include the ones we want.
260 			 */
261 			joinrel = make_join_rel(root,
262 									old_clump->joinrel,
263 									new_clump->joinrel);
264 
265 			/* Keep searching if join order is not valid */
266 			if (joinrel)
267 			{
268 				/* Create paths for partitionwise joins. */
269 				generate_partitionwise_join_paths(root, joinrel);
270 
271 				/*
272 				 * Except for the topmost scan/join rel, consider gathering
273 				 * partial paths.  We'll do the same for the topmost scan/join
274 				 * rel once we know the final targetlist (see
275 				 * grouping_planner).
276 				 */
277 				if (old_clump->size + new_clump->size < num_gene)
278 					generate_gather_paths(root, joinrel, false);
279 
280 				/* Find and save the cheapest paths for this joinrel */
281 				set_cheapest(joinrel);
282 
283 				/* Absorb new clump into old */
284 				old_clump->joinrel = joinrel;
285 				old_clump->size += new_clump->size;
286 				pfree(new_clump);
287 
288 				/* Remove old_clump from list */
289 				clumps = list_delete_cell(clumps, lc, prev);
290 
291 				/*
292 				 * Recursively try to merge the enlarged old_clump with
293 				 * others.  When no further merge is possible, we'll reinsert
294 				 * it into the list.
295 				 */
296 				return merge_clump(root, clumps, old_clump, num_gene, force);
297 			}
298 		}
299 		prev = lc;
300 	}
301 
302 	/*
303 	 * No merging is possible, so add new_clump as an independent clump, in
304 	 * proper order according to size.  We can be fast for the common case
305 	 * where it has size 1 --- it should always go at the end.
306 	 */
307 	if (clumps == NIL || new_clump->size == 1)
308 		return lappend(clumps, new_clump);
309 
310 	/* Check if it belongs at the front */
311 	lc = list_head(clumps);
312 	if (new_clump->size > ((Clump *) lfirst(lc))->size)
313 		return lcons(new_clump, clumps);
314 
315 	/* Else search for the place to insert it */
316 	for (;;)
317 	{
318 		ListCell   *nxt = lnext(lc);
319 
320 		if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
321 			break;				/* it belongs after 'lc', before 'nxt' */
322 		lc = nxt;
323 	}
324 	lappend_cell(clumps, lc, new_clump);
325 
326 	return clumps;
327 }
328 
329 /*
330  * Heuristics for gimme_tree: do we want to join these two relations?
331  */
332 static bool
333 desirable_join(PlannerInfo *root,
334 			   RelOptInfo *outer_rel, RelOptInfo *inner_rel)
335 {
336 	/*
337 	 * Join if there is an applicable join clause, or if there is a join order
338 	 * restriction forcing these rels to be joined.
339 	 */
340 	if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
341 		have_join_order_restriction(root, outer_rel, inner_rel))
342 		return true;
343 
344 	/* Otherwise postpone the join till later. */
345 	return false;
346 }
347