1 /*------------------------------------------------------------------------
2  *
3  * geqo_main.c
4  *	  solution to the query optimization problem
5  *	  by means of a Genetic Algorithm (GA)
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/optimizer/geqo/geqo_main.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /* contributed by:
16    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
17    *  Martin Utesch				 * Institute of Automatic Control	   *
18    =							 = University of Mining and Technology =
19    *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   *
20    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
21  */
22 
23 /* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */
24 
25 #include "postgres.h"
26 
27 #include <math.h>
28 
29 #include "optimizer/geqo_misc.h"
30 #include "optimizer/geqo_mutation.h"
31 #include "optimizer/geqo_pool.h"
32 #include "optimizer/geqo_random.h"
33 #include "optimizer/geqo_selection.h"
34 
35 
36 /*
37  * Configuration options
38  */
39 int			Geqo_effort;
40 int			Geqo_pool_size;
41 int			Geqo_generations;
42 double		Geqo_selection_bias;
43 double		Geqo_seed;
44 
45 
46 static int	gimme_pool_size(int nr_rel);
47 static int	gimme_number_generations(int pool_size);
48 
49 /* complain if no recombination mechanism is #define'd */
50 #if !defined(ERX) && \
51 	!defined(PMX) && \
52 	!defined(CX)  && \
53 	!defined(PX)  && \
54 	!defined(OX1) && \
55 	!defined(OX2)
56 #error "must choose one GEQO recombination mechanism in geqo.h"
57 #endif
58 
59 
60 /*
61  * geqo
62  *	  solution of the query optimization problem
63  *	  similar to a constrained Traveling Salesman Problem (TSP)
64  */
65 
66 RelOptInfo *
geqo(PlannerInfo * root,int number_of_rels,List * initial_rels)67 geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
68 {
69 	GeqoPrivateData private;
70 	int			generation;
71 	Chromosome *momma;
72 	Chromosome *daddy;
73 	Chromosome *kid;
74 	Pool	   *pool;
75 	int			pool_size,
76 				number_generations;
77 
78 #ifdef GEQO_DEBUG
79 	int			status_interval;
80 #endif
81 	Gene	   *best_tour;
82 	RelOptInfo *best_rel;
83 
84 #if defined(ERX)
85 	Edge	   *edge_table;		/* list of edges */
86 	int			edge_failures = 0;
87 #endif
88 #if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
89 	City	   *city_table;		/* list of cities */
90 #endif
91 #if defined(CX)
92 	int			cycle_diffs = 0;
93 	int			mutations = 0;
94 #endif
95 
96 /* set up private information */
97 	root->join_search_private = (void *) &private;
98 	private.initial_rels = initial_rels;
99 
100 /* initialize private number generator */
101 	geqo_set_seed(root, Geqo_seed);
102 
103 /* set GA parameters */
104 	pool_size = gimme_pool_size(number_of_rels);
105 	number_generations = gimme_number_generations(pool_size);
106 #ifdef GEQO_DEBUG
107 	status_interval = 10;
108 #endif
109 
110 /* allocate genetic pool memory */
111 	pool = alloc_pool(root, pool_size, number_of_rels);
112 
113 /* random initialization of the pool */
114 	random_init_pool(root, pool);
115 
116 /* sort the pool according to cheapest path as fitness */
117 	sort_pool(root, pool);		/* we have to do it only one time, since all
118 								 * kids replace the worst individuals in
119 								 * future (-> geqo_pool.c:spread_chromo ) */
120 
121 #ifdef GEQO_DEBUG
122 	elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
123 		 pool_size,
124 		 pool->data[0].worth,
125 		 pool->data[pool_size - 1].worth);
126 #endif
127 
128 /* allocate chromosome momma and daddy memory */
129 	momma = alloc_chromo(root, pool->string_length);
130 	daddy = alloc_chromo(root, pool->string_length);
131 
132 #if defined (ERX)
133 #ifdef GEQO_DEBUG
134 	elog(DEBUG2, "using edge recombination crossover [ERX]");
135 #endif
136 /* allocate edge table memory */
137 	edge_table = alloc_edge_table(root, pool->string_length);
138 #elif defined(PMX)
139 #ifdef GEQO_DEBUG
140 	elog(DEBUG2, "using partially matched crossover [PMX]");
141 #endif
142 /* allocate chromosome kid memory */
143 	kid = alloc_chromo(root, pool->string_length);
144 #elif defined(CX)
145 #ifdef GEQO_DEBUG
146 	elog(DEBUG2, "using cycle crossover [CX]");
147 #endif
148 /* allocate city table memory */
149 	kid = alloc_chromo(root, pool->string_length);
150 	city_table = alloc_city_table(root, pool->string_length);
151 #elif defined(PX)
152 #ifdef GEQO_DEBUG
153 	elog(DEBUG2, "using position crossover [PX]");
154 #endif
155 /* allocate city table memory */
156 	kid = alloc_chromo(root, pool->string_length);
157 	city_table = alloc_city_table(root, pool->string_length);
158 #elif defined(OX1)
159 #ifdef GEQO_DEBUG
160 	elog(DEBUG2, "using order crossover [OX1]");
161 #endif
162 /* allocate city table memory */
163 	kid = alloc_chromo(root, pool->string_length);
164 	city_table = alloc_city_table(root, pool->string_length);
165 #elif defined(OX2)
166 #ifdef GEQO_DEBUG
167 	elog(DEBUG2, "using order crossover [OX2]");
168 #endif
169 /* allocate city table memory */
170 	kid = alloc_chromo(root, pool->string_length);
171 	city_table = alloc_city_table(root, pool->string_length);
172 #endif
173 
174 
175 /* my pain main part: */
176 /* iterative optimization */
177 
178 	for (generation = 0; generation < number_generations; generation++)
179 	{
180 		/* SELECTION: using linear bias function */
181 		geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
182 
183 #if defined (ERX)
184 		/* EDGE RECOMBINATION CROSSOVER */
185 		gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);
186 
187 		kid = momma;
188 
189 		/* are there any edge failures ? */
190 		edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length);
191 #elif defined(PMX)
192 		/* PARTIALLY MATCHED CROSSOVER */
193 		pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
194 #elif defined(CX)
195 		/* CYCLE CROSSOVER */
196 		cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
197 		/* mutate the child */
198 		if (cycle_diffs == 0)
199 		{
200 			mutations++;
201 			geqo_mutation(root, kid->string, pool->string_length);
202 		}
203 #elif defined(PX)
204 		/* POSITION CROSSOVER */
205 		px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
206 #elif defined(OX1)
207 		/* ORDER CROSSOVER */
208 		ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
209 #elif defined(OX2)
210 		/* ORDER CROSSOVER */
211 		ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
212 #endif
213 
214 
215 		/* EVALUATE FITNESS */
216 		kid->worth = geqo_eval(root, kid->string, pool->string_length);
217 
218 		/* push the kid into the wilderness of life according to its worth */
219 		spread_chromo(root, kid, pool);
220 
221 
222 #ifdef GEQO_DEBUG
223 		if (status_interval && !(generation % status_interval))
224 			print_gen(stdout, pool, generation);
225 #endif
226 
227 	}
228 
229 
230 #if defined(ERX) && defined(GEQO_DEBUG)
231 	if (edge_failures != 0)
232 		elog(LOG, "[GEQO] failures: %d, average: %d",
233 			 edge_failures, (int) number_generations / edge_failures);
234 	else
235 		elog(LOG, "[GEQO] no edge failures detected");
236 #endif
237 
238 #if defined(CX) && defined(GEQO_DEBUG)
239 	if (mutations != 0)
240 		elog(LOG, "[GEQO] mutations: %d, generations: %d",
241 			 mutations, number_generations);
242 	else
243 		elog(LOG, "[GEQO] no mutations processed");
244 #endif
245 
246 #ifdef GEQO_DEBUG
247 	print_pool(stdout, pool, 0, pool_size - 1);
248 #endif
249 
250 #ifdef GEQO_DEBUG
251 	elog(DEBUG1, "GEQO best is %.2f after %d generations",
252 		 pool->data[0].worth, number_generations);
253 #endif
254 
255 
256 	/*
257 	 * got the cheapest query tree processed by geqo; first element of the
258 	 * population indicates the best query tree
259 	 */
260 	best_tour = (Gene *) pool->data[0].string;
261 
262 	best_rel = gimme_tree(root, best_tour, pool->string_length);
263 
264 	if (best_rel == NULL)
265 		elog(ERROR, "geqo failed to make a valid plan");
266 
267 	/* DBG: show the query plan */
268 #ifdef NOT_USED
269 	print_plan(best_plan, root);
270 #endif
271 
272 	/* ... free memory stuff */
273 	free_chromo(root, momma);
274 	free_chromo(root, daddy);
275 
276 #if defined (ERX)
277 	free_edge_table(root, edge_table);
278 #elif defined(PMX)
279 	free_chromo(root, kid);
280 #elif defined(CX)
281 	free_chromo(root, kid);
282 	free_city_table(root, city_table);
283 #elif defined(PX)
284 	free_chromo(root, kid);
285 	free_city_table(root, city_table);
286 #elif defined(OX1)
287 	free_chromo(root, kid);
288 	free_city_table(root, city_table);
289 #elif defined(OX2)
290 	free_chromo(root, kid);
291 	free_city_table(root, city_table);
292 #endif
293 
294 	free_pool(root, pool);
295 
296 	/* ... clear root pointer to our private storage */
297 	root->join_search_private = NULL;
298 
299 	return best_rel;
300 }
301 
302 
303 /*
304  * Return either configured pool size or a good default
305  *
306  * The default is based on query size (no. of relations) = 2^(QS+1),
307  * but constrained to a range based on the effort value.
308  */
309 static int
gimme_pool_size(int nr_rel)310 gimme_pool_size(int nr_rel)
311 {
312 	double		size;
313 	int			minsize;
314 	int			maxsize;
315 
316 	/* Legal pool size *must* be at least 2, so ignore attempt to select 1 */
317 	if (Geqo_pool_size >= 2)
318 		return Geqo_pool_size;
319 
320 	size = pow(2.0, nr_rel + 1.0);
321 
322 	maxsize = 50 * Geqo_effort; /* 50 to 500 individuals */
323 	if (size > maxsize)
324 		return maxsize;
325 
326 	minsize = 10 * Geqo_effort; /* 10 to 100 individuals */
327 	if (size < minsize)
328 		return minsize;
329 
330 	return (int) ceil(size);
331 }
332 
333 
334 /*
335  * Return either configured number of generations or a good default
336  *
337  * The default is the same as the pool size, which allows us to be
338  * sure that less-fit individuals get pushed out of the breeding
339  * population before the run finishes.
340  */
341 static int
gimme_number_generations(int pool_size)342 gimme_number_generations(int pool_size)
343 {
344 	if (Geqo_generations > 0)
345 		return Geqo_generations;
346 
347 	return pool_size;
348 }
349