1 /*------------------------------------------------------------------------ 2 * 3 * geqo_eval.c 4 * Routines to evaluate query trees 5 * 6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * src/backend/optimizer/geqo/geqo_eval.c 10 * 11 *------------------------------------------------------------------------- 12 */ 13 14 /* contributed by: 15 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= 16 * Martin Utesch * Institute of Automatic Control * 17 = = University of Mining and Technology = 18 * utesch@aut.tu-freiberg.de * Freiberg, Germany * 19 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= 20 */ 21 22 #include "postgres.h" 23 24 #include <float.h> 25 #include <limits.h> 26 #include <math.h> 27 28 #include "optimizer/geqo.h" 29 #include "optimizer/joininfo.h" 30 #include "optimizer/pathnode.h" 31 #include "optimizer/paths.h" 32 #include "utils/memutils.h" 33 34 35 /* A "clump" of already-joined relations within gimme_tree */ 36 typedef struct 37 { 38 RelOptInfo *joinrel; /* joinrel for the set of relations */ 39 int size; /* number of input relations in clump */ 40 } Clump; 41 42 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, 43 int num_gene, bool force); 44 static bool desirable_join(PlannerInfo *root, 45 RelOptInfo *outer_rel, RelOptInfo *inner_rel); 46 47 48 /* 49 * geqo_eval 50 * 51 * Returns cost of a query tree as an individual of the population. 52 * 53 * If no legal join order can be extracted from the proposed tour, 54 * returns DBL_MAX. 55 */ 56 Cost 57 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) 58 { 59 MemoryContext mycontext; 60 MemoryContext oldcxt; 61 RelOptInfo *joinrel; 62 Cost fitness; 63 int savelength; 64 struct HTAB *savehash; 65 66 /* 67 * Create a private memory context that will hold all temp storage 68 * allocated inside gimme_tree(). 69 * 70 * Since geqo_eval() will be called many times, we can't afford to let all 71 * that memory go unreclaimed until end of statement. Note we make the 72 * temp context a child of the planner's normal context, so that it will 73 * be freed even if we abort via ereport(ERROR). 74 */ 75 mycontext = AllocSetContextCreate(CurrentMemoryContext, 76 "GEQO", 77 ALLOCSET_DEFAULT_SIZES); 78 oldcxt = MemoryContextSwitchTo(mycontext); 79 80 /* 81 * gimme_tree will add entries to root->join_rel_list, which may or may 82 * not already contain some entries. The newly added entries will be 83 * recycled by the MemoryContextDelete below, so we must ensure that the 84 * list is restored to its former state before exiting. We can do this by 85 * truncating the list to its original length. NOTE this assumes that any 86 * added entries are appended at the end! 87 * 88 * We also must take care not to mess up the outer join_rel_hash, if there 89 * is one. We can do this by just temporarily setting the link to NULL. 90 * (If we are dealing with enough join rels, which we very likely are, a 91 * new hash table will get built and used locally.) 92 * 93 * join_rel_level[] shouldn't be in use, so just Assert it isn't. 94 */ 95 savelength = list_length(root->join_rel_list); 96 savehash = root->join_rel_hash; 97 Assert(root->join_rel_level == NULL); 98 99 root->join_rel_hash = NULL; 100 101 /* construct the best path for the given combination of relations */ 102 joinrel = gimme_tree(root, tour, num_gene); 103 104 /* 105 * compute fitness, if we found a valid join 106 * 107 * XXX geqo does not currently support optimization for partial result 108 * retrieval, nor do we take any cognizance of possible use of 109 * parameterized paths --- how to fix? 110 */ 111 if (joinrel) 112 { 113 Path *best_path = joinrel->cheapest_total_path; 114 115 fitness = best_path->total_cost; 116 } 117 else 118 fitness = DBL_MAX; 119 120 /* 121 * Restore join_rel_list to its former state, and put back original 122 * hashtable if any. 123 */ 124 root->join_rel_list = list_truncate(root->join_rel_list, 125 savelength); 126 root->join_rel_hash = savehash; 127 128 /* release all the memory acquired within gimme_tree */ 129 MemoryContextSwitchTo(oldcxt); 130 MemoryContextDelete(mycontext); 131 132 return fitness; 133 } 134 135 /* 136 * gimme_tree 137 * Form planner estimates for a join tree constructed in the specified 138 * order. 139 * 140 * 'tour' is the proposed join order, of length 'num_gene' 141 * 142 * Returns a new join relation whose cheapest path is the best plan for 143 * this join order. NB: will return NULL if join order is invalid and 144 * we can't modify it into a valid order. 145 * 146 * The original implementation of this routine always joined in the specified 147 * order, and so could only build left-sided plans (and right-sided and 148 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). 149 * It could never produce a "bushy" plan. This had a couple of big problems, 150 * of which the worst was that there are situations involving join order 151 * restrictions where the only valid plans are bushy. 152 * 153 * The present implementation takes the given tour as a guideline, but 154 * postpones joins that are illegal or seem unsuitable according to some 155 * heuristic rules. This allows correct bushy plans to be generated at need, 156 * and as a nice side-effect it seems to materially improve the quality of the 157 * generated plans. Note however that since it's just a heuristic, it can 158 * still fail in some cases. (In particular, we might clump together 159 * relations that actually mustn't be joined yet due to LATERAL restrictions; 160 * since there's no provision for un-clumping, this must lead to failure.) 161 */ 162 RelOptInfo * 163 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) 164 { 165 GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; 166 List *clumps; 167 int rel_count; 168 169 /* 170 * Sometimes, a relation can't yet be joined to others due to heuristics 171 * or actual semantic restrictions. We maintain a list of "clumps" of 172 * successfully joined relations, with larger clumps at the front. Each 173 * new relation from the tour is added to the first clump it can be joined 174 * to; if there is none then it becomes a new clump of its own. When we 175 * enlarge an existing clump we check to see if it can now be merged with 176 * any other clumps. After the tour is all scanned, we forget about the 177 * heuristics and try to forcibly join any remaining clumps. If we are 178 * unable to merge all the clumps into one, fail. 179 */ 180 clumps = NIL; 181 182 for (rel_count = 0; rel_count < num_gene; rel_count++) 183 { 184 int cur_rel_index; 185 RelOptInfo *cur_rel; 186 Clump *cur_clump; 187 188 /* Get the next input relation */ 189 cur_rel_index = (int) tour[rel_count]; 190 cur_rel = (RelOptInfo *) list_nth(private->initial_rels, 191 cur_rel_index - 1); 192 193 /* Make it into a single-rel clump */ 194 cur_clump = (Clump *) palloc(sizeof(Clump)); 195 cur_clump->joinrel = cur_rel; 196 cur_clump->size = 1; 197 198 /* Merge it into the clumps list, using only desirable joins */ 199 clumps = merge_clump(root, clumps, cur_clump, num_gene, false); 200 } 201 202 if (list_length(clumps) > 1) 203 { 204 /* Force-join the remaining clumps in some legal order */ 205 List *fclumps; 206 ListCell *lc; 207 208 fclumps = NIL; 209 foreach(lc, clumps) 210 { 211 Clump *clump = (Clump *) lfirst(lc); 212 213 fclumps = merge_clump(root, fclumps, clump, num_gene, true); 214 } 215 clumps = fclumps; 216 } 217 218 /* Did we succeed in forming a single join relation? */ 219 if (list_length(clumps) != 1) 220 return NULL; 221 222 return ((Clump *) linitial(clumps))->joinrel; 223 } 224 225 /* 226 * Merge a "clump" into the list of existing clumps for gimme_tree. 227 * 228 * We try to merge the clump into some existing clump, and repeat if 229 * successful. When no more merging is possible, insert the clump 230 * into the list, preserving the list ordering rule (namely, that 231 * clumps of larger size appear earlier). 232 * 233 * If force is true, merge anywhere a join is legal, even if it causes 234 * a cartesian join to be performed. When force is false, do only 235 * "desirable" joins. 236 */ 237 static List * 238 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, 239 bool force) 240 { 241 ListCell *prev; 242 ListCell *lc; 243 244 /* Look for a clump that new_clump can join to */ 245 prev = NULL; 246 foreach(lc, clumps) 247 { 248 Clump *old_clump = (Clump *) lfirst(lc); 249 250 if (force || 251 desirable_join(root, old_clump->joinrel, new_clump->joinrel)) 252 { 253 RelOptInfo *joinrel; 254 255 /* 256 * Construct a RelOptInfo representing the join of these two input 257 * relations. Note that we expect the joinrel not to exist in 258 * root->join_rel_list yet, and so the paths constructed for it 259 * will only include the ones we want. 260 */ 261 joinrel = make_join_rel(root, 262 old_clump->joinrel, 263 new_clump->joinrel); 264 265 /* Keep searching if join order is not valid */ 266 if (joinrel) 267 { 268 /* Create paths for partitionwise joins. */ 269 generate_partitionwise_join_paths(root, joinrel); 270 271 /* 272 * Except for the topmost scan/join rel, consider gathering 273 * partial paths. We'll do the same for the topmost scan/join 274 * rel once we know the final targetlist (see 275 * grouping_planner). 276 */ 277 if (old_clump->size + new_clump->size < num_gene) 278 generate_gather_paths(root, joinrel, false); 279 280 /* Find and save the cheapest paths for this joinrel */ 281 set_cheapest(joinrel); 282 283 /* Absorb new clump into old */ 284 old_clump->joinrel = joinrel; 285 old_clump->size += new_clump->size; 286 pfree(new_clump); 287 288 /* Remove old_clump from list */ 289 clumps = list_delete_cell(clumps, lc, prev); 290 291 /* 292 * Recursively try to merge the enlarged old_clump with 293 * others. When no further merge is possible, we'll reinsert 294 * it into the list. 295 */ 296 return merge_clump(root, clumps, old_clump, num_gene, force); 297 } 298 } 299 prev = lc; 300 } 301 302 /* 303 * No merging is possible, so add new_clump as an independent clump, in 304 * proper order according to size. We can be fast for the common case 305 * where it has size 1 --- it should always go at the end. 306 */ 307 if (clumps == NIL || new_clump->size == 1) 308 return lappend(clumps, new_clump); 309 310 /* Check if it belongs at the front */ 311 lc = list_head(clumps); 312 if (new_clump->size > ((Clump *) lfirst(lc))->size) 313 return lcons(new_clump, clumps); 314 315 /* Else search for the place to insert it */ 316 for (;;) 317 { 318 ListCell *nxt = lnext(lc); 319 320 if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) 321 break; /* it belongs after 'lc', before 'nxt' */ 322 lc = nxt; 323 } 324 lappend_cell(clumps, lc, new_clump); 325 326 return clumps; 327 } 328 329 /* 330 * Heuristics for gimme_tree: do we want to join these two relations? 331 */ 332 static bool 333 desirable_join(PlannerInfo *root, 334 RelOptInfo *outer_rel, RelOptInfo *inner_rel) 335 { 336 /* 337 * Join if there is an applicable join clause, or if there is a join order 338 * restriction forcing these rels to be joined. 339 */ 340 if (have_relevant_joinclause(root, outer_rel, inner_rel) || 341 have_join_order_restriction(root, outer_rel, inner_rel)) 342 return true; 343 344 /* Otherwise postpone the join till later. */ 345 return false; 346 } 347