1 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates.
2 Copyright (c) 2008, 2020, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /*
18 TODO:
19 Fix that MAYBE_KEY are stored in the tree so that we can detect use
20 of full hash keys for queries like:
21
22 select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
23
24 */
25
26 /*
27 This file contains:
28
29 RangeAnalysisModule
30 A module that accepts a condition, index (or partitioning) description,
31 and builds lists of intervals (in index/partitioning space), such that
32 all possible records that match the condition are contained within the
33 intervals.
34 The entry point for the range analysis module is get_mm_tree() function.
35
36 The lists are returned in form of complicated structure of interlinked
37 SEL_TREE/SEL_IMERGE/SEL_ARG objects.
38 See quick_range_seq_next, find_used_partitions for examples of how to walk
39 this structure.
40 All direct "users" of this module are located within this file, too.
41
42
43 PartitionPruningModule
44 A module that accepts a partitioned table, condition, and finds which
45 partitions we will need to use in query execution. Search down for
46 "PartitionPruningModule" for description.
47 The module has single entry point - prune_partitions() function.
48
49
50 Range/index_merge/groupby-minmax optimizer module
51 A module that accepts a table, condition, and returns
52 - a QUICK_*_SELECT object that can be used to retrieve rows that match
53 the specified condition, or a "no records will match the condition"
54 statement.
55
56 The module entry points are
57 test_quick_select()
58 get_quick_select_for_ref()
59
60
61 Record retrieval code for range/index_merge/groupby-min-max.
62 Implementations of QUICK_*_SELECT classes.
63
64 KeyTupleFormat
65 ~~~~~~~~~~~~~~
66 The code in this file (and elsewhere) makes operations on key value tuples.
67 Those tuples are stored in the following format:
68
69 The tuple is a sequence of key part values. The length of key part value
70 depends only on its type (and not depends on the what value is stored)
71
72 KeyTuple: keypart1-data, keypart2-data, ...
73
74 The value of each keypart is stored in the following format:
75
76 keypart_data: [isnull_byte] keypart-value-bytes
77
78 If a keypart may have a NULL value (key_part->field->real_maybe_null() can
79 be used to check this), then the first byte is a NULL indicator with the
80 following valid values:
81 1 - keypart has NULL value.
82 0 - keypart has non-NULL value.
83
84 <questionable-statement> If isnull_byte==1 (NULL value), then the following
85 keypart->length bytes must be 0.
86 </questionable-statement>
87
88 keypart-value-bytes holds the value. Its format depends on the field type.
89 The length of keypart-value-bytes may or may not depend on the value being
90 stored. The default is that length is static and equal to
91 KEY_PART_INFO::length.
92
93 Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
94 value:
95
96 keypart-value-bytes: value_length value_bytes
97
98 The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
99
100 See key_copy() and key_restore() for code to move data between index tuple
101 and table record
102
103 CAUTION: the above description is only sergefp's understanding of the
104 subject and may omit some details.
105 */
106
107 #ifdef USE_PRAGMA_IMPLEMENTATION
108 #pragma implementation // gcc: Class implementation
109 #endif
110
111 #include "mariadb.h"
112 #include "sql_priv.h"
113 #include "key.h" // is_key_used, key_copy, key_cmp, key_restore
114 #include "sql_parse.h" // check_stack_overrun
115 #include "sql_partition.h" // get_part_id_func, PARTITION_ITERATOR,
116 // struct partition_info, NOT_A_PARTITION_ID
117 #include "records.h" // init_read_record, end_read_record
118 #include <m_ctype.h>
119 #include "sql_select.h"
120 #include "sql_statistics.h"
121 #include "uniques.h"
122 #include "my_json_writer.h"
123
124 #ifndef EXTRA_DEBUG
125 #define test_rb_tree(A,B) {}
126 #define test_use_count(A) {}
127 #endif
128
129 /*
130 Convert double value to #rows. Currently this does floor(), and we
131 might consider using round() instead.
132 */
133 #define double2rows(x) ((ha_rows)(x))
134
135 /*
136 this should be long enough so that any memcmp with a string that
137 starts from '\0' won't cross is_null_string boundaries, even
138 if the memcmp is optimized to compare 4- 8- or 16- bytes at once
139 */
140 static uchar is_null_string[20]= {1,0};
141
142 /**
143 Helper function to compare two SEL_ARG's.
144 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)145 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
146 {
147 if (sa1 == NULL && sa2 == NULL)
148 return true;
149 if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
150 return false;
151 return sa1->all_same(sa2);
152 }
153
154 class SEL_IMERGE;
155
156 #define CLONE_KEY1_MAYBE 1
157 #define CLONE_KEY2_MAYBE 2
158 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
159
160
161 /*
162 While objects of the class SEL_ARG represent ranges for indexes or
163 index infixes (including ranges for index prefixes and index suffixes),
164 objects of the class SEL_TREE represent AND/OR formulas of such ranges.
165 Currently an AND/OR formula represented by a SEL_TREE object can have
166 at most three levels:
167
168 <SEL_TREE formula> ::=
169 [ <SEL_RANGE_TREE formula> AND ]
170 [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ]
171
172 <SEL_RANGE_TREE formula> ::=
173 <SEL_ARG formula> [ AND <SEL_ARG_formula> ... ]
174
175 <SEL_IMERGE formula> ::=
176 <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ]
177
178 As we can see from the above definitions:
179 - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas
180 - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas
181 - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula
182 and SEL_IMERGE formulas.
183 It's required above that a SEL_TREE formula has at least one conjunct.
184
185 Usually we will consider normalized SEL_RANGE_TREE formulas where we use
186 TRUE as conjunct members for those indexes whose SEL_ARG trees are empty.
187
188 We will call an SEL_TREE object simply 'tree'.
189 The part of a tree that represents SEL_RANGE_TREE formula is called
190 'range part' of the tree while the remaining part is called 'imerge part'.
191 If a tree contains only a range part then we call such a tree 'range tree'.
192 Components of a range tree that represent SEL_ARG formulas are called ranges.
193 If a tree does not contain any range part we call such a tree 'imerge tree'.
194 Components of the imerge part of a tree that represent SEL_IMERGE formula
195 are called imerges.
196
197 Usually we'll designate:
198 SEL_TREE formulas by T_1,...,T_k
199 SEL_ARG formulas by R_1,...,R_k
200 SEL_RANGE_TREE formulas by RT_1,...,RT_k
201 SEL_IMERGE formulas by M_1,...,M_k
202 Accordingly we'll use:
203 t_1,...,t_k - to designate trees representing T_1,...,T_k
204 r_1,...,r_k - to designate ranges representing R_1,...,R_k
205 rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k
206 m_1,...,m_k - to designate imerges representing M_1,...,M_k
207
208 SEL_TREE objects are usually built from WHERE conditions or
209 ON expressions.
210 A SEL_TREE object always represents an inference of the condition it is
211 built from. Therefore, if a row satisfies a SEL_TREE formula it also
212 satisfies the condition it is built from.
213
214 The following transformations of tree t representing SEL_TREE formula T
215 yield a new tree t1 thar represents an inference of T: T=>T1.
216 (1) remove any of SEL_ARG tree from the range part of t
217 (2) remove any imerge from the tree t
218 (3) remove any of SEL_ARG tree from any range tree contained
219 in any imerge of tree
220
221 Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE
222 objects in many cases can be effectively used to filter out a big part
223 of table rows that do not satisfy WHERE/IN conditions utilizing
224 only single or multiple range index scans.
225
226 A single range index scan is constructed for a range tree that contains
227 only one SEL_ARG object for an index or an index prefix.
228 An index intersection scan can be constructed for a range tree
229 that contains several SEL_ARG objects. Currently index intersection
230 scans are constructed only for single-point ranges.
231 An index merge scan is constructed for a imerge tree that contains only
232 one imerge. If range trees of this imerge contain only single-point merges
233 than a union of index intersections can be built.
234
235 Usually the tree built by the range optimizer for a query table contains
236 more than one range in the range part, and additionally may contain some
237 imerges in the imerge part. The range optimizer evaluates all of them one
238 by one and chooses the range or the imerge that provides the cheapest
239 single or multiple range index scan of the table. According to rules
240 (1)-(3) this scan always filter out only those rows that do not satisfy
241 the query conditions.
242
243 For any condition the SEL_TREE object for it is built in a bottom up
244 manner starting from the range trees for the predicates. The tree_and
245 function builds a tree for any conjunction of formulas from the trees
246 for its conjuncts. The tree_or function builds a tree for any disjunction
247 of formulas from the trees for its disjuncts.
248 */
249
250 class SEL_TREE :public Sql_alloc
251 {
252 public:
253 /*
254 Starting an effort to document this field:
255 (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) =>
256 (type == SEL_TREE::IMPOSSIBLE)
257 */
258 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
259
SEL_TREE(enum Type type_arg,MEM_ROOT * root,size_t num_keys)260 SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys)
261 : type(type_arg), keys(root, num_keys), n_ror_scans(0)
262 {
263 keys_map.clear_all();
264 }
265
SEL_TREE(MEM_ROOT * root,size_t num_keys)266 SEL_TREE(MEM_ROOT *root, size_t num_keys) :
267 type(KEY), keys(root, num_keys), n_ror_scans(0)
268 {
269 keys_map.clear_all();
270 }
271
272 SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param);
273 /*
274 Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
275 keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
276 merit in range analyzer functions (e.g. get_mm_parts) returning a
277 pointer to such SEL_TREE instead of NULL)
278 */
279 Mem_root_array<SEL_ARG *, true> keys;
280 key_map keys_map; /* bitmask of non-NULL elements in keys */
281
282 /*
283 Possible ways to read rows using index_merge. The list is non-empty only
284 if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
285 */
286 List<SEL_IMERGE> merges;
287
288 /* The members below are filled/used only after get_mm_tree is done */
289 key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */
290 uint n_ror_scans; /* number of set bits in ror_scans_map */
291
292 struct st_index_scan_info **index_scans; /* list of index scans */
293 struct st_index_scan_info **index_scans_end; /* last index scan */
294
295 struct st_ror_scan_info **ror_scans; /* list of ROR key scans */
296 struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
297 /* Note that #records for each key scan is stored in table->quick_rows */
298
without_ranges()299 bool without_ranges() { return keys_map.is_clear_all(); }
without_imerges()300 bool without_imerges() { return merges.is_empty(); }
301 };
302
303
304 class PARAM : public RANGE_OPT_PARAM
305 {
306 public:
307 ha_rows quick_rows[MAX_KEY];
308
309 /*
310 This will collect 'possible keys' based on the range optimization.
311
312 Queries with a JOIN object actually use ref optimizer (see add_key_field)
313 to collect possible_keys. This is used by single table UPDATE/DELETE.
314 */
315 key_map possible_keys;
316 longlong baseflag;
317 uint max_key_parts, range_count;
318
319 bool quick; // Don't calulate possible keys
320
321 uint fields_bitmap_size;
322 MY_BITMAP needed_fields; /* bitmask of fields needed by the query */
323 MY_BITMAP tmp_covered_fields;
324
325 key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */
326
327 uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
328 uint imerge_cost_buff_size; /* size of the buffer */
329
330 /* Number of ranges in the last checked tree->key */
331 uint n_ranges;
332 uint8 first_null_comp; /* first null component if any, 0 - otherwise */
333 };
334
335
336 class TABLE_READ_PLAN;
337 class TRP_RANGE;
338 class TRP_ROR_INTERSECT;
339 class TRP_ROR_UNION;
340 class TRP_INDEX_INTERSECT;
341 class TRP_INDEX_MERGE;
342 class TRP_GROUP_MIN_MAX;
343
344 struct st_index_scan_info;
345 struct st_ror_scan_info;
346
347 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
348 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
349 SEL_ARG *tree, bool update_tbl_stats,
350 uint *mrr_flags, uint *bufsize,
351 Cost_estimate *cost, bool *is_ror_scan);
352
353 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
354 SEL_ARG *key_tree, uint mrr_flags,
355 uint mrr_buf_size, MEM_ROOT *alloc);
356 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
357 bool index_read_must_be_used,
358 bool for_range_access,
359 double read_time);
360 static
361 TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
362 double read_time);
363 static
364 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
365 double read_time,
366 bool *are_all_covering);
367 static
368 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
369 SEL_TREE *tree,
370 double read_time);
371 static
372 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
373 double read_time, bool named_trace= false);
374 static
375 TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
376 TRP_INDEX_MERGE *imerge_trp,
377 double read_time);
378 static
379 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
380 double read_time);
381
382 #ifndef DBUG_OFF
383 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
384 const char *msg);
385 static void print_ror_scans_arr(TABLE *table, const char *msg,
386 struct st_ror_scan_info **start,
387 struct st_ror_scan_info **end);
388 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
389 #endif
390
391 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,
392 SEL_TREE *tree1, SEL_TREE *tree2);
393 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,
394 SEL_TREE *tree1,SEL_TREE *tree2);
395 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
396 static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
397 SEL_ARG *key1, SEL_ARG *key2);
398 static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
399 SEL_ARG *key1, SEL_ARG *key2,
400 uint clone_flag);
401 static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
402 SEL_ARG *key1, SEL_ARG *key2);
403 static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
404 SEL_ARG *key1, SEL_ARG *key2,
405 uint clone_flag);
406 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
407 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
408 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
409 uchar *max_key,uint max_key_flag);
410 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
411
412 SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
413 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
414 uint length);
415 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
416
417 static
418 SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
419 SEL_ARG *sel_arg);
420 static
421 bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
422 SEL_ARG *key2);
423
424 #include "opt_range_mrr.cc"
425
426 static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
427 key_map *common_keys);
428 static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param,
429 SEL_TREE *tree);
430
431 static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
432 SEL_TREE *tree1, SEL_TREE *tree2,
433 key_map *common_keys);
434 static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
435 SEL_TREE *tree1, SEL_TREE *tree2,
436 key_map common_keys);
437 static int and_range_trees(RANGE_OPT_PARAM *param,
438 SEL_TREE *tree1, SEL_TREE *tree2,
439 SEL_TREE *result);
440 static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree);
441 static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree,
442 SEL_ARG **backup);
443 static void print_key_value(String *out, const KEY_PART_INFO *key_part,
444 const uchar* key, uint length);
445 static void print_keyparts_name(String *out, const KEY_PART_INFO *key_part,
446 uint n_keypart, key_part_map keypart_map);
447
448 static void trace_ranges(Json_writer_array *range_trace,
449 PARAM *param, uint idx,
450 SEL_ARG *keypart,
451 const KEY_PART_INFO *key_parts);
452
453 static
454 void print_range(String *out, const KEY_PART_INFO *key_part,
455 KEY_MULTI_RANGE *range, uint n_key_parts);
456
457 static
458 void print_range_for_non_indexed_field(String *out, Field *field,
459 KEY_MULTI_RANGE *range);
460
461 static void print_min_range_operator(String *out, const ha_rkey_function flag);
462 static void print_max_range_operator(String *out, const ha_rkey_function flag);
463
464 static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field);
465
466 /*
467 SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
468 a condition in the following form:
469 (t_1||t_2||...||t_N) && (next)
470
471 where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
472 (t_i,t_j) contains SEL_ARGS for the same index.
473
474 SEL_TREE contained in SEL_IMERGE always has merges=NULL.
475
476 This class relies on memory manager to do the cleanup.
477 */
478
479 class SEL_IMERGE : public Sql_alloc
480 {
481 enum { PREALLOCED_TREES= 10};
482 public:
483 SEL_TREE *trees_prealloced[PREALLOCED_TREES];
484 SEL_TREE **trees; /* trees used to do index_merge */
485 SEL_TREE **trees_next; /* last of these trees */
486 SEL_TREE **trees_end; /* end of allocated space */
487
488 SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */
489
SEL_IMERGE()490 SEL_IMERGE() :
491 trees(&trees_prealloced[0]),
492 trees_next(trees),
493 trees_end(trees + PREALLOCED_TREES)
494 {}
495 SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param);
496 int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
497 bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree);
498 int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
499 SEL_IMERGE *new_imerge);
500 int or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
501 uint n_init_trees,
502 SEL_TREE *new_tree,
503 bool is_first_check_pass,
504 bool *is_last_check_pass);
505 int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
506 uint n_init_trees,
507 SEL_IMERGE* imerge,
508 bool is_first_check_pass,
509 bool *is_last_check_pass);
510 };
511
512
513 /*
514 Add a range tree to the range trees of this imerge
515
516 SYNOPSIS
517 or_sel_tree()
518 param Context info for the operation
519 tree SEL_TREE to add to this imerge
520
521 DESCRIPTION
522 The function just adds the range tree 'tree' to the range trees
523 of this imerge.
524
525 RETURN
526 0 if the operation is success
527 -1 if the function runs out memory
528 */
529
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)530 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
531 {
532 if (trees_next == trees_end)
533 {
534 const int realloc_ratio= 2; /* Double size for next round */
535 size_t old_elements= (trees_end - trees);
536 size_t old_size= sizeof(SEL_TREE**) * old_elements;
537 size_t new_size= old_size * realloc_ratio;
538 SEL_TREE **new_trees;
539 if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
540 return -1;
541 memcpy(new_trees, trees, old_size);
542 trees= new_trees;
543 trees_next= trees + old_elements;
544 trees_end= trees + old_elements * realloc_ratio;
545 }
546 *(trees_next++)= tree;
547 return 0;
548 }
549
550
551 /*
552 Check if any of the range trees of this imerge intersects with a given tree
553
554 SYNOPSIS
555 have_common_keys()
556 param Context info for the function
557 tree SEL_TREE intersection with the imerge range trees is checked for
558
559 DESCRIPTION
560 The function checks whether there is any range tree rt_i in this imerge
561 such that there are some indexes for which ranges are defined in both
562 rt_i and the range part of the SEL_TREE tree.
563 To check this the function calls the function sel_trees_have_common_keys.
564
565 RETURN
566 TRUE if there are such range trees in this imerge
567 FALSE otherwise
568 */
569
have_common_keys(RANGE_OPT_PARAM * param,SEL_TREE * tree)570 bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree)
571 {
572 for (SEL_TREE** or_tree= trees, **bound= trees_next;
573 or_tree != bound; or_tree++)
574 {
575 key_map common_keys;
576 if (sel_trees_have_common_keys(*or_tree, tree, &common_keys))
577 return TRUE;
578 }
579 return FALSE;
580 }
581
582
583 /*
584 Perform AND operation for this imerge and the range part of a tree
585
586 SYNOPSIS
587 and_sel_tree()
588 param Context info for the operation
589 tree SEL_TREE for the second operand of the operation
590 new_imerge OUT imerge for the result of the operation
591
592 DESCRIPTION
593 This function performs AND operation for this imerge m and the
594 range part of the SEL_TREE tree rt. In other words the function
595 pushes rt into this imerge. The resulting imerge is returned in
596 the parameter new_imerge.
597 If this imerge m represent the formula
598 RT_1 OR ... OR RT_k
599 then the resulting imerge of the function represents the formula
600 (RT_1 AND RT) OR ... OR (RT_k AND RT)
601 The function calls the function and_range_trees to construct the
602 range tree representing (RT_i AND RT).
603
604 NOTE
605 The function may return an empty imerge without any range trees.
606 This happens when each call of and_range_trees returns an
607 impossible range tree (SEL_TREE::IMPOSSIBLE).
608 Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6).
609
610 RETURN
611 0 if the operation is a success
612 -1 otherwise: there is not enough memory to perform the operation
613 */
614
and_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree,SEL_IMERGE * new_imerge)615 int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
616 SEL_IMERGE *new_imerge)
617 {
618 for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++)
619 {
620 SEL_TREE *res_or_tree= 0;
621 SEL_TREE *and_tree= 0;
622 if (!(res_or_tree= new SEL_TREE(param->mem_root, param->keys)) ||
623 !(and_tree= new SEL_TREE(tree, TRUE, param)))
624 return (-1);
625 if (!and_range_trees(param, *or_tree, and_tree, res_or_tree))
626 {
627 if (new_imerge->or_sel_tree(param, res_or_tree))
628 return (-1);
629 }
630 }
631 return 0;
632 }
633
634
635 /*
636 Perform OR operation on this imerge and the range part of a tree
637
638 SYNOPSIS
639 or_sel_tree_with_checks()
640 param Context info for the operation
641 n_trees Number of trees in this imerge to check for oring
642 tree SEL_TREE whose range part is to be ored
643 is_first_check_pass <=> the first call of the function for this imerge
644 is_last_check_pass OUT <=> no more calls of the function for this imerge
645
646 DESCRIPTION
647 The function performs OR operation on this imerge m and the range part
648 of the SEL_TREE tree rt. It always replaces this imerge with the result
649 of the operation.
650
651 The operation can be performed in two different modes: with
652 is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming
653 this imerge differently.
654
655 Given this imerge represents the formula
656 RT_1 OR ... OR RT_k:
657
658 1. In the first mode, when is_first_check_pass==TRUE :
659 1.1. If rt must be ored(see the function sel_trees_must_be_ored) with
660 some rt_j (there may be only one such range tree in the imerge)
661 then the function produces an imerge representing the formula
662 RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k,
663 where the tree for (RT_j OR RT) is built by oring the pairs
664 of SEL_ARG trees for the corresponding indexes
665 1.2. Otherwise the function produces the imerge representing the formula:
666 RT_1 OR ... OR RT_k OR RT.
667
668 2. In the second mode, when is_first_check_pass==FALSE :
669 2.1. For each rt_j in the imerge that can be ored (see the function
670 sel_trees_can_be_ored) with rt the function replaces rt_j for a
671 range tree such that for each index for which ranges are defined
672 in both in rt_j and rt the tree contains the result of oring of
673 these ranges.
674 2.2. In other cases the function does not produce any imerge.
675
676 When is_first_check==TRUE the function returns FALSE in the parameter
677 is_last_check_pass if there is no rt_j such that rt_j can be ored with rt,
678 but, at the same time, it's not true that rt_j must be ored with rt.
679 When is_first_check==FALSE the function always returns FALSE in the
680 parameter is_last_check_pass.
681
682 RETURN
683 1 The result of oring of rt_j and rt that must be ored returns the
684 the range tree with type==SEL_TREE::ALWAYS
685 (in this case the imerge m should be discarded)
686 -1 The function runs out of memory
687 0 in all other cases
688 */
689
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,uint n_trees,SEL_TREE * tree,bool is_first_check_pass,bool * is_last_check_pass)690 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
691 uint n_trees,
692 SEL_TREE *tree,
693 bool is_first_check_pass,
694 bool *is_last_check_pass)
695 {
696 bool was_ored= FALSE;
697 *is_last_check_pass= is_first_check_pass;
698 SEL_TREE** or_tree= trees;
699 for (uint i= 0; i < n_trees; i++, or_tree++)
700 {
701 SEL_TREE *result= 0;
702 key_map result_keys;
703 key_map ored_keys;
704 if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys))
705 {
706 bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree,
707 ored_keys);
708 if (must_be_ored || !is_first_check_pass)
709 {
710 result_keys.clear_all();
711 result= *or_tree;
712 for (uint key_no= 0; key_no < param->keys; key_no++)
713 {
714 if (!ored_keys.is_set(key_no))
715 {
716 result->keys[key_no]= 0;
717 continue;
718 }
719 SEL_ARG *key1= (*or_tree)->keys[key_no];
720 SEL_ARG *key2= tree->keys[key_no];
721 key2->incr_refs();
722 if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1,
723 key2)))
724 {
725
726 result_keys.set_bit(key_no);
727 #ifdef EXTRA_DEBUG
728 if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
729 {
730 key1= result->keys[key_no];
731 (key1)->test_use_count(key1);
732 }
733 #endif
734 }
735 }
736 }
737 else if(is_first_check_pass)
738 *is_last_check_pass= FALSE;
739 }
740
741 if (result)
742 {
743 result->keys_map= result_keys;
744 if (result_keys.is_clear_all())
745 result->type= SEL_TREE::ALWAYS;
746 if ((result->type == SEL_TREE::MAYBE) ||
747 (result->type == SEL_TREE::ALWAYS))
748 return 1;
749 /* SEL_TREE::IMPOSSIBLE is impossible here */
750 *or_tree= result;
751 was_ored= TRUE;
752 }
753 }
754 if (was_ored)
755 return 0;
756
757 if (is_first_check_pass && !*is_last_check_pass &&
758 !(tree= new SEL_TREE(tree, FALSE, param)))
759 return (-1);
760 return or_sel_tree(param, tree);
761 }
762
763
764 /*
765 Perform OR operation on this imerge and and another imerge
766
767 SYNOPSIS
768 or_sel_imerge_with_checks()
769 param Context info for the operation
770 n_trees Number of trees in this imerge to check for oring
771 imerge The second operand of the operation
772 is_first_check_pass <=> the first call of the function for this imerge
773 is_last_check_pass OUT <=> no more calls of the function for this imerge
774
775 DESCRIPTION
776 For each range tree rt from 'imerge' the function calls the method
777 SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this
778 SEL_IMERGE object m and the tree rt. The mode of the operation is
779 specified by the parameter is_first_check_pass. Each call of
780 SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m.
781 The function returns FALSE in the prameter is_last_check_pass if
782 at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks
783 returns FALSE as the value of its last parameter.
784
785 RETURN
786 1 One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1.
787 (in this case the imerge m should be discarded)
788 -1 The function runs out of memory
789 0 in all other cases
790 */
791
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,uint n_trees,SEL_IMERGE * imerge,bool is_first_check_pass,bool * is_last_check_pass)792 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
793 uint n_trees,
794 SEL_IMERGE* imerge,
795 bool is_first_check_pass,
796 bool *is_last_check_pass)
797 {
798 *is_last_check_pass= TRUE;
799 SEL_TREE** tree= imerge->trees;
800 SEL_TREE** tree_end= imerge->trees_next;
801 for ( ; tree < tree_end; tree++)
802 {
803 uint rc;
804 bool is_last= TRUE;
805 rc= or_sel_tree_with_checks(param, n_trees, *tree,
806 is_first_check_pass, &is_last);
807 if (!is_last)
808 *is_last_check_pass= FALSE;
809 if (rc)
810 return rc;
811 }
812 return 0;
813 }
814
815
816 /*
817 Copy constructor for SEL_TREE objects
818
819 SYNOPSIS
820 SEL_TREE
821 arg The source tree for the constructor
822 without_merges <=> only the range part of the tree arg is copied
823 param Context info for the operation
824
825 DESCRIPTION
826 The constructor creates a full copy of the SEL_TREE arg if
827 the prameter without_merges==FALSE. Otherwise a tree is created
828 that contains the copy only of the range part of the tree arg.
829 */
830
SEL_TREE(SEL_TREE * arg,bool without_merges,RANGE_OPT_PARAM * param)831 SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
832 RANGE_OPT_PARAM *param)
833 : Sql_alloc(),
834 keys(param->mem_root, param->keys),
835 n_ror_scans(0)
836 {
837 keys_map= arg->keys_map;
838 type= arg->type;
839 MEM_ROOT *mem_root;
840
841 for (uint idx= 0; idx < param->keys; idx++)
842 {
843 if ((keys[idx]= arg->keys[idx]))
844 keys[idx]->incr_refs_all();
845 }
846
847 if (without_merges)
848 return;
849
850 mem_root= current_thd->mem_root;
851 List_iterator<SEL_IMERGE> it(arg->merges);
852 for (SEL_IMERGE *el= it++; el; el= it++)
853 {
854 SEL_IMERGE *merge= new (mem_root) SEL_IMERGE(el, 0, param);
855 if (!merge || merge->trees == merge->trees_next)
856 {
857 merges.empty();
858 return;
859 }
860 merges.push_back(merge, mem_root);
861 }
862 }
863
864
865 /*
866 Copy constructor for SEL_IMERGE objects
867
868 SYNOPSIS
869 SEL_IMERGE
870 arg The source imerge for the constructor
871 cnt How many trees from arg are to be copied
872 param Context info for the operation
873
874 DESCRIPTION
875 The cnt==0 then the constructor creates a full copy of the
876 imerge arg. Otherwise only the first cnt trees of the imerge
877 are copied.
878 */
879
SEL_IMERGE(SEL_IMERGE * arg,uint cnt,RANGE_OPT_PARAM * param)880 SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
881 RANGE_OPT_PARAM *param) : Sql_alloc()
882 {
883 size_t elements= (arg->trees_end - arg->trees);
884 if (elements > PREALLOCED_TREES)
885 {
886 size_t size= elements * sizeof (SEL_TREE **);
887 if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
888 goto mem_err;
889 }
890 else
891 trees= &trees_prealloced[0];
892
893 trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees);
894 trees_end= trees + elements;
895
896 for (SEL_TREE **tree= trees, **arg_tree= arg->trees; tree < trees_next;
897 tree++, arg_tree++)
898 {
899 if (!(*tree= new SEL_TREE(*arg_tree, TRUE, param)))
900 goto mem_err;
901 }
902
903 return;
904
905 mem_err:
906 trees= &trees_prealloced[0];
907 trees_next= trees;
908 trees_end= trees;
909 }
910
911
912 /*
913 Perform AND operation on two imerge lists
914
915 SYNOPSIS
916 imerge_list_and_list()
917 param Context info for the operation
918 im1 The first imerge list for the operation
919 im2 The second imerge list for the operation
920
921 DESCRIPTION
922 The function just appends the imerge list im2 to the imerge list im1
923
924 RETURN VALUE
925 none
926 */
927
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)928 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
929 {
930 im1->append(im2);
931 }
932
933
934 /*
935 Perform OR operation on two imerge lists
936
937 SYNOPSIS
938 imerge_list_or_list()
939 param Context info for the operation
940 im1 The first imerge list for the operation
941 im2 The second imerge list for the operation
942
943 DESCRIPTION
944 Assuming that the first imerge list represents the formula
945 F1= M1_1 AND ... AND M1_k1
946 while the second imerge list represents the formula
947 F2= M2_1 AND ... AND M2_k2,
948 where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1])
949 and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]),
950 the function builds a list of imerges for some formula that can be
951 inferred from the formula (F1 OR F2).
952
953 More exactly the function builds imerges for the formula (M1_1 OR M2_1).
954 Note that
955 (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) =
956 AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) =>
957 M1_1 OR M2_1.
958 So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2).
959
960 To build imerges for the formula (M1_1 OR M2_1) the function invokes,
961 possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks
962 for the imerge m1_1.
963 At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks
964 performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by
965 calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE.
966 The resulting imerge of the operation is ored with the next range tree of
967 the imerge m2_1. This oring continues until the last range tree from
968 m2_1 has been ored.
969 At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks
970 performs the same sequence of OR operations, but now calling
971 SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE.
972
973 The imerges that the operation produces replace those in the list im1
974
975 RETURN
976 0 if the operation is a success
977 -1 if the function has run out of memory
978 */
979
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)980 int imerge_list_or_list(RANGE_OPT_PARAM *param,
981 List<SEL_IMERGE> *im1,
982 List<SEL_IMERGE> *im2)
983 {
984
985 uint rc;
986 bool is_last_check_pass= FALSE;
987 SEL_IMERGE *imerge= im1->head();
988 uint elems= (uint)(imerge->trees_next-imerge->trees);
989 MEM_ROOT *mem_root= current_thd->mem_root;
990
991 im1->empty();
992 im1->push_back(imerge, mem_root);
993
994 rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
995 TRUE, &is_last_check_pass);
996 if (rc)
997 {
998 if (rc == 1)
999 {
1000 im1->empty();
1001 rc= 0;
1002 }
1003 return rc;
1004 }
1005
1006 if (!is_last_check_pass)
1007 {
1008 SEL_IMERGE* new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, param);
1009 if (new_imerge)
1010 {
1011 is_last_check_pass= TRUE;
1012 rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
1013 FALSE, &is_last_check_pass);
1014 if (!rc)
1015 im1->push_back(new_imerge, mem_root);
1016 }
1017 }
1018 return rc;
1019 }
1020
1021
1022 /*
1023 Perform OR operation for each imerge from a list and the range part of a tree
1024
1025 SYNOPSIS
1026 imerge_list_or_tree()
1027 param Context info for the operation
1028 merges The list of imerges to be ored with the range part of tree
1029 tree SEL_TREE whose range part is to be ored with the imerges
1030
1031 DESCRIPTION
1032 For each imerge mi from the list 'merges' the function performes OR
1033 operation with mi and the range part of 'tree' rt, producing one or
1034 two imerges.
1035
1036 Given the merge mi represent the formula RTi_1 OR ... OR RTi_k,
1037 the function forms the merges by the following rules:
1038
1039 1. If rt cannot be ored with any of the trees rti the function just
1040 produces an imerge that represents the formula
1041 RTi_1 OR ... RTi_k OR RT.
1042 2. If there exist a tree rtj that must be ored with rt the function
1043 produces an imerge the represents the formula
1044 RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k,
1045 where the range tree for (RTi_j OR RT) is constructed by oring the
1046 SEL_ARG trees that must be ored.
1047 3. For each rti_j that can be ored with rt the function produces
1048 the new tree rti_j' and substitutes rti_j for this new range tree.
1049
1050 In any case the function removes mi from the list and then adds all
1051 produced imerges.
1052
1053 To build imerges by rules 1-3 the function calls the method
1054 SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first
1055 call it passes TRUE for the third parameter of the function.
1056 At this first call imerges by rules 1-2 are built. If the call
1057 returns FALSE as the return value of its fourth parameter then the
1058 function are called for the second time. At this call the imerge
1059 of rule 3 is produced.
1060
1061 If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then
1062 then it means that the produced tree contains an always true
1063 range tree and the whole imerge can be discarded.
1064
1065 RETURN
1066 1 if no imerges are produced
1067 0 otherwise
1068 */
1069
1070 static
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * merges,SEL_TREE * tree)1071 int imerge_list_or_tree(RANGE_OPT_PARAM *param,
1072 List<SEL_IMERGE> *merges,
1073 SEL_TREE *tree)
1074 {
1075 SEL_IMERGE *imerge;
1076 List<SEL_IMERGE> additional_merges;
1077 List_iterator<SEL_IMERGE> it(*merges);
1078 MEM_ROOT *mem_root= current_thd->mem_root;
1079
1080 while ((imerge= it++))
1081 {
1082 bool is_last_check_pass;
1083 int rc= 0;
1084 int rc1= 0;
1085 SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param);
1086 if (or_tree)
1087 {
1088 uint elems= (uint)(imerge->trees_next-imerge->trees);
1089 rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
1090 TRUE, &is_last_check_pass);
1091 if (!is_last_check_pass)
1092 {
1093 SEL_IMERGE *new_imerge= new (mem_root) SEL_IMERGE(imerge, elems,
1094 param);
1095 if (new_imerge)
1096 {
1097 rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree,
1098 FALSE, &is_last_check_pass);
1099 if (!rc1)
1100 additional_merges.push_back(new_imerge, mem_root);
1101 }
1102 }
1103 }
1104 if (rc || rc1 || !or_tree)
1105 it.remove();
1106 }
1107
1108 merges->append(&additional_merges);
1109 return merges->is_empty();
1110 }
1111
1112
1113 /*
1114 Perform pushdown operation of the range part of a tree into given imerges
1115
1116 SYNOPSIS
1117 imerge_list_and_tree()
1118 param Context info for the operation
1119 merges IN/OUT List of imerges to push the range part of 'tree' into
1120 tree SEL_TREE whose range part is to be pushed into imerges
1121 replace if the pushdow operation for a imerge is a success
1122 then the original imerge is replaced for the result
1123 of the pushdown
1124
1125 DESCRIPTION
1126 For each imerge from the list merges the function pushes the range part
1127 rt of 'tree' into the imerge.
1128 More exactly if the imerge mi from the list represents the formula
1129 RTi_1 OR ... OR RTi_k
1130 the function bulds a new imerge that represents the formula
1131 (RTi_1 AND RT) OR ... OR (RTi_k AND RT)
1132 and adds this imerge to the list merges.
1133 To perform this pushdown operation the function calls the method
1134 SEL_IMERGE::and_sel_tree.
1135 For any imerge mi the new imerge is not created if for each pair of
1136 trees rti_j and rt the intersection of the indexes with defined ranges
1137 is empty.
1138 If the result of the pushdown operation for the imerge mi returns an
1139 imerge with no trees then then not only nothing is added to the list
1140 merges but mi itself is removed from the list.
1141
1142 TODO
1143 Optimize the code in order to not create new SEL_IMERGE and new SER_TREE
1144 objects when 'replace' is TRUE. (Currently this function is called always
1145 with this parameter equal to TRUE.)
1146
1147 RETURN
1148 1 if no imerges are left in the list merges
1149 0 otherwise
1150 */
1151
1152 static
imerge_list_and_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * merges,SEL_TREE * tree,bool replace)1153 int imerge_list_and_tree(RANGE_OPT_PARAM *param,
1154 List<SEL_IMERGE> *merges,
1155 SEL_TREE *tree,
1156 bool replace)
1157 {
1158 SEL_IMERGE *imerge;
1159 SEL_IMERGE *new_imerge= NULL;
1160 List<SEL_IMERGE> new_merges;
1161 List_iterator<SEL_IMERGE> it(*merges);
1162 MEM_ROOT *mem_root= current_thd->mem_root;
1163
1164 while ((imerge= it++))
1165 {
1166 if (!new_imerge)
1167 new_imerge= new (mem_root) SEL_IMERGE();
1168 if (imerge->have_common_keys(param, tree) &&
1169 new_imerge && !imerge->and_sel_tree(param, tree, new_imerge))
1170 {
1171 if (new_imerge->trees == new_imerge->trees_next)
1172 it.remove();
1173 else
1174 {
1175 if (replace)
1176 it.replace(new_imerge);
1177 else
1178 new_merges.push_back(new_imerge, mem_root);
1179 new_imerge= NULL;
1180 }
1181 }
1182 }
1183 imerge_list_and_list(&new_merges, merges);
1184 *merges= new_merges;
1185 return merges->is_empty();
1186 }
1187
1188
1189 /***************************************************************************
1190 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1191 ***************************************************************************/
1192
1193 /* make a select from mysql info
1194 Error is set as following:
1195 0 = ok
1196 1 = Got some error (out of memory?)
1197 */
1198
make_select(TABLE * head,table_map const_tables,table_map read_tables,COND * conds,SORT_INFO * filesort,bool allow_null_cond,int * error)1199 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1200 table_map read_tables, COND *conds,
1201 SORT_INFO *filesort,
1202 bool allow_null_cond,
1203 int *error)
1204 {
1205 SQL_SELECT *select;
1206 DBUG_ENTER("make_select");
1207
1208 *error=0;
1209
1210 if (!conds && !allow_null_cond)
1211 DBUG_RETURN(0);
1212 if (!(select= new (head->in_use->mem_root) SQL_SELECT))
1213 {
1214 *error= 1; // out of memory
1215 DBUG_RETURN(0); /* purecov: inspected */
1216 }
1217 select->read_tables=read_tables;
1218 select->const_tables=const_tables;
1219 select->head=head;
1220 select->cond= conds;
1221
1222 if (filesort && my_b_inited(&filesort->io_cache))
1223 {
1224 /*
1225 Hijack the filesort io_cache for make_select
1226 SQL_SELECT will be responsible for ensuring that it's properly freed.
1227 */
1228 select->file= filesort->io_cache;
1229 select->records=(ha_rows) (select->file.end_of_file/
1230 head->file->ref_length);
1231 my_b_clear(&filesort->io_cache);
1232 }
1233 DBUG_RETURN(select);
1234 }
1235
1236
SQL_SELECT()1237 SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0)
1238 {
1239 quick_keys.clear_all(); needed_reg.clear_all();
1240 my_b_clear(&file);
1241 }
1242
1243
cleanup()1244 void SQL_SELECT::cleanup()
1245 {
1246 delete quick;
1247 quick= 0;
1248 if (free_cond)
1249 {
1250 free_cond=0;
1251 delete cond;
1252 cond= 0;
1253 }
1254 close_cached_file(&file);
1255 }
1256
1257
~SQL_SELECT()1258 SQL_SELECT::~SQL_SELECT()
1259 {
1260 cleanup();
1261 }
1262
1263 #undef index // Fix for Unixware 7
1264
QUICK_SELECT_I()1265 QUICK_SELECT_I::QUICK_SELECT_I()
1266 :max_used_key_length(0),
1267 used_key_parts(0)
1268 {}
1269
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1270 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1271 bool no_alloc, MEM_ROOT *parent_alloc,
1272 bool *create_error)
1273 :thd(thd), no_alloc(no_alloc), parent_alloc(parent_alloc),
1274 free_file(0),cur_range(NULL),last_range(0),dont_free(0)
1275 {
1276 my_bitmap_map *bitmap;
1277 DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1278
1279 in_ror_merged_scan= 0;
1280 index= key_nr;
1281 head= table;
1282 key_part_info= head->key_info[index].key_part;
1283
1284 /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1285 mrr_buf_size= thd->variables.mrr_buff_size;
1286 mrr_buf_desc= NULL;
1287
1288 if (!no_alloc && !parent_alloc)
1289 {
1290 // Allocates everything through the internal memroot
1291 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1292 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1293 thd->mem_root= &alloc;
1294 }
1295 else
1296 bzero((char*) &alloc,sizeof(alloc));
1297 file= head->file;
1298 record= head->record[0];
1299
1300 my_init_dynamic_array2(PSI_INSTRUMENT_ME, &ranges, sizeof(QUICK_RANGE*),
1301 thd->alloc(sizeof(QUICK_RANGE*) * 16), 16, 16,
1302 MYF(MY_THREAD_SPECIFIC));
1303
1304 /* Allocate a bitmap for used columns */
1305 if (!(bitmap= (my_bitmap_map*) thd->alloc(head->s->column_bitmap_size)))
1306 {
1307 column_bitmap.bitmap= 0;
1308 *create_error= 1;
1309 }
1310 else
1311 my_bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1312 DBUG_VOID_RETURN;
1313 }
1314
1315
need_sorted_output()1316 void QUICK_RANGE_SELECT::need_sorted_output()
1317 {
1318 if (!(mrr_flags & HA_MRR_SORTED))
1319 {
1320 /*
1321 Native implementation can't produce sorted output. We'll have to
1322 switch to default
1323 */
1324 mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
1325 }
1326 mrr_flags |= HA_MRR_SORTED;
1327 }
1328
1329
init()1330 int QUICK_RANGE_SELECT::init()
1331 {
1332 DBUG_ENTER("QUICK_RANGE_SELECT::init");
1333
1334 if (file->inited != handler::NONE)
1335 file->ha_index_or_rnd_end();
1336 DBUG_RETURN(FALSE);
1337 }
1338
1339
range_end()1340 void QUICK_RANGE_SELECT::range_end()
1341 {
1342 if (file->inited != handler::NONE)
1343 file->ha_index_or_rnd_end();
1344 }
1345
1346
~QUICK_RANGE_SELECT()1347 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1348 {
1349 DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1350 if (!dont_free)
1351 {
1352 /* file is NULL for CPK scan on covering ROR-intersection */
1353 if (file)
1354 {
1355 range_end();
1356 file->ha_end_keyread();
1357 if (free_file)
1358 {
1359 DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1360 free_file));
1361 file->ha_external_unlock(current_thd);
1362 file->ha_close();
1363 delete file;
1364 }
1365 }
1366 delete_dynamic(&ranges); /* ranges are allocated in alloc */
1367 free_root(&alloc,MYF(0));
1368 }
1369 my_free(mrr_buf_desc);
1370 DBUG_VOID_RETURN;
1371 }
1372
1373 /*
1374 QUICK_INDEX_SORT_SELECT works as follows:
1375 - Do index scans, accumulate rowids in the Unique object
1376 (Unique will also sort and de-duplicate rowids)
1377 - Use rowids from unique to run a disk-ordered sweep
1378 */
1379
QUICK_INDEX_SORT_SELECT(THD * thd_param,TABLE * table)1380 QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, TABLE *table)
1381 :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1382 {
1383 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT");
1384 index= MAX_KEY;
1385 head= table;
1386 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1387 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1388 DBUG_VOID_RETURN;
1389 }
1390
init()1391 int QUICK_INDEX_SORT_SELECT::init()
1392 {
1393 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init");
1394 DBUG_RETURN(0);
1395 }
1396
reset()1397 int QUICK_INDEX_SORT_SELECT::reset()
1398 {
1399 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset");
1400 const int retval= read_keys_and_merge();
1401 DBUG_RETURN(retval);
1402 }
1403
1404 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1405 QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1406 {
1407 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back");
1408 if (head->file->is_clustering_key(quick_sel_range->index))
1409 {
1410 /*
1411 A quick_select over a clustered primary key is handled specifically
1412 Here we assume:
1413 - PK columns are included in any other merged index
1414 - Scan on the PK is disk-ordered.
1415 (not meeting #2 will only cause performance degradation)
1416
1417 We could treat clustered PK as any other index, but that would
1418 be inefficient. There is no point in doing scan on
1419 CPK, remembering the rowid, then making rnd_pos() call with
1420 that rowid.
1421 */
1422 pk_quick_select= quick_sel_range;
1423 DBUG_RETURN(0);
1424 }
1425 DBUG_RETURN(quick_selects.push_back(quick_sel_range, thd->mem_root));
1426 }
1427
~QUICK_INDEX_SORT_SELECT()1428 QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
1429 {
1430 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1431 QUICK_RANGE_SELECT* quick;
1432 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT");
1433 delete unique;
1434 quick_it.rewind();
1435 while ((quick= quick_it++))
1436 quick->file= NULL;
1437 quick_selects.delete_elements();
1438 delete pk_quick_select;
1439 /* It's ok to call the next two even if they are already deinitialized */
1440 end_read_record(&read_record);
1441 free_root(&alloc,MYF(0));
1442 DBUG_VOID_RETURN;
1443 }
1444
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1445 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1446 TABLE *table,
1447 bool retrieve_full_rows,
1448 MEM_ROOT *parent_alloc)
1449 : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1450 scans_inited(FALSE)
1451 {
1452 index= MAX_KEY;
1453 head= table;
1454 record= head->record[0];
1455 if (!parent_alloc)
1456 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1457 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1458 else
1459 bzero(&alloc, sizeof(MEM_ROOT));
1460 last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1461 head->file->ref_length);
1462 }
1463
1464
1465 /*
1466 Do post-constructor initialization.
1467 SYNOPSIS
1468 QUICK_ROR_INTERSECT_SELECT::init()
1469
1470 RETURN
1471 0 OK
1472 other Error code
1473 */
1474
init()1475 int QUICK_ROR_INTERSECT_SELECT::init()
1476 {
1477 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1478 /* Check if last_rowid was successfully allocated in ctor */
1479 DBUG_RETURN(!last_rowid);
1480 }
1481
1482
1483 /*
1484 Initialize this quick select to be a ROR-merged scan.
1485
1486 SYNOPSIS
1487 QUICK_RANGE_SELECT::init_ror_merged_scan()
1488 reuse_handler If TRUE, use head->file, otherwise create a separate
1489 handler object
1490
1491 NOTES
1492 This function creates and prepares for subsequent use a separate handler
1493 object if it can't reuse head->file. The reason for this is that during
1494 ROR-merge several key scans are performed simultaneously, and a single
1495 handler is only capable of preserving context of a single key scan.
1496
1497 In ROR-merge the quick select doing merge does full records retrieval,
1498 merged quick selects read only keys.
1499
1500 RETURN
1501 0 ROR child scan initialized, ok to use.
1502 1 error
1503 */
1504
init_ror_merged_scan(bool reuse_handler,MEM_ROOT * local_alloc)1505 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler,
1506 MEM_ROOT *local_alloc)
1507 {
1508 handler *save_file= file, *org_file;
1509 THD *thd= head->in_use;
1510 MY_BITMAP * const save_read_set= head->read_set;
1511 MY_BITMAP * const save_write_set= head->write_set;
1512 DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1513
1514 in_ror_merged_scan= 1;
1515 if (reuse_handler)
1516 {
1517 DBUG_PRINT("info", ("Reusing handler %p", file));
1518 if (init())
1519 {
1520 DBUG_RETURN(1);
1521 }
1522 goto end;
1523 }
1524
1525 /* Create a separate handler object for this quick select */
1526 if (free_file)
1527 {
1528 /* already have own 'handler' object. */
1529 DBUG_RETURN(0);
1530 }
1531
1532 if (!(file= head->file->clone(head->s->normalized_path.str, local_alloc)))
1533 {
1534 /*
1535 Manually set the error flag. Note: there seems to be quite a few
1536 places where a failure could cause the server to "hang" the client by
1537 sending no response to a query. ATM those are not real errors because
1538 the storage engine calls in question happen to never fail with the
1539 existing storage engines.
1540 */
1541 my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1542 /* Caller will free the memory */
1543 goto failure; /* purecov: inspected */
1544 }
1545
1546 if (file->ha_external_lock(thd, F_RDLCK))
1547 goto failure;
1548
1549 if (init())
1550 {
1551 file->ha_external_unlock(thd);
1552 file->ha_close();
1553 goto failure;
1554 }
1555 free_file= TRUE;
1556 last_rowid= file->ref;
1557
1558 end:
1559 /*
1560 We are only going to read key fields and call position() on 'file'
1561 The following sets head->read_set (== column_bitmap) to only use this
1562 key. The 'column_bitmap' is used in ::get_next()
1563 */
1564 org_file= head->file;
1565 head->file= file;
1566
1567 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
1568 head->prepare_for_keyread(index, &column_bitmap);
1569 head->prepare_for_position();
1570
1571 head->file= org_file;
1572
1573 /* Restore head->read_set (and write_set) to what they had before the call */
1574 head->column_bitmaps_set(save_read_set, save_write_set);
1575
1576 if (reset())
1577 {
1578 if (!reuse_handler)
1579 {
1580 file->ha_external_unlock(thd);
1581 file->ha_close();
1582 goto failure;
1583 }
1584 DBUG_RETURN(1);
1585 }
1586 DBUG_RETURN(0);
1587
1588 failure:
1589 head->column_bitmaps_set(save_read_set, save_write_set);
1590 delete file;
1591 file= save_file;
1592 free_file= false;
1593 DBUG_RETURN(1);
1594 }
1595
1596
1597 /*
1598 Initialize this quick select to be a part of a ROR-merged scan.
1599 SYNOPSIS
1600 QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1601 reuse_handler If TRUE, use head->file, otherwise create separate
1602 handler object.
1603 RETURN
1604 0 OK
1605 other error code
1606 */
init_ror_merged_scan(bool reuse_handler,MEM_ROOT * local_alloc)1607 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler,
1608 MEM_ROOT *local_alloc)
1609 {
1610 List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
1611 QUICK_SELECT_WITH_RECORD *cur;
1612 QUICK_RANGE_SELECT *quick;
1613 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1614
1615 /* Initialize all merged "children" quick selects */
1616 DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1617 if (!need_to_fetch_row && reuse_handler)
1618 {
1619 cur= quick_it++;
1620 quick= cur->quick;
1621 /*
1622 There is no use of this->file. Use it for the first of merged range
1623 selects.
1624 */
1625 int error= quick->init_ror_merged_scan(TRUE, local_alloc);
1626 if (unlikely(error))
1627 DBUG_RETURN(error);
1628 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1629 }
1630 while ((cur= quick_it++))
1631 {
1632 quick= cur->quick;
1633 #ifndef DBUG_OFF
1634 const MY_BITMAP * const save_read_set= quick->head->read_set;
1635 const MY_BITMAP * const save_write_set= quick->head->write_set;
1636 #endif
1637 if (quick->init_ror_merged_scan(FALSE, local_alloc))
1638 DBUG_RETURN(1);
1639 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1640
1641 // Sets are shared by all members of "quick_selects" so must not change
1642 #ifndef DBUG_OFF
1643 DBUG_ASSERT(quick->head->read_set == save_read_set);
1644 DBUG_ASSERT(quick->head->write_set == save_write_set);
1645 #endif
1646 /* All merged scans share the same record buffer in intersection. */
1647 quick->record= head->record[0];
1648 }
1649
1650 if (need_to_fetch_row &&
1651 unlikely(head->file->ha_rnd_init_with_error(false)))
1652 {
1653 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1654 DBUG_RETURN(1);
1655 }
1656 DBUG_RETURN(0);
1657 }
1658
1659
1660 /*
1661 Initialize quick select for row retrieval.
1662 SYNOPSIS
1663 reset()
1664 RETURN
1665 0 OK
1666 other Error code
1667 */
1668
reset()1669 int QUICK_ROR_INTERSECT_SELECT::reset()
1670 {
1671 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1672 if (!scans_inited && init_ror_merged_scan(TRUE, &alloc))
1673 DBUG_RETURN(1);
1674 scans_inited= TRUE;
1675 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
1676 QUICK_SELECT_WITH_RECORD *qr;
1677 while ((qr= it++))
1678 qr->quick->reset();
1679 DBUG_RETURN(0);
1680 }
1681
1682
1683 /*
1684 Add a merged quick select to this ROR-intersection quick select.
1685
1686 SYNOPSIS
1687 QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1688 alloc Mem root to create auxiliary structures on
1689 quick Quick select to be added. The quick select must return
1690 rows in rowid order.
1691 NOTES
1692 This call can only be made before init() is called.
1693
1694 RETURN
1695 FALSE OK
1696 TRUE Out of memory.
1697 */
1698
1699 bool
push_quick_back(MEM_ROOT * local_alloc,QUICK_RANGE_SELECT * quick)1700 QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *local_alloc,
1701 QUICK_RANGE_SELECT *quick)
1702 {
1703 QUICK_SELECT_WITH_RECORD *qr;
1704 if (!(qr= new QUICK_SELECT_WITH_RECORD) ||
1705 !(qr->key_tuple= (uchar*)alloc_root(local_alloc,
1706 quick->max_used_key_length)))
1707 return TRUE;
1708 qr->quick= quick;
1709 return quick_selects.push_back(qr);
1710 }
1711
1712
~QUICK_ROR_INTERSECT_SELECT()1713 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1714 {
1715 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1716 quick_selects.delete_elements();
1717 delete cpk_quick;
1718 free_root(&alloc,MYF(0));
1719 if (need_to_fetch_row && head->file->inited != handler::NONE)
1720 head->file->ha_rnd_end();
1721 DBUG_VOID_RETURN;
1722 }
1723
1724
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1725 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1726 TABLE *table)
1727 : thd(thd_param), scans_inited(FALSE)
1728 {
1729 index= MAX_KEY;
1730 head= table;
1731 rowid_length= table->file->ref_length;
1732 record= head->record[0];
1733 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1734 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1735 thd_param->mem_root= &alloc;
1736 }
1737
1738
1739 /*
1740 Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1741 queue.
1742
1743 SYNPOSIS
1744 QUICK_ROR_UNION_SELECT_queue_cmp()
1745 arg Pointer to QUICK_ROR_UNION_SELECT
1746 val1 First merged select
1747 val2 Second merged select
1748 */
1749
1750 C_MODE_START
1751
QUICK_ROR_UNION_SELECT_queue_cmp(void * arg,uchar * val1,uchar * val2)1752 static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1753 {
1754 QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1755 return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1756 ((QUICK_SELECT_I*)val2)->last_rowid);
1757 }
1758
1759 C_MODE_END
1760
1761
1762 /*
1763 Do post-constructor initialization.
1764 SYNOPSIS
1765 QUICK_ROR_UNION_SELECT::init()
1766
1767 RETURN
1768 0 OK
1769 other Error code
1770 */
1771
init()1772 int QUICK_ROR_UNION_SELECT::init()
1773 {
1774 DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1775 if (init_queue(&queue, quick_selects.elements, 0,
1776 FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1777 (void*) this, 0, 0))
1778 {
1779 bzero(&queue, sizeof(QUEUE));
1780 DBUG_RETURN(1);
1781 }
1782
1783 if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1784 DBUG_RETURN(1);
1785 prev_rowid= cur_rowid + head->file->ref_length;
1786 DBUG_RETURN(0);
1787 }
1788
1789
1790 /*
1791 Initialize quick select for row retrieval.
1792 SYNOPSIS
1793 reset()
1794
1795 RETURN
1796 0 OK
1797 other Error code
1798 */
1799
reset()1800 int QUICK_ROR_UNION_SELECT::reset()
1801 {
1802 QUICK_SELECT_I *quick;
1803 int error;
1804 DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1805 have_prev_rowid= FALSE;
1806 if (!scans_inited)
1807 {
1808 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1809 while ((quick= it++))
1810 {
1811 if (quick->init_ror_merged_scan(FALSE, &alloc))
1812 DBUG_RETURN(1);
1813 }
1814 scans_inited= TRUE;
1815 }
1816 queue_remove_all(&queue);
1817 /*
1818 Initialize scans for merged quick selects and put all merged quick
1819 selects into the queue.
1820 */
1821 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1822 while ((quick= it++))
1823 {
1824 if (unlikely((error= quick->reset())))
1825 DBUG_RETURN(error);
1826 if (unlikely((error= quick->get_next())))
1827 {
1828 if (error == HA_ERR_END_OF_FILE)
1829 continue;
1830 DBUG_RETURN(error);
1831 }
1832 quick->save_last_pos();
1833 queue_insert(&queue, (uchar*)quick);
1834 }
1835 /* Prepare for ha_rnd_pos calls. */
1836 if (head->file->inited && unlikely((error= head->file->ha_rnd_end())))
1837 {
1838 DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1839 DBUG_RETURN(error);
1840 }
1841 if (unlikely((error= head->file->ha_rnd_init(false))))
1842 {
1843 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1844 DBUG_RETURN(error);
1845 }
1846
1847 DBUG_RETURN(0);
1848 }
1849
1850
1851 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)1852 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1853 {
1854 return quick_selects.push_back(quick_sel_range);
1855 }
1856
~QUICK_ROR_UNION_SELECT()1857 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1858 {
1859 DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1860 delete_queue(&queue);
1861 quick_selects.delete_elements();
1862 if (head->file->inited != handler::NONE)
1863 head->file->ha_rnd_end();
1864 free_root(&alloc,MYF(0));
1865 DBUG_VOID_RETURN;
1866 }
1867
1868
QUICK_RANGE()1869 QUICK_RANGE::QUICK_RANGE()
1870 :min_key(0),max_key(0),min_length(0),max_length(0),
1871 flag(NO_MIN_RANGE | NO_MAX_RANGE),
1872 min_keypart_map(0), max_keypart_map(0)
1873 {}
1874
SEL_ARG(SEL_ARG & arg)1875 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1876 {
1877 type=arg.type;
1878 min_flag=arg.min_flag;
1879 max_flag=arg.max_flag;
1880 maybe_flag=arg.maybe_flag;
1881 maybe_null=arg.maybe_null;
1882 part=arg.part;
1883 field=arg.field;
1884 min_value=arg.min_value;
1885 max_value=arg.max_value;
1886 next_key_part=arg.next_key_part;
1887 max_part_no= arg.max_part_no;
1888 use_count=1; elements=1;
1889 weight=1;
1890 next= 0;
1891 if (next_key_part)
1892 {
1893 ++next_key_part->use_count;
1894 weight += next_key_part->weight;
1895 }
1896 }
1897
1898
make_root()1899 inline void SEL_ARG::make_root()
1900 {
1901 left=right= &null_element;
1902 color=BLACK;
1903 next=prev=0;
1904 use_count=0; elements=1;
1905 }
1906
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)1907 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
1908 const uchar *max_value_arg)
1909 :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
1910 elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
1911 max_value((uchar*) max_value_arg), next(0),prev(0),
1912 next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1)
1913 {
1914 left=right= &null_element;
1915 max_part_no= 1;
1916 }
1917
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)1918 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
1919 uchar *min_value_, uchar *max_value_,
1920 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
1921 :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
1922 part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
1923 field(field_), min_value(min_value_), max_value(max_value_),
1924 next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1)
1925 {
1926 max_part_no= part+1;
1927 left=right= &null_element;
1928 }
1929
1930
1931 /*
1932 A number of helper classes:
1933 SEL_ARG_LE, SEL_ARG_LT, SEL_ARG_GT, SEL_ARG_GE,
1934 to share the code between:
1935 Field::stored_field_make_mm_leaf()
1936 Field::stored_field_make_mm_leaf_exact()
1937 */
1938 class SEL_ARG_LE: public SEL_ARG
1939 {
1940 public:
SEL_ARG_LE(const uchar * key,Field * field)1941 SEL_ARG_LE(const uchar *key, Field *field)
1942 :SEL_ARG(field, key, key)
1943 {
1944 if (!field->real_maybe_null())
1945 min_flag= NO_MIN_RANGE; // From start
1946 else
1947 {
1948 min_value= is_null_string;
1949 min_flag= NEAR_MIN; // > NULL
1950 }
1951 }
1952 };
1953
1954
1955 class SEL_ARG_LT: public SEL_ARG_LE
1956 {
1957 public:
1958 /*
1959 Use this constructor if value->save_in_field() went precisely,
1960 without any data rounding or truncation.
1961 */
SEL_ARG_LT(const uchar * key,Field * field)1962 SEL_ARG_LT(const uchar *key, Field *field)
1963 :SEL_ARG_LE(key, field)
1964 { max_flag= NEAR_MAX; }
1965 /*
1966 Use this constructor if value->save_in_field() returned success,
1967 but we don't know if rounding or truncation happened
1968 (as some Field::store() do not report minor data changes).
1969 */
SEL_ARG_LT(THD * thd,const uchar * key,Field * field,Item * value)1970 SEL_ARG_LT(THD *thd, const uchar *key, Field *field, Item *value)
1971 :SEL_ARG_LE(key, field)
1972 {
1973 if (stored_field_cmp_to_item(thd, field, value) == 0)
1974 max_flag= NEAR_MAX;
1975 }
1976 };
1977
1978
1979 class SEL_ARG_GT: public SEL_ARG
1980 {
1981 public:
1982 /*
1983 Use this constructor if value->save_in_field() went precisely,
1984 without any data rounding or truncation.
1985 */
SEL_ARG_GT(const uchar * key,const KEY_PART * key_part,Field * field)1986 SEL_ARG_GT(const uchar *key, const KEY_PART *key_part, Field *field)
1987 :SEL_ARG(field, key, key)
1988 {
1989 // Don't use open ranges for partial key_segments
1990 if (!(key_part->flag & HA_PART_KEY_SEG))
1991 min_flag= NEAR_MIN;
1992 max_flag= NO_MAX_RANGE;
1993 }
1994 /*
1995 Use this constructor if value->save_in_field() returned success,
1996 but we don't know if rounding or truncation happened
1997 (as some Field::store() do not report minor data changes).
1998 */
SEL_ARG_GT(THD * thd,const uchar * key,const KEY_PART * key_part,Field * field,Item * value)1999 SEL_ARG_GT(THD *thd, const uchar *key,
2000 const KEY_PART *key_part, Field *field, Item *value)
2001 :SEL_ARG(field, key, key)
2002 {
2003 // Don't use open ranges for partial key_segments
2004 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
2005 (stored_field_cmp_to_item(thd, field, value) <= 0))
2006 min_flag= NEAR_MIN;
2007 max_flag= NO_MAX_RANGE;
2008 }
2009 };
2010
2011
2012 class SEL_ARG_GE: public SEL_ARG
2013 {
2014 public:
2015 /*
2016 Use this constructor if value->save_in_field() went precisely,
2017 without any data rounding or truncation.
2018 */
SEL_ARG_GE(const uchar * key,Field * field)2019 SEL_ARG_GE(const uchar *key, Field *field)
2020 :SEL_ARG(field, key, key)
2021 {
2022 max_flag= NO_MAX_RANGE;
2023 }
2024 /*
2025 Use this constructor if value->save_in_field() returned success,
2026 but we don't know if rounding or truncation happened
2027 (as some Field::store() do not report minor data changes).
2028 */
SEL_ARG_GE(THD * thd,const uchar * key,const KEY_PART * key_part,Field * field,Item * value)2029 SEL_ARG_GE(THD *thd, const uchar *key,
2030 const KEY_PART *key_part, Field *field, Item *value)
2031 :SEL_ARG(field, key, key)
2032 {
2033 // Don't use open ranges for partial key_segments
2034 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
2035 (stored_field_cmp_to_item(thd, field, value) < 0))
2036 min_flag= NEAR_MIN;
2037 max_flag= NO_MAX_RANGE;
2038 }
2039 };
2040
2041
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2042 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2043 SEL_ARG **next_arg)
2044 {
2045 SEL_ARG *tmp;
2046
2047 /* Bail out if we have already generated too many SEL_ARGs */
2048 if (++param->alloced_sel_args > MAX_SEL_ARGS)
2049 return 0;
2050
2051 if (type != KEY_RANGE)
2052 {
2053 if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2054 return 0; // out of memory
2055 tmp->prev= *next_arg; // Link into next/prev chain
2056 (*next_arg)->next=tmp;
2057 (*next_arg)= tmp;
2058 tmp->part= this->part;
2059 }
2060 else
2061 {
2062 if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2063 min_flag, max_flag, maybe_flag)))
2064 return 0; // OOM
2065 tmp->parent=new_parent;
2066 tmp->next_key_part=next_key_part;
2067 if (left != &null_element)
2068 if (!(tmp->left=left->clone(param, tmp, next_arg)))
2069 return 0; // OOM
2070
2071 tmp->prev= *next_arg; // Link into next/prev chain
2072 (*next_arg)->next=tmp;
2073 (*next_arg)= tmp;
2074
2075 if (right != &null_element)
2076 if (!(tmp->right= right->clone(param, tmp, next_arg)))
2077 return 0; // OOM
2078 }
2079 increment_use_count(1);
2080 tmp->color= color;
2081 tmp->elements= this->elements;
2082 tmp->max_part_no= max_part_no;
2083 tmp->weight= weight;
2084 return tmp;
2085 }
2086
2087 /**
2088 This gives the first SEL_ARG in the interval list, and the minimal element
2089 in the red-black tree
2090
2091 @return
2092 SEL_ARG first SEL_ARG in the interval list
2093 */
first()2094 SEL_ARG *SEL_ARG::first()
2095 {
2096 SEL_ARG *next_arg=this;
2097 if (!next_arg->left)
2098 return 0; // MAYBE_KEY
2099 while (next_arg->left != &null_element)
2100 next_arg=next_arg->left;
2101 return next_arg;
2102 }
2103
first() const2104 const SEL_ARG *SEL_ARG::first() const
2105 {
2106 return const_cast<SEL_ARG*>(this)->first();
2107 }
2108
last()2109 SEL_ARG *SEL_ARG::last()
2110 {
2111 SEL_ARG *next_arg=this;
2112 if (!next_arg->right)
2113 return 0; // MAYBE_KEY
2114 while (next_arg->right != &null_element)
2115 next_arg=next_arg->right;
2116 return next_arg;
2117 }
2118
2119
2120 /*
2121 Check if a compare is ok, when one takes ranges in account
2122 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
2123 */
2124
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2125 int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2126 uint8 b_flag)
2127 {
2128 int cmp;
2129 /* First check if there was a compare to a min or max element */
2130 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2131 {
2132 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2133 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2134 return 0;
2135 return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2136 }
2137 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2138 return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2139
2140 if (field->real_maybe_null()) // If null is part of key
2141 {
2142 if (*a != *b)
2143 {
2144 return *a ? -1 : 1;
2145 }
2146 if (*a)
2147 goto end; // NULL where equal
2148 a++; b++; // Skip NULL marker
2149 }
2150 cmp=field->key_cmp(a , b);
2151 if (cmp) return cmp < 0 ? -1 : 1; // The values differed
2152
2153 // Check if the compared equal arguments was defined with open/closed range
2154 end:
2155 if (a_flag & (NEAR_MIN | NEAR_MAX))
2156 {
2157 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2158 return 0;
2159 if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2160 return (a_flag & NEAR_MIN) ? 2 : -2;
2161 return (a_flag & NEAR_MIN) ? 1 : -1;
2162 }
2163 if (b_flag & (NEAR_MIN | NEAR_MAX))
2164 return (b_flag & NEAR_MIN) ? -2 : 2;
2165 return 0; // The elements where equal
2166 }
2167
2168
clone_tree(RANGE_OPT_PARAM * param)2169 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2170 {
2171 SEL_ARG tmp_link,*next_arg,*root;
2172 next_arg= &tmp_link;
2173 if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2174 return 0;
2175 next_arg->next=0; // Fix last link
2176 tmp_link.next->prev=0; // Fix first link
2177 if (root) // If not OOM
2178 root->use_count= 0;
2179 return root;
2180 }
2181
2182
2183 /*
2184 Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2185 objects from table read plans.
2186 */
2187 class TABLE_READ_PLAN
2188 {
2189 public:
2190 /*
2191 Plan read cost, with or without cost of full row retrieval, depending
2192 on plan creation parameters.
2193 */
2194 double read_cost;
2195 ha_rows records; /* estimate of #rows to be examined */
2196
2197 /*
2198 If TRUE, the scan returns rows in rowid order. This is used only for
2199 scans that can be both ROR and non-ROR.
2200 */
2201 bool is_ror;
2202
2203 /*
2204 Create quick select for this plan.
2205 SYNOPSIS
2206 make_quick()
2207 param Parameter from test_quick_select
2208 retrieve_full_rows If TRUE, created quick select will do full record
2209 retrieval.
2210 parent_alloc Memory pool to use, if any.
2211
2212 NOTES
2213 retrieve_full_rows is ignored by some implementations.
2214
2215 RETURN
2216 created quick select
2217 NULL on any error.
2218 */
2219 virtual QUICK_SELECT_I *make_quick(PARAM *param,
2220 bool retrieve_full_rows,
2221 MEM_ROOT *parent_alloc=NULL) = 0;
2222
2223 /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2224 static void *operator new(size_t size, MEM_ROOT *mem_root)
2225 { return (void*) alloc_root(mem_root, (uint) size); }
operator delete(void * ptr,size_t size)2226 static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2227 static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2228 virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */
2229 /**
2230 Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2231
2232 @param param Parameters for range analysis of this table
2233 @param trace_object The optimizer trace object the info is appended to
2234 */
2235 virtual void trace_basic_info(PARAM *param,
2236 Json_writer_object *trace_object) const= 0;
2237
2238 };
2239
2240 class TRP_ROR_INTERSECT;
2241 class TRP_ROR_UNION;
2242 class TRP_INDEX_MERGE;
2243
2244
2245 /*
2246 Plan for a QUICK_RANGE_SELECT scan.
2247 TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2248 QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2249 record retrieval scans.
2250 */
2251
2252 class TRP_RANGE : public TABLE_READ_PLAN
2253 {
2254 public:
2255 SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
2256 uint key_idx; /* key number in PARAM::key */
2257 uint mrr_flags;
2258 uint mrr_buf_size;
2259
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2260 TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2261 : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2262 {}
~TRP_RANGE()2263 virtual ~TRP_RANGE() {} /* Remove gcc warning */
2264
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2265 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2266 MEM_ROOT *parent_alloc)
2267 {
2268 DBUG_ENTER("TRP_RANGE::make_quick");
2269 QUICK_RANGE_SELECT *quick;
2270 if ((quick= get_quick_select(param, key_idx, key, mrr_flags,
2271 mrr_buf_size, parent_alloc)))
2272 {
2273 quick->records= records;
2274 quick->read_time= read_cost;
2275 }
2276 DBUG_RETURN(quick);
2277 }
2278 void trace_basic_info(PARAM *param,
2279 Json_writer_object *trace_object) const;
2280 };
2281
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2282 void TRP_RANGE::trace_basic_info(PARAM *param,
2283 Json_writer_object *trace_object) const
2284 {
2285 DBUG_ASSERT(trace_object->trace_started());
2286 DBUG_ASSERT(param->using_real_indexes);
2287 const uint keynr_in_table= param->real_keynr[key_idx];
2288
2289 const KEY &cur_key= param->table->key_info[keynr_in_table];
2290 const KEY_PART_INFO *key_part= cur_key.key_part;
2291
2292 trace_object->add("type", "range_scan")
2293 .add("index", cur_key.name)
2294 .add("rows", records);
2295
2296 Json_writer_array trace_range(param->thd, "ranges");
2297
2298 // TRP_RANGE should not be created if there are no range intervals
2299 DBUG_ASSERT(key);
2300
2301 trace_ranges(&trace_range, param, key_idx, key, key_part);
2302 }
2303
2304
2305 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2306
2307 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2308 {
2309 public:
TRP_ROR_INTERSECT()2310 TRP_ROR_INTERSECT() {} /* Remove gcc warning */
~TRP_ROR_INTERSECT()2311 virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */
2312 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2313 MEM_ROOT *parent_alloc);
2314
2315 /* Array of pointers to ROR range scans used in this intersection */
2316 struct st_ror_scan_info **first_scan;
2317 struct st_ror_scan_info **last_scan; /* End of the above array */
2318 struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
2319 bool is_covering; /* TRUE if no row retrieval phase is necessary */
2320 double index_scan_costs; /* SUM(cost(index_scan)) */
2321 void trace_basic_info(PARAM *param,
2322 Json_writer_object *trace_object) const;
2323 };
2324
2325
2326
2327 /*
2328 Plan for QUICK_ROR_UNION_SELECT scan.
2329 QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2330 is ignored by make_quick.
2331 */
2332
2333 class TRP_ROR_UNION : public TABLE_READ_PLAN
2334 {
2335 public:
TRP_ROR_UNION()2336 TRP_ROR_UNION() {} /* Remove gcc warning */
~TRP_ROR_UNION()2337 virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */
2338 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2339 MEM_ROOT *parent_alloc);
2340 TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2341 TABLE_READ_PLAN **last_ror; /* end of the above array */
2342 void trace_basic_info(PARAM *param,
2343 Json_writer_object *trace_object) const;
2344 };
2345
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2346 void TRP_ROR_UNION::trace_basic_info(PARAM *param,
2347 Json_writer_object *trace_object) const
2348 {
2349 THD *thd= param->thd;
2350 DBUG_ASSERT(trace_object->trace_started());
2351 trace_object->add("type", "index_roworder_union");
2352 Json_writer_array smth_trace(thd, "union_of");
2353 for (TABLE_READ_PLAN **current= first_ror; current != last_ror; current++)
2354 {
2355 Json_writer_object trp_info(thd);
2356 (*current)->trace_basic_info(param, &trp_info);
2357 }
2358 }
2359
2360 /*
2361 Plan for QUICK_INDEX_INTERSECT_SELECT scan.
2362 QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2363 is ignored by make_quick.
2364 */
2365
2366 class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN
2367 {
2368 public:
TRP_INDEX_INTERSECT()2369 TRP_INDEX_INTERSECT() {} /* Remove gcc warning */
~TRP_INDEX_INTERSECT()2370 virtual ~TRP_INDEX_INTERSECT() {} /* Remove gcc warning */
2371 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2372 MEM_ROOT *parent_alloc);
2373 TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
2374 TRP_RANGE **range_scans_end; /* end of the array */
2375 /* keys whose scans are to be filtered by cpk conditions */
2376 key_map filtered_scans;
2377 void trace_basic_info(PARAM *param,
2378 Json_writer_object *trace_object) const;
2379
2380 };
2381
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2382 void TRP_INDEX_INTERSECT::trace_basic_info(PARAM *param,
2383 Json_writer_object *trace_object) const
2384 {
2385 THD *thd= param->thd;
2386 DBUG_ASSERT(trace_object->trace_started());
2387 trace_object->add("type", "index_sort_intersect");
2388 Json_writer_array smth_trace(thd, "index_sort_intersect_of");
2389 for (TRP_RANGE **current= range_scans; current != range_scans_end;
2390 current++)
2391 {
2392 Json_writer_object trp_info(thd);
2393 (*current)->trace_basic_info(param, &trp_info);
2394 }
2395 }
2396
2397 /*
2398 Plan for QUICK_INDEX_MERGE_SELECT scan.
2399 QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2400 is ignored by make_quick.
2401 */
2402
2403 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2404 {
2405 public:
TRP_INDEX_MERGE()2406 TRP_INDEX_MERGE() {} /* Remove gcc warning */
~TRP_INDEX_MERGE()2407 virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */
2408 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2409 MEM_ROOT *parent_alloc);
2410 TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2411 TRP_RANGE **range_scans_end; /* end of the array */
2412 void trace_basic_info(PARAM *param,
2413 Json_writer_object *trace_object) const;
2414 };
2415
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2416 void TRP_INDEX_MERGE::trace_basic_info(PARAM *param,
2417 Json_writer_object *trace_object) const
2418 {
2419 THD *thd= param->thd;
2420 DBUG_ASSERT(trace_object->trace_started());
2421 trace_object->add("type", "index_merge");
2422 Json_writer_array smth_trace(thd, "index_merge_of");
2423 for (TRP_RANGE **current= range_scans; current != range_scans_end; current++)
2424 {
2425 Json_writer_object trp_info(thd);
2426 (*current)->trace_basic_info(param, &trp_info);
2427 }
2428 }
2429
2430 /*
2431 Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2432 */
2433
2434 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2435 {
2436 private:
2437 bool have_min, have_max, have_agg_distinct;
2438 KEY_PART_INFO *min_max_arg_part;
2439 uint group_prefix_len;
2440 uint used_key_parts;
2441 uint group_key_parts;
2442 KEY *index_info;
2443 uint index;
2444 uint key_infix_len;
2445 uchar key_infix[MAX_KEY_LENGTH];
2446 SEL_TREE *range_tree; /* Represents all range predicates in the query. */
2447 SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
2448 uint param_idx; /* Index of used key in param->key. */
2449 bool is_index_scan; /* Use index_next() instead of random read */
2450 public:
2451 /* Number of records selected by the ranges in index_tree. */
2452 ha_rows quick_prefix_records;
2453 public:
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2454 TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2455 bool have_agg_distinct_arg,
2456 KEY_PART_INFO *min_max_arg_part_arg,
2457 uint group_prefix_len_arg, uint used_key_parts_arg,
2458 uint group_key_parts_arg, KEY *index_info_arg,
2459 uint index_arg, uint key_infix_len_arg,
2460 uchar *key_infix_arg,
2461 SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2462 uint param_idx_arg, ha_rows quick_prefix_records_arg)
2463 : have_min(have_min_arg), have_max(have_max_arg),
2464 have_agg_distinct(have_agg_distinct_arg),
2465 min_max_arg_part(min_max_arg_part_arg),
2466 group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2467 group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2468 index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2469 index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2470 quick_prefix_records(quick_prefix_records_arg)
2471 {
2472 if (key_infix_len)
2473 memcpy(this->key_infix, key_infix_arg, key_infix_len);
2474 }
~TRP_GROUP_MIN_MAX()2475 virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */
2476
2477 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2478 MEM_ROOT *parent_alloc);
use_index_scan()2479 void use_index_scan() { is_index_scan= TRUE; }
2480 void trace_basic_info(PARAM *param,
2481 Json_writer_object *trace_object) const;
2482 };
2483
2484
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2485 void TRP_GROUP_MIN_MAX::trace_basic_info(PARAM *param,
2486 Json_writer_object *trace_object) const
2487 {
2488 THD *thd= param->thd;
2489 DBUG_ASSERT(trace_object->trace_started());
2490
2491 trace_object->add("type", "index_group").add("index", index_info->name);
2492
2493 if (min_max_arg_part)
2494 trace_object->add("min_max_arg", min_max_arg_part->field->field_name);
2495 else
2496 trace_object->add_null("min_max_arg");
2497
2498 trace_object->add("min_aggregate", have_min)
2499 .add("max_aggregate", have_max)
2500 .add("distinct_aggregate", have_agg_distinct)
2501 .add("rows", records)
2502 .add("cost", read_cost);
2503
2504 const KEY_PART_INFO *key_part= index_info->key_part;
2505 {
2506 Json_writer_array trace_keyparts(thd, "key_parts_used_for_access");
2507 for (uint partno= 0; partno < used_key_parts; partno++)
2508 {
2509 const KEY_PART_INFO *cur_key_part= key_part + partno;
2510 trace_keyparts.add(cur_key_part->field->field_name);
2511 }
2512 }
2513
2514 Json_writer_array trace_range(thd, "ranges");
2515
2516 // can have group quick without ranges
2517 if (index_tree)
2518 {
2519 trace_ranges(&trace_range, param, param_idx,
2520 index_tree, key_part);
2521 }
2522 }
2523
2524
2525 typedef struct st_index_scan_info
2526 {
2527 uint idx; /* # of used key in param->keys */
2528 uint keynr; /* # of used key in table */
2529 uint range_count;
2530 ha_rows records; /* estimate of # records this scan will return */
2531
2532 /* Set of intervals over key fields that will be used for row retrieval. */
2533 SEL_ARG *sel_arg;
2534
2535 KEY *key_info;
2536 uint used_key_parts;
2537
2538 /* Estimate of # records filtered out by intersection with cpk */
2539 ha_rows filtered_out;
2540 /* Bitmap of fields used in index intersection */
2541 MY_BITMAP used_fields;
2542
2543 /* Fields used in the query and covered by ROR scan. */
2544 MY_BITMAP covered_fields;
2545 uint used_fields_covered; /* # of set bits in covered_fields */
2546 int key_rec_length; /* length of key record (including rowid) */
2547
2548 /*
2549 Cost of reading all index records with values in sel_arg intervals set
2550 (assuming there is no need to access full table records)
2551 */
2552 double index_read_cost;
2553 uint first_uncovered_field; /* first unused bit in covered_fields */
2554 uint key_components; /* # of parts in the key */
2555 } INDEX_SCAN_INFO;
2556
2557 /*
2558 Fill param->needed_fields with bitmap of fields used in the query.
2559 SYNOPSIS
2560 fill_used_fields_bitmap()
2561 param Parameter from test_quick_select function.
2562
2563 NOTES
2564 Clustered PK members are not put into the bitmap as they are implicitly
2565 present in all keys (and it is impossible to avoid reading them).
2566 RETURN
2567 0 Ok
2568 1 Out of memory.
2569 */
2570
fill_used_fields_bitmap(PARAM * param)2571 static int fill_used_fields_bitmap(PARAM *param)
2572 {
2573 TABLE *table= param->table;
2574 my_bitmap_map *tmp;
2575 uint pk;
2576 param->tmp_covered_fields.bitmap= 0;
2577 param->fields_bitmap_size= table->s->column_bitmap_size;
2578 if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2579 param->fields_bitmap_size)) ||
2580 my_bitmap_init(¶m->needed_fields, tmp, table->s->fields, FALSE))
2581 return 1;
2582
2583 bitmap_copy(¶m->needed_fields, table->read_set);
2584 bitmap_union(¶m->needed_fields, table->write_set);
2585
2586 pk= param->table->s->primary_key;
2587 if (param->table->file->pk_is_clustering_key(pk))
2588 {
2589 /* The table uses clustered PK and it is not internally generated */
2590 KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2591 KEY_PART_INFO *key_part_end= key_part +
2592 param->table->key_info[pk].user_defined_key_parts;
2593 for (;key_part != key_part_end; ++key_part)
2594 bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1);
2595 }
2596 return 0;
2597 }
2598
2599
2600 /*
2601 Test if a key can be used in different ranges
2602
2603 SYNOPSIS
2604 SQL_SELECT::test_quick_select()
2605 thd Current thread
2606 keys_to_use Keys to use for range retrieval
2607 prev_tables Tables assumed to be already read when the scan is
2608 performed (but not read at the moment of this call)
2609 limit Query limit
2610 force_quick_range Prefer to use range (instead of full table scan) even
2611 if it is more expensive.
2612 remove_false_parts_of_where Remove parts of OR-clauses for which range
2613 analysis produced SEL_TREE(IMPOSSIBLE)
2614 only_single_index_range_scan Evaluate only single index range scans
2615
2616 NOTES
2617 Updates the following in the select parameter:
2618 needed_reg - Bits for keys with may be used if all prev regs are read
2619 quick - Parameter to use when reading records.
2620
2621 In the table struct the following information is updated:
2622 quick_keys - Which keys can be used
2623 quick_rows - How many rows the key matches
2624 opt_range_condition_rows - E(# rows that will satisfy the table condition)
2625
2626 IMPLEMENTATION
2627 opt_range_condition_rows value is obtained as follows:
2628
2629 It is a minimum of E(#output rows) for all considered table access
2630 methods (range and index_merge accesses over various indexes).
2631
2632 The obtained value is not a true E(#rows that satisfy table condition)
2633 but rather a pessimistic estimate. To obtain a true E(#...) one would
2634 need to combine estimates of various access methods, taking into account
2635 correlations between sets of rows they will return.
2636
2637 For example, if values of tbl.key1 and tbl.key2 are independent (a right
2638 assumption if we have no information about their correlation) then the
2639 correct estimate will be:
2640
2641 E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2642 = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2643
2644 which is smaller than
2645
2646 MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2647
2648 which is currently produced.
2649
2650 TODO
2651 * Change the value returned in opt_range_condition_rows from a pessimistic
2652 estimate to true E(#rows that satisfy table condition).
2653 (we can re-use some of E(#rows) calcuation code from
2654 index_merge/intersection for this)
2655
2656 * Check if this function really needs to modify keys_to_use, and change the
2657 code to pass it by reference if it doesn't.
2658
2659 * In addition to force_quick_range other means can be (an usually are) used
2660 to make this function prefer range over full table scan. Figure out if
2661 force_quick_range is really needed.
2662
2663 RETURN
2664 -1 if error or impossible select (i.e. certainly no rows will be selected)
2665 0 if can't use quick_select
2666 1 if found usable ranges and quick select has been successfully created.
2667 */
2668
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,bool ordered_output,bool remove_false_parts_of_where,bool only_single_index_range_scan)2669 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2670 table_map prev_tables,
2671 ha_rows limit, bool force_quick_range,
2672 bool ordered_output,
2673 bool remove_false_parts_of_where,
2674 bool only_single_index_range_scan)
2675 {
2676 uint idx;
2677 double scan_time;
2678 Item *notnull_cond= NULL;
2679 TABLE_READ_PLAN *best_trp= NULL;
2680 SEL_ARG **backup_keys= 0;
2681 DBUG_ENTER("SQL_SELECT::test_quick_select");
2682 DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
2683 (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2684 (ulong) const_tables));
2685 DBUG_PRINT("info", ("records: %lu", (ulong) head->stat_records()));
2686 delete quick;
2687 quick=0;
2688 needed_reg.clear_all();
2689 quick_keys.clear_all();
2690 head->with_impossible_ranges.clear_all();
2691 DBUG_ASSERT(!head->is_filled_at_execution());
2692 if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
2693 DBUG_RETURN(0);
2694 records= head->stat_records();
2695 notnull_cond= head->notnull_cond;
2696 if (!records)
2697 records++; /* purecov: inspected */
2698 if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
2699 only_single_index_range_scan= 1;
2700
2701 if (head->force_index || force_quick_range)
2702 scan_time= read_time= DBL_MAX;
2703 else
2704 {
2705 scan_time= rows2double(records) / TIME_FOR_COMPARE;
2706 /*
2707 The 2 is there to prefer range scans to full table scans.
2708 This is mainly to make the test suite happy as many tests has
2709 very few rows. In real life tables has more than a few rows and the
2710 +2 has no practical effect.
2711 */
2712 read_time= (double) head->file->scan_time() + scan_time + 2;
2713 if (limit < records && read_time < (double) records + scan_time + 1 )
2714 {
2715 read_time= (double) records + scan_time + 1; // Force to use index
2716 notnull_cond= NULL;
2717 }
2718 }
2719
2720 possible_keys.clear_all();
2721
2722 DBUG_PRINT("info",("Time to scan table: %g", read_time));
2723
2724 Json_writer_object table_records(thd);
2725 table_records.add_table_name(head);
2726
2727 Json_writer_object trace_range(thd, "range_analysis");
2728 {
2729 Json_writer_object table_rec(thd, "table_scan");
2730 table_rec.add("rows", records).add("cost", read_time);
2731 }
2732
2733 keys_to_use.intersect(head->keys_in_use_for_query);
2734 if (!keys_to_use.is_clear_all())
2735 {
2736 uchar buff[STACK_BUFF_ALLOC];
2737 MEM_ROOT alloc;
2738 SEL_TREE *tree= NULL;
2739 SEL_TREE *notnull_cond_tree= NULL;
2740 KEY_PART *key_parts;
2741 KEY *key_info;
2742 PARAM param;
2743 bool force_group_by = false;
2744
2745 if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff))
2746 DBUG_RETURN(0); // Fatal error flag is set
2747
2748 /* set up parameter that is passed to all functions */
2749 param.thd= thd;
2750 param.baseflag= head->file->ha_table_flags();
2751 param.prev_tables=prev_tables | const_tables;
2752 param.read_tables=read_tables;
2753 param.current_table= head->map;
2754 param.table=head;
2755 param.keys=0;
2756 param.mem_root= &alloc;
2757 param.old_root= thd->mem_root;
2758 param.needed_reg= &needed_reg;
2759 param.imerge_cost_buff_size= 0;
2760 param.using_real_indexes= TRUE;
2761 param.remove_jump_scans= TRUE;
2762 param.max_key_parts= 0;
2763 param.remove_false_where_parts= remove_false_parts_of_where;
2764 param.force_default_mrr= ordered_output;
2765 param.possible_keys.clear_all();
2766
2767 thd->no_errors=1; // Don't warn about NULL
2768 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
2769 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
2770 if (!(param.key_parts=
2771 (KEY_PART*) alloc_root(&alloc,
2772 sizeof(KEY_PART) *
2773 head->s->actual_n_key_parts(thd))) ||
2774 fill_used_fields_bitmap(¶m))
2775 {
2776 thd->no_errors=0;
2777 free_root(&alloc,MYF(0)); // Return memory & allocator
2778 DBUG_RETURN(-1); // Error
2779 }
2780 key_parts= param.key_parts;
2781
2782 /*
2783 Make an array with description of all key parts of all table keys.
2784 This is used in get_mm_parts function.
2785 */
2786 key_info= head->key_info;
2787 uint max_key_len= 0;
2788
2789 Json_writer_array trace_idx(thd, "potential_range_indexes");
2790
2791 for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2792 {
2793 Json_writer_object trace_idx_details(thd);
2794 trace_idx_details.add("index", key_info->name);
2795 KEY_PART_INFO *key_part_info;
2796 uint n_key_parts= head->actual_n_key_parts(key_info);
2797
2798 if (!keys_to_use.is_set(idx))
2799 {
2800 trace_idx_details.add("usable", false)
2801 .add("cause", "not applicable");
2802 continue;
2803 }
2804 if (key_info->flags & HA_FULLTEXT)
2805 {
2806 trace_idx_details.add("usable", false).add("cause", "fulltext");
2807 continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
2808 }
2809
2810 trace_idx_details.add("usable", true);
2811 param.key[param.keys]=key_parts;
2812 key_part_info= key_info->key_part;
2813 uint cur_key_len= 0;
2814 Json_writer_array trace_keypart(thd, "key_parts");
2815 for (uint part= 0 ; part < n_key_parts ;
2816 part++, key_parts++, key_part_info++)
2817 {
2818 key_parts->key= param.keys;
2819 key_parts->part= part;
2820 key_parts->length= key_part_info->length;
2821 key_parts->store_length= key_part_info->store_length;
2822 cur_key_len += key_part_info->store_length;
2823 key_parts->field= key_part_info->field;
2824 key_parts->null_bit= key_part_info->null_bit;
2825 key_parts->image_type =
2826 (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2827 /* Only HA_PART_KEY_SEG is used */
2828 key_parts->flag= (uint8) key_part_info->key_part_flag;
2829 trace_keypart.add(key_parts->field->field_name);
2830 }
2831 param.real_keynr[param.keys++]=idx;
2832 if (cur_key_len > max_key_len)
2833 max_key_len= cur_key_len;
2834 }
2835 trace_idx.end();
2836
2837 param.key_parts_end=key_parts;
2838 param.alloced_sel_args= 0;
2839
2840 max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
2841 if (!(param.min_key= (uchar*)alloc_root(&alloc,max_key_len)) ||
2842 !(param.max_key= (uchar*)alloc_root(&alloc,max_key_len)))
2843 {
2844 thd->no_errors=0;
2845 free_root(&alloc,MYF(0)); // Return memory & allocator
2846 DBUG_RETURN(-1); // Error
2847 }
2848
2849 thd->mem_root= &alloc;
2850 /* Calculate cost of full index read for the shortest covering index */
2851 if (!force_quick_range && !head->covering_keys.is_clear_all())
2852 {
2853 int key_for_use= find_shortest_key(head, &head->covering_keys);
2854 double key_read_time= (head->file->key_scan_time(key_for_use) +
2855 rows2double(records) / TIME_FOR_COMPARE);
2856 DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
2857 "read time %g", key_for_use, key_read_time));
2858
2859 Json_writer_object trace_cov(thd, "best_covering_index_scan");
2860 bool chosen= FALSE;
2861 if (key_read_time < read_time)
2862 {
2863 read_time= key_read_time;
2864 chosen= TRUE;
2865 }
2866 trace_cov.add("index", head->key_info[key_for_use].name)
2867 .add("cost", key_read_time).add("chosen", chosen);
2868 if (!chosen)
2869 trace_cov.add("cause", "cost");
2870 }
2871
2872 double best_read_time= read_time;
2873
2874 if (notnull_cond)
2875 notnull_cond_tree= notnull_cond->get_mm_tree(¶m, ¬null_cond);
2876
2877 if (cond || notnull_cond_tree)
2878 {
2879 {
2880 Json_writer_array trace_range_summary(thd,
2881 "setup_range_conditions");
2882 if (cond)
2883 tree= cond->get_mm_tree(¶m, &cond);
2884 if (notnull_cond_tree)
2885 tree= tree_and(¶m, tree, notnull_cond_tree);
2886 }
2887 if (tree)
2888 {
2889 if (tree->type == SEL_TREE::IMPOSSIBLE)
2890 {
2891 records=0L; /* Return -1 from this function. */
2892 read_time= (double) HA_POS_ERROR;
2893 trace_range.add("impossible_range", true);
2894 goto free_mem;
2895 }
2896 /*
2897 If the tree can't be used for range scans, proceed anyway, as we
2898 can construct a group-min-max quick select
2899 */
2900 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2901 {
2902 trace_range.add("range_scan_possible", false);
2903 tree= NULL;
2904 }
2905 }
2906 else if (thd->is_error())
2907 {
2908 thd->no_errors=0;
2909 thd->mem_root= param.old_root;
2910 free_root(&alloc, MYF(0));
2911 DBUG_RETURN(-1);
2912 }
2913 }
2914
2915 if (tree)
2916 {
2917 /*
2918 It is possible to use a range-based quick select (but it might be
2919 slower than 'all' table scan).
2920 */
2921 TRP_ROR_INTERSECT *rori_trp;
2922 TRP_INDEX_INTERSECT *intersect_trp;
2923 bool can_build_covering= FALSE;
2924 Json_writer_object trace_range(thd, "analyzing_range_alternatives");
2925
2926 backup_keys= (SEL_ARG**) alloca(sizeof(backup_keys[0])*param.keys);
2927 memcpy(&backup_keys[0], &tree->keys[0],
2928 sizeof(backup_keys[0])*param.keys);
2929
2930 remove_nonrange_trees(¶m, tree);
2931
2932 /* Get best 'range' plan and prepare data for making other plans */
2933 if (auto range_trp= get_key_scans_params(¶m, tree,
2934 only_single_index_range_scan,
2935 true, best_read_time))
2936 {
2937 best_trp= range_trp;
2938 best_read_time= best_trp->read_cost;
2939 }
2940
2941 /*
2942 Simultaneous key scans and row deletes on several handler
2943 objects are not allowed so don't use ROR-intersection for
2944 table deletes.
2945 */
2946 if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2947 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2948 !only_single_index_range_scan)
2949 {
2950 /*
2951 Get best non-covering ROR-intersection plan and prepare data for
2952 building covering ROR-intersection.
2953 */
2954 if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time,
2955 &can_build_covering)))
2956 {
2957 best_trp= rori_trp;
2958 best_read_time= best_trp->read_cost;
2959 /*
2960 Try constructing covering ROR-intersect only if it looks possible
2961 and worth doing.
2962 */
2963 if (!rori_trp->is_covering && can_build_covering &&
2964 (rori_trp= get_best_covering_ror_intersect(¶m, tree,
2965 best_read_time)))
2966 best_trp= rori_trp;
2967 }
2968 }
2969 /*
2970 Do not look for an index intersection plan if there is a covering
2971 index. The scan by this covering index will be always cheaper than
2972 any index intersection.
2973 */
2974 if (param.table->covering_keys.is_clear_all() &&
2975 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2976 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT) &&
2977 !only_single_index_range_scan)
2978 {
2979 if ((intersect_trp= get_best_index_intersect(¶m, tree,
2980 best_read_time)))
2981 {
2982 best_trp= intersect_trp;
2983 best_read_time= best_trp->read_cost;
2984 set_if_smaller(param.table->opt_range_condition_rows,
2985 intersect_trp->records);
2986 }
2987 }
2988
2989 if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2990 head->stat_records() != 0 && !only_single_index_range_scan)
2991 {
2992 /* Try creating index_merge/ROR-union scan. */
2993 SEL_IMERGE *imerge;
2994 TABLE_READ_PLAN *best_conj_trp= NULL,
2995 *UNINIT_VAR(new_conj_trp); /* no empty index_merge lists possible */
2996 DBUG_PRINT("info",("No range reads possible,"
2997 " trying to construct index_merge"));
2998 List_iterator_fast<SEL_IMERGE> it(tree->merges);
2999 Json_writer_array trace_idx_merge(thd, "analyzing_index_merge_union");
3000 while ((imerge= it++))
3001 {
3002 new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time);
3003 if (new_conj_trp)
3004 set_if_smaller(param.table->opt_range_condition_rows,
3005 new_conj_trp->records);
3006 if (new_conj_trp &&
3007 (!best_conj_trp ||
3008 new_conj_trp->read_cost < best_conj_trp->read_cost))
3009 {
3010 best_conj_trp= new_conj_trp;
3011 best_read_time= best_conj_trp->read_cost;
3012 }
3013 }
3014 if (best_conj_trp)
3015 best_trp= best_conj_trp;
3016 }
3017 }
3018
3019 /*
3020 Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
3021 Notice that it can be constructed no matter if there is a range tree.
3022 */
3023 DBUG_EXECUTE_IF("force_group_by", force_group_by = true; );
3024 if (!only_single_index_range_scan)
3025 {
3026 TRP_GROUP_MIN_MAX *group_trp;
3027 if (tree)
3028 restore_nonrange_trees(¶m, tree, backup_keys);
3029 if ((group_trp= get_best_group_min_max(¶m, tree, read_time)))
3030 {
3031 param.table->opt_range_condition_rows= MY_MIN(group_trp->records,
3032 head->stat_records());
3033 Json_writer_object grp_summary(thd, "best_group_range_summary");
3034
3035 if (unlikely(thd->trace_started()))
3036 group_trp->trace_basic_info(¶m, &grp_summary);
3037
3038 if (group_trp->read_cost < best_read_time || force_group_by)
3039 {
3040 grp_summary.add("chosen", true);
3041 best_trp= group_trp;
3042 best_read_time= best_trp->read_cost;
3043 }
3044 else
3045 grp_summary.add("chosen", false).add("cause", "cost");
3046 }
3047 if (tree)
3048 remove_nonrange_trees(¶m, tree);
3049 }
3050
3051 thd->mem_root= param.old_root;
3052
3053 /* If we got a read plan, create a quick select from it. */
3054 if (best_trp)
3055 {
3056 records= best_trp->records;
3057 if (!(quick= best_trp->make_quick(¶m, TRUE)) || quick->init())
3058 {
3059 delete quick;
3060 quick= NULL;
3061 }
3062 }
3063 possible_keys= param.possible_keys;
3064
3065 free_mem:
3066 if (unlikely(quick && best_trp && thd->trace_started()))
3067 {
3068 Json_writer_object trace_range_summary(thd,
3069 "chosen_range_access_summary");
3070 {
3071 Json_writer_object trace_range_plan(thd, "range_access_plan");
3072 best_trp->trace_basic_info(¶m, &trace_range_plan);
3073 }
3074 trace_range_summary.add("rows_for_plan", quick->records)
3075 .add("cost_for_plan", quick->read_time)
3076 .add("chosen", true);
3077 }
3078
3079 free_root(&alloc,MYF(0)); // Return memory & allocator
3080 thd->mem_root= param.old_root;
3081 thd->no_errors=0;
3082 }
3083
3084 DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
3085
3086 /*
3087 Assume that if the user is using 'limit' we will only need to scan
3088 limit rows if we are using a key
3089 */
3090 DBUG_RETURN(records ? MY_TEST(quick) : -1);
3091 }
3092
3093 /****************************************************************************
3094 * Condition selectivity module
3095 ****************************************************************************/
3096
3097
3098 /*
3099 Build descriptors of pseudo-indexes over columns to perform range analysis
3100
3101 SYNOPSIS
3102 create_key_parts_for_pseudo_indexes()
3103 param IN/OUT data structure for the descriptors to be built
3104 used_fields bitmap of columns for which the descriptors are to be built
3105
3106 DESCRIPTION
3107 For each column marked in the bitmap used_fields the function builds
3108 a descriptor of a single-component pseudo-index over this column that
3109 can be used for the range analysis of the predicates over this columns.
3110 The descriptors are created in the memory of param->mem_root.
3111
3112 RETURN
3113 FALSE in the case of success
3114 TRUE otherwise
3115 */
3116
3117 static
create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM * param,MY_BITMAP * used_fields)3118 bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
3119 MY_BITMAP *used_fields)
3120 {
3121 Field **field_ptr;
3122 TABLE *table= param->table;
3123 uint parts= 0;
3124
3125 for (field_ptr= table->field; *field_ptr; field_ptr++)
3126 {
3127 Field *field= *field_ptr;
3128 if (bitmap_is_set(used_fields, field->field_index) &&
3129 is_eits_usable(field))
3130 parts++;
3131 }
3132
3133 KEY_PART *key_part;
3134 uint keys= 0;
3135
3136 if (!parts)
3137 return TRUE;
3138
3139 if (!(key_part= (KEY_PART *) alloc_root(param->mem_root,
3140 sizeof(KEY_PART) * parts)))
3141 return TRUE;
3142
3143 param->key_parts= key_part;
3144 uint max_key_len= 0;
3145 for (field_ptr= table->field; *field_ptr; field_ptr++)
3146 {
3147 Field *field= *field_ptr;
3148 if (bitmap_is_set(used_fields, field->field_index))
3149 {
3150 if (!is_eits_usable(field))
3151 continue;
3152
3153 uint16 store_length;
3154 uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
3155 key_part->key= keys;
3156 key_part->part= 0;
3157 if (field->flags & BLOB_FLAG)
3158 key_part->length= max_key_part_length;
3159 else
3160 {
3161 key_part->length= (uint16) field->key_length();
3162 set_if_smaller(key_part->length, max_key_part_length);
3163 }
3164 store_length= key_part->length;
3165 if (field->real_maybe_null())
3166 store_length+= HA_KEY_NULL_LENGTH;
3167 if (field->real_type() == MYSQL_TYPE_VARCHAR)
3168 store_length+= HA_KEY_BLOB_LENGTH;
3169 if (max_key_len < store_length)
3170 max_key_len= store_length;
3171 key_part->store_length= store_length;
3172 key_part->field= field;
3173 key_part->image_type= Field::itRAW;
3174 key_part->flag= 0;
3175 param->key[keys]= key_part;
3176 keys++;
3177 key_part++;
3178 }
3179 }
3180
3181 max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
3182 if (!(param->min_key= (uchar*)alloc_root(param->mem_root, max_key_len)) ||
3183 !(param->max_key= (uchar*)alloc_root(param->mem_root, max_key_len)))
3184 {
3185 return true;
3186 }
3187 param->keys= keys;
3188 param->key_parts_end= key_part;
3189
3190 return FALSE;
3191 }
3192
3193
3194 /*
3195 Estimate the number of rows in all ranges built for a column
3196 by the range optimizer
3197
3198 SYNOPSIS
3199 records_in_column_ranges()
3200 param the data structure to access descriptors of pseudo indexes
3201 built over columns used in the condition of the processed query
3202 idx the index of the descriptor of interest in param
3203 tree the tree representing ranges built for the interesting column
3204
3205 DESCRIPTION
3206 This function retrieves the ranges represented by the SEL_ARG 'tree' and
3207 for each of them r it calls the function get_column_range_cardinality()
3208 that estimates the number of expected rows in r. It is assumed that param
3209 is the data structure containing the descriptors of pseudo-indexes that
3210 has been built to perform range analysis of the range conditions imposed
3211 on the columns used in the processed query, while idx is the index of the
3212 descriptor created in 'param' exactly for the column for which 'tree'
3213 has been built by the range optimizer.
3214
3215 RETURN
3216 the number of rows in the retrieved ranges
3217 */
3218
3219 static
records_in_column_ranges(PARAM * param,uint idx,SEL_ARG * tree)3220 double records_in_column_ranges(PARAM *param, uint idx,
3221 SEL_ARG *tree)
3222 {
3223 THD *thd= param->thd;
3224 SEL_ARG_RANGE_SEQ seq;
3225 KEY_MULTI_RANGE range;
3226 range_seq_t seq_it;
3227 double rows;
3228 Field *field;
3229 uint flags= 0;
3230 double total_rows= 0;
3231 RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
3232 sel_arg_range_seq_next, 0, 0};
3233
3234 /* Handle cases when we don't have a valid non-empty list of range */
3235 if (!tree)
3236 return DBL_MAX;
3237 if (tree->type == SEL_ARG::IMPOSSIBLE)
3238 return (0L);
3239
3240 field= tree->field;
3241
3242 seq.keyno= idx;
3243 seq.real_keyno= MAX_KEY;
3244 seq.param= param;
3245 seq.start= tree;
3246 seq.is_ror_scan= FALSE;
3247
3248 seq_it= seq_if.init((void *) &seq, 0, flags);
3249
3250 Json_writer_array range_trace(thd, "ranges");
3251
3252 while (!seq_if.next(seq_it, &range))
3253 {
3254 key_range *min_endp, *max_endp;
3255 min_endp= range.start_key.length? &range.start_key : NULL;
3256 max_endp= range.end_key.length? &range.end_key : NULL;
3257 int range_flag= range.range_flag;
3258
3259 if (!range.start_key.length)
3260 range_flag |= NO_MIN_RANGE;
3261 if (!range.end_key.length)
3262 range_flag |= NO_MAX_RANGE;
3263 if (range.start_key.flag == HA_READ_AFTER_KEY)
3264 range_flag |= NEAR_MIN;
3265 if (range.start_key.flag == HA_READ_BEFORE_KEY)
3266 range_flag |= NEAR_MAX;
3267
3268 if (unlikely(thd->trace_started()))
3269 {
3270 StringBuffer<128> range_info(system_charset_info);
3271 print_range_for_non_indexed_field(&range_info, field, &range);
3272 range_trace.add(range_info.c_ptr_safe(), range_info.length());
3273 }
3274
3275 rows= get_column_range_cardinality(field, min_endp, max_endp, range_flag);
3276 if (DBL_MAX == rows)
3277 {
3278 total_rows= DBL_MAX;
3279 break;
3280 }
3281 total_rows += rows;
3282 }
3283 return total_rows;
3284 }
3285
3286
3287 /*
3288 Compare quick select ranges according to number of found rows
3289 If there is equal amounts of rows, use the long key part.
3290 The idea is that if we have keys (a),(a,b) and (a,b,c) and we have
3291 a query like WHERE a=1 and b=1 and c=1,
3292 it is better to use key (a,b,c) than (a) as it will ensure we don't also
3293 use histograms for columns b and c
3294 */
3295
3296 static
cmp_quick_ranges(TABLE * table,uint * a,uint * b)3297 int cmp_quick_ranges(TABLE *table, uint *a, uint *b)
3298 {
3299 int tmp= CMP_NUM(table->opt_range[*a].rows, table->opt_range[*b].rows);
3300 if (tmp)
3301 return tmp;
3302 return -CMP_NUM(table->opt_range[*a].key_parts, table->opt_range[*b].key_parts);
3303 }
3304
3305
3306 /*
3307 Calculate the selectivity of the condition imposed on the rows of a table
3308
3309 SYNOPSIS
3310 calculate_cond_selectivity_for_table()
3311 thd the context handle
3312 table the table of interest
3313 cond conditions imposed on the rows of the table
3314
3315 DESCRIPTION
3316 This function calculates the selectivity of range conditions cond imposed
3317 on the rows of 'table' in the processed query.
3318 The calculated selectivity is assigned to the field table->cond_selectivity.
3319
3320 Selectivity is calculated as a product of selectivities imposed by:
3321
3322 1. possible range accesses. (if multiple range accesses use the same
3323 restrictions on the same field, we make adjustments for that)
3324 2. Sargable conditions on fields for which we have column statistics (if
3325 a field is used in a possible range access, we assume that selectivity
3326 is already provided by the range access' estimates)
3327 3. Reading a few records from the table pages and checking the condition
3328 selectivity (this is used for conditions like "column LIKE '%val%'"
3329 where approaches #1 and #2 do not provide selectivity data).
3330
3331 NOTE
3332 Currently the selectivities of range conditions over different columns are
3333 considered independent.
3334
3335 RETURN
3336 FALSE on success
3337 TRUE otherwise
3338 */
3339
calculate_cond_selectivity_for_table(THD * thd,TABLE * table,Item ** cond)3340 bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
3341 {
3342 uint keynr, range_index, ranges;
3343 MY_BITMAP *used_fields= &table->cond_set;
3344 double table_records= (double)table->stat_records();
3345 uint optimal_key_order[MAX_KEY];
3346 DBUG_ENTER("calculate_cond_selectivity_for_table");
3347
3348 table->cond_selectivity= 1.0;
3349
3350 if (table_records == 0)
3351 DBUG_RETURN(FALSE);
3352
3353 QUICK_SELECT_I *quick;
3354 if ((quick=table->reginfo.join_tab->quick) &&
3355 quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
3356 {
3357 table->cond_selectivity*= (quick->records/table_records);
3358 DBUG_RETURN(FALSE);
3359 }
3360
3361 if (!*cond)
3362 DBUG_RETURN(FALSE);
3363
3364 if (table->pos_in_table_list->schema_table)
3365 DBUG_RETURN(FALSE);
3366
3367 MY_BITMAP handled_columns;
3368 my_bitmap_map* buf;
3369 if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size)))
3370 DBUG_RETURN(TRUE);
3371 my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE);
3372
3373 /*
3374 Calculate the selectivity of the range conditions supported by indexes.
3375
3376 First, take into account possible range accesses.
3377 range access estimates are the most precise, we prefer them to any other
3378 estimate sources.
3379 */
3380
3381 Json_writer_object trace_wrapper(thd);
3382 Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes");
3383
3384 /*
3385 Walk through all quick ranges in the order of least found rows.
3386 */
3387 for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++)
3388 if (table->opt_range_keys.is_set(keynr))
3389 optimal_key_order[ranges++]= keynr;
3390
3391 my_qsort2(optimal_key_order, ranges,
3392 sizeof(optimal_key_order[0]),
3393 (qsort2_cmp) cmp_quick_ranges, table);
3394
3395 for (range_index= 0 ; range_index < ranges ; range_index++)
3396 {
3397 uint keynr= optimal_key_order[range_index];
3398 {
3399 {
3400 uint i;
3401 uint used_key_parts= table->opt_range[keynr].key_parts;
3402 double quick_cond_selectivity= (table->opt_range[keynr].rows /
3403 table_records);
3404 KEY *key_info= table->key_info + keynr;
3405 KEY_PART_INFO* key_part= key_info->key_part;
3406 /*
3407 Suppose, there are range conditions on two keys
3408 KEY1 (col1, col2)
3409 KEY2 (col3, col2)
3410
3411 we don't want to count selectivity of condition on col2 twice.
3412
3413 First, find the longest key prefix that's made of columns whose
3414 selectivity wasn't already accounted for.
3415 */
3416 for (i= 0; i < used_key_parts; i++, key_part++)
3417 {
3418 if (bitmap_is_set(&handled_columns, key_part->fieldnr-1))
3419 break;
3420 bitmap_set_bit(&handled_columns, key_part->fieldnr-1);
3421 }
3422 if (i)
3423 {
3424 double UNINIT_VAR(selectivity_mult);
3425
3426 /*
3427 There is at least 1-column prefix of columns whose selectivity has
3428 not yet been accounted for.
3429 */
3430 table->cond_selectivity*= quick_cond_selectivity;
3431 Json_writer_object selectivity_for_index(thd);
3432 selectivity_for_index.add("index_name", key_info->name)
3433 .add("selectivity_from_index",
3434 quick_cond_selectivity);
3435 if (i != used_key_parts)
3436 {
3437 /*
3438 Range access got us estimate for #used_key_parts.
3439 We need estimate for #(i-1) key parts.
3440 */
3441 double f1= key_info->actual_rec_per_key(i-1);
3442 double f2= key_info->actual_rec_per_key(i);
3443 if (f1 > 0 && f2 > 0)
3444 selectivity_mult= f1 / f2;
3445 else
3446 {
3447 /*
3448 No statistics available, assume the selectivity is proportional
3449 to the number of key parts.
3450 (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1)
3451 */
3452 selectivity_mult= ((double)(i+1)) / i;
3453 }
3454 table->cond_selectivity*= selectivity_mult;
3455 selectivity_for_index.add("selectivity_multiplier",
3456 selectivity_mult);
3457 }
3458 /*
3459 We need to set selectivity for fields supported by indexes.
3460 For single-component indexes and for some first components
3461 of other indexes we do it here. For the remaining fields
3462 we do it later in this function, in the same way as for the
3463 fields not used in any indexes.
3464 */
3465 if (i == 1)
3466 {
3467 uint fieldnr= key_info->key_part[0].fieldnr;
3468 table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity;
3469 if (i != used_key_parts)
3470 table->field[fieldnr-1]->cond_selectivity*= selectivity_mult;
3471 bitmap_clear_bit(used_fields, fieldnr-1);
3472 }
3473 }
3474 }
3475 }
3476 }
3477 selectivity_for_indexes.end();
3478
3479 /*
3480 Second step: calculate the selectivity of the range conditions not
3481 supported by any index and selectivity of the range condition
3482 over the fields whose selectivity has not been set yet.
3483 */
3484 Json_writer_array selectivity_for_columns(thd, "selectivity_for_columns");
3485
3486 if (thd->variables.optimizer_use_condition_selectivity > 2 &&
3487 !bitmap_is_clear_all(used_fields) &&
3488 thd->variables.use_stat_tables > 0 && table->stats_is_read)
3489 {
3490 PARAM param;
3491 MEM_ROOT alloc;
3492 SEL_TREE *tree;
3493 double rows;
3494
3495 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
3496 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
3497 param.thd= thd;
3498 param.mem_root= &alloc;
3499 param.old_root= thd->mem_root;
3500 param.table= table;
3501 param.remove_false_where_parts= true;
3502
3503 if (create_key_parts_for_pseudo_indexes(¶m, used_fields))
3504 goto free_alloc;
3505
3506 param.prev_tables= param.read_tables= 0;
3507 param.current_table= table->map;
3508 param.using_real_indexes= FALSE;
3509 param.real_keynr[0]= 0;
3510 param.alloced_sel_args= 0;
3511 param.max_key_parts= 0;
3512
3513 thd->no_errors=1;
3514
3515 tree= cond[0]->get_mm_tree(¶m, cond);
3516
3517 if (!tree)
3518 goto free_alloc;
3519
3520 table->reginfo.impossible_range= 0;
3521 if (tree->type == SEL_TREE::IMPOSSIBLE)
3522 {
3523 rows= 0;
3524 table->reginfo.impossible_range= 1;
3525 goto free_alloc;
3526 }
3527 else if (tree->type == SEL_TREE::ALWAYS)
3528 {
3529 rows= table_records;
3530 goto free_alloc;
3531 }
3532 else if (tree->type == SEL_TREE::MAYBE)
3533 {
3534 rows= table_records;
3535 goto free_alloc;
3536 }
3537
3538 for (uint idx= 0; idx < param.keys; idx++)
3539 {
3540 SEL_ARG *key= tree->keys[idx];
3541 if (key)
3542 {
3543 Json_writer_object selectivity_for_column(thd);
3544 selectivity_for_column.add("column_name", key->field->field_name);
3545 if (key->type == SEL_ARG::IMPOSSIBLE)
3546 {
3547 rows= 0;
3548 table->reginfo.impossible_range= 1;
3549 selectivity_for_column.add("selectivity_from_histogram", rows);
3550 selectivity_for_column.add("cause", "impossible range");
3551 goto free_alloc;
3552 }
3553 else
3554 {
3555 rows= records_in_column_ranges(¶m, idx, key);
3556 if (rows != DBL_MAX)
3557 {
3558 key->field->cond_selectivity= rows/table_records;
3559 selectivity_for_column.add("selectivity_from_histogram",
3560 key->field->cond_selectivity);
3561 }
3562 }
3563 }
3564 }
3565
3566 for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
3567 {
3568 Field *table_field= *field_ptr;
3569 if (bitmap_is_set(used_fields, table_field->field_index) &&
3570 table_field->cond_selectivity < 1.0)
3571 {
3572 if (!bitmap_is_set(&handled_columns, table_field->field_index))
3573 table->cond_selectivity*= table_field->cond_selectivity;
3574 }
3575 }
3576
3577 free_alloc:
3578 thd->no_errors= 0;
3579 thd->mem_root= param.old_root;
3580 free_root(&alloc, MYF(0));
3581
3582 }
3583 selectivity_for_columns.end();
3584
3585 if (quick && (quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
3586 quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
3587 {
3588 table->cond_selectivity*= (quick->records/table_records);
3589 }
3590
3591 bitmap_union(used_fields, &handled_columns);
3592
3593 /* Check if we can improve selectivity estimates by using sampling */
3594 ulong check_rows=
3595 MY_MIN(thd->variables.optimizer_selectivity_sampling_limit,
3596 (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
3597 if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD &&
3598 thd->variables.optimizer_use_condition_selectivity > 4)
3599 {
3600 find_selective_predicates_list_processor_data *dt=
3601 (find_selective_predicates_list_processor_data *)
3602 alloc_root(thd->mem_root,
3603 sizeof(find_selective_predicates_list_processor_data));
3604 if (!dt)
3605 DBUG_RETURN(TRUE);
3606 dt->list.empty();
3607 dt->table= table;
3608 if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, dt))
3609 DBUG_RETURN(TRUE);
3610 if (dt->list.elements > 0)
3611 {
3612 check_rows= check_selectivity(thd, check_rows, table, &dt->list);
3613 if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD)
3614 {
3615 COND_STATISTIC *stat;
3616 List_iterator_fast<COND_STATISTIC> it(dt->list);
3617 double examined_rows= check_rows;
3618 while ((stat= it++))
3619 {
3620 if (!stat->positive)
3621 {
3622 DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter"));
3623 stat->positive= 1; // avoid 0
3624 }
3625 DBUG_PRINT("info", ("The predicate selectivity : %g",
3626 (double)stat->positive / examined_rows));
3627 double selectivity= ((double)stat->positive) / examined_rows;
3628 table->cond_selectivity*= selectivity;
3629 /*
3630 If a field is involved then we register its selectivity in case
3631 there in an equality with the field.
3632 For example in case
3633 t1.a LIKE "%bla%" and t1.a = t2.b
3634 the selectivity we have found could be used also for t2.
3635 */
3636 if (stat->field_arg)
3637 {
3638 stat->field_arg->cond_selectivity*= selectivity;
3639
3640 if (stat->field_arg->next_equal_field)
3641 {
3642 for (Field *next_field= stat->field_arg->next_equal_field;
3643 next_field != stat->field_arg;
3644 next_field= next_field->next_equal_field)
3645 {
3646 next_field->cond_selectivity*= selectivity;
3647 next_field->table->cond_selectivity*= selectivity;
3648 }
3649 }
3650 }
3651 }
3652
3653 }
3654 /* This list and its elements put to mem_root so should not be freed */
3655 table->cond_selectivity_sampling_explain= &dt->list;
3656 }
3657 }
3658 trace_wrapper.add("cond_selectivity", table->cond_selectivity);
3659 DBUG_RETURN(FALSE);
3660 }
3661
3662 /****************************************************************************
3663 * Condition selectivity code ends
3664 ****************************************************************************/
3665
3666 /****************************************************************************
3667 * Partition pruning module
3668 ****************************************************************************/
3669
3670 /*
3671 Store field key image to table record
3672
3673 SYNOPSIS
3674 store_key_image_to_rec()
3675 field Field which key image should be stored
3676 ptr Field value in key format
3677 len Length of the value, in bytes
3678
3679 ATTENTION
3680 len is the length of the value not counting the NULL-byte (at the same
3681 time, ptr points to the key image, which starts with NULL-byte for
3682 nullable columns)
3683
3684 DESCRIPTION
3685 Copy the field value from its key image to the table record. The source
3686 is the value in key image format, occupying len bytes in buffer pointed
3687 by ptr. The destination is table record, in "field value in table record"
3688 format.
3689 */
3690
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3691 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3692 {
3693 /* Do the same as print_key() does */
3694
3695 if (field->real_maybe_null())
3696 {
3697 if (*ptr)
3698 {
3699 field->set_null();
3700 return;
3701 }
3702 field->set_notnull();
3703 ptr++;
3704 }
3705 MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
3706 &field->table->write_set);
3707 field->set_key_image(ptr, len);
3708 dbug_tmp_restore_column_map(&field->table->write_set, old_map);
3709 }
3710
3711 #ifdef WITH_PARTITION_STORAGE_ENGINE
3712
3713 /*
3714 PartitionPruningModule
3715
3716 This part of the code does partition pruning. Partition pruning solves the
3717 following problem: given a query over partitioned tables, find partitions
3718 that we will not need to access (i.e. partitions that we can assume to be
3719 empty) when executing the query.
3720 The set of partitions to prune doesn't depend on which query execution
3721 plan will be used to execute the query.
3722
3723 HOW IT WORKS
3724
3725 Partition pruning module makes use of RangeAnalysisModule. The following
3726 examples show how the problem of partition pruning can be reduced to the
3727 range analysis problem:
3728
3729 EXAMPLE 1
3730 Consider a query:
3731
3732 SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3733
3734 where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent
3735 way to find the used (i.e. not pruned away) partitions is as follows:
3736
3737 1. analyze the WHERE clause and extract the list of intervals over t1.a
3738 for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3739
3740 2. for each interval I
3741 {
3742 find partitions that have non-empty intersection with I;
3743 mark them as used;
3744 }
3745
3746 EXAMPLE 2
3747 Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3748 we need to:
3749
3750 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3751 The list of intervals we'll obtain will look like this:
3752 ((t1.a, t1.b) = (1,'foo')),
3753 ((t1.a, t1.b) = (2,'bar')),
3754 ((t1,a, t1.b) > (10,'zz'))
3755
3756 2. for each interval I
3757 {
3758 if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3759 {
3760 calculate HASH(part_func(t1.a, t1.b));
3761 find which partition has records with this hash value and mark
3762 it as used;
3763 }
3764 else
3765 {
3766 mark all partitions as used;
3767 break;
3768 }
3769 }
3770
3771 For both examples the step #1 is exactly what RangeAnalysisModule could
3772 be used to do, if it was provided with appropriate index description
3773 (array of KEY_PART structures).
3774 In example #1, we need to provide it with description of index(t1.a),
3775 in example #2, we need to provide it with description of index(t1.a, t1.b).
3776
3777 These index descriptions are further called "partitioning index
3778 descriptions". Note that it doesn't matter if such indexes really exist,
3779 as range analysis module only uses the description.
3780
3781 Putting it all together, partitioning module works as follows:
3782
3783 prune_partitions() {
3784 call create_partition_index_description();
3785
3786 call get_mm_tree(); // invoke the RangeAnalysisModule
3787
3788 // analyze the obtained interval list and get used partitions
3789 call find_used_partitions();
3790 }
3791
3792 */
3793
3794 struct st_part_prune_param;
3795 struct st_part_opt_info;
3796
3797 typedef void (*mark_full_part_func)(partition_info*, uint32);
3798
3799 /*
3800 Partition pruning operation context
3801 */
3802 typedef struct st_part_prune_param
3803 {
3804 RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3805
3806 /***************************************************************
3807 Following fields are filled in based solely on partitioning
3808 definition and not modified after that:
3809 **************************************************************/
3810 partition_info *part_info; /* Copy of table->part_info */
3811 /* Function to get partition id from partitioning fields only */
3812 get_part_id_func get_top_partition_id_func;
3813 /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3814 mark_full_part_func mark_full_partition_used;
3815
3816 /* Partitioning 'index' description, array of key parts */
3817 KEY_PART *key;
3818
3819 /*
3820 Number of fields in partitioning 'index' definition created for
3821 partitioning (0 if partitioning 'index' doesn't include partitioning
3822 fields)
3823 */
3824 uint part_fields;
3825 uint subpart_fields; /* Same as above for subpartitioning */
3826
3827 /*
3828 Number of the last partitioning field keypart in the index, or -1 if
3829 partitioning index definition doesn't include partitioning fields.
3830 */
3831 int last_part_partno;
3832 int last_subpart_partno; /* Same as above for supartitioning */
3833
3834 /*
3835 is_part_keypart[i] == MY_TEST(keypart #i in partitioning index is a member
3836 used in partitioning)
3837 Used to maintain current values of cur_part_fields and cur_subpart_fields
3838 */
3839 my_bool *is_part_keypart;
3840 /* Same as above for subpartitioning */
3841 my_bool *is_subpart_keypart;
3842
3843 my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3844
3845 /***************************************************************
3846 Following fields form find_used_partitions() recursion context:
3847 **************************************************************/
3848 SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */
3849 SEL_ARG **arg_stack_end; /* Top of the stack */
3850 /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3851 uint cur_part_fields;
3852 /* Same as cur_part_fields, but for subpartitioning */
3853 uint cur_subpart_fields;
3854
3855 /* Iterator to be used to obtain the "current" set of used partitions */
3856 PARTITION_ITERATOR part_iter;
3857
3858 /* Initialized bitmap of num_subparts size */
3859 MY_BITMAP subparts_bitmap;
3860
3861 uchar *cur_min_key;
3862 uchar *cur_max_key;
3863
3864 uint cur_min_flag, cur_max_flag;
3865 } PART_PRUNE_PARAM;
3866
3867 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3868 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3869 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3870 SEL_IMERGE *imerge);
3871 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3872 List<SEL_IMERGE> &merges);
3873 static void mark_all_partitions_as_used(partition_info *part_info);
3874
3875 #ifndef DBUG_OFF
3876 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3877 static void dbug_print_field(Field *field);
3878 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3879 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3880 #endif
3881
3882
3883 /**
3884 Perform partition pruning for a given table and condition.
3885
3886 @param thd Thread handle
3887 @param table Table to perform partition pruning for
3888 @param pprune_cond Condition to use for partition pruning
3889
3890 @note This function assumes that lock_partitions are setup when it
3891 is invoked. The function analyzes the condition, finds partitions that
3892 need to be used to retrieve the records that match the condition, and
3893 marks them as used by setting appropriate bit in part_info->read_partitions
3894 In the worst case all partitions are marked as used. If the table is not
3895 yet locked, it will also unset bits in part_info->lock_partitions that is
3896 not set in read_partitions.
3897
3898 This function returns promptly if called for non-partitioned table.
3899
3900 @return Operation status
3901 @retval true Failure
3902 @retval false Success
3903 */
3904
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3905 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3906 {
3907 bool retval= FALSE;
3908 partition_info *part_info = table->part_info;
3909 DBUG_ENTER("prune_partitions");
3910
3911 if (!part_info)
3912 DBUG_RETURN(FALSE); /* not a partitioned table */
3913
3914 if (!pprune_cond)
3915 {
3916 mark_all_partitions_as_used(part_info);
3917 DBUG_RETURN(FALSE);
3918 }
3919
3920 PART_PRUNE_PARAM prune_param;
3921 MEM_ROOT alloc;
3922 RANGE_OPT_PARAM *range_par= &prune_param.range_param;
3923 MY_BITMAP *old_sets[2];
3924
3925 prune_param.part_info= part_info;
3926 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
3927 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
3928 range_par->mem_root= &alloc;
3929 range_par->old_root= thd->mem_root;
3930
3931 if (create_partition_index_description(&prune_param))
3932 {
3933 mark_all_partitions_as_used(part_info);
3934 free_root(&alloc,MYF(0)); // Return memory & allocator
3935 DBUG_RETURN(FALSE);
3936 }
3937
3938 dbug_tmp_use_all_columns(table, old_sets,
3939 &table->read_set, &table->write_set);
3940 range_par->thd= thd;
3941 range_par->table= table;
3942 /* range_par->cond doesn't need initialization */
3943 range_par->prev_tables= range_par->read_tables= 0;
3944 range_par->current_table= table->map;
3945 /* It should be possible to switch the following ON: */
3946 range_par->remove_false_where_parts= false;
3947
3948 range_par->keys= 1; // one index
3949 range_par->using_real_indexes= FALSE;
3950 range_par->remove_jump_scans= FALSE;
3951 range_par->real_keynr[0]= 0;
3952 range_par->alloced_sel_args= 0;
3953
3954 thd->no_errors=1; // Don't warn about NULL
3955 thd->mem_root=&alloc;
3956
3957 bitmap_clear_all(&part_info->read_partitions);
3958
3959 prune_param.key= prune_param.range_param.key_parts;
3960 SEL_TREE *tree;
3961 int res;
3962
3963 tree= pprune_cond->get_mm_tree(range_par, &pprune_cond);
3964 if (!tree)
3965 goto all_used;
3966
3967 if (tree->type == SEL_TREE::IMPOSSIBLE)
3968 {
3969 retval= TRUE;
3970 goto end;
3971 }
3972
3973 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3974 goto all_used;
3975
3976 if (tree->merges.is_empty())
3977 {
3978 /* Range analysis has produced a single list of intervals. */
3979 prune_param.arg_stack_end= prune_param.arg_stack;
3980 prune_param.cur_part_fields= 0;
3981 prune_param.cur_subpart_fields= 0;
3982
3983 prune_param.cur_min_key= prune_param.range_param.min_key;
3984 prune_param.cur_max_key= prune_param.range_param.max_key;
3985 prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3986
3987 init_all_partitions_iterator(part_info, &prune_param.part_iter);
3988 if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3989 tree->keys[0]))))
3990 goto all_used;
3991 }
3992 else
3993 {
3994 if (tree->merges.elements == 1)
3995 {
3996 /*
3997 Range analysis has produced a "merge" of several intervals lists, a
3998 SEL_TREE that represents an expression in form
3999 sel_imerge = (tree1 OR tree2 OR ... OR treeN)
4000 that cannot be reduced to one tree. This can only happen when
4001 partitioning index has several keyparts and the condition is OR of
4002 conditions that refer to different key parts. For example, we'll get
4003 here for "partitioning_field=const1 OR subpartitioning_field=const2"
4004 */
4005 if (-1 == (res= find_used_partitions_imerge(&prune_param,
4006 tree->merges.head())))
4007 goto all_used;
4008 }
4009 else
4010 {
4011 /*
4012 Range analysis has produced a list of several imerges, i.e. a
4013 structure that represents a condition in form
4014 imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
4015 This is produced for complicated WHERE clauses that range analyzer
4016 can't really analyze properly.
4017 */
4018 if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
4019 tree->merges)))
4020 goto all_used;
4021 }
4022 }
4023
4024 /*
4025 res == 0 => no used partitions => retval=TRUE
4026 res == 1 => some used partitions => retval=FALSE
4027 res == -1 - we jump over this line to all_used:
4028 */
4029 retval= MY_TEST(!res);
4030 goto end;
4031
4032 all_used:
4033 retval= FALSE; // some partitions are used
4034 mark_all_partitions_as_used(prune_param.part_info);
4035 end:
4036 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
4037 thd->no_errors=0;
4038 thd->mem_root= range_par->old_root;
4039 free_root(&alloc,MYF(0)); // Return memory & allocator
4040 /*
4041 Must be a subset of the locked partitions.
4042 lock_partitions contains the partitions marked by explicit partition
4043 selection (... t PARTITION (pX) ...) and we must only use partitions
4044 within that set.
4045 */
4046 bitmap_intersect(&prune_param.part_info->read_partitions,
4047 &prune_param.part_info->lock_partitions);
4048 /*
4049 If not yet locked, also prune partitions to lock if not UPDATEing
4050 partition key fields. This will also prune lock_partitions if we are under
4051 LOCK TABLES, so prune away calls to start_stmt().
4052 TODO: enhance this prune locking to also allow pruning of
4053 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
4054 a lock for part_key partition.
4055 */
4056 if (table->file->get_lock_type() == F_UNLCK &&
4057 !partition_key_modified(table, table->write_set))
4058 {
4059 bitmap_copy(&prune_param.part_info->lock_partitions,
4060 &prune_param.part_info->read_partitions);
4061 }
4062 if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
4063 {
4064 table->all_partitions_pruned_away= true;
4065 retval= TRUE;
4066 }
4067 DBUG_RETURN(retval);
4068 }
4069
4070
4071 /*
4072 For SEL_ARG* array, store sel_arg->min values into table record buffer
4073
4074 SYNOPSIS
4075 store_selargs_to_rec()
4076 ppar Partition pruning context
4077 start Array of SEL_ARG* for which the minimum values should be stored
4078 num Number of elements in the array
4079
4080 DESCRIPTION
4081 For each SEL_ARG* interval in the specified array, store the left edge
4082 field value (sel_arg->min, key image format) into the table record.
4083 */
4084
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)4085 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
4086 int num)
4087 {
4088 KEY_PART *parts= ppar->range_param.key_parts;
4089 for (SEL_ARG **end= start + num; start != end; start++)
4090 {
4091 SEL_ARG *sel_arg= (*start);
4092 store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
4093 parts[sel_arg->part].length);
4094 }
4095 }
4096
4097
4098 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)4099 static void mark_full_partition_used_no_parts(partition_info* part_info,
4100 uint32 part_id)
4101 {
4102 DBUG_ENTER("mark_full_partition_used_no_parts");
4103 DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
4104 bitmap_set_bit(&part_info->read_partitions, part_id);
4105 DBUG_VOID_RETURN;
4106 }
4107
4108
4109 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)4110 static void mark_full_partition_used_with_parts(partition_info *part_info,
4111 uint32 part_id)
4112 {
4113 uint32 start= part_id * part_info->num_subparts;
4114 uint32 end= start + part_info->num_subparts;
4115 DBUG_ENTER("mark_full_partition_used_with_parts");
4116
4117 for (; start != end; start++)
4118 {
4119 DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
4120 bitmap_set_bit(&part_info->read_partitions, start);
4121 }
4122 DBUG_VOID_RETURN;
4123 }
4124
4125 /*
4126 Find the set of used partitions for List<SEL_IMERGE>
4127 SYNOPSIS
4128 find_used_partitions_imerge_list
4129 ppar Partition pruning context.
4130 key_tree Intervals tree to perform pruning for.
4131
4132 DESCRIPTION
4133 List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
4134 The set of used partitions is an intersection of used partitions sets
4135 for imerge_{i}.
4136 We accumulate this intersection in a separate bitmap.
4137
4138 RETURN
4139 See find_used_partitions()
4140 */
4141
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)4142 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
4143 List<SEL_IMERGE> &merges)
4144 {
4145 MY_BITMAP all_merges;
4146 uint bitmap_bytes;
4147 my_bitmap_map *bitmap_buf;
4148 uint n_bits= ppar->part_info->read_partitions.n_bits;
4149 bitmap_bytes= bitmap_buffer_size(n_bits);
4150 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
4151 bitmap_bytes)))
4152 {
4153 /*
4154 Fallback, process just the first SEL_IMERGE. This can leave us with more
4155 partitions marked as used then actually needed.
4156 */
4157 return find_used_partitions_imerge(ppar, merges.head());
4158 }
4159 my_bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
4160 bitmap_set_prefix(&all_merges, n_bits);
4161
4162 List_iterator<SEL_IMERGE> it(merges);
4163 SEL_IMERGE *imerge;
4164 while ((imerge=it++))
4165 {
4166 int res= find_used_partitions_imerge(ppar, imerge);
4167 if (!res)
4168 {
4169 /* no used partitions on one ANDed imerge => no used partitions at all */
4170 return 0;
4171 }
4172
4173 if (res != -1)
4174 bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
4175
4176
4177 if (bitmap_is_clear_all(&all_merges))
4178 return 0;
4179
4180 bitmap_clear_all(&ppar->part_info->read_partitions);
4181 }
4182 memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
4183 bitmap_bytes);
4184 return 1;
4185 }
4186
4187
4188 /*
4189 Find the set of used partitions for SEL_IMERGE structure
4190 SYNOPSIS
4191 find_used_partitions_imerge()
4192 ppar Partition pruning context.
4193 key_tree Intervals tree to perform pruning for.
4194
4195 DESCRIPTION
4196 SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
4197 trivial - just use mark used partitions for each tree and bail out early
4198 if for some tree_{i} all partitions are used.
4199
4200 RETURN
4201 See find_used_partitions().
4202 */
4203
4204 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)4205 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
4206 {
4207 int res= 0;
4208 for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
4209 {
4210 ppar->arg_stack_end= ppar->arg_stack;
4211 ppar->cur_part_fields= 0;
4212 ppar->cur_subpart_fields= 0;
4213
4214 ppar->cur_min_key= ppar->range_param.min_key;
4215 ppar->cur_max_key= ppar->range_param.max_key;
4216 ppar->cur_min_flag= ppar->cur_max_flag= 0;
4217
4218 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4219 SEL_ARG *key_tree= (*ptree)->keys[0];
4220 if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
4221 return -1;
4222 }
4223 return res;
4224 }
4225
4226
4227 /*
4228 Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
4229
4230 SYNOPSIS
4231 find_used_partitions()
4232 ppar Partition pruning context.
4233 key_tree SEL_ARG range tree to perform pruning for
4234
4235 DESCRIPTION
4236 This function
4237 * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
4238 * finds the partitions one needs to use to get rows in these intervals
4239 * marks these partitions as used.
4240 The next session desribes the process in greater detail.
4241
4242 IMPLEMENTATION
4243 TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
4244 We can find out which [sub]partitions to use if we obtain restrictions on
4245 [sub]partitioning fields in the following form:
4246 1. "partition_field1=const1 AND ... AND partition_fieldN=constN"
4247 1.1 Same as (1) but for subpartition fields
4248
4249 If partitioning supports interval analysis (i.e. partitioning is a
4250 function of a single table field, and partition_info::
4251 get_part_iter_for_interval != NULL), then we can also use condition in
4252 this form:
4253 2. "const1 <=? partition_field <=? const2"
4254 2.1 Same as (2) but for subpartition_field
4255
4256 INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
4257
4258 The below is an example of what SEL_ARG tree may represent:
4259
4260 (start)
4261 | $
4262 | Partitioning keyparts $ subpartitioning keyparts
4263 | $
4264 | ... ... $
4265 | | | $
4266 | +---------+ +---------+ $ +-----------+ +-----------+
4267 \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
4268 +---------+ +---------+ $ +-----------+ +-----------+
4269 | $ | |
4270 | $ | +-----------+
4271 | $ | | subpar2=c6|
4272 | $ | +-----------+
4273 | $ |
4274 | $ +-----------+ +-----------+
4275 | $ | subpar1=c4|--| subpar2=c8|
4276 | $ +-----------+ +-----------+
4277 | $
4278 | $
4279 +---------+ $ +------------+ +------------+
4280 | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
4281 +---------+ $ +------------+ +------------+
4282 | $
4283 ... $
4284
4285 The up-down connections are connections via SEL_ARG::left and
4286 SEL_ARG::right. A horizontal connection to the right is the
4287 SEL_ARG::next_key_part connection.
4288
4289 find_used_partitions() traverses the entire tree via recursion on
4290 * SEL_ARG::next_key_part (from left to right on the picture)
4291 * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
4292 performed for each depth level.
4293
4294 Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
4295 ppar->arg_stack) constraints on partitioning and subpartitioning fields.
4296 For the example in the above picture, one of stack states is:
4297 in find_used_partitions(key_tree = "subpar2=c5") (***)
4298 in find_used_partitions(key_tree = "subpar1=c3")
4299 in find_used_partitions(key_tree = "par2=c2") (**)
4300 in find_used_partitions(key_tree = "par1=c1")
4301 in prune_partitions(...)
4302 We apply partitioning limits as soon as possible, e.g. when we reach the
4303 depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
4304 and save them in ppar->part_iter.
4305 When we reach the depth (***), we find which subpartition(s) correspond to
4306 "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
4307 appropriate subpartitions as used.
4308
4309 It is possible that constraints on some partitioning fields are missing.
4310 For the above example, consider this stack state:
4311 in find_used_partitions(key_tree = "subpar2=c12") (***)
4312 in find_used_partitions(key_tree = "subpar1=c10")
4313 in find_used_partitions(key_tree = "par1=c2")
4314 in prune_partitions(...)
4315 Here we don't have constraints for all partitioning fields. Since we've
4316 never set the ppar->part_iter to contain used set of partitions, we use
4317 its default "all partitions" value. We get subpartition id for
4318 "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
4319 partition.
4320
4321 The inverse is also possible: we may get constraints on partitioning
4322 fields, but not constraints on subpartitioning fields. In that case,
4323 calls to find_used_partitions() with depth below (**) will return -1,
4324 and we will mark entire partition as used.
4325
4326 TODO
4327 Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
4328
4329 RETURN
4330 1 OK, one or more [sub]partitions are marked as used.
4331 0 The passed condition doesn't match any partitions
4332 -1 Couldn't infer any partition pruning "intervals" from the passed
4333 SEL_ARG* tree (which means that all partitions should be marked as
4334 used) Marking partitions as used is the responsibility of the caller.
4335 */
4336
4337 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)4338 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
4339 {
4340 int res, left_res=0, right_res=0;
4341 int key_tree_part= (int)key_tree->part;
4342 bool set_full_part_if_bad_ret= FALSE;
4343 bool ignore_part_fields= ppar->ignore_part_fields;
4344 bool did_set_ignore_part_fields= FALSE;
4345 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4346
4347 if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
4348 return -1;
4349
4350 if (key_tree->left != &null_element)
4351 {
4352 if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
4353 return -1;
4354 }
4355
4356 /* Push SEL_ARG's to stack to enable looking backwards as well */
4357 ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
4358 ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
4359 *(ppar->arg_stack_end++)= key_tree;
4360
4361 if (ignore_part_fields)
4362 {
4363 /*
4364 We come here when a condition on the first partitioning
4365 fields led to evaluating the partitioning condition
4366 (due to finding a condition of the type a < const or
4367 b > const). Thus we must ignore the rest of the
4368 partitioning fields but we still want to analyse the
4369 subpartitioning fields.
4370 */
4371 if (key_tree->next_key_part)
4372 res= find_used_partitions(ppar, key_tree->next_key_part);
4373 else
4374 res= -1;
4375 goto pop_and_go_right;
4376 }
4377
4378 if (key_tree->type == SEL_ARG::KEY_RANGE)
4379 {
4380 if (ppar->part_info->get_part_iter_for_interval &&
4381 key_tree->part <= ppar->last_part_partno)
4382 {
4383 /* Collect left and right bound, their lengths and flags */
4384 uchar *min_key= ppar->cur_min_key;
4385 uchar *max_key= ppar->cur_max_key;
4386 uchar *tmp_min_key= min_key;
4387 uchar *tmp_max_key= max_key;
4388 key_tree->store_min(ppar->key[key_tree->part].store_length,
4389 &tmp_min_key, ppar->cur_min_flag);
4390 key_tree->store_max(ppar->key[key_tree->part].store_length,
4391 &tmp_max_key, ppar->cur_max_flag);
4392 uint flag;
4393 if (key_tree->next_key_part &&
4394 key_tree->next_key_part->part == key_tree->part+1 &&
4395 key_tree->next_key_part->part <= ppar->last_part_partno &&
4396 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
4397 {
4398 /*
4399 There are more key parts for partition pruning to handle
4400 This mainly happens when the condition is an equality
4401 condition.
4402 */
4403 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
4404 (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
4405 !key_tree->min_flag && !key_tree->max_flag)
4406 {
4407 /* Set 'parameters' */
4408 ppar->cur_min_key= tmp_min_key;
4409 ppar->cur_max_key= tmp_max_key;
4410 uint save_min_flag= ppar->cur_min_flag;
4411 uint save_max_flag= ppar->cur_max_flag;
4412
4413 ppar->cur_min_flag|= key_tree->min_flag;
4414 ppar->cur_max_flag|= key_tree->max_flag;
4415
4416 res= find_used_partitions(ppar, key_tree->next_key_part);
4417
4418 /* Restore 'parameters' back */
4419 ppar->cur_min_key= min_key;
4420 ppar->cur_max_key= max_key;
4421
4422 ppar->cur_min_flag= save_min_flag;
4423 ppar->cur_max_flag= save_max_flag;
4424 goto pop_and_go_right;
4425 }
4426 /* We have arrived at the last field in the partition pruning */
4427 uint tmp_min_flag= key_tree->min_flag,
4428 tmp_max_flag= key_tree->max_flag;
4429 if (!tmp_min_flag)
4430 key_tree->next_key_part->store_min_key(ppar->key,
4431 &tmp_min_key,
4432 &tmp_min_flag,
4433 ppar->last_part_partno);
4434 if (!tmp_max_flag)
4435 key_tree->next_key_part->store_max_key(ppar->key,
4436 &tmp_max_key,
4437 &tmp_max_flag,
4438 ppar->last_part_partno);
4439 flag= tmp_min_flag | tmp_max_flag;
4440 }
4441 else
4442 flag= key_tree->min_flag | key_tree->max_flag;
4443
4444 if (tmp_min_key != range_par->min_key)
4445 flag&= ~NO_MIN_RANGE;
4446 else
4447 flag|= NO_MIN_RANGE;
4448 if (tmp_max_key != range_par->max_key)
4449 flag&= ~NO_MAX_RANGE;
4450 else
4451 flag|= NO_MAX_RANGE;
4452
4453 /*
4454 We need to call the interval mapper if we have a condition which
4455 makes sense to prune on. In the example of COLUMNS on a and
4456 b it makes sense if we have a condition on a, or conditions on
4457 both a and b. If we only have conditions on b it might make sense
4458 but this is a harder case we will solve later. For the harder case
4459 this clause then turns into use of all partitions and thus we
4460 simply set res= -1 as if the mapper had returned that.
4461 TODO: What to do here is defined in WL#4065.
4462 */
4463 if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION)
4464 {
4465 uint32 i;
4466 uint32 store_length_array[MAX_KEY];
4467 uint32 num_keys= ppar->part_fields;
4468
4469 for (i= 0; i < num_keys; i++)
4470 store_length_array[i]= ppar->key[i].store_length;
4471 res= ppar->part_info->
4472 get_part_iter_for_interval(ppar->part_info,
4473 FALSE,
4474 store_length_array,
4475 range_par->min_key,
4476 range_par->max_key,
4477 (uint)(tmp_min_key - range_par->min_key),
4478 (uint)(tmp_max_key - range_par->max_key),
4479 flag,
4480 &ppar->part_iter);
4481 if (!res)
4482 goto pop_and_go_right; /* res==0 --> no satisfying partitions */
4483 }
4484 else
4485 res= -1;
4486
4487 if (res == -1)
4488 {
4489 /* get a full range iterator */
4490 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4491 }
4492 /*
4493 Save our intent to mark full partition as used if we will not be able
4494 to obtain further limits on subpartitions
4495 */
4496 if (key_tree_part < ppar->last_part_partno)
4497 {
4498 /*
4499 We need to ignore the rest of the partitioning fields in all
4500 evaluations after this
4501 */
4502 did_set_ignore_part_fields= TRUE;
4503 ppar->ignore_part_fields= TRUE;
4504 }
4505 set_full_part_if_bad_ret= TRUE;
4506 goto process_next_key_part;
4507 }
4508
4509 if (key_tree_part == ppar->last_subpart_partno &&
4510 (NULL != ppar->part_info->get_subpart_iter_for_interval))
4511 {
4512 PARTITION_ITERATOR subpart_iter;
4513 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
4514 range_par->key_parts););
4515 res= ppar->part_info->
4516 get_subpart_iter_for_interval(ppar->part_info,
4517 TRUE,
4518 NULL, /* Currently not used here */
4519 key_tree->min_value,
4520 key_tree->max_value,
4521 0, 0, /* Those are ignored here */
4522 key_tree->min_flag |
4523 key_tree->max_flag,
4524 &subpart_iter);
4525 if (res == 0)
4526 {
4527 /*
4528 The only case where we can get "no satisfying subpartitions"
4529 returned from the above call is when an error has occurred.
4530 */
4531 DBUG_ASSERT(range_par->thd->is_error());
4532 return 0;
4533 }
4534
4535 if (res == -1)
4536 goto pop_and_go_right; /* all subpartitions satisfy */
4537
4538 uint32 subpart_id;
4539 bitmap_clear_all(&ppar->subparts_bitmap);
4540 while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
4541 NOT_A_PARTITION_ID)
4542 bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
4543
4544 /* Mark each partition as used in each subpartition. */
4545 uint32 part_id;
4546 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4547 NOT_A_PARTITION_ID)
4548 {
4549 for (uint i= 0; i < ppar->part_info->num_subparts; i++)
4550 if (bitmap_is_set(&ppar->subparts_bitmap, i))
4551 bitmap_set_bit(&ppar->part_info->read_partitions,
4552 part_id * ppar->part_info->num_subparts + i);
4553 }
4554 goto pop_and_go_right;
4555 }
4556
4557 if (key_tree->is_singlepoint())
4558 {
4559 if (key_tree_part == ppar->last_part_partno &&
4560 ppar->cur_part_fields == ppar->part_fields &&
4561 ppar->part_info->get_part_iter_for_interval == NULL)
4562 {
4563 /*
4564 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
4565 fields. Save all constN constants into table record buffer.
4566 */
4567 store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
4568 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
4569 ppar->part_fields););
4570 uint32 part_id;
4571 longlong func_value;
4572 /* Find in which partition the {const1, ...,constN} tuple goes */
4573 if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
4574 &func_value))
4575 {
4576 res= 0; /* No satisfying partitions */
4577 goto pop_and_go_right;
4578 }
4579 /* Rembember the limit we got - single partition #part_id */
4580 init_single_partition_iterator(part_id, &ppar->part_iter);
4581
4582 /*
4583 If there are no subpartitions/we fail to get any limit for them,
4584 then we'll mark full partition as used.
4585 */
4586 set_full_part_if_bad_ret= TRUE;
4587 goto process_next_key_part;
4588 }
4589
4590 if (key_tree_part == ppar->last_subpart_partno &&
4591 ppar->cur_subpart_fields == ppar->subpart_fields)
4592 {
4593 /*
4594 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
4595 fields. Save all constN constants into table record buffer.
4596 */
4597 store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
4598 ppar->subpart_fields);
4599 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
4600 ppar->subpart_fields,
4601 ppar->subpart_fields););
4602 /* Find the subpartition (it's HASH/KEY so we always have one) */
4603 partition_info *part_info= ppar->part_info;
4604 uint32 part_id, subpart_id;
4605
4606 if (part_info->get_subpartition_id(part_info, &subpart_id))
4607 return 0;
4608
4609 /* Mark this partition as used in each subpartition. */
4610 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4611 NOT_A_PARTITION_ID)
4612 {
4613 bitmap_set_bit(&part_info->read_partitions,
4614 part_id * part_info->num_subparts + subpart_id);
4615 }
4616 res= 1; /* Some partitions were marked as used */
4617 goto pop_and_go_right;
4618 }
4619 }
4620 else
4621 {
4622 /*
4623 Can't handle condition on current key part. If we're that deep that
4624 we're processing subpartititoning's key parts, this means we'll not be
4625 able to infer any suitable condition, so bail out.
4626 */
4627 if (key_tree_part >= ppar->last_part_partno)
4628 {
4629 res= -1;
4630 goto pop_and_go_right;
4631 }
4632 /*
4633 No meaning in continuing with rest of partitioning key parts.
4634 Will try to continue with subpartitioning key parts.
4635 */
4636 ppar->ignore_part_fields= true;
4637 did_set_ignore_part_fields= true;
4638 goto process_next_key_part;
4639 }
4640 }
4641
4642 process_next_key_part:
4643 if (key_tree->next_key_part)
4644 res= find_used_partitions(ppar, key_tree->next_key_part);
4645 else
4646 res= -1;
4647
4648 if (did_set_ignore_part_fields)
4649 {
4650 /*
4651 We have returned from processing all key trees linked to our next
4652 key part. We are ready to be moving down (using right pointers) and
4653 this tree is a new evaluation requiring its own decision on whether
4654 to ignore partitioning fields.
4655 */
4656 ppar->ignore_part_fields= FALSE;
4657 }
4658 if (set_full_part_if_bad_ret)
4659 {
4660 if (res == -1)
4661 {
4662 /* Got "full range" for subpartitioning fields */
4663 uint32 part_id;
4664 bool found= FALSE;
4665 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4666 NOT_A_PARTITION_ID)
4667 {
4668 ppar->mark_full_partition_used(ppar->part_info, part_id);
4669 found= TRUE;
4670 }
4671 res= MY_TEST(found);
4672 }
4673 /*
4674 Restore the "used partitions iterator" to the default setting that
4675 specifies iteration over all partitions.
4676 */
4677 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4678 }
4679
4680 pop_and_go_right:
4681 /* Pop this key part info off the "stack" */
4682 ppar->arg_stack_end--;
4683 ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part];
4684 ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4685
4686 if (res == -1)
4687 return -1;
4688 if (key_tree->right != &null_element)
4689 {
4690 if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4691 return -1;
4692 }
4693 return (left_res || right_res || res);
4694 }
4695
4696
mark_all_partitions_as_used(partition_info * part_info)4697 static void mark_all_partitions_as_used(partition_info *part_info)
4698 {
4699 bitmap_copy(&(part_info->read_partitions),
4700 &(part_info->lock_partitions));
4701 }
4702
4703
4704 /*
4705 Check if field types allow to construct partitioning index description
4706
4707 SYNOPSIS
4708 fields_ok_for_partition_index()
4709 pfield NULL-terminated array of pointers to fields.
4710
4711 DESCRIPTION
4712 For an array of fields, check if we can use all of the fields to create
4713 partitioning index description.
4714
4715 We can't process GEOMETRY fields - for these fields singlepoint intervals
4716 cant be generated, and non-singlepoint are "special" kinds of intervals
4717 to which our processing logic can't be applied.
4718
4719 It is not known if we could process ENUM fields, so they are disabled to be
4720 on the safe side.
4721
4722 RETURN
4723 TRUE Yes, fields can be used in partitioning index
4724 FALSE Otherwise
4725 */
4726
fields_ok_for_partition_index(Field ** pfield)4727 static bool fields_ok_for_partition_index(Field **pfield)
4728 {
4729 if (!pfield)
4730 return FALSE;
4731 for (; (*pfield); pfield++)
4732 {
4733 enum_field_types ftype= (*pfield)->real_type();
4734 if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4735 return FALSE;
4736 }
4737 return TRUE;
4738 }
4739
4740
4741 /*
4742 Create partition index description and fill related info in the context
4743 struct
4744
4745 SYNOPSIS
4746 create_partition_index_description()
4747 prune_par INOUT Partition pruning context
4748
4749 DESCRIPTION
4750 Create partition index description. Partition index description is:
4751
4752 part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4753
4754 If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4755 corresponding fields_list(...) is not included into index description
4756 and we don't perform partition pruning for partitions/subpartitions.
4757
4758 RETURN
4759 TRUE Out of memory or can't do partition pruning at all
4760 FALSE OK
4761 */
4762
create_partition_index_description(PART_PRUNE_PARAM * ppar)4763 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4764 {
4765 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4766 partition_info *part_info= ppar->part_info;
4767 uint used_part_fields, used_subpart_fields;
4768
4769 used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4770 part_info->num_part_fields : 0;
4771 used_subpart_fields=
4772 fields_ok_for_partition_index(part_info->subpart_field_array)?
4773 part_info->num_subpart_fields : 0;
4774
4775 uint total_parts= used_part_fields + used_subpart_fields;
4776
4777 ppar->ignore_part_fields= FALSE;
4778 ppar->part_fields= used_part_fields;
4779 ppar->last_part_partno= (int)used_part_fields - 1;
4780
4781 ppar->subpart_fields= used_subpart_fields;
4782 ppar->last_subpart_partno=
4783 used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4784
4785 if (part_info->is_sub_partitioned())
4786 {
4787 ppar->mark_full_partition_used= mark_full_partition_used_with_parts;
4788 ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4789 }
4790 else
4791 {
4792 ppar->mark_full_partition_used= mark_full_partition_used_no_parts;
4793 ppar->get_top_partition_id_func= part_info->get_partition_id;
4794 }
4795
4796 KEY_PART *key_part;
4797 MEM_ROOT *alloc= range_par->mem_root;
4798 if (!total_parts ||
4799 !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4800 total_parts)) ||
4801 !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4802 total_parts)) ||
4803 !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4804 total_parts)) ||
4805 !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4806 total_parts)))
4807 return TRUE;
4808
4809 if (ppar->subpart_fields)
4810 {
4811 my_bitmap_map *buf;
4812 uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4813 if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4814 return TRUE;
4815 my_bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4816 FALSE);
4817 }
4818 range_par->key_parts= key_part;
4819 Field **field= (ppar->part_fields)? part_info->part_field_array :
4820 part_info->subpart_field_array;
4821 bool in_subpart_fields= FALSE;
4822 uint total_key_len= 0;
4823 for (uint part= 0; part < total_parts; part++, key_part++)
4824 {
4825 key_part->key= 0;
4826 key_part->part= part;
4827 key_part->length= (uint16)(*field)->key_length();
4828 key_part->store_length= (uint16)get_partition_field_store_length(*field);
4829 total_key_len += key_part->store_length;
4830
4831 DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4832 key_part->length, key_part->store_length));
4833
4834 key_part->field= (*field);
4835 key_part->image_type = Field::itRAW;
4836 /*
4837 We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4838 in the RangeAnalysisModule.
4839 */
4840 key_part->flag= 0;
4841 /* We don't set key_parts->null_bit as it will not be used */
4842
4843 ppar->is_part_keypart[part]= !in_subpart_fields;
4844 ppar->is_subpart_keypart[part]= in_subpart_fields;
4845
4846 /*
4847 Check if this was last field in this array, in this case we
4848 switch to subpartitioning fields. (This will only happens if
4849 there are subpartitioning fields to cater for).
4850 */
4851 if (!*(++field))
4852 {
4853 field= part_info->subpart_field_array;
4854 in_subpart_fields= TRUE;
4855 }
4856 }
4857 range_par->key_parts_end= key_part;
4858
4859 total_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
4860 if (!(range_par->min_key= (uchar*)alloc_root(alloc,total_key_len)) ||
4861 !(range_par->max_key= (uchar*)alloc_root(alloc,total_key_len)))
4862 {
4863 return true;
4864 }
4865
4866 DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4867 range_par->key_parts_end););
4868 return FALSE;
4869 }
4870
4871
4872 #ifndef DBUG_OFF
4873
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4874 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4875 {
4876 DBUG_ENTER("print_partitioning_index");
4877 DBUG_LOCK_FILE;
4878 fprintf(DBUG_FILE, "partitioning INDEX(");
4879 for (KEY_PART *p=parts; p != parts_end; p++)
4880 {
4881 fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name.str);
4882 }
4883 fputs(");\n", DBUG_FILE);
4884 DBUG_UNLOCK_FILE;
4885 DBUG_VOID_RETURN;
4886 }
4887
4888 /* Print field value into debug trace, in NULL-aware way. */
dbug_print_field(Field * field)4889 static void dbug_print_field(Field *field)
4890 {
4891 if (field->is_real_null())
4892 fprintf(DBUG_FILE, "NULL");
4893 else
4894 {
4895 char buf[256];
4896 String str(buf, sizeof(buf), &my_charset_bin);
4897 str.length(0);
4898 String *pstr;
4899 pstr= field->val_str(&str);
4900 fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
4901 }
4902 }
4903
4904
4905 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4906 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4907 {
4908 DBUG_ENTER("dbug_print_segment_range");
4909 DBUG_LOCK_FILE;
4910 if (!(arg->min_flag & NO_MIN_RANGE))
4911 {
4912 store_key_image_to_rec(part->field, arg->min_value, part->length);
4913 dbug_print_field(part->field);
4914 if (arg->min_flag & NEAR_MIN)
4915 fputs(" < ", DBUG_FILE);
4916 else
4917 fputs(" <= ", DBUG_FILE);
4918 }
4919
4920 fprintf(DBUG_FILE, "%s", part->field->field_name.str);
4921
4922 if (!(arg->max_flag & NO_MAX_RANGE))
4923 {
4924 if (arg->max_flag & NEAR_MAX)
4925 fputs(" < ", DBUG_FILE);
4926 else
4927 fputs(" <= ", DBUG_FILE);
4928 store_key_image_to_rec(part->field, arg->max_value, part->length);
4929 dbug_print_field(part->field);
4930 }
4931 fputs("\n", DBUG_FILE);
4932 DBUG_UNLOCK_FILE;
4933 DBUG_VOID_RETURN;
4934 }
4935
4936
4937 /*
4938 Print a singlepoint multi-keypart range interval to debug trace
4939
4940 SYNOPSIS
4941 dbug_print_singlepoint_range()
4942 start Array of SEL_ARG* ptrs representing conditions on key parts
4943 num Number of elements in the array.
4944
4945 DESCRIPTION
4946 This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4947 interval to debug trace.
4948 */
4949
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4950 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4951 {
4952 DBUG_ENTER("dbug_print_singlepoint_range");
4953 DBUG_LOCK_FILE;
4954 SEL_ARG **end= start + num;
4955
4956 for (SEL_ARG **arg= start; arg != end; arg++)
4957 {
4958 Field *field= (*arg)->field;
4959 fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name.str);
4960 dbug_print_field(field);
4961 }
4962 fputs("\n", DBUG_FILE);
4963 DBUG_UNLOCK_FILE;
4964 DBUG_VOID_RETURN;
4965 }
4966 #endif
4967
4968 /****************************************************************************
4969 * Partition pruning code ends
4970 ****************************************************************************/
4971 #endif
4972
4973
4974 /*
4975 Get cost of 'sweep' full records retrieval.
4976 SYNOPSIS
4977 get_sweep_read_cost()
4978 param Parameter from test_quick_select
4979 records # of records to be retrieved
4980 RETURN
4981 cost of sweep
4982 */
4983
get_sweep_read_cost(const PARAM * param,ha_rows records)4984 double get_sweep_read_cost(const PARAM *param, ha_rows records)
4985 {
4986 double result;
4987 uint pk= param->table->s->primary_key;
4988 DBUG_ENTER("get_sweep_read_cost");
4989 if (param->table->file->pk_is_clustering_key(pk) ||
4990 param->table->file->stats.block_size == 0 /* HEAP */)
4991 {
4992 /*
4993 We are using the primary key to find the rows.
4994 Calculate the cost for this.
4995 */
4996 result= param->table->file->read_time(pk, (uint)records, records);
4997 }
4998 else
4999 {
5000 /*
5001 Rows will be retreived with rnd_pos(). Caluclate the expected
5002 cost for this.
5003 */
5004 double n_blocks=
5005 ceil(ulonglong2double(param->table->file->stats.data_file_length) /
5006 IO_SIZE);
5007 double busy_blocks=
5008 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
5009 if (busy_blocks < 1.0)
5010 busy_blocks= 1.0;
5011 DBUG_PRINT("info",("sweep: nblocks: %g, busy_blocks: %g", n_blocks,
5012 busy_blocks));
5013 /*
5014 Disabled: Bail out if # of blocks to read is bigger than # of blocks in
5015 table data file.
5016 if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks)
5017 return 1;
5018 */
5019 JOIN *join= param->thd->lex->first_select_lex()->join;
5020 if (!join || join->table_count == 1)
5021 {
5022 /* No join, assume reading is done in one 'sweep' */
5023 result= busy_blocks*(DISK_SEEK_BASE_COST +
5024 DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
5025 }
5026 else
5027 {
5028 /*
5029 Possibly this is a join with source table being non-last table, so
5030 assume that disk seeks are random here.
5031 */
5032 result= busy_blocks;
5033 }
5034 }
5035 DBUG_PRINT("return",("cost: %g", result));
5036 DBUG_RETURN(result);
5037 }
5038
5039
5040 /*
5041 Get best plan for a SEL_IMERGE disjunctive expression.
5042 SYNOPSIS
5043 get_best_disjunct_quick()
5044 param Parameter from check_quick_select function
5045 imerge Expression to use
5046 read_time Don't create scans with cost > read_time
5047
5048 NOTES
5049 index_merge cost is calculated as follows:
5050 index_merge_cost =
5051 cost(index_reads) + (see #1)
5052 cost(rowid_to_row_scan) + (see #2)
5053 cost(unique_use) (see #3)
5054
5055 1. cost(index_reads) =SUM_i(cost(index_read_i))
5056 For non-CPK scans,
5057 cost(index_read_i) = {cost of ordinary 'index only' scan}
5058 For CPK scan,
5059 cost(index_read_i) = {cost of non-'index only' scan}
5060
5061 2. cost(rowid_to_row_scan)
5062 If table PK is clustered then
5063 cost(rowid_to_row_scan) =
5064 {cost of ordinary clustered PK scan with n_ranges=n_rows}
5065
5066 Otherwise, we use the following model to calculate costs:
5067 We need to retrieve n_rows rows from file that occupies n_blocks blocks.
5068 We assume that offsets of rows we need are independent variates with
5069 uniform distribution in [0..max_file_offset] range.
5070
5071 We'll denote block as "busy" if it contains row(s) we need to retrieve
5072 and "empty" if doesn't contain rows we need.
5073
5074 Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
5075 applies to any block in file). Let x_i be a variate taking value 1 if
5076 block #i is empty and 0 otherwise.
5077
5078 Then E(x_i) = (1 - 1/n_blocks)^n_rows;
5079
5080 E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
5081 = n_blocks * ((1 - 1/n_blocks)^n_rows) =
5082 ~= n_blocks * exp(-n_rows/n_blocks).
5083
5084 E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
5085 ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
5086
5087 Average size of "hole" between neighbor non-empty blocks is
5088 E(hole_size) = n_blocks/E(n_busy_blocks).
5089
5090 The total cost of reading all needed blocks in one "sweep" is:
5091
5092 E(n_busy_blocks)*
5093 (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
5094
5095 3. Cost of Unique use is calculated in Unique::get_use_cost function.
5096
5097 ROR-union cost is calculated in the same way index_merge, but instead of
5098 Unique a priority queue is used.
5099
5100 RETURN
5101 Created read plan
5102 NULL - Out of memory or no read scan could be built.
5103 */
5104
5105 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,double read_time,bool named_trace)5106 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
5107 double read_time, bool named_trace)
5108 {
5109 SEL_TREE **ptree;
5110 TRP_INDEX_MERGE *imerge_trp= NULL;
5111 TRP_RANGE **range_scans;
5112 TRP_RANGE **cur_child;
5113 TRP_RANGE **cpk_scan= NULL;
5114 bool imerge_too_expensive= FALSE;
5115 double imerge_cost= 0.0;
5116 ha_rows cpk_scan_records= 0;
5117 ha_rows non_cpk_scan_records= 0;
5118 bool all_scans_ror_able= TRUE;
5119 bool all_scans_rors= TRUE;
5120 uint unique_calc_buff_size;
5121 TABLE_READ_PLAN **roru_read_plans;
5122 TABLE_READ_PLAN **cur_roru_plan;
5123 double roru_index_costs;
5124 ha_rows roru_total_records;
5125 double roru_intersect_part= 1.0;
5126 size_t n_child_scans;
5127 double limit_read_time= read_time;
5128 THD *thd= param->thd;
5129 DBUG_ENTER("get_best_disjunct_quick");
5130 DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
5131
5132 /*
5133 In every tree of imerge remove SEL_ARG trees that do not make ranges.
5134 If after this removal some SEL_ARG tree becomes empty discard imerge.
5135 */
5136 for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++)
5137 {
5138 if (remove_nonrange_trees(param, *ptree))
5139 {
5140 imerge->trees_next= imerge->trees;
5141 break;
5142 }
5143 }
5144
5145 n_child_scans= imerge->trees_next - imerge->trees;
5146
5147 if (!n_child_scans)
5148 DBUG_RETURN(NULL);
5149
5150 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
5151 sizeof(TRP_RANGE*)*
5152 n_child_scans)))
5153 DBUG_RETURN(NULL);
5154
5155 const char* trace_best_disjunct_obj_name= named_trace ? "best_disjunct_quick" : nullptr;
5156 Json_writer_object trace_best_disjunct(thd, trace_best_disjunct_obj_name);
5157 Json_writer_array to_merge(thd, "indexes_to_merge");
5158 /*
5159 Collect best 'range' scan for each of disjuncts, and, while doing so,
5160 analyze possibility of ROR scans. Also calculate some values needed by
5161 other parts of the code.
5162 */
5163 for (ptree= imerge->trees, cur_child= range_scans;
5164 ptree != imerge->trees_next;
5165 ptree++, cur_child++)
5166 {
5167 DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
5168 "tree in SEL_IMERGE"););
5169 Json_writer_object trace_idx(thd);
5170 if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE,
5171 read_time)))
5172 {
5173 /*
5174 One of index scans in this index_merge is more expensive than entire
5175 table read for another available option. The entire index_merge (and
5176 any possible ROR-union) will be more expensive then, too. We continue
5177 here only to update SQL_SELECT members.
5178 */
5179 imerge_too_expensive= TRUE;
5180 }
5181 if (imerge_too_expensive)
5182 {
5183 trace_idx.add("chosen", false).add("cause", "cost");
5184 continue;
5185 }
5186 const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
5187 imerge_cost += (*cur_child)->read_cost;
5188 all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
5189 all_scans_rors &= (*cur_child)->is_ror;
5190 if (param->table->file->is_clustering_key(param->real_keynr[(*cur_child)->key_idx]))
5191 {
5192 cpk_scan= cur_child;
5193 cpk_scan_records= (*cur_child)->records;
5194 }
5195 else
5196 non_cpk_scan_records += (*cur_child)->records;
5197 trace_idx.add("index_to_merge",
5198 param->table->key_info[keynr_in_table].name)
5199 .add("cumulated_cost", imerge_cost);
5200 }
5201
5202 to_merge.end();
5203
5204 DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
5205 trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
5206
5207 if (imerge_too_expensive || (imerge_cost > read_time) ||
5208 ((non_cpk_scan_records+cpk_scan_records >=
5209 param->table->stat_records()) &&
5210 read_time != DBL_MAX))
5211 {
5212 /*
5213 Bail out if it is obvious that both index_merge and ROR-union will be
5214 more expensive
5215 */
5216 DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
5217 "full table scan, bailing out"));
5218 trace_best_disjunct.add("chosen", false).add("cause", "cost");
5219 DBUG_RETURN(NULL);
5220 }
5221
5222 /*
5223 If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
5224 guaranteed to be cheaper than non-ROR union), unless ROR-unions are
5225 disabled in @@optimizer_switch
5226 */
5227 if (all_scans_rors &&
5228 optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
5229 {
5230 roru_read_plans= (TABLE_READ_PLAN**)range_scans;
5231 trace_best_disjunct.add("use_roworder_union", true)
5232 .add("cause",
5233 "always cheaper than non roworder retrieval");
5234 goto skip_to_ror_scan;
5235 }
5236
5237 if (cpk_scan)
5238 {
5239 /*
5240 Add one ROWID comparison for each row retrieved on non-CPK scan. (it
5241 is done in QUICK_RANGE_SELECT::row_in_ranges)
5242 */
5243 double rid_comp_cost= (rows2double(non_cpk_scan_records) /
5244 TIME_FOR_COMPARE_ROWID);
5245 imerge_cost+= rid_comp_cost;
5246 trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
5247 rid_comp_cost);
5248 }
5249
5250 /* Calculate cost(rowid_to_row_scan) */
5251 {
5252 double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records);
5253 imerge_cost+= sweep_cost;
5254 trace_best_disjunct.add("cost_sort_rowid_and_read_disk", sweep_cost);
5255 }
5256 DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
5257 imerge_cost));
5258 if (imerge_cost > read_time ||
5259 !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
5260 {
5261 trace_best_disjunct.add("use_roworder_index_merge", true);
5262 trace_best_disjunct.add("cause", "cost");
5263 goto build_ror_index_merge;
5264 }
5265
5266 /* Add Unique operations cost */
5267 unique_calc_buff_size=
5268 Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
5269 param->table->file->ref_length,
5270 (size_t)param->thd->variables.sortbuff_size);
5271 if (param->imerge_cost_buff_size < unique_calc_buff_size)
5272 {
5273 if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
5274 unique_calc_buff_size)))
5275 DBUG_RETURN(NULL);
5276 param->imerge_cost_buff_size= unique_calc_buff_size;
5277 }
5278
5279 {
5280 const double dup_removal_cost= Unique::get_use_cost(
5281 param->imerge_cost_buff, (uint)non_cpk_scan_records,
5282 param->table->file->ref_length,
5283 (size_t)param->thd->variables.sortbuff_size,
5284 TIME_FOR_COMPARE_ROWID,
5285 FALSE, NULL);
5286 imerge_cost+= dup_removal_cost;
5287 trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost)
5288 .add("total_cost", imerge_cost);
5289 }
5290
5291 DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
5292 imerge_cost, read_time));
5293 if (imerge_cost < read_time)
5294 {
5295 if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
5296 {
5297 imerge_trp->read_cost= imerge_cost;
5298 imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
5299 imerge_trp->records= MY_MIN(imerge_trp->records,
5300 param->table->stat_records());
5301 imerge_trp->range_scans= range_scans;
5302 imerge_trp->range_scans_end= range_scans + n_child_scans;
5303 read_time= imerge_cost;
5304 }
5305 if (imerge_trp)
5306 {
5307 TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp,
5308 limit_read_time);
5309 if (trp != imerge_trp)
5310 DBUG_RETURN(trp);
5311 }
5312 }
5313
5314 build_ror_index_merge:
5315 if (!all_scans_ror_able ||
5316 param->thd->lex->sql_command == SQLCOM_DELETE ||
5317 !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
5318 DBUG_RETURN(imerge_trp);
5319
5320 /* Ok, it is possible to build a ROR-union, try it. */
5321 bool dummy;
5322 if (!(roru_read_plans=
5323 (TABLE_READ_PLAN**)alloc_root(param->mem_root,
5324 sizeof(TABLE_READ_PLAN*)*
5325 n_child_scans)))
5326 DBUG_RETURN(imerge_trp);
5327
5328 skip_to_ror_scan:
5329 roru_index_costs= 0.0;
5330 roru_total_records= 0;
5331 cur_roru_plan= roru_read_plans;
5332
5333 Json_writer_array trace_analyze_ror(thd, "analyzing_roworder_scans");
5334
5335 /* Find 'best' ROR scan for each of trees in disjunction */
5336 for (ptree= imerge->trees, cur_child= range_scans;
5337 ptree != imerge->trees_next;
5338 ptree++, cur_child++, cur_roru_plan++)
5339 {
5340 Json_writer_object trp_info(thd);
5341 if (unlikely(thd->trace_started()))
5342 (*cur_child)->trace_basic_info(param, &trp_info);
5343 /*
5344 Assume the best ROR scan is the one that has cheapest full-row-retrieval
5345 scan cost.
5346 Also accumulate index_only scan costs as we'll need them to calculate
5347 overall index_intersection cost.
5348 */
5349 double cost;
5350 if ((*cur_child)->is_ror)
5351 {
5352 /* Ok, we have index_only cost, now get full rows scan cost */
5353 cost= param->table->file->
5354 read_time(param->real_keynr[(*cur_child)->key_idx], 1,
5355 (*cur_child)->records) +
5356 rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
5357 }
5358 else
5359 cost= read_time;
5360
5361 TABLE_READ_PLAN *prev_plan= *cur_child;
5362 if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost,
5363 &dummy)))
5364 {
5365 if (prev_plan->is_ror)
5366 *cur_roru_plan= prev_plan;
5367 else
5368 DBUG_RETURN(imerge_trp);
5369 roru_index_costs += (*cur_roru_plan)->read_cost;
5370 }
5371 else
5372 roru_index_costs +=
5373 ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
5374 roru_total_records += (*cur_roru_plan)->records;
5375 roru_intersect_part *= (*cur_roru_plan)->records /
5376 param->table->stat_records();
5377 }
5378 trace_analyze_ror.end();
5379 /*
5380 rows to retrieve=
5381 SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
5382 This is valid because index_merge construction guarantees that conditions
5383 in disjunction do not share key parts.
5384 */
5385 roru_total_records -= (ha_rows)(roru_intersect_part*
5386 param->table->stat_records());
5387 /* ok, got a ROR read plan for each of the disjuncts
5388 Calculate cost:
5389 cost(index_union_scan(scan_1, ... scan_n)) =
5390 SUM_i(cost_of_index_only_scan(scan_i)) +
5391 queue_use_cost(rowid_len, n) +
5392 cost_of_row_retrieval
5393 See get_merge_buffers_cost function for queue_use_cost formula derivation.
5394 */
5395
5396 double roru_total_cost;
5397 roru_total_cost= roru_index_costs +
5398 rows2double(roru_total_records)*log((double)n_child_scans) /
5399 (TIME_FOR_COMPARE_ROWID * M_LN2) +
5400 get_sweep_read_cost(param, roru_total_records);
5401
5402 DBUG_PRINT("info", ("ROR-union: cost %g, %zu members",
5403 roru_total_cost, n_child_scans));
5404 trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost)
5405 .add("members", n_child_scans);
5406 TRP_ROR_UNION* roru;
5407 if (roru_total_cost < read_time)
5408 {
5409 if ((roru= new (param->mem_root) TRP_ROR_UNION))
5410 {
5411 trace_best_disjunct.add("chosen", true);
5412 roru->first_ror= roru_read_plans;
5413 roru->last_ror= roru_read_plans + n_child_scans;
5414 roru->read_cost= roru_total_cost;
5415 roru->records= roru_total_records;
5416 DBUG_RETURN(roru);
5417 }
5418 }
5419 else
5420 trace_best_disjunct.add("chosen", false);
5421 DBUG_RETURN(imerge_trp);
5422 }
5423
5424
5425 /*
5426 Merge index scans for the same indexes in an index merge plan
5427
5428 SYNOPSIS
5429 merge_same_index_scans()
5430 param Context info for the operation
5431 imerge IN/OUT SEL_IMERGE from which imerge_trp has been extracted
5432 imerge_trp The index merge plan where index scans for the same
5433 indexes are to be merges
5434 read_time The upper bound for the cost of the plan to be evaluated
5435
5436 DESRIPTION
5437 For the given index merge plan imerge_trp extracted from the SEL_MERGE
5438 imerge the function looks for range scans with the same indexes and merges
5439 them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function
5440 creates a range tree rt_i that contains only r_i. All rt_i are joined
5441 into one index merge that replaces the original index merge imerge.
5442 The function calls get_best_disjunct_quick for the new index merge to
5443 get a new index merge plan that contains index scans only for different
5444 indexes.
5445 If there are no index scans for the same index in the original index
5446 merge plan the function does not change the original imerge and returns
5447 imerge_trp as its result.
5448
5449 RETURN
5450 The original or or improved index merge plan
5451 */
5452
5453 static
merge_same_index_scans(PARAM * param,SEL_IMERGE * imerge,TRP_INDEX_MERGE * imerge_trp,double read_time)5454 TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
5455 TRP_INDEX_MERGE *imerge_trp,
5456 double read_time)
5457 {
5458 uint16 first_scan_tree_idx[MAX_KEY];
5459 SEL_TREE **tree;
5460 TRP_RANGE **cur_child;
5461 uint removed_cnt= 0;
5462
5463 DBUG_ENTER("merge_same_index_scans");
5464
5465 bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys);
5466
5467 for (tree= imerge->trees, cur_child= imerge_trp->range_scans;
5468 tree != imerge->trees_next;
5469 tree++, cur_child++)
5470 {
5471 DBUG_ASSERT(tree);
5472 uint key_idx= (*cur_child)->key_idx;
5473 uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx];
5474 if (!*tree_idx_ptr)
5475 *tree_idx_ptr= (uint16) (tree-imerge->trees+1);
5476 else
5477 {
5478 SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1);
5479 SEL_ARG *key= (*changed_tree)->keys[key_idx];
5480 for (uint i= 0; i < param->keys; i++)
5481 (*changed_tree)->keys[i]= NULL;
5482 (*changed_tree)->keys_map.clear_all();
5483 if (key)
5484 key->incr_refs();
5485 if ((*tree)->keys[key_idx])
5486 (*tree)->keys[key_idx]->incr_refs();
5487 if (((*changed_tree)->keys[key_idx]=
5488 key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx])))
5489 (*changed_tree)->keys_map.set_bit(key_idx);
5490 *tree= NULL;
5491 removed_cnt++;
5492 }
5493 }
5494 if (!removed_cnt)
5495 DBUG_RETURN(imerge_trp);
5496
5497 TABLE_READ_PLAN *trp= NULL;
5498 SEL_TREE **new_trees_next= imerge->trees;
5499 for (tree= new_trees_next; tree != imerge->trees_next; tree++)
5500 {
5501 if (!*tree)
5502 continue;
5503 if (tree > new_trees_next)
5504 *new_trees_next= *tree;
5505 new_trees_next++;
5506 }
5507 imerge->trees_next= new_trees_next;
5508
5509 DBUG_ASSERT(imerge->trees_next>imerge->trees);
5510
5511 if (imerge->trees_next-imerge->trees > 1)
5512 trp= get_best_disjunct_quick(param, imerge, read_time, true);
5513 else
5514 {
5515 /*
5516 This alternative theoretically can be reached when the cost
5517 of the index merge for such a formula as
5518 (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR
5519 (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3
5520 is estimated as being cheaper than the cost of index scan for
5521 the formula
5522 (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4)
5523
5524 In the current code this may happen for two reasons:
5525 1. for a single index range scan data records are accessed in
5526 a random order
5527 2. the functions that estimate the cost of a range scan and an
5528 index merge retrievals are not well calibrated
5529
5530 As the best range access has been already chosen it does not
5531 make sense to evaluate the one obtained from a degenerated
5532 index merge.
5533 */
5534 trp= 0;
5535 }
5536
5537 DBUG_RETURN(trp);
5538 }
5539
5540
5541 /*
5542 This structure contains the info common for all steps of a partial
5543 index intersection plan. Morever it contains also the info common
5544 for index intersect plans. This info is filled in by the function
5545 prepare_search_best just before searching for the best index
5546 intersection plan.
5547 */
5548
5549 typedef struct st_common_index_intersect_info
5550 {
5551 PARAM *param; /* context info for range optimizations */
5552 uint key_size; /* size of a ROWID element stored in Unique object */
5553 double compare_factor; /* 1/compare - cost to compare two ROWIDs */
5554 size_t max_memory_size; /* maximum space allowed for Unique objects */
5555 ha_rows table_cardinality; /* estimate of the number of records in table */
5556 double cutoff_cost; /* discard index intersects with greater costs */
5557 INDEX_SCAN_INFO *cpk_scan; /* clustered primary key used in intersection */
5558
5559 bool in_memory; /* unique object for intersection is completely in memory */
5560
5561 INDEX_SCAN_INFO **search_scans; /* scans possibly included in intersect */
5562 uint n_search_scans; /* number of elements in search_scans */
5563
5564 bool best_uses_cpk; /* current best intersect uses clustered primary key */
5565 double best_cost; /* cost of the current best index intersection */
5566 /* estimate of the number of records in the current best intersection */
5567 ha_rows best_records;
5568 uint best_length; /* number of indexes in the current best intersection */
5569 INDEX_SCAN_INFO **best_intersect; /* the current best index intersection */
5570 /* scans from the best intersect to be filtrered by cpk conditions */
5571 key_map filtered_scans;
5572
5573 uint *buff_elems; /* buffer to calculate cost of index intersection */
5574
5575 } COMMON_INDEX_INTERSECT_INFO;
5576
5577
5578 /*
5579 This structure contains the info specific for one step of an index
5580 intersection plan. The structure is filled in by the function
5581 check_index_intersect_extension.
5582 */
5583
5584 typedef struct st_partial_index_intersect_info
5585 {
5586 COMMON_INDEX_INTERSECT_INFO *common_info; /* shared by index intersects */
5587 uint length; /* number of index scans in the partial intersection */
5588 ha_rows records; /* estimate of the number of records in intersection */
5589 double cost; /* cost of the partial index intersection */
5590
5591 /* estimate of total number of records of all scans of the partial index
5592 intersect sent to the Unique object used for the intersection */
5593 ha_rows records_sent_to_unique;
5594
5595 /* total cost of the scans of indexes from the partial index intersection */
5596 double index_read_cost;
5597
5598 bool use_cpk_filter; /* cpk filter is to be used for this scan */
5599 bool in_memory; /* uses unique object in memory */
5600 double in_memory_cost; /* cost of using unique object in memory */
5601
5602 key_map filtered_scans; /* scans to be filtered by cpk conditions */
5603
5604 MY_BITMAP *intersect_fields; /* bitmap of fields used in intersection */
5605
initst_partial_index_intersect_info5606 void init()
5607 {
5608 common_info= NULL;
5609 intersect_fields= NULL;
5610 records_sent_to_unique= records= length= in_memory= use_cpk_filter= 0;
5611 cost= index_read_cost= in_memory_cost= 0.0;
5612 filtered_scans.clear_all();
5613 }
5614 } PARTIAL_INDEX_INTERSECT_INFO;
5615
5616
5617 /* Check whether two indexes have the same first n components */
5618
5619 static
same_index_prefix(KEY * key1,KEY * key2,uint used_parts)5620 bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts)
5621 {
5622 KEY_PART_INFO *part1= key1->key_part;
5623 KEY_PART_INFO *part2= key2->key_part;
5624 for(uint i= 0; i < used_parts; i++, part1++, part2++)
5625 {
5626 if (part1->fieldnr != part2->fieldnr)
5627 return FALSE;
5628 }
5629 return TRUE;
5630 }
5631
5632
5633 /* Create a bitmap for all fields of a table */
5634
5635 static
create_fields_bitmap(PARAM * param,MY_BITMAP * fields_bitmap)5636 bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap)
5637 {
5638 my_bitmap_map *bitmap_buf;
5639
5640 if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root,
5641 param->fields_bitmap_size)))
5642 return TRUE;
5643 if (my_bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields, FALSE))
5644 return TRUE;
5645
5646 return FALSE;
5647 }
5648
5649 /* Compare two indexes scans for sort before search for the best intersection */
5650
5651 static
cmp_intersect_index_scan(INDEX_SCAN_INFO ** a,INDEX_SCAN_INFO ** b)5652 int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b)
5653 {
5654 return (*a)->records < (*b)->records ?
5655 -1 : (*a)->records == (*b)->records ? 0 : 1;
5656 }
5657
5658
5659 static inline
set_field_bitmap_for_index_prefix(MY_BITMAP * field_bitmap,KEY_PART_INFO * key_part,uint used_key_parts)5660 void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap,
5661 KEY_PART_INFO *key_part,
5662 uint used_key_parts)
5663 {
5664 bitmap_clear_all(field_bitmap);
5665 for (KEY_PART_INFO *key_part_end= key_part+used_key_parts;
5666 key_part < key_part_end; key_part++)
5667 {
5668 bitmap_set_bit(field_bitmap, key_part->fieldnr-1);
5669 }
5670 }
5671
5672
5673 /*
5674 Round up table cardinality read from statistics provided by engine.
5675 This function should go away when mysql test will allow to handle
5676 more or less easily in the test suites deviations of InnoDB
5677 statistical data.
5678 */
5679
5680 static inline
get_table_cardinality_for_index_intersect(TABLE * table)5681 ha_rows get_table_cardinality_for_index_intersect(TABLE *table)
5682 {
5683 if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)
5684 return table->stat_records();
5685 else
5686 {
5687 ha_rows d;
5688 double q;
5689 for (q= (double)table->stat_records(), d= 1 ; q >= 10; q/= 10, d*= 10 ) ;
5690 return (ha_rows) (floor(q+0.5) * d);
5691 }
5692 }
5693
5694 static
print_keyparts(THD * thd,KEY * key,uint key_parts)5695 void print_keyparts(THD *thd, KEY *key, uint key_parts)
5696 {
5697 DBUG_ASSERT(thd->trace_started());
5698
5699 KEY_PART_INFO *part= key->key_part;
5700 Json_writer_array keyparts(thd, "keyparts");
5701 for(uint i= 0; i < key_parts; i++, part++)
5702 keyparts.add(part->field->field_name);
5703 }
5704
5705
5706 static
5707 ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
5708 INDEX_SCAN_INFO *ext_index_scan);
5709
5710 /*
5711 Prepare to search for the best index intersection
5712
5713 SYNOPSIS
5714 prepare_search_best_index_intersect()
5715 param common info about index ranges
5716 tree tree of ranges for indexes than can be intersected
5717 common OUT info needed for search to be filled by the function
5718 init OUT info for an initial pseudo step of the intersection plans
5719 cutoff_cost cut off cost of the interesting index intersection
5720
5721 DESCRIPTION
5722 The function initializes all fields of the structure 'common' to be used
5723 when searching for the best intersection plan. It also allocates
5724 memory to store the most cheap index intersection.
5725
5726 NOTES
5727 When selecting candidates for index intersection we always take only
5728 one representative out of any set of indexes that share the same range
5729 conditions. These indexes always have the same prefixes and the
5730 components of this prefixes are exactly those used in these range
5731 conditions.
5732 Range conditions over clustered primary key (cpk) is always used only
5733 as the condition that filters out some rowids retrieved by the scans
5734 for secondary indexes. The cpk index will be handled in special way by
5735 the function that search for the best index intersection.
5736
5737 RETURN
5738 FALSE in the case of success
5739 TRUE otherwise
5740 */
5741
5742 static
prepare_search_best_index_intersect(PARAM * param,SEL_TREE * tree,COMMON_INDEX_INTERSECT_INFO * common,PARTIAL_INDEX_INTERSECT_INFO * init,double cutoff_cost)5743 bool prepare_search_best_index_intersect(PARAM *param,
5744 SEL_TREE *tree,
5745 COMMON_INDEX_INTERSECT_INFO *common,
5746 PARTIAL_INDEX_INTERSECT_INFO *init,
5747 double cutoff_cost)
5748 {
5749 uint i;
5750 uint n_search_scans;
5751 double cost;
5752 INDEX_SCAN_INFO **index_scan;
5753 INDEX_SCAN_INFO **scan_ptr;
5754 INDEX_SCAN_INFO *cpk_scan= NULL;
5755 TABLE *table= param->table;
5756 uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans);
5757 THD *thd= param->thd;
5758
5759 if (n_index_scans <= 1)
5760 return 1;
5761
5762 init->init();
5763 init->common_info= common;
5764 init->cost= cutoff_cost;
5765
5766 common->param= param;
5767 common->key_size= table->file->ref_length;
5768 common->compare_factor= TIME_FOR_COMPARE_ROWID;
5769 common->max_memory_size= (size_t)param->thd->variables.sortbuff_size;
5770 common->cutoff_cost= cutoff_cost;
5771 common->cpk_scan= NULL;
5772 common->table_cardinality=
5773 get_table_cardinality_for_index_intersect(table);
5774
5775 if (table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
5776 {
5777 INDEX_SCAN_INFO **index_scan_end;
5778 index_scan= tree->index_scans;
5779 index_scan_end= index_scan+n_index_scans;
5780 for ( ; index_scan < index_scan_end; index_scan++)
5781 {
5782 if (table->file->is_clustering_key((*index_scan)->keynr))
5783 {
5784 common->cpk_scan= cpk_scan= *index_scan;
5785 break;
5786 }
5787 }
5788 }
5789
5790 i= n_index_scans - MY_TEST(cpk_scan != NULL) + 1;
5791
5792 if (!(common->search_scans =
5793 (INDEX_SCAN_INFO **) alloc_root (param->mem_root,
5794 sizeof(INDEX_SCAN_INFO *) * i)))
5795 return TRUE;
5796 bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i);
5797
5798 INDEX_SCAN_INFO **selected_index_scans= common->search_scans;
5799 Json_writer_array potential_idx_scans(thd, "potential_index_scans");
5800 for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++)
5801 {
5802 Json_writer_object idx_scan(thd);
5803 uint used_key_parts= (*index_scan)->used_key_parts;
5804 KEY *key_info= (*index_scan)->key_info;
5805 idx_scan.add("index", key_info->name);
5806
5807 if (*index_scan == cpk_scan)
5808 {
5809 idx_scan.add("chosen", "false")
5810 .add("cause", "clustered index used for filtering");
5811 continue;
5812 }
5813 if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts &&
5814 same_index_prefix(cpk_scan->key_info, key_info, used_key_parts))
5815 {
5816 idx_scan.add("chosen", "false")
5817 .add("cause", "clustered index used for filtering");
5818 continue;
5819 }
5820
5821 cost= table->opt_range[(*index_scan)->keynr].index_only_cost;
5822
5823 idx_scan.add("cost", cost);
5824
5825 if (cost >= cutoff_cost)
5826 {
5827 idx_scan.add("chosen", false);
5828 idx_scan.add("cause", "cost");
5829 continue;
5830 }
5831
5832 for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++)
5833 {
5834 /*
5835 When we have range conditions for two different indexes with the same
5836 beginning it does not make sense to consider both of them for index
5837 intersection if the range conditions are covered by common initial
5838 components of the indexes. Actually in this case the indexes are
5839 guaranteed to have the same range conditions.
5840 */
5841 if ((*scan_ptr)->used_key_parts == used_key_parts &&
5842 same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts))
5843 break;
5844 }
5845 if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost)
5846 {
5847 idx_scan.add("chosen", true);
5848 if (!*scan_ptr)
5849 idx_scan.add("cause", "first occurrence of index prefix");
5850 else
5851 idx_scan.add("cause", "better cost for same idx prefix");
5852 *scan_ptr= *index_scan;
5853 (*scan_ptr)->index_read_cost= cost;
5854 }
5855 else
5856 {
5857 idx_scan.add("chosen", false).add("cause", "cost");
5858 }
5859 }
5860 potential_idx_scans.end();
5861
5862 ha_rows records_in_scans= 0;
5863
5864 for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++)
5865 {
5866 if (create_fields_bitmap(param, &(*scan_ptr)->used_fields))
5867 return TRUE;
5868 records_in_scans+= (*scan_ptr)->records;
5869 }
5870
5871 n_search_scans= i;
5872
5873 if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields))
5874 return TRUE;
5875
5876 if (!(common->n_search_scans= n_search_scans))
5877 return TRUE;
5878
5879 common->best_uses_cpk= FALSE;
5880 common->best_cost= cutoff_cost + COST_EPS;
5881 common->best_length= 0;
5882
5883 if (!(common->best_intersect=
5884 (INDEX_SCAN_INFO **) alloc_root (param->mem_root,
5885 sizeof(INDEX_SCAN_INFO *) *
5886 (i + MY_TEST(cpk_scan != NULL)))))
5887 return TRUE;
5888
5889 size_t calc_cost_buff_size=
5890 Unique::get_cost_calc_buff_size((size_t)records_in_scans,
5891 common->key_size,
5892 common->max_memory_size);
5893 if (!(common->buff_elems= (uint *) alloc_root(param->mem_root,
5894 calc_cost_buff_size)))
5895 return TRUE;
5896
5897 my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *),
5898 (qsort_cmp) cmp_intersect_index_scan);
5899
5900 Json_writer_array selected_idx_scans(thd, "selected_index_scans");
5901 if (cpk_scan)
5902 {
5903 PARTIAL_INDEX_INTERSECT_INFO curr;
5904 set_field_bitmap_for_index_prefix(&cpk_scan->used_fields,
5905 cpk_scan->key_info->key_part,
5906 cpk_scan->used_key_parts);
5907 curr.common_info= common;
5908 curr.intersect_fields= &cpk_scan->used_fields;
5909 curr.records= cpk_scan->records;
5910 curr.length= 1;
5911 for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
5912 {
5913 KEY *key_info= (*scan_ptr)->key_info;
5914 ha_rows scan_records= (*scan_ptr)->records;
5915 ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr);
5916 (*scan_ptr)->filtered_out= records >= scan_records ?
5917 0 : scan_records-records;
5918 if (thd->trace_started())
5919 {
5920 Json_writer_object selected_idx(thd);
5921 selected_idx.add("index", key_info->name);
5922 print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
5923 selected_idx.add("records", (*scan_ptr)->records)
5924 .add("filtered_records", (*scan_ptr)->filtered_out);
5925 }
5926 }
5927 }
5928 else
5929 {
5930 for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
5931 {
5932 KEY *key_info= (*scan_ptr)->key_info;
5933 (*scan_ptr)->filtered_out= 0;
5934 if (thd->trace_started())
5935 {
5936 Json_writer_object selected_idx(thd);
5937 selected_idx.add("index", key_info->name);
5938 print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
5939 selected_idx.add("records", (*scan_ptr)->records)
5940 .add("filtered_records", (*scan_ptr)->filtered_out);
5941 }
5942 }
5943 }
5944
5945 return FALSE;
5946 }
5947
5948
5949 /*
5950 On Estimation of the Number of Records in an Index Intersection
5951 ===============================================================
5952
5953 Consider query Q over table t. Let C be the WHERE condition of this query,
5954 and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined
5955 on table t.
5956 Let rt1 and rt2 be the range trees extracted by the range optimizer from C
5957 for idx1 and idx2 respectively.
5958 Let #t be the estimate of the number of records in table t provided for the
5959 optimizer.
5960 Let #r1 and #r2 be the estimates of the number of records in the range trees
5961 rt1 and rt2, respectively, obtained by the range optimizer.
5962
5963 We need to get an estimate for the number of records in the index
5964 intersection of rt1 and rt2. In other words, we need to estimate the
5965 cardinality of the set of records that are in both trees. Let's designate
5966 this number by #r.
5967
5968 If we do not make any assumptions then we can only state that
5969 #r<=MY_MIN(#r1,#r2).
5970 With this estimate we can't say that the index intersection scan will be
5971 cheaper than the cheapest index scan.
5972
5973 Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively.
5974 The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t.
5975 The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t.
5976
5977 If we assume that the values in columns of idx1 and idx2 are independent
5978 then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t).
5979 So in this case we have: #r=#r1*#r2/#t.
5980
5981 The above assumption of independence of the columns in idx1 and idx2 means
5982 that:
5983 - all columns are different
5984 - values from one column do not correlate with values from any other column.
5985
5986 We can't help with the case when column correlate with each other.
5987 Yet, if they are assumed to be uncorrelated the value of #r theoretically can
5988 be evaluated . Unfortunately this evaluation, in general, is rather complex.
5989
5990 Let's consider two indexes idx1:(dept, manager), idx2:(dept, building)
5991 over table 'employee' and two range conditions over these indexes:
5992 Rt1: dept=10 AND manager LIKE 'S%'
5993 Rt2: dept=10 AND building LIKE 'L%'.
5994 We can state that:
5995 sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%')
5996 =sel(Rt1)*sel(Rt2)/sel(dept=10).
5997 sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of
5998 the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality
5999 #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
6000 The current code does not make an estimate either for #rt1_0, or for #rt2_0,
6001 but it can be adjusted to provide those numbers.
6002 Alternatively, MY_MIN(rec_per_key) for (dept) could be used to get an upper
6003 bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
6004 now.
6005
6006 Let's consider two other indexes idx1:(dept, last_name),
6007 idx2:(first_name, last_name) and two range conditions over these indexes:
6008 Rt1: dept=5 AND last_name='Sm%'
6009 Rt2: first_name='Robert' AND last_name='Sm%'.
6010
6011 sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
6012 =sel(Rt2)*sel(dept=5)
6013 Here MY_MAX(rec_per_key) for (dept) could be used to get an upper bound for
6014 the value of sel(Rt1&Rt2).
6015
6016 When the intersected indexes have different major columns, but some
6017 minor column are common the picture may be more complicated.
6018
6019 Let's consider the following range conditions for the same indexes as in
6020 the previous example:
6021 Rt1: (Rt11: dept=5 AND last_name='So%')
6022 OR
6023 (Rt12: dept=7 AND last_name='Saw%')
6024 Rt2: (Rt21: first_name='Robert' AND last_name='Saw%')
6025 OR
6026 (Rt22: first_name='Bob' AND last_name='So%')
6027 Here we have:
6028 sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) +
6029 sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22)
6030 Now consider the range condition:
6031 Rt1_0: (dept=5 OR dept=7)
6032 For this condition we can state that:
6033 sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))=
6034 sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+
6035 sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)=
6036 sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+
6037 sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) >
6038 sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+
6039 sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) >
6040 sel(Rt1 & Rt2)
6041
6042 We've just demonstrated for an example what is intuitively almost obvious
6043 in general. We can remove the ending parts fromrange trees getting less
6044 selective range conditions for sub-indexes.
6045 So if not a most major component with the number k of an index idx is
6046 encountered in the index with which we intersect we can use the sub-index
6047 idx_k-1 that includes the components of idx up to the i-th component and
6048 the range tree for idx_k-1 to make an upper bound estimate for the number
6049 of records in the index intersection.
6050 The range tree for idx_k-1 we use here is the subtree of the original range
6051 tree for idx that contains only parts from the first k-1 components.
6052
6053 As it was mentioned above the range optimizer currently does not provide
6054 an estimate for the number of records in the ranges for sub-indexes.
6055 However, some reasonable upper bound estimate can be obtained.
6056
6057 Let's consider the following range tree:
6058 Rt: (first_name='Robert' AND last_name='Saw%')
6059 OR
6060 (first_name='Bob' AND last_name='So%')
6061 Let #r be the number of records in Rt. Let f_1 be the fan-out of column
6062 last_name:
6063 f_1 = rec_per_key[first_name]/rec_per_key[last_name].
6064 The the number of records in the range tree:
6065 Rt_0: (first_name='Robert' OR first_name='Bob')
6066 for the sub-index (first_name) is not greater than MY_MAX(#r*f_1, #t).
6067 Strictly speaking, we can state only that it's not greater than
6068 MY_MAX(#r*max_f_1, #t), where
6069 max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
6070 Yet, if #r/#t is big enough (and this is the case of an index intersection,
6071 because using this index range with a single index scan is cheaper than
6072 the cost of the intersection when #r/#t is small) then almost safely we
6073 can use here f_1 instead of max_f_1.
6074
6075 The above considerations can be used in future development. Now, they are
6076 used partly in the function that provides a rough upper bound estimate for
6077 the number of records in an index intersection that follow below.
6078 */
6079
6080 /*
6081 Estimate the number of records selected by an extension a partial intersection
6082
6083 SYNOPSIS
6084 records_in_index_intersect_extension()
6085 curr partial intersection plan to be extended
6086 ext_index_scan the evaluated extension of this partial plan
6087
6088 DESCRIPTION
6089 The function provides an estimate for the number of records in the
6090 intersection of the partial index intersection curr with the index
6091 ext_index_scan. If all intersected indexes does not have common columns
6092 then the function returns an exact estimate (assuming there are no
6093 correlations between values in the columns). If the intersected indexes
6094 have common columns the function returns an upper bound for the number
6095 of records in the intersection provided that the intersection of curr
6096 with ext_index_scan can is expected to have less records than the expected
6097 number of records in the partial intersection curr. In this case the
6098 function also assigns the bitmap of the columns in the extended
6099 intersection to ext_index_scan->used_fields.
6100 If the function cannot expect that the number of records in the extended
6101 intersection is less that the expected number of records #r in curr then
6102 the function returns a number bigger than #r.
6103
6104 NOTES
6105 See the comment before the desription of the function that explains the
6106 reasoning used by this function.
6107
6108 RETURN
6109 The expected number of rows in the extended index intersection
6110 */
6111
6112 static
records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO * curr,INDEX_SCAN_INFO * ext_index_scan)6113 ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
6114 INDEX_SCAN_INFO *ext_index_scan)
6115 {
6116 KEY *key_info= ext_index_scan->key_info;
6117 KEY_PART_INFO* key_part= key_info->key_part;
6118 uint used_key_parts= ext_index_scan->used_key_parts;
6119 MY_BITMAP *used_fields= &ext_index_scan->used_fields;
6120
6121 if (!curr->length)
6122 {
6123 /*
6124 If this the first index in the intersection just mark the
6125 fields in the used_fields bitmap and return the expected
6126 number of records in the range scan for the index provided
6127 by the range optimizer.
6128 */
6129 set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts);
6130 return ext_index_scan->records;
6131 }
6132
6133 uint i;
6134 bool better_selectivity= FALSE;
6135 ha_rows records= curr->records;
6136 MY_BITMAP *curr_intersect_fields= curr->intersect_fields;
6137 for (i= 0; i < used_key_parts; i++, key_part++)
6138 {
6139 if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1))
6140 break;
6141 }
6142 if (i)
6143 {
6144 ha_rows table_cardinality= curr->common_info->table_cardinality;
6145 ha_rows ext_records= ext_index_scan->records;
6146 if (i < used_key_parts)
6147 {
6148 double f1= key_info->actual_rec_per_key(i-1);
6149 double f2= key_info->actual_rec_per_key(i);
6150 ext_records= (ha_rows) ((double) ext_records / f2 * f1);
6151 }
6152 if (ext_records < table_cardinality)
6153 {
6154 better_selectivity= TRUE;
6155 records= (ha_rows) ((double) records / table_cardinality *
6156 ext_records);
6157 bitmap_copy(used_fields, curr_intersect_fields);
6158 key_part= key_info->key_part;
6159 for (uint j= 0; j < used_key_parts; j++, key_part++)
6160 bitmap_set_bit(used_fields, key_part->fieldnr-1);
6161 }
6162 }
6163 return !better_selectivity ? records+1 :
6164 !records ? 1 : records;
6165 }
6166
6167
6168 /*
6169 Estimate the cost a binary search within disjoint cpk range intervals
6170
6171 Number of comparisons to check whether a cpk value satisfies
6172 the cpk range condition = log2(cpk_scan->range_count).
6173 */
6174
6175 static inline
get_cpk_filter_cost(ha_rows filtered_records,INDEX_SCAN_INFO * cpk_scan,double compare_factor)6176 double get_cpk_filter_cost(ha_rows filtered_records,
6177 INDEX_SCAN_INFO *cpk_scan,
6178 double compare_factor)
6179 {
6180 return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) *
6181 filtered_records;
6182 }
6183
6184
6185 /*
6186 Check whether a patial index intersection plan can be extended
6187
6188 SYNOPSIS
6189 check_index_intersect_extension()
6190 curr partial intersection plan to be extended
6191 ext_index_scan a possible extension of this plan to be checked
6192 next OUT the structure to be filled for the extended plan
6193
6194 DESCRIPTION
6195 The function checks whether it makes sense to extend the index
6196 intersection plan adding the index ext_index_scan, and, if this
6197 the case, the function fills in the structure for the extended plan.
6198
6199 RETURN
6200 TRUE if it makes sense to extend the given plan
6201 FALSE otherwise
6202 */
6203
6204 static
check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO * curr,INDEX_SCAN_INFO * ext_index_scan,PARTIAL_INDEX_INTERSECT_INFO * next)6205 bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
6206 INDEX_SCAN_INFO *ext_index_scan,
6207 PARTIAL_INDEX_INTERSECT_INFO *next)
6208 {
6209 ha_rows records;
6210 ha_rows records_sent_to_unique;
6211 double cost;
6212 ha_rows ext_index_scan_records= ext_index_scan->records;
6213 ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out;
6214 COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
6215 double cutoff_cost= common_info->cutoff_cost;
6216 uint idx= curr->length;
6217 next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost;
6218 if (next->index_read_cost > cutoff_cost)
6219 return FALSE;
6220
6221 if ((next->in_memory= curr->in_memory))
6222 next->in_memory_cost= curr->in_memory_cost;
6223
6224 next->intersect_fields= &ext_index_scan->used_fields;
6225 next->filtered_scans= curr->filtered_scans;
6226
6227 records_sent_to_unique= curr->records_sent_to_unique;
6228
6229 next->use_cpk_filter= FALSE;
6230
6231 /* Calculate the cost of using a Unique object for index intersection */
6232 if (idx && next->in_memory)
6233 {
6234 /*
6235 All rowids received from the first scan are expected in one unique tree
6236 */
6237 ha_rows elems_in_tree= common_info->search_scans[0]->records-
6238 common_info->search_scans[0]->filtered_out ;
6239 next->in_memory_cost+= Unique::get_search_cost(elems_in_tree,
6240 common_info->compare_factor)*
6241 ext_index_scan_records;
6242 cost= next->in_memory_cost;
6243 }
6244 else
6245 {
6246 uint *buff_elems= common_info->buff_elems;
6247 uint key_size= common_info->key_size;
6248 double compare_factor= common_info->compare_factor;
6249 size_t max_memory_size= common_info->max_memory_size;
6250
6251 records_sent_to_unique+= ext_index_scan_records;
6252 cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size,
6253 max_memory_size, compare_factor, TRUE,
6254 &next->in_memory);
6255 if (records_filtered_out_by_cpk)
6256 {
6257 /* Check whether using cpk filter for this scan is beneficial */
6258
6259 double cost2;
6260 bool in_memory2;
6261 ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk;
6262 cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size,
6263 max_memory_size, compare_factor, TRUE,
6264 &in_memory2);
6265 cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan,
6266 compare_factor);
6267 if (cost > cost2 + COST_EPS)
6268 {
6269 cost= cost2;
6270 next->in_memory= in_memory2;
6271 next->use_cpk_filter= TRUE;
6272 records_sent_to_unique= records2;
6273 }
6274
6275 }
6276 if (next->in_memory)
6277 next->in_memory_cost= cost;
6278 }
6279
6280 if (next->use_cpk_filter)
6281 {
6282 next->filtered_scans.set_bit(ext_index_scan->keynr);
6283 bitmap_union(&ext_index_scan->used_fields,
6284 &common_info->cpk_scan->used_fields);
6285 }
6286 next->records_sent_to_unique= records_sent_to_unique;
6287
6288 records= records_in_index_intersect_extension(curr, ext_index_scan);
6289 if (idx && records > curr->records)
6290 return FALSE;
6291 if (next->use_cpk_filter && curr->filtered_scans.is_clear_all())
6292 records-= records_filtered_out_by_cpk;
6293 next->records= records;
6294
6295 cost+= next->index_read_cost;
6296 if (cost >= cutoff_cost)
6297 return FALSE;
6298
6299 cost+= get_sweep_read_cost(common_info->param, records);
6300
6301 next->cost= cost;
6302 next->length= curr->length+1;
6303
6304 return TRUE;
6305 }
6306
6307
6308 /*
6309 Search for the cheapest extensions of range scans used to access a table
6310
6311 SYNOPSIS
6312 find_index_intersect_best_extension()
6313 curr partial intersection to evaluate all possible extension for
6314
6315 DESCRIPTION
6316 The function tries to extend the partial plan curr in all possible ways
6317 to look for a cheapest index intersection whose cost less than the
6318 cut off value set in curr->common_info.cutoff_cost.
6319 */
6320
6321 static
find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO * curr)6322 void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
6323 {
6324 PARTIAL_INDEX_INTERSECT_INFO next;
6325 COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
6326 INDEX_SCAN_INFO **index_scans= common_info->search_scans;
6327 uint idx= curr->length;
6328 INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx];
6329 double cost= curr->cost;
6330
6331 if (cost + COST_EPS < common_info->best_cost)
6332 {
6333 common_info->best_cost= cost;
6334 common_info->best_length= curr->length;
6335 common_info->best_records= curr->records;
6336 common_info->filtered_scans= curr->filtered_scans;
6337 /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */
6338 common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all();
6339 uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length;
6340 memcpy(common_info->best_intersect, common_info->search_scans, sz);
6341 common_info->cutoff_cost= cost;
6342 }
6343
6344 if (!(*rem_first_index_scan_ptr))
6345 return;
6346
6347 next.common_info= common_info;
6348
6349 INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr;
6350 for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr;
6351 *index_scan_ptr; index_scan_ptr++)
6352 {
6353 *rem_first_index_scan_ptr= *index_scan_ptr;
6354 *index_scan_ptr= rem_first_index_scan;
6355 if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next))
6356 find_index_intersect_best_extension(&next);
6357 *index_scan_ptr= *rem_first_index_scan_ptr;
6358 *rem_first_index_scan_ptr= rem_first_index_scan;
6359 }
6360 }
6361
6362
6363 /*
6364 Get the plan of the best intersection of range scans used to access a table
6365
6366 SYNOPSIS
6367 get_best_index_intersect()
6368 param common info about index ranges
6369 tree tree of ranges for indexes than can be intersected
6370 read_time cut off value for the evaluated plans
6371
6372 DESCRIPTION
6373 The function looks for the cheapest index intersection of the range
6374 scans to access a table. The info about the ranges for all indexes
6375 is provided by the range optimizer and is passed through the
6376 parameters param and tree. Any plan whose cost is greater than read_time
6377 is rejected.
6378 After the best index intersection is found the function constructs
6379 the structure that manages the execution by the chosen plan.
6380
6381 RETURN
6382 Pointer to the generated execution structure if a success,
6383 0 - otherwise.
6384 */
6385
6386 static
get_best_index_intersect(PARAM * param,SEL_TREE * tree,double read_time)6387 TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
6388 double read_time)
6389 {
6390 uint i;
6391 uint count;
6392 TRP_RANGE **cur_range;
6393 TRP_RANGE **range_scans;
6394 INDEX_SCAN_INFO *index_scan;
6395 COMMON_INDEX_INTERSECT_INFO common;
6396 PARTIAL_INDEX_INTERSECT_INFO init;
6397 TRP_INDEX_INTERSECT *intersect_trp= NULL;
6398 TABLE *table= param->table;
6399 THD *thd= param->thd;
6400
6401 DBUG_ENTER("get_best_index_intersect");
6402
6403 Json_writer_object trace_idx_interect(thd, "analyzing_sort_intersect");
6404
6405 if (prepare_search_best_index_intersect(param, tree, &common, &init,
6406 read_time))
6407 DBUG_RETURN(NULL);
6408
6409 find_index_intersect_best_extension(&init);
6410
6411 if (common.best_length <= 1 && !common.best_uses_cpk)
6412 DBUG_RETURN(NULL);
6413
6414 if (common.best_uses_cpk)
6415 {
6416 memmove((char *) (common.best_intersect+1), (char *) common.best_intersect,
6417 sizeof(INDEX_SCAN_INFO *) * common.best_length);
6418 common.best_intersect[0]= common.cpk_scan;
6419 common.best_length++;
6420 }
6421
6422 count= common.best_length;
6423
6424 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
6425 sizeof(TRP_RANGE *)*
6426 count)))
6427 DBUG_RETURN(NULL);
6428
6429 for (i= 0, cur_range= range_scans; i < count; i++)
6430 {
6431 index_scan= common.best_intersect[i];
6432 if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg,
6433 index_scan->idx, 0)))
6434 {
6435 TRP_RANGE *trp= *cur_range;
6436 trp->read_cost= index_scan->index_read_cost;
6437 trp->records= index_scan->records;
6438 trp->is_ror= FALSE;
6439 trp->mrr_buf_size= 0;
6440 table->intersect_keys.set_bit(index_scan->keynr);
6441 cur_range++;
6442 }
6443 }
6444
6445 count= (uint)(tree->index_scans_end - tree->index_scans);
6446 for (i= 0; i < count; i++)
6447 {
6448 index_scan= tree->index_scans[i];
6449 if (!table->intersect_keys.is_set(index_scan->keynr))
6450 {
6451 for (uint j= 0; j < common.best_length; j++)
6452 {
6453 INDEX_SCAN_INFO *scan= common.best_intersect[j];
6454 if (same_index_prefix(index_scan->key_info, scan->key_info,
6455 scan->used_key_parts))
6456 {
6457 table->intersect_keys.set_bit(index_scan->keynr);
6458 break;
6459 }
6460 }
6461 }
6462 }
6463
6464 if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT))
6465 {
6466
6467 intersect_trp->read_cost= common.best_cost;
6468 intersect_trp->records= common.best_records;
6469 intersect_trp->range_scans= range_scans;
6470 intersect_trp->range_scans_end= cur_range;
6471 intersect_trp->filtered_scans= common.filtered_scans;
6472 trace_idx_interect.add("rows", intersect_trp->records)
6473 .add("cost", intersect_trp->read_cost)
6474 .add("chosen",true);
6475 }
6476 DBUG_RETURN(intersect_trp);
6477 }
6478
6479
6480 typedef struct st_ror_scan_info : INDEX_SCAN_INFO
6481 {
6482 } ROR_SCAN_INFO;
6483
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const6484 void TRP_ROR_INTERSECT::trace_basic_info(PARAM *param,
6485 Json_writer_object *trace_object) const
6486 {
6487 THD *thd= param->thd;
6488 DBUG_ASSERT(trace_object->trace_started());
6489
6490 trace_object->add("type", "index_roworder_intersect");
6491 trace_object->add("rows", records);
6492 trace_object->add("cost", read_cost);
6493 trace_object->add("covering", is_covering);
6494 trace_object->add("clustered_pk_scan", cpk_scan != NULL);
6495
6496 Json_writer_array smth_trace(thd, "intersect_of");
6497 for (ROR_SCAN_INFO **cur_scan= first_scan; cur_scan != last_scan;
6498 cur_scan++)
6499 {
6500 const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
6501 const KEY_PART_INFO *key_part= cur_key.key_part;
6502
6503 Json_writer_object trace_isect_idx(thd);
6504 trace_isect_idx.add("type", "range_scan");
6505 trace_isect_idx.add("index", cur_key.name);
6506 trace_isect_idx.add("rows", (*cur_scan)->records);
6507
6508 Json_writer_array trace_range(thd, "ranges");
6509
6510 trace_ranges(&trace_range, param, (*cur_scan)->idx,
6511 (*cur_scan)->sel_arg, key_part);
6512 }
6513 }
6514
6515
6516 /*
6517 Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
6518 sel_arg set of intervals.
6519
6520 SYNOPSIS
6521 make_ror_scan()
6522 param Parameter from test_quick_select function
6523 idx Index of key in param->keys
6524 sel_arg Set of intervals for a given key
6525
6526 RETURN
6527 NULL - out of memory
6528 ROR scan structure containing a scan for {idx, sel_arg}
6529 */
6530
6531 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)6532 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
6533 {
6534 ROR_SCAN_INFO *ror_scan;
6535 my_bitmap_map *bitmap_buf;
6536 uint keynr;
6537 DBUG_ENTER("make_ror_scan");
6538
6539 if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
6540 sizeof(ROR_SCAN_INFO))))
6541 DBUG_RETURN(NULL);
6542
6543 ror_scan->idx= idx;
6544 ror_scan->keynr= keynr= param->real_keynr[idx];
6545 ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
6546 param->table->file->ref_length);
6547 ror_scan->sel_arg= sel_arg;
6548 ror_scan->records= param->quick_rows[keynr];
6549
6550 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root,
6551 param->fields_bitmap_size)))
6552 DBUG_RETURN(NULL);
6553
6554 if (my_bitmap_init(&ror_scan->covered_fields, bitmap_buf,
6555 param->table->s->fields, FALSE))
6556 DBUG_RETURN(NULL);
6557 bitmap_clear_all(&ror_scan->covered_fields);
6558
6559 KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
6560 KEY_PART_INFO *key_part_end= key_part +
6561 param->table->key_info[keynr].user_defined_key_parts;
6562 for (;key_part != key_part_end; ++key_part)
6563 {
6564 if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr-1))
6565 bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
6566 }
6567 ror_scan->index_read_cost=
6568 param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records);
6569 DBUG_RETURN(ror_scan);
6570 }
6571
6572
6573 /*
6574 Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length.
6575 SYNOPSIS
6576 cmp_ror_scan_info()
6577 a ptr to first compared value
6578 b ptr to second compared value
6579
6580 RETURN
6581 -1 a < b
6582 0 a = b
6583 1 a > b
6584 */
6585
cmp_ror_scan_info(ROR_SCAN_INFO ** a,ROR_SCAN_INFO ** b)6586 static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
6587 {
6588 double val1= rows2double((*a)->records) * (*a)->key_rec_length;
6589 double val2= rows2double((*b)->records) * (*b)->key_rec_length;
6590 return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
6591 }
6592
6593 /*
6594 Compare two ROR_SCAN_INFO** by
6595 (#covered fields in F desc,
6596 #components asc,
6597 number of first not covered component asc)
6598
6599 SYNOPSIS
6600 cmp_ror_scan_info_covering()
6601 a ptr to first compared value
6602 b ptr to second compared value
6603
6604 RETURN
6605 -1 a < b
6606 0 a = b
6607 1 a > b
6608 */
6609
cmp_ror_scan_info_covering(ROR_SCAN_INFO ** a,ROR_SCAN_INFO ** b)6610 static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
6611 {
6612 if ((*a)->used_fields_covered > (*b)->used_fields_covered)
6613 return -1;
6614 if ((*a)->used_fields_covered < (*b)->used_fields_covered)
6615 return 1;
6616 if ((*a)->key_components < (*b)->key_components)
6617 return -1;
6618 if ((*a)->key_components > (*b)->key_components)
6619 return 1;
6620 if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
6621 return -1;
6622 if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
6623 return 1;
6624 return 0;
6625 }
6626
6627
6628 /* Auxiliary structure for incremental ROR-intersection creation */
6629 typedef struct
6630 {
6631 const PARAM *param;
6632 MY_BITMAP covered_fields; /* union of fields covered by all scans */
6633 /*
6634 Fraction of table records that satisfies conditions of all scans.
6635 This is the number of full records that will be retrieved if a
6636 non-index_only index intersection will be employed.
6637 */
6638 double out_rows;
6639 /* TRUE if covered_fields is a superset of needed_fields */
6640 bool is_covering;
6641
6642 ha_rows index_records; /* sum(#records to look in indexes) */
6643 double index_scan_costs; /* SUM(cost of 'index-only' scans) */
6644 double total_cost;
6645 } ROR_INTERSECT_INFO;
6646
6647
6648 /*
6649 Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
6650
6651 SYNOPSIS
6652 ror_intersect_init()
6653 param Parameter from test_quick_select
6654
6655 RETURN
6656 allocated structure
6657 NULL on error
6658 */
6659
6660 static
ror_intersect_init(const PARAM * param)6661 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
6662 {
6663 ROR_INTERSECT_INFO *info;
6664 my_bitmap_map* buf;
6665 if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
6666 sizeof(ROR_INTERSECT_INFO))))
6667 return NULL;
6668 info->param= param;
6669 if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
6670 param->fields_bitmap_size)))
6671 return NULL;
6672 if (my_bitmap_init(&info->covered_fields, buf, param->table->s->fields,
6673 FALSE))
6674 return NULL;
6675 info->is_covering= FALSE;
6676 info->index_scan_costs= 0.0;
6677 info->index_records= 0;
6678 info->out_rows= (double) param->table->stat_records();
6679 bitmap_clear_all(&info->covered_fields);
6680 return info;
6681 }
6682
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)6683 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
6684 {
6685 dst->param= src->param;
6686 memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
6687 no_bytes_in_map(&src->covered_fields));
6688 dst->out_rows= src->out_rows;
6689 dst->is_covering= src->is_covering;
6690 dst->index_records= src->index_records;
6691 dst->index_scan_costs= src->index_scan_costs;
6692 dst->total_cost= src->total_cost;
6693 }
6694
6695
6696 /*
6697 Get selectivity of a ROR scan wrt ROR-intersection.
6698
6699 SYNOPSIS
6700 ror_scan_selectivity()
6701 info ROR-interection
6702 scan ROR scan
6703
6704 NOTES
6705 Suppose we have a condition on several keys
6706 cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key
6707 k_21=c_21 AND k_22=c_22 AND ... // parts of second key
6708 ...
6709 k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //parts of the key used by *scan
6710
6711 where k_ij may be the same as any k_pq (i.e. keys may have common parts).
6712
6713 A full row is retrieved if entire condition holds.
6714
6715 The recursive procedure for finding P(cond) is as follows:
6716
6717 First step:
6718 Pick 1st part of 1st key and break conjunction (1) into two parts:
6719 cond= (k_11=c_11 AND R)
6720
6721 Here R may still contain condition(s) equivalent to k_11=c_11.
6722 Nevertheless, the following holds:
6723
6724 P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
6725
6726 Mark k_11 as fixed field (and satisfied condition) F, save P(F),
6727 save R to be cond and proceed to recursion step.
6728
6729 Recursion step:
6730 We have a set of fixed fields/satisfied conditions) F, probability P(F),
6731 and remaining conjunction R
6732 Pick next key part on current key and its condition "k_ij=c_ij".
6733 We will add "k_ij=c_ij" into F and update P(F).
6734 Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then
6735
6736 P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
6737
6738 (where '|' mean conditional probability, not "or")
6739
6740 Consider the first multiplier in (2). One of the following holds:
6741 a) F contains condition on field used in t (i.e. t AND F = F).
6742 Then P(t|F) = 1
6743
6744 b) F doesn't contain condition on field used in t. Then F and t are
6745 considered independent.
6746
6747 P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
6748 = P(t|fields_before_t_in_key).
6749
6750 P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
6751 #records(fields_before_t_in_key, t)
6752
6753 The second multiplier is calculated by applying this step recursively.
6754
6755 IMPLEMENTATION
6756 This function calculates the result of application of the "recursion step"
6757 described above for all fixed key members of a single key, accumulating set
6758 of covered fields, selectivity, etc.
6759
6760 The calculation is conducted as follows:
6761 Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
6762
6763 n_{k1} n_{k2}
6764 --------- * --------- * .... (3)
6765 n_{k1-1} n_{k2-1}
6766
6767 where k1,k2,... are key parts which fields were not yet marked as fixed
6768 ( this is result of application of option b) of the recursion step for
6769 parts of a single key).
6770 Since it is reasonable to expect that most of the fields are not marked
6771 as fixed, we calculate (3) as
6772
6773 n_{i1} n_{i2}
6774 (3) = n_{max_key_part} / ( --------- * --------- * .... )
6775 n_{i1-1} n_{i2-1}
6776
6777 where i1,i2, .. are key parts that were already marked as fixed.
6778
6779 In order to minimize number of expensive records_in_range calls we group
6780 and reduce adjacent fractions.
6781
6782 RETURN
6783 Selectivity of given ROR scan.
6784 */
6785
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)6786 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
6787 const ROR_SCAN_INFO *scan)
6788 {
6789 double selectivity_mult= 1.0;
6790 KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part;
6791 uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
6792 uchar *key_ptr= key_val;
6793 SEL_ARG *sel_arg, *tuple_arg= NULL;
6794 key_part_map keypart_map= 0;
6795 bool cur_covered;
6796 bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
6797 key_part->fieldnr - 1));
6798 key_range min_range;
6799 key_range max_range;
6800 min_range.key= key_val;
6801 min_range.flag= HA_READ_KEY_EXACT;
6802 max_range.key= key_val;
6803 max_range.flag= HA_READ_AFTER_KEY;
6804 ha_rows prev_records= info->param->table->stat_records();
6805 DBUG_ENTER("ror_scan_selectivity");
6806
6807 for (sel_arg= scan->sel_arg; sel_arg;
6808 sel_arg= sel_arg->next_key_part)
6809 {
6810 DBUG_PRINT("info",("sel_arg step"));
6811 cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
6812 key_part[sel_arg->part].fieldnr - 1));
6813 if (cur_covered != prev_covered)
6814 {
6815 /* create (part1val, ..., part{n-1}val) tuple. */
6816 ha_rows records;
6817 page_range pages;
6818 if (!tuple_arg)
6819 {
6820 tuple_arg= scan->sel_arg;
6821 /* Here we use the length of the first key part */
6822 tuple_arg->store_min(key_part->store_length, &key_ptr, 0);
6823 keypart_map= 1;
6824 }
6825 while (tuple_arg->next_key_part != sel_arg)
6826 {
6827 tuple_arg= tuple_arg->next_key_part;
6828 tuple_arg->store_min(key_part[tuple_arg->part].store_length,
6829 &key_ptr, 0);
6830 keypart_map= (keypart_map << 1) | 1;
6831 }
6832 min_range.length= max_range.length= (uint) (key_ptr - key_val);
6833 min_range.keypart_map= max_range.keypart_map= keypart_map;
6834 records= (info->param->table->file->
6835 records_in_range(scan->keynr, &min_range, &max_range, &pages));
6836 if (cur_covered)
6837 {
6838 /* uncovered -> covered */
6839 double tmp= rows2double(records)/rows2double(prev_records);
6840 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
6841 selectivity_mult *= tmp;
6842 prev_records= HA_POS_ERROR;
6843 }
6844 else
6845 {
6846 /* covered -> uncovered */
6847 prev_records= records;
6848 }
6849 }
6850 prev_covered= cur_covered;
6851 }
6852 if (!prev_covered)
6853 {
6854 double tmp= rows2double(info->param->quick_rows[scan->keynr]) /
6855 rows2double(prev_records);
6856 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
6857 selectivity_mult *= tmp;
6858 }
6859 DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
6860 DBUG_RETURN(selectivity_mult);
6861 }
6862
6863
6864 /*
6865 Check if adding a ROR scan to a ROR-intersection reduces its cost of
6866 ROR-intersection and if yes, update parameters of ROR-intersection,
6867 including its cost.
6868
6869 SYNOPSIS
6870 ror_intersect_add()
6871 param Parameter from test_quick_select
6872 info ROR-intersection structure to add the scan to.
6873 ror_scan ROR scan info to add.
6874 is_cpk_scan If TRUE, add the scan as CPK scan (this can be inferred
6875 from other parameters and is passed separately only to
6876 avoid duplicating the inference code)
6877
6878 NOTES
6879 Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
6880 intersection decreases. The cost of ROR-intersection is calculated as
6881 follows:
6882
6883 cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
6884
6885 When we add a scan the first increases and the second decreases.
6886
6887 cost_of_full_rows_retrieval=
6888 (union of indexes used covers all needed fields) ?
6889 cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
6890 0
6891
6892 E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
6893 ror_scan_selectivity({scan1}, scan2) * ... *
6894 ror_scan_selectivity({scan1,...}, scanN).
6895 RETURN
6896 TRUE ROR scan added to ROR-intersection, cost updated.
6897 FALSE It doesn't make sense to add this ROR scan to this ROR-intersection.
6898 */
6899
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,Json_writer_object * trace_costs,bool is_cpk_scan)6900 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
6901 ROR_SCAN_INFO* ror_scan,
6902 Json_writer_object *trace_costs,
6903 bool is_cpk_scan)
6904 {
6905 double selectivity_mult= 1.0;
6906
6907 DBUG_ENTER("ror_intersect_add");
6908 DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
6909 DBUG_PRINT("info", ("Adding scan on %s",
6910 info->param->table->key_info[ror_scan->keynr].name.str));
6911 DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
6912
6913 selectivity_mult = ror_scan_selectivity(info, ror_scan);
6914 if (selectivity_mult == 1.0)
6915 {
6916 /* Don't add this scan if it doesn't improve selectivity. */
6917 DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
6918 DBUG_RETURN(FALSE);
6919 }
6920
6921 info->out_rows *= selectivity_mult;
6922
6923 if (is_cpk_scan)
6924 {
6925 /*
6926 CPK scan is used to filter out rows. We apply filtering for
6927 each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
6928 per check this gives us:
6929 */
6930 const double idx_cost= rows2double(info->index_records) /
6931 TIME_FOR_COMPARE_ROWID;
6932 info->index_scan_costs+= idx_cost;
6933 trace_costs->add("index_scan_cost", idx_cost);
6934 }
6935 else
6936 {
6937 info->index_records += info->param->quick_rows[ror_scan->keynr];
6938 info->index_scan_costs += ror_scan->index_read_cost;
6939 trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
6940 bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
6941 if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
6942 &info->covered_fields))
6943 {
6944 DBUG_PRINT("info", ("ROR-intersect is covering now"));
6945 info->is_covering= TRUE;
6946 }
6947 }
6948
6949 info->total_cost= info->index_scan_costs;
6950 trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs);
6951 DBUG_PRINT("info", ("info->total_cost: %g", info->total_cost));
6952 if (!info->is_covering)
6953 {
6954 double sweep_cost= get_sweep_read_cost(info->param,
6955 double2rows(info->out_rows));
6956 info->total_cost+= sweep_cost;
6957 trace_costs->add("disk_sweep_cost", sweep_cost);
6958 DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
6959 }
6960 else
6961 trace_costs->add("disk_sweep_cost", 0);
6962
6963 DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
6964 DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
6965 info->is_covering?"" : "non-"));
6966 DBUG_RETURN(TRUE);
6967 }
6968
6969
6970 /*
6971 Get best ROR-intersection plan using non-covering ROR-intersection search
6972 algorithm. The returned plan may be covering.
6973
6974 SYNOPSIS
6975 get_best_ror_intersect()
6976 param Parameter from test_quick_select function.
6977 tree Transformed restriction condition to be used to look
6978 for ROR scans.
6979 read_time Do not return read plans with cost > read_time.
6980 are_all_covering [out] set to TRUE if union of all scans covers all
6981 fields needed by the query (and it is possible to build
6982 a covering ROR-intersection)
6983
6984 NOTES
6985 get_key_scans_params must be called before this function can be called.
6986
6987 When this function is called by ROR-union construction algorithm it
6988 assumes it is building an uncovered ROR-intersection (and thus # of full
6989 records to be retrieved is wrong here). This is a hack.
6990
6991 IMPLEMENTATION
6992 The approximate best non-covering plan search algorithm is as follows:
6993
6994 find_min_ror_intersection_scan()
6995 {
6996 R= select all ROR scans;
6997 order R by (E(#records_matched) * key_record_length).
6998
6999 S= first(R); -- set of scans that will be used for ROR-intersection
7000 R= R-first(S);
7001 min_cost= cost(S);
7002 min_scan= make_scan(S);
7003 while (R is not empty)
7004 {
7005 firstR= R - first(R);
7006 if (!selectivity(S + firstR < selectivity(S)))
7007 continue;
7008
7009 S= S + first(R);
7010 if (cost(S) < min_cost)
7011 {
7012 min_cost= cost(S);
7013 min_scan= make_scan(S);
7014 }
7015 }
7016 return min_scan;
7017 }
7018
7019 See ror_intersect_add function for ROR intersection costs.
7020
7021 Special handling for Clustered PK scans
7022 Clustered PK contains all table fields, so using it as a regular scan in
7023 index intersection doesn't make sense: a range scan on CPK will be less
7024 expensive in this case.
7025 Clustered PK scan has special handling in ROR-intersection: it is not used
7026 to retrieve rows, instead its condition is used to filter row references
7027 we get from scans on other keys.
7028
7029 RETURN
7030 ROR-intersection table read plan
7031 NULL if out of memory or no suitable plan found.
7032 */
7033
7034 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,double read_time,bool * are_all_covering)7035 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
7036 double read_time,
7037 bool *are_all_covering)
7038 {
7039 uint idx;
7040 double min_cost= DBL_MAX;
7041 DBUG_ENTER("get_best_ror_intersect");
7042 THD *thd= param->thd;
7043 Json_writer_object trace_ror(thd, "analyzing_roworder_intersect");
7044
7045 if ((tree->n_ror_scans < 2) || !param->table->stat_records() ||
7046 !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
7047 {
7048 if (tree->n_ror_scans < 2)
7049 trace_ror.add("cause", "too few roworder scans");
7050 DBUG_RETURN(NULL);
7051 }
7052
7053 /*
7054 Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
7055 them. Also find and save clustered PK scan if there is one.
7056 */
7057 ROR_SCAN_INFO **cur_ror_scan;
7058 ROR_SCAN_INFO *cpk_scan= NULL;
7059 uint cpk_no;
7060
7061 if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7062 sizeof(ROR_SCAN_INFO*)*
7063 param->keys)))
7064 return NULL;
7065 cpk_no= (param->table->file->
7066 pk_is_clustering_key(param->table->s->primary_key) ?
7067 param->table->s->primary_key : MAX_KEY);
7068
7069 for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
7070 {
7071 ROR_SCAN_INFO *scan;
7072 uint key_no;
7073 if (!tree->ror_scans_map.is_set(idx))
7074 continue;
7075 key_no= param->real_keynr[idx];
7076 if (key_no != cpk_no &&
7077 param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
7078 {
7079 /* Ignore clustering keys */
7080 tree->n_ror_scans--;
7081 continue;
7082 }
7083 if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
7084 return NULL;
7085 if (key_no == cpk_no)
7086 {
7087 cpk_scan= scan;
7088 tree->n_ror_scans--;
7089 }
7090 else
7091 *(cur_ror_scan++)= scan;
7092 }
7093
7094 tree->ror_scans_end= cur_ror_scan;
7095 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
7096 tree->ror_scans,
7097 tree->ror_scans_end););
7098 /*
7099 Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
7100 ROR_SCAN_INFO's.
7101 Step 2: Get best ROR-intersection using an approximate algorithm.
7102 */
7103 my_qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
7104 (qsort_cmp)cmp_ror_scan_info);
7105 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
7106 tree->ror_scans,
7107 tree->ror_scans_end););
7108
7109 ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
7110 ROR_SCAN_INFO **intersect_scans_end;
7111 if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7112 sizeof(ROR_SCAN_INFO*)*
7113 tree->n_ror_scans)))
7114 return NULL;
7115 intersect_scans_end= intersect_scans;
7116
7117 /* Create and incrementally update ROR intersection. */
7118 ROR_INTERSECT_INFO *intersect, *intersect_best;
7119 if (!(intersect= ror_intersect_init(param)) ||
7120 !(intersect_best= ror_intersect_init(param)))
7121 return NULL;
7122
7123 /* [intersect_scans,intersect_scans_best) will hold the best intersection */
7124 ROR_SCAN_INFO **intersect_scans_best;
7125 cur_ror_scan= tree->ror_scans;
7126 intersect_scans_best= intersect_scans;
7127 Json_writer_array trace_isect_idx(thd, "intersecting_indexes");
7128 while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
7129 {
7130 Json_writer_object trace_idx(thd);
7131 trace_idx.add("index",
7132 param->table->key_info[(*cur_ror_scan)->keynr].name);
7133
7134 /* S= S + first(R); R= R - first(R); */
7135 if (!ror_intersect_add(intersect, *cur_ror_scan, &trace_idx, FALSE))
7136 {
7137 trace_idx.add("usable", false)
7138 .add("cause", "does not reduce cost of intersect");
7139 cur_ror_scan++;
7140 continue;
7141 }
7142
7143 trace_idx.add("cumulative_total_cost", intersect->total_cost)
7144 .add("usable", true)
7145 .add("matching_rows_now", intersect->out_rows)
7146 .add("intersect_covering_with_this_index", intersect->is_covering);
7147
7148 *(intersect_scans_end++)= *(cur_ror_scan++);
7149
7150 if (intersect->total_cost < min_cost)
7151 {
7152 /* Local minimum found, save it */
7153 ror_intersect_cpy(intersect_best, intersect);
7154 intersect_scans_best= intersect_scans_end;
7155 min_cost = intersect->total_cost;
7156 trace_idx.add("chosen", true);
7157 }
7158 else
7159 {
7160 trace_idx.add("chosen", false)
7161 .add("cause", "does not reduce cost");
7162 }
7163 }
7164 trace_isect_idx.end();
7165
7166 if (intersect_scans_best == intersect_scans)
7167 {
7168 DBUG_PRINT("info", ("None of scans increase selectivity"));
7169 trace_ror.add("chosen", false)
7170 .add("cause","does not increase selectivity");
7171 DBUG_RETURN(NULL);
7172 }
7173
7174 DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
7175 "best ROR-intersection",
7176 intersect_scans,
7177 intersect_scans_best););
7178
7179 *are_all_covering= intersect->is_covering;
7180 uint best_num= (uint)(intersect_scans_best - intersect_scans);
7181 ror_intersect_cpy(intersect, intersect_best);
7182
7183 /*
7184 Ok, found the best ROR-intersection of non-CPK key scans.
7185 Check if we should add a CPK scan. If the obtained ROR-intersection is
7186 covering, it doesn't make sense to add CPK scan.
7187 */
7188 Json_writer_object trace_cpk(thd, "clustered_pk");
7189 if (cpk_scan && !intersect->is_covering)
7190 {
7191 if (ror_intersect_add(intersect, cpk_scan, &trace_cpk, TRUE) &&
7192 (intersect->total_cost < min_cost))
7193 {
7194 trace_cpk.add("clustered_pk_scan_added_to_intersect", true)
7195 .add("cumulated_cost", intersect->total_cost);
7196 intersect_best= intersect; //just set pointer here
7197 }
7198 else
7199 {
7200 trace_cpk.add("clustered_pk_added_to_intersect", false)
7201 .add("cause", "cost");
7202 cpk_scan= 0; // Don't use cpk_scan
7203 }
7204 }
7205 else
7206 {
7207 trace_cpk.add("clustered_pk_added_to_intersect", false)
7208 .add("cause", cpk_scan ? "roworder is covering"
7209 : "no clustered pk index");
7210 cpk_scan= 0; // Don't use cpk_scan
7211 }
7212 trace_cpk.end();
7213
7214 /* Ok, return ROR-intersect plan if we have found one */
7215 TRP_ROR_INTERSECT *trp= NULL;
7216 if (min_cost < read_time && (cpk_scan || best_num > 1))
7217 {
7218 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
7219 DBUG_RETURN(trp);
7220 if (!(trp->first_scan=
7221 (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7222 sizeof(ROR_SCAN_INFO*)*best_num)))
7223 DBUG_RETURN(NULL);
7224 memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
7225 trp->last_scan= trp->first_scan + best_num;
7226 trp->is_covering= intersect_best->is_covering;
7227 trp->read_cost= intersect_best->total_cost;
7228 /* Prevent divisons by zero */
7229 ha_rows best_rows = double2rows(intersect_best->out_rows);
7230 if (!best_rows)
7231 best_rows= 1;
7232 set_if_smaller(param->table->opt_range_condition_rows, best_rows);
7233 trp->records= best_rows;
7234 trp->index_scan_costs= intersect_best->index_scan_costs;
7235 trp->cpk_scan= cpk_scan;
7236 DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
7237 "cost %g, records %lu",
7238 trp->read_cost, (ulong) trp->records));
7239 trace_ror.add("rows", trp->records)
7240 .add("cost", trp->read_cost)
7241 .add("covering", trp->is_covering)
7242 .add("chosen", true);
7243 }
7244 else
7245 {
7246 trace_ror.add("chosen", false)
7247 .add("cause", (read_time > min_cost)
7248 ? "too few indexes to merge"
7249 : "cost");
7250 }
7251 DBUG_RETURN(trp);
7252 }
7253
7254
7255 /*
7256 Get best covering ROR-intersection.
7257 SYNOPSIS
7258 get_best_ntersectcovering_ror_intersect()
7259 param Parameter from test_quick_select function.
7260 tree SEL_TREE with sets of intervals for different keys.
7261 read_time Don't return table read plans with cost > read_time.
7262
7263 RETURN
7264 Best covering ROR-intersection plan
7265 NULL if no plan found.
7266
7267 NOTES
7268 get_best_ror_intersect must be called for a tree before calling this
7269 function for it.
7270 This function invalidates tree->ror_scans member values.
7271
7272 The following approximate algorithm is used:
7273 I=set of all covering indexes
7274 F=set of all fields to cover
7275 S={}
7276
7277 do
7278 {
7279 Order I by (#covered fields in F desc,
7280 #components asc,
7281 number of first not covered component asc);
7282 F=F-covered by first(I);
7283 S=S+first(I);
7284 I=I-first(I);
7285 } while F is not empty.
7286 */
7287
7288 static
get_best_covering_ror_intersect(PARAM * param,SEL_TREE * tree,double read_time)7289 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
7290 SEL_TREE *tree,
7291 double read_time)
7292 {
7293 ROR_SCAN_INFO **ror_scan_mark;
7294 ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;
7295 DBUG_ENTER("get_best_covering_ror_intersect");
7296
7297 if (!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
7298 DBUG_RETURN(NULL);
7299
7300 for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
7301 (*scan)->key_components=
7302 param->table->key_info[(*scan)->keynr].user_defined_key_parts;
7303
7304 /*
7305 Run covering-ROR-search algorithm.
7306 Assume set I is [ror_scan .. ror_scans_end)
7307 */
7308
7309 /*I=set of all covering indexes */
7310 ror_scan_mark= tree->ror_scans;
7311
7312 MY_BITMAP *covered_fields= ¶m->tmp_covered_fields;
7313 if (!covered_fields->bitmap)
7314 covered_fields->bitmap= (my_bitmap_map*)alloc_root(param->mem_root,
7315 param->fields_bitmap_size);
7316 if (!covered_fields->bitmap ||
7317 my_bitmap_init(covered_fields, covered_fields->bitmap,
7318 param->table->s->fields, FALSE))
7319 DBUG_RETURN(0);
7320 bitmap_clear_all(covered_fields);
7321
7322 double total_cost= 0.0f;
7323 ha_rows records=0;
7324 bool all_covered;
7325
7326 DBUG_PRINT("info", ("Building covering ROR-intersection"));
7327 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7328 "building covering ROR-I",
7329 ror_scan_mark, ror_scans_end););
7330 do
7331 {
7332 /*
7333 Update changed sorting info:
7334 #covered fields,
7335 number of first not covered component
7336 Calculate and save these values for each of remaining scans.
7337 */
7338 for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
7339 {
7340 bitmap_subtract(&(*scan)->covered_fields, covered_fields);
7341 (*scan)->used_fields_covered=
7342 bitmap_bits_set(&(*scan)->covered_fields);
7343 (*scan)->first_uncovered_field=
7344 bitmap_get_first(&(*scan)->covered_fields);
7345 }
7346
7347 my_qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
7348 (qsort_cmp)cmp_ror_scan_info_covering);
7349
7350 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7351 "remaining scans",
7352 ror_scan_mark, ror_scans_end););
7353
7354 /* I=I-first(I) */
7355 total_cost += (*ror_scan_mark)->index_read_cost;
7356 records += (*ror_scan_mark)->records;
7357 DBUG_PRINT("info", ("Adding scan on %s",
7358 param->table->key_info[(*ror_scan_mark)->keynr].name.str));
7359 if (total_cost > read_time)
7360 DBUG_RETURN(NULL);
7361 /* F=F-covered by first(I) */
7362 bitmap_union(covered_fields, &(*ror_scan_mark)->covered_fields);
7363 all_covered= bitmap_is_subset(¶m->needed_fields, covered_fields);
7364 } while ((++ror_scan_mark < ror_scans_end) && !all_covered);
7365
7366 if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1)
7367 DBUG_RETURN(NULL);
7368
7369 /*
7370 Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
7371 cost total_cost.
7372 */
7373 DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
7374 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7375 "creating covering ROR-intersect",
7376 tree->ror_scans, ror_scan_mark););
7377
7378 /* Add priority queue use cost. */
7379 total_cost += rows2double(records)*
7380 log((double)(ror_scan_mark - tree->ror_scans)) /
7381 (TIME_FOR_COMPARE_ROWID * M_LN2);
7382 DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
7383
7384 if (total_cost > read_time)
7385 DBUG_RETURN(NULL);
7386
7387 TRP_ROR_INTERSECT *trp;
7388 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
7389 DBUG_RETURN(trp);
7390 uint best_num= (uint)(ror_scan_mark - tree->ror_scans);
7391 if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7392 sizeof(ROR_SCAN_INFO*)*
7393 best_num)))
7394 DBUG_RETURN(NULL);
7395 memcpy(trp->first_scan, tree->ror_scans, best_num*sizeof(ROR_SCAN_INFO*));
7396 trp->last_scan= trp->first_scan + best_num;
7397 trp->is_covering= TRUE;
7398 trp->read_cost= total_cost;
7399 trp->records= records;
7400 trp->cpk_scan= NULL;
7401 set_if_smaller(param->table->opt_range_condition_rows, records);
7402
7403 DBUG_PRINT("info",
7404 ("Returning covering ROR-intersect plan: cost %g, records %lu",
7405 trp->read_cost, (ulong) trp->records));
7406 DBUG_RETURN(trp);
7407 }
7408
7409
7410 /*
7411 Get best "range" table read plan for given SEL_TREE.
7412 Also update PARAM members and store ROR scans info in the SEL_TREE.
7413 SYNOPSIS
7414 get_key_scans_params
7415 param parameters from test_quick_select
7416 tree make range select for this SEL_TREE
7417 index_read_must_be_used if TRUE, assume 'index only' option will be set
7418 (except for clustered PK indexes)
7419 for_range_access if TRUE the function is called to get the best range
7420 plan for range access, not for index merge access
7421 read_time don't create read plans with cost > read_time.
7422 RETURN
7423 Best range read plan
7424 NULL if no plan found or error occurred
7425 */
7426
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool for_range_access,double read_time)7427 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
7428 bool index_read_must_be_used,
7429 bool for_range_access,
7430 double read_time)
7431 {
7432 uint idx, UNINIT_VAR(best_idx);
7433 SEL_ARG *key_to_read= NULL;
7434 ha_rows UNINIT_VAR(best_records); /* protected by key_to_read */
7435 uint UNINIT_VAR(best_mrr_flags), /* protected by key_to_read */
7436 UNINIT_VAR(best_buf_size); /* protected by key_to_read */
7437 TRP_RANGE* read_plan= NULL;
7438 DBUG_ENTER("get_key_scans_params");
7439 THD *thd= param->thd;
7440 /*
7441 Note that there may be trees that have type SEL_TREE::KEY but contain no
7442 key reads at all, e.g. tree for expression "key1 is not null" where key1
7443 is defined as "not null".
7444 */
7445 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
7446 "tree scans"););
7447 Json_writer_array range_scan_alt(thd, "range_scan_alternatives");
7448
7449 tree->ror_scans_map.clear_all();
7450 tree->n_ror_scans= 0;
7451 tree->index_scans= 0;
7452 if (!tree->keys_map.is_clear_all())
7453 {
7454 tree->index_scans=
7455 (INDEX_SCAN_INFO **) alloc_root(param->mem_root,
7456 sizeof(INDEX_SCAN_INFO *) * param->keys);
7457 }
7458 tree->index_scans_end= tree->index_scans;
7459
7460 for (idx= 0; idx < param->keys; idx++)
7461 {
7462 SEL_ARG *key= tree->keys[idx];
7463 if (key)
7464 {
7465 ha_rows found_records;
7466 Cost_estimate cost;
7467 double found_read_time;
7468 uint mrr_flags, buf_size;
7469 bool is_ror_scan= FALSE;
7470 INDEX_SCAN_INFO *index_scan;
7471 uint keynr= param->real_keynr[idx];
7472 if (key->type == SEL_ARG::MAYBE_KEY ||
7473 key->maybe_flag)
7474 param->needed_reg->set_bit(keynr);
7475
7476 bool read_index_only= index_read_must_be_used ? TRUE :
7477 (bool) param->table->covering_keys.is_set(keynr);
7478
7479 Json_writer_object trace_idx(thd);
7480 trace_idx.add("index", param->table->key_info[keynr].name);
7481
7482 found_records= check_quick_select(param, idx, read_index_only, key,
7483 for_range_access, &mrr_flags,
7484 &buf_size, &cost, &is_ror_scan);
7485
7486 if (!for_range_access && !is_ror_scan &&
7487 !optimizer_flag(param->thd,OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
7488 {
7489 /* The scan is not a ROR-scan, just skip it */
7490 continue;
7491 }
7492
7493 if (found_records != HA_POS_ERROR && tree->index_scans &&
7494 (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root,
7495 sizeof(INDEX_SCAN_INFO))))
7496 {
7497 Json_writer_array trace_range(thd, "ranges");
7498
7499 const KEY &cur_key= param->table->key_info[keynr];
7500 const KEY_PART_INFO *key_part= cur_key.key_part;
7501
7502 index_scan->idx= idx;
7503 index_scan->keynr= keynr;
7504 index_scan->key_info= ¶m->table->key_info[keynr];
7505 index_scan->used_key_parts= param->max_key_parts;
7506 index_scan->range_count= param->range_count;
7507 index_scan->records= found_records;
7508 index_scan->sel_arg= key;
7509 *tree->index_scans_end++= index_scan;
7510
7511 if (unlikely(thd->trace_started()))
7512 trace_ranges(&trace_range, param, idx, key, key_part);
7513 trace_range.end();
7514
7515 trace_idx.add("rowid_ordered", is_ror_scan)
7516 .add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL))
7517 .add("index_only", read_index_only)
7518 .add("rows", found_records)
7519 .add("cost", cost.total_cost());
7520 }
7521 if ((found_records != HA_POS_ERROR) && is_ror_scan)
7522 {
7523 tree->n_ror_scans++;
7524 tree->ror_scans_map.set_bit(idx);
7525 }
7526 if (found_records != HA_POS_ERROR &&
7527 read_time > (found_read_time= cost.total_cost()))
7528 {
7529 read_time= found_read_time;
7530 best_records= found_records;
7531 key_to_read= key;
7532 best_idx= idx;
7533 best_mrr_flags= mrr_flags;
7534 best_buf_size= buf_size;
7535 trace_idx.add("chosen", true);
7536 }
7537 else
7538 {
7539 trace_idx.add("chosen", false);
7540 if (found_records == HA_POS_ERROR)
7541 {
7542 if (key->type == SEL_ARG::Type::MAYBE_KEY)
7543 trace_idx.add("cause", "depends on unread values");
7544 else
7545 trace_idx.add("cause", "unknown");
7546 }
7547 else
7548 trace_idx.add("cause", "cost");
7549 }
7550 }
7551 }
7552
7553 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
7554 "ROR scans"););
7555 if (key_to_read)
7556 {
7557 if ((read_plan= new (param->mem_root) TRP_RANGE(key_to_read, best_idx,
7558 best_mrr_flags)))
7559 {
7560 read_plan->records= best_records;
7561 read_plan->is_ror= tree->ror_scans_map.is_set(best_idx);
7562 read_plan->read_cost= read_time;
7563 read_plan->mrr_buf_size= best_buf_size;
7564 DBUG_PRINT("info",
7565 ("Returning range plan for key %s, cost %g, records %lu",
7566 param->table->key_info[param->real_keynr[best_idx]].name.str,
7567 read_plan->read_cost, (ulong) read_plan->records));
7568 }
7569 }
7570 else
7571 DBUG_PRINT("info", ("No 'range' table read plan found"));
7572
7573 DBUG_RETURN(read_plan);
7574 }
7575
7576
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7577 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
7578 bool retrieve_full_rows,
7579 MEM_ROOT *parent_alloc)
7580 {
7581 QUICK_INDEX_MERGE_SELECT *quick_imerge;
7582 QUICK_RANGE_SELECT *quick;
7583 /* index_merge always retrieves full rows, ignore retrieve_full_rows */
7584 if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
7585 return NULL;
7586
7587 quick_imerge->records= records;
7588 quick_imerge->read_time= read_cost;
7589 for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
7590 range_scan++)
7591 {
7592 if (!(quick= (QUICK_RANGE_SELECT*)
7593 ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
7594 quick_imerge->push_quick_back(quick))
7595 {
7596 delete quick;
7597 delete quick_imerge;
7598 return NULL;
7599 }
7600 }
7601 return quick_imerge;
7602 }
7603
7604
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7605 QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param,
7606 bool retrieve_full_rows,
7607 MEM_ROOT *parent_alloc)
7608 {
7609 QUICK_INDEX_INTERSECT_SELECT *quick_intersect;
7610 QUICK_RANGE_SELECT *quick;
7611 /* index_merge always retrieves full rows, ignore retrieve_full_rows */
7612 if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table)))
7613 return NULL;
7614
7615 quick_intersect->records= records;
7616 quick_intersect->read_time= read_cost;
7617 quick_intersect->filtered_scans= filtered_scans;
7618 for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
7619 range_scan++)
7620 {
7621 if (!(quick= (QUICK_RANGE_SELECT*)
7622 ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))||
7623 quick_intersect->push_quick_back(quick))
7624 {
7625 delete quick;
7626 delete quick_intersect;
7627 return NULL;
7628 }
7629 }
7630 return quick_intersect;
7631 }
7632
7633
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7634 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
7635 bool retrieve_full_rows,
7636 MEM_ROOT *parent_alloc)
7637 {
7638 QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
7639 QUICK_RANGE_SELECT *quick;
7640 DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
7641 MEM_ROOT *alloc;
7642
7643 if ((quick_intrsect=
7644 new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
7645 (retrieve_full_rows? (!is_covering) :
7646 FALSE),
7647 parent_alloc)))
7648 {
7649 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7650 "creating ROR-intersect",
7651 first_scan, last_scan););
7652 alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
7653 for (ROR_SCAN_INFO **curr_scan= first_scan; curr_scan != last_scan;
7654 ++curr_scan)
7655 {
7656 if (!(quick= get_quick_select(param, (*curr_scan)->idx,
7657 (*curr_scan)->sel_arg,
7658 HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
7659 0, alloc)) ||
7660 quick_intrsect->push_quick_back(alloc, quick))
7661 {
7662 delete quick_intrsect;
7663 DBUG_RETURN(NULL);
7664 }
7665 }
7666 if (cpk_scan)
7667 {
7668 if (!(quick= get_quick_select(param, cpk_scan->idx,
7669 cpk_scan->sel_arg,
7670 HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
7671 0, alloc)))
7672 {
7673 delete quick_intrsect;
7674 DBUG_RETURN(NULL);
7675 }
7676 quick->file= NULL;
7677 quick_intrsect->cpk_quick= quick;
7678 }
7679 quick_intrsect->records= records;
7680 quick_intrsect->read_time= read_cost;
7681 }
7682 DBUG_RETURN(quick_intrsect);
7683 }
7684
7685
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7686 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
7687 bool retrieve_full_rows,
7688 MEM_ROOT *parent_alloc)
7689 {
7690 QUICK_ROR_UNION_SELECT *quick_roru;
7691 TABLE_READ_PLAN **scan;
7692 QUICK_SELECT_I *quick;
7693 DBUG_ENTER("TRP_ROR_UNION::make_quick");
7694 /*
7695 It is impossible to construct a ROR-union that will not retrieve full
7696 rows, ignore retrieve_full_rows parameter.
7697 */
7698 if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
7699 {
7700 for (scan= first_ror; scan != last_ror; scan++)
7701 {
7702 if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
7703 quick_roru->push_quick_back(quick))
7704 {
7705 delete quick_roru;
7706 DBUG_RETURN(NULL);
7707 }
7708 }
7709 quick_roru->records= records;
7710 quick_roru->read_time= read_cost;
7711 }
7712 DBUG_RETURN(quick_roru);
7713 }
7714
7715
7716 /*
7717 Build a SEL_TREE for <> or NOT BETWEEN predicate
7718
7719 SYNOPSIS
7720 get_ne_mm_tree()
7721 param PARAM from SQL_SELECT::test_quick_select
7722 cond_func item for the predicate
7723 field field in the predicate
7724 lt_value constant that field should be smaller
7725 gt_value constant that field should be greaterr
7726
7727 RETURN
7728 # Pointer to tree built tree
7729 0 on error
7730 */
7731
get_ne_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * lt_value,Item * gt_value)7732 SEL_TREE *Item_bool_func::get_ne_mm_tree(RANGE_OPT_PARAM *param,
7733 Field *field,
7734 Item *lt_value, Item *gt_value)
7735 {
7736 SEL_TREE *tree;
7737 tree= get_mm_parts(param, field, Item_func::LT_FUNC, lt_value);
7738 if (tree)
7739 tree= tree_or(param, tree, get_mm_parts(param, field, Item_func::GT_FUNC,
7740 gt_value));
7741 return tree;
7742 }
7743
7744
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7745 SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
7746 Field *field, Item *value)
7747 {
7748 DBUG_ENTER("Item_func_ne::get_func_mm_tree");
7749 /*
7750 If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
7751 do not construct a SEL_TREE from it. A condition that excludes just one
7752 row in the table is not selective (unless there are only a few rows)
7753 */
7754 if (is_field_an_unique_index(param, field))
7755 DBUG_RETURN(NULL);
7756 DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
7757 }
7758
7759
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7760 SEL_TREE *Item_func_between::get_func_mm_tree(RANGE_OPT_PARAM *param,
7761 Field *field, Item *value)
7762 {
7763 SEL_TREE *tree;
7764 DBUG_ENTER("Item_func_between::get_func_mm_tree");
7765 if (!value)
7766 {
7767 if (negated)
7768 {
7769 tree= get_ne_mm_tree(param, field, args[1], args[2]);
7770 }
7771 else
7772 {
7773 tree= get_mm_parts(param, field, Item_func::GE_FUNC, args[1]);
7774 if (tree)
7775 {
7776 tree= tree_and(param, tree, get_mm_parts(param, field,
7777 Item_func::LE_FUNC,
7778 args[2]));
7779 }
7780 }
7781 }
7782 else
7783 {
7784 tree= get_mm_parts(param, field,
7785 (negated ?
7786 (value == (Item*)1 ? Item_func::GT_FUNC :
7787 Item_func::LT_FUNC):
7788 (value == (Item*)1 ? Item_func::LE_FUNC :
7789 Item_func::GE_FUNC)),
7790 args[0]);
7791 }
7792 DBUG_RETURN(tree);
7793 }
7794
7795
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7796 SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
7797 Field *field, Item *value)
7798 {
7799 SEL_TREE *tree= 0;
7800 DBUG_ENTER("Item_func_in::get_func_mm_tree");
7801 /*
7802 Array for IN() is constructed when all values have the same result
7803 type. Tree won't be built for values with different result types,
7804 so we check it here to avoid unnecessary work.
7805 */
7806 if (!arg_types_compatible)
7807 DBUG_RETURN(0);
7808
7809 if (negated)
7810 {
7811 if (array && array->type_handler()->result_type() != ROW_RESULT)
7812 {
7813 /*
7814 We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
7815 where c{i} are constants. Our goal is to produce a SEL_TREE that
7816 represents intervals:
7817
7818 ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
7819
7820 where $MIN is either "-inf" or NULL.
7821
7822 The most straightforward way to produce it is to convert NOT IN
7823 into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
7824 analyzer to build SEL_TREE from that. The problem is that the
7825 range analyzer will use O(N^2) memory (which is probably a bug),
7826 and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
7827 will run out of memory.
7828
7829 Another problem with big lists like (*) is that a big list is
7830 unlikely to produce a good "range" access, while considering that
7831 range access will require expensive CPU calculations (and for
7832 MyISAM even index accesses). In short, big NOT IN lists are rarely
7833 worth analyzing.
7834
7835 Considering the above, we'll handle NOT IN as follows:
7836 * if the number of entries in the NOT IN list is less than
7837 NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
7838 * Otherwise, don't produce a SEL_TREE.
7839 */
7840 #define NOT_IN_IGNORE_THRESHOLD 1000
7841 MEM_ROOT *tmp_root= param->mem_root;
7842 param->thd->mem_root= param->old_root;
7843 /*
7844 Create one Item_type constant object. We'll need it as
7845 get_mm_parts only accepts constant values wrapped in Item_Type
7846 objects.
7847 We create the Item on param->mem_root which points to
7848 per-statement mem_root (while thd->mem_root is currently pointing
7849 to mem_root local to range optimizer).
7850 */
7851 Item *value_item= array->create_item(param->thd);
7852 param->thd->mem_root= tmp_root;
7853
7854 if (array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
7855 DBUG_RETURN(0);
7856
7857 /*
7858 if this is a "col1 NOT IN (...)", and there is a UNIQUE KEY(col1), do
7859 not constuct a SEL_TREE from it. The rationale is as follows:
7860 - if there are only a few constants, this condition is not selective
7861 (unless the table is also very small in which case we won't gain
7862 anything)
7863 - if there are a lot of constants, the overhead of building and
7864 processing enormous range list is not worth it.
7865 */
7866 if (is_field_an_unique_index(param, field))
7867 DBUG_RETURN(0);
7868
7869 /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
7870 uint i=0;
7871 do
7872 {
7873 array->value_to_item(i, value_item);
7874 tree= get_mm_parts(param, field, Item_func::LT_FUNC, value_item);
7875 if (!tree)
7876 break;
7877 i++;
7878 } while (i < array->count && tree->type == SEL_TREE::IMPOSSIBLE);
7879
7880 if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
7881 {
7882 /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
7883 DBUG_RETURN(NULL);
7884 }
7885 SEL_TREE *tree2;
7886 for (; i < array->used_count; i++)
7887 {
7888 if (array->compare_elems(i, i-1))
7889 {
7890 /* Get a SEL_TREE for "-inf < X < c_i" interval */
7891 array->value_to_item(i, value_item);
7892 tree2= get_mm_parts(param, field, Item_func::LT_FUNC, value_item);
7893 if (!tree2)
7894 {
7895 tree= NULL;
7896 break;
7897 }
7898
7899 /* Change all intervals to be "c_{i-1} < X < c_i" */
7900 for (uint idx= 0; idx < param->keys; idx++)
7901 {
7902 SEL_ARG *new_interval, *last_val;
7903 if (((new_interval= tree2->keys[idx])) &&
7904 (tree->keys[idx]) &&
7905 ((last_val= tree->keys[idx]->last())))
7906 {
7907 new_interval->min_value= last_val->max_value;
7908 new_interval->min_flag= NEAR_MIN;
7909
7910 /*
7911 If the interval is over a partial keypart, the
7912 interval must be "c_{i-1} <= X < c_i" instead of
7913 "c_{i-1} < X < c_i". Reason:
7914
7915 Consider a table with a column "my_col VARCHAR(3)",
7916 and an index with definition
7917 "INDEX my_idx my_col(1)". If the table contains rows
7918 with my_col values "f" and "foo", the index will not
7919 distinguish the two rows.
7920
7921 Note that tree_or() below will effectively merge
7922 this range with the range created for c_{i-1} and
7923 we'll eventually end up with only one range:
7924 "NULL < X".
7925
7926 Partitioning indexes are never partial.
7927 */
7928 if (param->using_real_indexes)
7929 {
7930 const KEY key=
7931 param->table->key_info[param->real_keynr[idx]];
7932 const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
7933
7934 if (kpi->key_part_flag & HA_PART_KEY_SEG)
7935 new_interval->min_flag= 0;
7936 }
7937 }
7938 }
7939 /*
7940 The following doesn't try to allocate memory so no need to
7941 check for NULL.
7942 */
7943 tree= tree_or(param, tree, tree2);
7944 }
7945 }
7946
7947 if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
7948 {
7949 /*
7950 Get the SEL_TREE for the last "c_last < X < +inf" interval
7951 (value_item cotains c_last already)
7952 */
7953 tree2= get_mm_parts(param, field, Item_func::GT_FUNC, value_item);
7954 tree= tree_or(param, tree, tree2);
7955 }
7956 }
7957 else
7958 {
7959 tree= get_ne_mm_tree(param, field, args[1], args[1]);
7960 if (tree)
7961 {
7962 Item **arg, **end;
7963 for (arg= args + 2, end= arg + arg_count - 2; arg < end ; arg++)
7964 {
7965 tree= tree_and(param, tree, get_ne_mm_tree(param, field,
7966 *arg, *arg));
7967 }
7968 }
7969 }
7970 }
7971 else
7972 {
7973 tree= get_mm_parts(param, field, Item_func::EQ_FUNC, args[1]);
7974 if (tree)
7975 {
7976 Item **arg, **end;
7977 for (arg= args + 2, end= arg + arg_count - 2;
7978 arg < end ; arg++)
7979 {
7980 tree= tree_or(param, tree, get_mm_parts(param, field,
7981 Item_func::EQ_FUNC, *arg));
7982 }
7983 }
7984 }
7985 DBUG_RETURN(tree);
7986 }
7987
7988
7989 /*
7990 The structure Key_col_info is purely auxiliary and is used
7991 only in the method Item_func_in::get_func_row_mm_tree
7992 */
7993 struct Key_col_info {
7994 Field *field; /* If != NULL the column can be used for keys */
7995 cmp_item *comparator; /* If != 0 the column can be evaluated */
7996 };
7997
7998 /**
7999 Build SEL_TREE for the IN predicate whose arguments are rows
8000
8001 @param param PARAM from SQL_SELECT::test_quick_select
8002 @param key_row First operand of the IN predicate
8003
8004 @note
8005 The function builds a SEL_TREE for in IN predicate in the case
8006 when the predicate uses row arguments. First the function
8007 detects among the components of the key_row (c[1],...,c[n]) taken
8008 from in the left part the predicate those that can be usable
8009 for building SEL_TREE (c[i1],...,c[ik]). They have to contain
8010 items whose real items are field items referring to the current
8011 table or equal to the items referring to the current table.
8012 For the remaining components of the row it checks whether they
8013 can be evaluated. The result of the analysis is put into the
8014 array of structures of the type Key_row_col_info.
8015
8016 After this the function builds the SEL_TREE for the following
8017 formula that can be inferred from the given IN predicate:
8018 c[i11]=a[1][i11] AND ... AND c[i1k1]=a[1][i1k1]
8019 OR
8020 ...
8021 OR
8022 c[im1]=a[m][im1] AND ... AND c[imkm]=a[m][imkm].
8023 Here a[1],...,a[m] are all arguments of the IN predicate from
8024 the right part and for each j ij1,...,ijkj is a subset of
8025 i1,...,ik such that a[j][ij1],...,a[j][ijkj] can be evaluated.
8026
8027 If for some j there no a[j][i1],...,a[j][ik] can be evaluated
8028 then no SEL_TREE can be built for this predicate and the
8029 function immediately returns 0.
8030
8031 If for some j by using evaluated values of key_row it can be
8032 proven that c[ij1]=a[j][ij1] AND ... AND c[ijkj]=a[j][ijkj]
8033 is always FALSE then this disjunct is omitted.
8034
8035 @returns
8036 the built SEL_TREE if it can be constructed
8037 0 - otherwise.
8038 */
8039
get_func_row_mm_tree(RANGE_OPT_PARAM * param,Item_row * key_row)8040 SEL_TREE *Item_func_in::get_func_row_mm_tree(RANGE_OPT_PARAM *param,
8041 Item_row *key_row)
8042 {
8043 DBUG_ENTER("Item_func_in::get_func_row_mm_tree");
8044
8045 if (negated)
8046 DBUG_RETURN(0);
8047
8048 SEL_TREE *res_tree= 0;
8049 uint used_key_cols= 0;
8050 uint col_comparators= 0;
8051 table_map param_comp= ~(param->prev_tables | param->read_tables |
8052 param->current_table);
8053 uint row_cols= key_row->cols();
8054 Dynamic_array <Key_col_info> key_cols_info(row_cols);
8055 cmp_item_row *row_cmp_item;
8056
8057 if (array)
8058 {
8059 in_row *row= static_cast<in_row*>(array);
8060 row_cmp_item= static_cast<cmp_item_row*>(row->get_cmp_item());
8061 }
8062 else
8063 {
8064 DBUG_ASSERT(get_comparator_type_handler(0) == &type_handler_row);
8065 row_cmp_item= static_cast<cmp_item_row*>(get_comparator_cmp_item(0));
8066 }
8067 DBUG_ASSERT(row_cmp_item);
8068
8069 Item **key_col_ptr= key_row->addr(0);
8070 for(uint i= 0; i < row_cols; i++, key_col_ptr++)
8071 {
8072 Key_col_info key_col_info= {0, NULL};
8073 Item *key_col= *key_col_ptr;
8074 if (key_col->real_item()->type() == Item::FIELD_ITEM)
8075 {
8076 /*
8077 The i-th component of key_row can be used for key access if
8078 key_col->real_item() points to a field of the current table or
8079 if it is equal to a field item pointing to such a field.
8080 */
8081 Item_field *col_field_item= (Item_field *) (key_col->real_item());
8082 Field *key_col_field= col_field_item->field;
8083 if (key_col_field->table->map != param->current_table)
8084 {
8085 Item_equal *item_equal= col_field_item->item_equal;
8086 if (item_equal)
8087 {
8088 Item_equal_fields_iterator it(*item_equal);
8089 while (it++)
8090 {
8091 key_col_field= it.get_curr_field();
8092 if (key_col_field->table->map == param->current_table)
8093 break;
8094 }
8095 }
8096 }
8097 if (key_col_field->table->map == param->current_table)
8098 {
8099 key_col_info.field= key_col_field;
8100 used_key_cols++;
8101 }
8102 }
8103 else if (!(key_col->used_tables() & (param_comp | param->current_table))
8104 && !key_col->is_expensive())
8105 {
8106 /* The i-th component of key_row can be evaluated */
8107
8108 /* See the comment in Item::get_mm_tree_for_const */
8109 MEM_ROOT *tmp_root= param->mem_root;
8110 param->thd->mem_root= param->old_root;
8111
8112 key_col->bring_value();
8113 key_col_info.comparator= row_cmp_item->get_comparator(i);
8114 DBUG_ASSERT(key_col_info.comparator);
8115 key_col_info.comparator->store_value(key_col);
8116 col_comparators++;
8117
8118 param->thd->mem_root= tmp_root;
8119 }
8120 key_cols_info.push(key_col_info);
8121 }
8122
8123 if (!used_key_cols)
8124 DBUG_RETURN(0);
8125
8126 uint omitted_tuples= 0;
8127 Item **arg_start= arguments() + 1;
8128 Item **arg_end= arg_start + argument_count() - 1;
8129 for (Item **arg= arg_start ; arg < arg_end; arg++)
8130 {
8131 uint i;
8132
8133 /*
8134 First check whether the disjunct constructed for *arg
8135 is really needed
8136 */
8137 Item_row *arg_tuple= (Item_row *) (*arg);
8138 if (col_comparators)
8139 {
8140 MEM_ROOT *tmp_root= param->mem_root;
8141 param->thd->mem_root= param->old_root;
8142 for (i= 0; i < row_cols; i++)
8143 {
8144 Key_col_info *key_col_info= &key_cols_info.at(i);
8145 if (key_col_info->comparator)
8146 {
8147 Item *arg_col= arg_tuple->element_index(i);
8148 if (!(arg_col->used_tables() & (param_comp | param->current_table)) &&
8149 !arg_col->is_expensive() &&
8150 key_col_info->comparator->cmp(arg_col))
8151 {
8152 omitted_tuples++;
8153 break;
8154 }
8155 }
8156 }
8157 param->thd->mem_root= tmp_root;
8158 if (i < row_cols)
8159 continue;
8160 }
8161
8162 /* The disjunct for *arg is needed: build it. */
8163 SEL_TREE *and_tree= 0;
8164 Item **arg_col_ptr= arg_tuple->addr(0);
8165 for (uint i= 0; i < row_cols; i++, arg_col_ptr++)
8166 {
8167 Key_col_info *key_col_info= &key_cols_info.at(i);
8168 if (!key_col_info->field)
8169 continue;
8170 Item *arg_col= *arg_col_ptr;
8171 if (!(arg_col->used_tables() & (param_comp | param->current_table)) &&
8172 !arg_col->is_expensive())
8173 {
8174 and_tree= tree_and(param, and_tree,
8175 get_mm_parts(param,
8176 key_col_info->field,
8177 Item_func::EQ_FUNC,
8178 arg_col->real_item()));
8179 }
8180 }
8181 if (!and_tree)
8182 {
8183 res_tree= 0;
8184 break;
8185 }
8186 /* Join the disjunct the the OR tree that is being constructed */
8187 res_tree= !res_tree ? and_tree : tree_or(param, res_tree, and_tree);
8188 }
8189 if (omitted_tuples == argument_count() - 1)
8190 {
8191 /* It's turned out that all disjuncts are always FALSE */
8192 res_tree= new (param->mem_root) SEL_TREE(SEL_TREE::IMPOSSIBLE,
8193 param->mem_root, param->keys);
8194 }
8195 DBUG_RETURN(res_tree);
8196 }
8197
8198
8199 /*
8200 Build conjunction of all SEL_TREEs for a simple predicate applying equalities
8201
8202 SYNOPSIS
8203 get_full_func_mm_tree()
8204 param PARAM from SQL_SELECT::test_quick_select
8205 field_item field in the predicate
8206 value constant in the predicate (or a field already read from
8207 a table in the case of dynamic range access)
8208 (for BETWEEN it contains the number of the field argument,
8209 for IN it's always 0)
8210 inv TRUE <> NOT cond_func is considered
8211 (makes sense only when cond_func is BETWEEN or IN)
8212
8213 DESCRIPTION
8214 For a simple SARGable predicate of the form (f op c), where f is a field and
8215 c is a constant, the function builds a conjunction of all SEL_TREES that can
8216 be obtained by the substitution of f for all different fields equal to f.
8217
8218 NOTES
8219 If the WHERE condition contains a predicate (fi op c),
8220 then not only SELL_TREE for this predicate is built, but
8221 the trees for the results of substitution of fi for
8222 each fj belonging to the same multiple equality as fi
8223 are built as well.
8224 E.g. for WHERE t1.a=t2.a AND t2.a > 10
8225 a SEL_TREE for t2.a > 10 will be built for quick select from t2
8226 and
8227 a SEL_TREE for t1.a > 10 will be built for quick select from t1.
8228
8229 A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
8230 in a similar way: we build a conjuction of trees for the results
8231 of all substitutions of fi for equal fj.
8232 Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
8233 differently. It is considered as a conjuction of two SARGable
8234 predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
8235 is called for each of them separately producing trees for
8236 AND j (f1j <=c ) and AND j (f2j <= c)
8237 After this these two trees are united in one conjunctive tree.
8238 It's easy to see that the same tree is obtained for
8239 AND j,k (f1j <=c AND f2k<=c)
8240 which is equivalent to
8241 AND j,k (c BETWEEN f1j AND f2k).
8242 The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
8243 which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
8244 function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
8245 producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
8246 trees are united in one OR-tree. The expression
8247 (AND j (f1j > c) OR AND j (f2j < c)
8248 is equivalent to the expression
8249 AND j,k (f1j > c OR f2k < c)
8250 which is just a translation of
8251 AND j,k (c NOT BETWEEN f1j AND f2k)
8252
8253 In the cases when one of the items f1, f2 is a constant c1 we do not create
8254 a tree for it at all. It works for BETWEEN predicates but does not
8255 work for NOT BETWEEN predicates as we have to evaluate the expression
8256 with it. If it is TRUE then the other tree can be completely ignored.
8257 We do not do it now and no trees are built in these cases for
8258 NOT BETWEEN predicates.
8259
8260 As to IN predicates only ones of the form (f IN (c1,...,cn)),
8261 where f1 is a field and c1,...,cn are constant, are considered as
8262 SARGable. We never try to narrow the index scan using predicates of
8263 the form (c IN (c1,...,f,...,cn)).
8264
8265 RETURN
8266 Pointer to the tree representing the built conjunction of SEL_TREEs
8267 */
8268
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item_field * field_item,Item * value)8269 SEL_TREE *Item_bool_func::get_full_func_mm_tree(RANGE_OPT_PARAM *param,
8270 Item_field *field_item,
8271 Item *value)
8272 {
8273 DBUG_ENTER("Item_bool_func::get_full_func_mm_tree");
8274 SEL_TREE *tree= 0;
8275 SEL_TREE *ftree= 0;
8276 table_map ref_tables= 0;
8277 table_map param_comp= ~(param->prev_tables | param->read_tables |
8278 param->current_table);
8279
8280 for (uint i= 0; i < arg_count; i++)
8281 {
8282 Item *arg= arguments()[i]->real_item();
8283 if (arg != field_item)
8284 ref_tables|= arg->used_tables();
8285 }
8286 Field *field= field_item->field;
8287 if (!((ref_tables | field->table->map) & param_comp))
8288 ftree= get_func_mm_tree(param, field, value);
8289 Item_equal *item_equal= field_item->item_equal;
8290 if (item_equal)
8291 {
8292 Item_equal_fields_iterator it(*item_equal);
8293 while (it++)
8294 {
8295 Field *f= it.get_curr_field();
8296 if (field->eq(f))
8297 continue;
8298 if (!((ref_tables | f->table->map) & param_comp))
8299 {
8300 tree= get_func_mm_tree(param, f, value);
8301 ftree= !ftree ? tree : tree_and(param, ftree, tree);
8302 }
8303 }
8304 }
8305
8306 DBUG_RETURN(ftree);
8307 }
8308
8309
8310 /*
8311 make a select tree of all keys in condition
8312
8313 @param param Context
8314 @param cond INOUT condition to perform range analysis on.
8315
8316 @detail
8317 Range analysis may infer that some conditions are never true.
8318 - If the condition is never true, SEL_TREE(type=IMPOSSIBLE) is returned
8319 - if parts of condition are never true, the function may remove these parts
8320 from the condition 'cond'. Sometimes, this will cause the condition to
8321 be substituted for something else.
8322
8323
8324 @return
8325 NULL - Could not infer anything from condition cond.
8326 SEL_TREE with type=IMPOSSIBLE - condition can never be true.
8327 */
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8328 SEL_TREE *Item_cond_and::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8329 {
8330 DBUG_ENTER("Item_cond_and::get_mm_tree");
8331 SEL_TREE *tree= NULL;
8332 List_iterator<Item> li(*argument_list());
8333 Item *item;
8334 while ((item= li++))
8335 {
8336 SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param,li.ref());
8337 if (param->statement_should_be_aborted())
8338 DBUG_RETURN(NULL);
8339 tree= tree_and(param, tree, new_tree);
8340 if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
8341 {
8342 /*
8343 Do not remove 'item' from 'cond'. We return a SEL_TREE::IMPOSSIBLE
8344 and that is sufficient for the caller to see that the whole
8345 condition is never true.
8346 */
8347 break;
8348 }
8349 }
8350 DBUG_RETURN(tree);
8351 }
8352
8353
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8354 SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8355 {
8356 DBUG_ENTER("Item_cond::get_mm_tree");
8357 List_iterator<Item> li(*argument_list());
8358 bool replace_cond= false;
8359 Item *replacement_item= li++;
8360 SEL_TREE *tree= li.ref()[0]->get_mm_tree(param, li.ref());
8361 if (param->statement_should_be_aborted())
8362 DBUG_RETURN(NULL);
8363 if (tree)
8364 {
8365 if (tree->type == SEL_TREE::IMPOSSIBLE &&
8366 param->remove_false_where_parts)
8367 {
8368 /* See the other li.remove() call below */
8369 li.remove();
8370 if (argument_list()->elements <= 1)
8371 replace_cond= true;
8372 }
8373
8374 Item *item;
8375 while ((item= li++))
8376 {
8377 SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param, li.ref());
8378 if (new_tree == NULL || param->statement_should_be_aborted())
8379 DBUG_RETURN(NULL);
8380 tree= tree_or(param, tree, new_tree);
8381 if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
8382 {
8383 replacement_item= *li.ref();
8384 break;
8385 }
8386
8387 if (new_tree && new_tree->type == SEL_TREE::IMPOSSIBLE &&
8388 param->remove_false_where_parts)
8389 {
8390 /*
8391 This is a condition in form
8392
8393 cond = item1 OR ... OR item_i OR ... itemN
8394
8395 and item_i produces SEL_TREE(IMPOSSIBLE). We should remove item_i
8396 from cond. This may cause 'cond' to become a degenerate,
8397 one-way OR. In that case, we replace 'cond' with the remaining
8398 item_i.
8399 */
8400 li.remove();
8401 if (argument_list()->elements <= 1)
8402 replace_cond= true;
8403 }
8404 else
8405 replacement_item= *li.ref();
8406 }
8407
8408 if (replace_cond)
8409 *cond_ptr= replacement_item;
8410 }
8411 DBUG_RETURN(tree);
8412 }
8413
8414
get_mm_tree_for_const(RANGE_OPT_PARAM * param)8415 SEL_TREE *Item::get_mm_tree_for_const(RANGE_OPT_PARAM *param)
8416 {
8417 DBUG_ENTER("get_mm_tree_for_const");
8418 if (is_expensive())
8419 DBUG_RETURN(0);
8420 /*
8421 During the cond->val_int() evaluation we can come across a subselect
8422 item which may allocate memory on the thd->mem_root and assumes
8423 all the memory allocated has the same life span as the subselect
8424 item itself. So we have to restore the thread's mem_root here.
8425 */
8426 MEM_ROOT *tmp_root= param->mem_root;
8427 param->thd->mem_root= param->old_root;
8428 SEL_TREE *tree;
8429
8430 const SEL_TREE::Type type= val_int()? SEL_TREE::ALWAYS: SEL_TREE::IMPOSSIBLE;
8431 param->thd->mem_root= tmp_root;
8432
8433 tree= new (tmp_root) SEL_TREE(type, tmp_root, param->keys);
8434 DBUG_RETURN(tree);
8435 }
8436
8437
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8438 SEL_TREE *Item::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8439 {
8440 DBUG_ENTER("Item::get_mm_tree");
8441 if (const_item())
8442 DBUG_RETURN(get_mm_tree_for_const(param));
8443
8444 /*
8445 Here we have a not-constant non-function Item.
8446
8447 Item_field should not appear, as normalize_cond() replaces
8448 "WHERE field" to "WHERE field<>0".
8449
8450 Item_exists_subselect is possible, e.g. in this query:
8451 SELECT id, st FROM t1
8452 WHERE st IN ('GA','FL') AND EXISTS (SELECT 1 FROM t2 WHERE t2.id=t1.id)
8453 GROUP BY id;
8454 */
8455 table_map ref_tables= used_tables();
8456 if ((ref_tables & param->current_table) ||
8457 (ref_tables & ~(param->prev_tables | param->read_tables)))
8458 DBUG_RETURN(0);
8459 DBUG_RETURN(new (param->mem_root) SEL_TREE(SEL_TREE::MAYBE, param->mem_root,
8460 param->keys));
8461 }
8462
8463
8464 SEL_TREE *
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8465 Item_func_between::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8466 {
8467 DBUG_ENTER("Item_func_between::get_mm_tree");
8468 if (const_item())
8469 DBUG_RETURN(get_mm_tree_for_const(param));
8470
8471 SEL_TREE *tree= 0;
8472 SEL_TREE *ftree= 0;
8473
8474 if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
8475 {
8476 Item_field *field_item= (Item_field*) (arguments()[0]->real_item());
8477 ftree= get_full_func_mm_tree(param, field_item, NULL);
8478 }
8479
8480 /*
8481 Concerning the code below see the NOTES section in
8482 the comments for the function get_full_func_mm_tree()
8483 */
8484 for (uint i= 1 ; i < arg_count ; i++)
8485 {
8486 if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
8487 {
8488 Item_field *field_item= (Item_field*) (arguments()[i]->real_item());
8489 SEL_TREE *tmp= get_full_func_mm_tree(param, field_item,
8490 (Item*)(intptr) i);
8491 if (negated)
8492 {
8493 tree= !tree ? tmp : tree_or(param, tree, tmp);
8494 if (tree == NULL)
8495 break;
8496 }
8497 else
8498 tree= tree_and(param, tree, tmp);
8499 }
8500 else if (negated)
8501 {
8502 tree= 0;
8503 break;
8504 }
8505 }
8506
8507 ftree= tree_and(param, ftree, tree);
8508 DBUG_RETURN(ftree);
8509 }
8510
8511
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8512 SEL_TREE *Item_func_in::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8513 {
8514 DBUG_ENTER("Item_func_in::get_mm_tree");
8515 if (const_item())
8516 DBUG_RETURN(get_mm_tree_for_const(param));
8517
8518 SEL_TREE *tree= 0;
8519 switch (key_item()->real_item()->type()) {
8520 case Item::FIELD_ITEM:
8521 tree= get_full_func_mm_tree(param,
8522 (Item_field*) (key_item()->real_item()),
8523 NULL);
8524 break;
8525 case Item::ROW_ITEM:
8526 tree= get_func_row_mm_tree(param,
8527 (Item_row *) (key_item()->real_item()));
8528 break;
8529 default:
8530 DBUG_RETURN(0);
8531 }
8532 DBUG_RETURN(tree);
8533 }
8534
8535
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8536 SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8537 {
8538 DBUG_ENTER("Item_equal::get_mm_tree");
8539 if (const_item())
8540 DBUG_RETURN(get_mm_tree_for_const(param));
8541
8542 SEL_TREE *tree= 0;
8543 SEL_TREE *ftree= 0;
8544
8545 Item *value;
8546 if (!(value= get_const()) || value->is_expensive())
8547 DBUG_RETURN(0);
8548
8549 Item_equal_fields_iterator it(*this);
8550 table_map ref_tables= value->used_tables();
8551 table_map param_comp= ~(param->prev_tables | param->read_tables |
8552 param->current_table);
8553 while (it++)
8554 {
8555 Field *field= it.get_curr_field();
8556 if (!((ref_tables | field->table->map) & param_comp))
8557 {
8558 tree= get_mm_parts(param, field, Item_func::EQ_FUNC, value);
8559 ftree= !ftree ? tree : tree_and(param, ftree, tree);
8560 }
8561 }
8562
8563 DBUG_RETURN(ftree);
8564 }
8565
8566
8567 /*
8568 @brief
8569 Check if there is an one-segment unique key that matches the field exactly
8570
8571 @detail
8572 In the future we could also add "almost unique" indexes where any value is
8573 present only in a few rows (but necessarily exactly one row)
8574 */
is_field_an_unique_index(RANGE_OPT_PARAM * param,Field * field)8575 static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
8576 {
8577 DBUG_ENTER("is_field_an_unique_index");
8578
8579 // The check for using_real_indexes is there because of the heuristics
8580 // this function is used for.
8581 if (param->using_real_indexes)
8582 {
8583 key_map::Iterator it(field->key_start);
8584 uint key_no;
8585 while ((key_no= it++) != key_map::Iterator::BITMAP_END)
8586 {
8587 KEY *key_info= &field->table->key_info[key_no];
8588 if (key_info->user_defined_key_parts == 1 &&
8589 (key_info->flags & HA_NOSAME))
8590 {
8591 DBUG_RETURN(true);
8592 }
8593 }
8594 }
8595 DBUG_RETURN(false);
8596 }
8597
8598
8599 SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Field * field,Item_func::Functype type,Item * value)8600 Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
8601 Item_func::Functype type, Item *value)
8602 {
8603 DBUG_ENTER("get_mm_parts");
8604 if (field->table != param->table)
8605 DBUG_RETURN(0);
8606
8607 KEY_PART *key_part = param->key_parts;
8608 KEY_PART *end = param->key_parts_end;
8609 SEL_TREE *tree=0;
8610 table_map value_used_tables= 0;
8611 if (value &&
8612 (value_used_tables= value->used_tables()) &
8613 ~(param->prev_tables | param->read_tables))
8614 DBUG_RETURN(0);
8615 for (; key_part != end ; key_part++)
8616 {
8617 if (field->eq(key_part->field))
8618 {
8619 SEL_ARG *sel_arg=0;
8620 if (!tree && !(tree=new (param->thd->mem_root) SEL_TREE(param->mem_root,
8621 param->keys)))
8622 DBUG_RETURN(0); // OOM
8623 if (!value || !(value_used_tables & ~param->read_tables))
8624 {
8625 /*
8626 We need to restore the runtime mem_root of the thread in this
8627 function because it evaluates the value of its argument, while
8628 the argument can be any, e.g. a subselect. The subselect
8629 items, in turn, assume that all the memory allocated during
8630 the evaluation has the same life span as the item itself.
8631 TODO: opt_range.cc should not reset thd->mem_root at all.
8632 */
8633 MEM_ROOT *tmp_root= param->mem_root;
8634 param->thd->mem_root= param->old_root;
8635 sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
8636 param->thd->mem_root= tmp_root;
8637
8638 if (!sel_arg)
8639 continue;
8640 if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
8641 {
8642 tree->type=SEL_TREE::IMPOSSIBLE;
8643 DBUG_RETURN(tree);
8644 }
8645 }
8646 else
8647 {
8648 // This key may be used later
8649 if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
8650 DBUG_RETURN(0); // OOM
8651 }
8652 sel_arg->part=(uchar) key_part->part;
8653 sel_arg->max_part_no= sel_arg->part+1;
8654 tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
8655 tree->keys_map.set_bit(key_part->key);
8656 }
8657 }
8658
8659 if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
8660 tree= NULL;
8661 DBUG_RETURN(tree);
8662 }
8663
8664
8665 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)8666 Item_func_null_predicate::get_mm_leaf(RANGE_OPT_PARAM *param,
8667 Field *field, KEY_PART *key_part,
8668 Item_func::Functype type,
8669 Item *value)
8670 {
8671 MEM_ROOT *alloc= param->mem_root;
8672 DBUG_ENTER("Item_func_null_predicate::get_mm_leaf");
8673 DBUG_ASSERT(!value);
8674 /*
8675 No check for field->table->maybe_null. It's perfecly fine to use range
8676 access for cases like
8677
8678 SELECT * FROM t1 LEFT JOIN t2 ON t2.key IS [NOT] NULL
8679
8680 ON expression is evaluated before considering NULL-complemented rows, so
8681 IS [NOT] NULL has regular semantics.
8682 */
8683 if (!field->real_maybe_null())
8684 DBUG_RETURN(type == ISNULL_FUNC ? &null_element : NULL);
8685 SEL_ARG *tree;
8686 if (!(tree= new (alloc) SEL_ARG(field, is_null_string, is_null_string)))
8687 DBUG_RETURN(0);
8688 if (type == Item_func::ISNOTNULL_FUNC)
8689 {
8690 tree->min_flag=NEAR_MIN; /* IS NOT NULL -> X > NULL */
8691 tree->max_flag=NO_MAX_RANGE;
8692 }
8693 DBUG_RETURN(tree);
8694 }
8695
8696
8697 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)8698 Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
8699 Field *field, KEY_PART *key_part,
8700 Item_func::Functype type, Item *value)
8701 {
8702 DBUG_ENTER("Item_func_like::get_mm_leaf");
8703 DBUG_ASSERT(value);
8704
8705 if (key_part->image_type != Field::itRAW)
8706 DBUG_RETURN(0);
8707
8708 if (param->using_real_indexes &&
8709 !field->optimize_range(param->real_keynr[key_part->key],
8710 key_part->part))
8711 DBUG_RETURN(0);
8712
8713 if (field->result_type() == STRING_RESULT &&
8714 field->charset() != compare_collation())
8715 DBUG_RETURN(0);
8716
8717 StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
8718 String *res;
8719
8720 if (!(res= value->val_str(&tmp)))
8721 DBUG_RETURN(&null_element);
8722
8723 if (field->cmp_type() != STRING_RESULT ||
8724 field->type_handler() == &type_handler_enum ||
8725 field->type_handler() == &type_handler_set)
8726 DBUG_RETURN(0);
8727
8728 /*
8729 TODO:
8730 Check if this was a function. This should have be optimized away
8731 in the sql_select.cc
8732 */
8733 if (res != &tmp)
8734 {
8735 tmp.copy(*res); // Get own copy
8736 res= &tmp;
8737 }
8738
8739 uint maybe_null= (uint) field->real_maybe_null();
8740 size_t field_length= field->pack_length() + maybe_null;
8741 size_t offset= maybe_null;
8742 size_t length= key_part->store_length;
8743
8744 if (length != key_part->length + maybe_null)
8745 {
8746 /* key packed with length prefix */
8747 offset+= HA_KEY_BLOB_LENGTH;
8748 field_length= length - HA_KEY_BLOB_LENGTH;
8749 }
8750 else
8751 {
8752 if (unlikely(length < field_length))
8753 {
8754 /*
8755 This can only happen in a table created with UNIREG where one key
8756 overlaps many fields
8757 */
8758 length= field_length;
8759 }
8760 else
8761 field_length= length;
8762 }
8763 length+= offset;
8764 uchar *min_str,*max_str;
8765 if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
8766 DBUG_RETURN(0);
8767 max_str= min_str + length;
8768 if (maybe_null)
8769 max_str[0]= min_str[0]=0;
8770
8771 size_t min_length, max_length;
8772 field_length-= maybe_null;
8773 if (field->charset()->like_range(res->ptr(), res->length(),
8774 escape, wild_one, wild_many,
8775 field_length,
8776 (char*) min_str + offset,
8777 (char*) max_str + offset,
8778 &min_length, &max_length))
8779 DBUG_RETURN(0); // Can't optimize with LIKE
8780
8781 if (offset != maybe_null) // BLOB or VARCHAR
8782 {
8783 int2store(min_str + maybe_null, min_length);
8784 int2store(max_str + maybe_null, max_length);
8785 }
8786 SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
8787 DBUG_RETURN(tree);
8788 }
8789
8790
8791 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype functype,Item * value)8792 Item_bool_func::get_mm_leaf(RANGE_OPT_PARAM *param,
8793 Field *field, KEY_PART *key_part,
8794 Item_func::Functype functype, Item *value)
8795 {
8796 DBUG_ENTER("Item_bool_func::get_mm_leaf");
8797 DBUG_ASSERT(value); // IS NULL and IS NOT NULL are handled separately
8798 if (key_part->image_type != Field::itRAW)
8799 DBUG_RETURN(0); // e.g. SPATIAL index
8800 DBUG_RETURN(field->get_mm_leaf(param, key_part, this,
8801 functype_to_scalar_comparison_op(functype),
8802 value));
8803 }
8804
8805
can_optimize_scalar_range(const RANGE_OPT_PARAM * param,const KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,const Item * value) const8806 bool Field::can_optimize_scalar_range(const RANGE_OPT_PARAM *param,
8807 const KEY_PART *key_part,
8808 const Item_bool_func *cond,
8809 scalar_comparison_op op,
8810 const Item *value) const
8811 {
8812 bool is_eq_func= op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL;
8813 if ((param->using_real_indexes &&
8814 !optimize_range(param->real_keynr[key_part->key],
8815 key_part->part) && !is_eq_func) ||
8816 !can_optimize_range(cond, value, is_eq_func))
8817 return false;
8818 return true;
8819 }
8820
8821
make_key_image(MEM_ROOT * mem_root,const KEY_PART * key_part)8822 uchar *Field::make_key_image(MEM_ROOT *mem_root, const KEY_PART *key_part)
8823 {
8824 DBUG_ENTER("Field::make_key_image");
8825 uint maybe_null= (uint) real_maybe_null();
8826 uchar *str;
8827 if (!(str= (uchar*) alloc_root(mem_root, key_part->store_length + 1)))
8828 DBUG_RETURN(0);
8829 if (maybe_null)
8830 *str= (uchar) is_real_null(); // Set to 1 if null
8831 get_key_image(str + maybe_null, key_part->length, key_part->image_type);
8832 DBUG_RETURN(str);
8833 }
8834
8835
stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM * param,scalar_comparison_op op,Item * value)8836 SEL_ARG *Field::stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM *param,
8837 scalar_comparison_op op,
8838 Item *value)
8839 {
8840 DBUG_ENTER("Field::stored_field_make_mm_leaf_truncated");
8841 if ((op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) &&
8842 value->result_type() == item_cmp_type(result_type(),
8843 value->result_type()))
8844 DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
8845 /*
8846 TODO: We should return trees of the type SEL_ARG::IMPOSSIBLE
8847 for the cases like int_field > 999999999999999999999999 as well.
8848 */
8849 DBUG_RETURN(0);
8850 }
8851
8852
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8853 SEL_ARG *Field_num::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8854 const Item_bool_func *cond,
8855 scalar_comparison_op op, Item *value)
8856 {
8857 DBUG_ENTER("Field_num::get_mm_leaf");
8858 if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8859 DBUG_RETURN(0);
8860 int err= value->save_in_field_no_warnings(this, 1);
8861 if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8862 DBUG_RETURN(&null_element);
8863 if (err > 0 && cmp_type() != value->result_type())
8864 DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8865 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8866 }
8867
8868
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8869 SEL_ARG *Field_temporal::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8870 const Item_bool_func *cond,
8871 scalar_comparison_op op, Item *value)
8872 {
8873 DBUG_ENTER("Field_temporal::get_mm_leaf");
8874 if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8875 DBUG_RETURN(0);
8876 int err= value->save_in_field_no_warnings(this, 1);
8877 if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8878 DBUG_RETURN(&null_element);
8879 if (err > 0)
8880 DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8881 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8882 }
8883
8884
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8885 SEL_ARG *Field_date_common::get_mm_leaf(RANGE_OPT_PARAM *prm,
8886 KEY_PART *key_part,
8887 const Item_bool_func *cond,
8888 scalar_comparison_op op,
8889 Item *value)
8890 {
8891 DBUG_ENTER("Field_date_common::get_mm_leaf");
8892 if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8893 DBUG_RETURN(0);
8894 int err= value->save_in_field_no_warnings(this, 1);
8895 if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8896 DBUG_RETURN(&null_element);
8897 if (err > 0)
8898 {
8899 if (err == 3)
8900 {
8901 /*
8902 We were saving DATETIME into a DATE column, the conversion went ok
8903 but a non-zero time part was cut off.
8904
8905 In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
8906 values. Index over a DATE column uses DATE comparison. Changing
8907 from one comparison to the other is possible:
8908
8909 datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
8910 datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
8911
8912 datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
8913 datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
8914
8915 but we'll need to convert '>' to '>=' and '<' to '<='. This will
8916 be done together with other types at the end of this function
8917 (grep for stored_field_cmp_to_item)
8918 */
8919 if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL)
8920 DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this));
8921 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8922 }
8923 DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8924 }
8925 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8926 }
8927
8928
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8929 SEL_ARG *Field_str::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8930 const Item_bool_func *cond,
8931 scalar_comparison_op op, Item *value)
8932 {
8933 DBUG_ENTER("Field_str::get_mm_leaf");
8934 if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8935 DBUG_RETURN(0);
8936 int err= value->save_in_field_no_warnings(this, 1);
8937 if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8938 DBUG_RETURN(&null_element);
8939 if (err > 0)
8940 {
8941 if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL)
8942 DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this));
8943 DBUG_RETURN(NULL); /* Cannot infer anything */
8944 }
8945 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8946 }
8947
8948
get_mm_leaf_int(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value,bool unsigned_field)8949 SEL_ARG *Field::get_mm_leaf_int(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8950 const Item_bool_func *cond,
8951 scalar_comparison_op op, Item *value,
8952 bool unsigned_field)
8953 {
8954 DBUG_ENTER("Field::get_mm_leaf_int");
8955 if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8956 DBUG_RETURN(0);
8957 int err= value->save_in_field_no_warnings(this, 1);
8958 if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8959 DBUG_RETURN(&null_element);
8960 if (err > 0)
8961 {
8962 if (value->result_type() != INT_RESULT)
8963 DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8964 else
8965 DBUG_RETURN(stored_field_make_mm_leaf_bounded_int(prm, key_part,
8966 op, value,
8967 unsigned_field));
8968 }
8969 if (value->result_type() != INT_RESULT)
8970 DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8971 DBUG_RETURN(stored_field_make_mm_leaf_exact(prm, key_part, op, value));
8972 }
8973
8974
8975 /*
8976 This method is called when:
8977 - value->save_in_field_no_warnings() returned err > 0
8978 - and both field and "value" are of integer data types
8979 If an integer got bounded (e.g. to within 0..255 / -128..127)
8980 for < or >, set flags as for <= or >= (no NEAR_MAX / NEAR_MIN)
8981 */
8982
stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value,bool unsigned_field)8983 SEL_ARG *Field::stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM *param,
8984 KEY_PART *key_part,
8985 scalar_comparison_op op,
8986 Item *value,
8987 bool unsigned_field)
8988 {
8989 DBUG_ENTER("Field::stored_field_make_mm_leaf_bounded_int");
8990 if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) // e.g. tinyint = 200
8991 DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
8992 longlong item_val= value->val_int();
8993
8994 if (op == SCALAR_CMP_LT && item_val > 0)
8995 op= SCALAR_CMP_LE; // e.g. rewrite (tinyint < 200) to (tinyint <= 127)
8996 else if (op == SCALAR_CMP_GT && !unsigned_field &&
8997 !value->unsigned_flag && item_val < 0)
8998 op= SCALAR_CMP_GE; // e.g. rewrite (tinyint > -200) to (tinyint >= -128)
8999
9000 /*
9001 Check if we are comparing an UNSIGNED integer with a negative constant.
9002 In this case we know that:
9003 (a) (unsigned_int [< | <=] negative_constant) == FALSE
9004 (b) (unsigned_int [> | >=] negative_constant) == TRUE
9005 In case (a) the condition is false for all values, and in case (b) it
9006 is true for all values, so we can avoid unnecessary retrieval and condition
9007 testing, and we also get correct comparison of unsinged integers with
9008 negative integers (which otherwise fails because at query execution time
9009 negative integers are cast to unsigned if compared with unsigned).
9010 */
9011 if (unsigned_field && !value->unsigned_flag && item_val < 0)
9012 {
9013 if (op == SCALAR_CMP_LT || op == SCALAR_CMP_LE) // e.g. uint < -1
9014 DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
9015 if (op == SCALAR_CMP_GT || op == SCALAR_CMP_GE) // e.g. uint > -1
9016 DBUG_RETURN(0);
9017 }
9018 DBUG_RETURN(stored_field_make_mm_leaf_exact(param, key_part, op, value));
9019 }
9020
9021
stored_field_make_mm_leaf(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value)9022 SEL_ARG *Field::stored_field_make_mm_leaf(RANGE_OPT_PARAM *param,
9023 KEY_PART *key_part,
9024 scalar_comparison_op op,
9025 Item *value)
9026 {
9027 DBUG_ENTER("Field::stored_field_make_mm_leaf");
9028 THD *thd= param->thd;
9029 MEM_ROOT *mem_root= param->mem_root;
9030 uchar *str;
9031 if (!(str= make_key_image(param->mem_root, key_part)))
9032 DBUG_RETURN(0);
9033
9034 switch (op) {
9035 case SCALAR_CMP_LE:
9036 DBUG_RETURN(new (mem_root) SEL_ARG_LE(str, this));
9037 case SCALAR_CMP_LT:
9038 DBUG_RETURN(new (mem_root) SEL_ARG_LT(thd, str, this, value));
9039 case SCALAR_CMP_GT:
9040 DBUG_RETURN(new (mem_root) SEL_ARG_GT(thd, str, key_part, this, value));
9041 case SCALAR_CMP_GE:
9042 DBUG_RETURN(new (mem_root) SEL_ARG_GE(thd, str, key_part, this, value));
9043 case SCALAR_CMP_EQ:
9044 case SCALAR_CMP_EQUAL:
9045 DBUG_RETURN(new (mem_root) SEL_ARG(this, str, str));
9046 break;
9047 }
9048 DBUG_ASSERT(0);
9049 DBUG_RETURN(NULL);
9050 }
9051
9052
stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value)9053 SEL_ARG *Field::stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM *param,
9054 KEY_PART *key_part,
9055 scalar_comparison_op op,
9056 Item *value)
9057 {
9058 DBUG_ENTER("Field::stored_field_make_mm_leaf_exact");
9059 uchar *str;
9060 if (!(str= make_key_image(param->mem_root, key_part)))
9061 DBUG_RETURN(0);
9062
9063 switch (op) {
9064 case SCALAR_CMP_LE:
9065 DBUG_RETURN(new (param->mem_root) SEL_ARG_LE(str, this));
9066 case SCALAR_CMP_LT:
9067 DBUG_RETURN(new (param->mem_root) SEL_ARG_LT(str, this));
9068 case SCALAR_CMP_GT:
9069 DBUG_RETURN(new (param->mem_root) SEL_ARG_GT(str, key_part, this));
9070 case SCALAR_CMP_GE:
9071 DBUG_RETURN(new (param->mem_root) SEL_ARG_GE(str, this));
9072 case SCALAR_CMP_EQ:
9073 case SCALAR_CMP_EQUAL:
9074 DBUG_RETURN(new (param->mem_root) SEL_ARG(this, str, str));
9075 break;
9076 }
9077 DBUG_ASSERT(0);
9078 DBUG_RETURN(NULL);
9079 }
9080
9081
9082 /******************************************************************************
9083 ** Tree manipulation functions
9084 ** If tree is 0 it means that the condition can't be tested. It refers
9085 ** to a non existent table or to a field in current table with isn't a key.
9086 ** The different tree flags:
9087 ** IMPOSSIBLE: Condition is never TRUE
9088 ** ALWAYS: Condition is always TRUE
9089 ** MAYBE: Condition may exists when tables are read
9090 ** MAYBE_KEY: Condition refers to a key that may be used in join loop
9091 ** KEY_RANGE: Condition uses a key
9092 ******************************************************************************/
9093
9094 /*
9095 Update weights for SEL_ARG graph that is connected only via next_key_part
9096 (and not left/right) links
9097 */
update_weight_for_single_arg(SEL_ARG * arg)9098 static uint update_weight_for_single_arg(SEL_ARG *arg)
9099 {
9100 if (arg->next_key_part)
9101 return (arg->weight= 1 + update_weight_for_single_arg(arg->next_key_part));
9102 else
9103 return (arg->weight= 1);
9104 }
9105
9106
9107 /*
9108 Add a new key test to a key when scanning through all keys
9109 This will never be called for same key parts.
9110 */
9111
9112 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)9113 sel_add(SEL_ARG *key1,SEL_ARG *key2)
9114 {
9115 SEL_ARG *root,**key_link;
9116
9117 if (!key1)
9118 return key2;
9119 if (!key2)
9120 return key1;
9121
9122 key_link= &root;
9123 while (key1 && key2)
9124 {
9125 if (key1->part < key2->part)
9126 {
9127 *key_link= key1;
9128 key_link= &key1->next_key_part;
9129 key1=key1->next_key_part;
9130 }
9131 else
9132 {
9133 *key_link= key2;
9134 key_link= &key2->next_key_part;
9135 key2=key2->next_key_part;
9136 }
9137 }
9138 *key_link=key1 ? key1 : key2;
9139
9140 update_weight_for_single_arg(root);
9141 return root;
9142 }
9143
9144
9145 /*
9146 Build a range tree for the conjunction of the range parts of two trees
9147
9148 SYNOPSIS
9149 and_range_trees()
9150 param Context info for the operation
9151 tree1 SEL_TREE for the first conjunct
9152 tree2 SEL_TREE for the second conjunct
9153 result SEL_TREE for the result
9154
9155 DESCRIPTION
9156 This function takes range parts of two trees tree1 and tree2 and builds
9157 a range tree for the conjunction of the formulas that these two range parts
9158 represent.
9159 More exactly:
9160 if the range part of tree1 represents the normalized formula
9161 R1_1 AND ... AND R1_k,
9162 and the range part of tree2 represents the normalized formula
9163 R2_1 AND ... AND R2_k,
9164 then the range part of the result represents the formula:
9165 RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k]
9166
9167 The function assumes that tree1 is never equal to tree2. At the same
9168 time the tree result can be the same as tree1 (but never as tree2).
9169 If result==tree1 then rt replaces the range part of tree1 leaving
9170 imerges as they are.
9171 if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and
9172 tree2 should be preserved. Otherwise they can be destroyed.
9173
9174 RETURN
9175 1 if the type the result tree is SEL_TREE::IMPOSSIBLE
9176 0 otherwise
9177 */
9178
9179 static
and_range_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,SEL_TREE * result)9180 int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
9181 SEL_TREE *result)
9182 {
9183 DBUG_ENTER("and_ranges");
9184 key_map result_keys;
9185 result_keys.clear_all();
9186 key_map anded_keys= tree1->keys_map;
9187 anded_keys.merge(tree2->keys_map);
9188 int key_no;
9189 key_map::Iterator it(anded_keys);
9190 while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9191 {
9192 uint flag=0;
9193 SEL_ARG *key1= tree1->keys[key_no];
9194 SEL_ARG *key2= tree2->keys[key_no];
9195 if (key1 && !key1->simple_key())
9196 flag|= CLONE_KEY1_MAYBE;
9197 if (key2 && !key2->simple_key())
9198 flag|=CLONE_KEY2_MAYBE;
9199 if (result != tree1)
9200 {
9201 if (key1)
9202 key1->incr_refs();
9203 if (key2)
9204 key2->incr_refs();
9205 }
9206 SEL_ARG *key;
9207 if ((result->keys[key_no]= key= key_and_with_limit(param, key_no,
9208 key1, key2, flag)))
9209 {
9210 if (key && key->type == SEL_ARG::IMPOSSIBLE)
9211 {
9212 result->type= SEL_TREE::IMPOSSIBLE;
9213 if (param->using_real_indexes)
9214 {
9215 param->table->with_impossible_ranges.set_bit(param->
9216 real_keynr[key_no]);
9217 }
9218 DBUG_RETURN(1);
9219 }
9220 result_keys.set_bit(key_no);
9221 #ifdef EXTRA_DEBUG
9222 if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
9223 key->test_use_count(key);
9224 #endif
9225 }
9226 }
9227 result->keys_map= result_keys;
9228 DBUG_RETURN(0);
9229 }
9230
9231
9232 /*
9233 Build a SEL_TREE for a conjunction out of such trees for the conjuncts
9234
9235 SYNOPSIS
9236 tree_and()
9237 param Context info for the operation
9238 tree1 SEL_TREE for the first conjunct
9239 tree2 SEL_TREE for the second conjunct
9240
9241 DESCRIPTION
9242 This function builds a tree for the formula (A AND B) out of the trees
9243 tree1 and tree2 that has been built for the formulas A and B respectively.
9244
9245 In a general case
9246 tree1 represents the formula RT1 AND MT1,
9247 where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
9248 tree2 represents the formula RT2 AND MT2
9249 where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 AND ... AND M2_l2.
9250
9251 The result tree will represent the formula of the the following structure:
9252 RT AND RT1MT2 AND RT2MT1, such that
9253 rt is a tree obtained by range intersection of trees tree1 and tree2,
9254 RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2,
9255 RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1,
9256 where rt1m2_i (i=1,...,l2) is the result of the pushdown operation
9257 of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the
9258 result of the pushdown operation of range tree rt2 into imerge m1_j.
9259
9260 RT1MT2/RT2MT is empty if MT2/MT1 is empty.
9261
9262 The range intersection of two range trees is produced by the function
9263 and_range_trees. The pushdown of a range tree to a imerge is performed
9264 by the function imerge_list_and_tree. This function may produce imerges
9265 containing only one range tree. Such trees are intersected with rt and
9266 the result of intersection is returned as the range part of the result
9267 tree, while the corresponding imerges are removed altogether from its
9268 imerge part.
9269
9270 NOTE
9271 The pushdown operation of range trees into imerges is needed to be able
9272 to construct valid imerges for the condition like this:
9273 key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2)
9274
9275 NOTE
9276 Currently we do not support intersection between indexes and index merges.
9277 When this will be supported the list of imerges for the result tree
9278 should include also imerges from M1 and M2. That's why an extra parameter
9279 is added to the function imerge_list_and_tree. If we call the function
9280 with the last parameter equal to FALSE then MT1 and MT2 will be preserved
9281 in the imerge list of the result tree. This can lead to the exponential
9282 growth of the imerge list though.
9283 Currently the last parameter of imerge_list_and_tree calls is always
9284 TRUE.
9285
9286 RETURN
9287 The result tree, if a success
9288 0 - otherwise.
9289 */
9290
9291 static
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)9292 SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2)
9293 {
9294 DBUG_ENTER("tree_and");
9295 if (!tree1)
9296 DBUG_RETURN(tree2);
9297 if (!tree2)
9298 DBUG_RETURN(tree1);
9299 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
9300 DBUG_RETURN(tree1);
9301 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
9302 DBUG_RETURN(tree2);
9303 if (tree1->type == SEL_TREE::MAYBE)
9304 {
9305 if (tree2->type == SEL_TREE::KEY)
9306 tree2->type=SEL_TREE::KEY_SMALLER;
9307 DBUG_RETURN(tree2);
9308 }
9309 if (tree2->type == SEL_TREE::MAYBE)
9310 {
9311 tree1->type=SEL_TREE::KEY_SMALLER;
9312 DBUG_RETURN(tree1);
9313 }
9314
9315 if (!tree1->merges.is_empty())
9316 imerge_list_and_tree(param, &tree1->merges, tree2, TRUE);
9317 if (!tree2->merges.is_empty())
9318 imerge_list_and_tree(param, &tree2->merges, tree1, TRUE);
9319 if (and_range_trees(param, tree1, tree2, tree1))
9320 DBUG_RETURN(tree1);
9321 imerge_list_and_list(&tree1->merges, &tree2->merges);
9322 eliminate_single_tree_imerges(param, tree1);
9323 DBUG_RETURN(tree1);
9324 }
9325
9326
9327 /*
9328 Eliminate single tree imerges in a SEL_TREE objects
9329
9330 SYNOPSIS
9331 eliminate_single_tree_imerges()
9332 param Context info for the function
9333 tree SEL_TREE where single tree imerges are to be eliminated
9334
9335 DESCRIPTION
9336 For each imerge in 'tree' that contains only one disjunct tree, i.e.
9337 for any imerge of the form m=rt, the function performs and operation
9338 the range part of tree, replaces rt the with the result of anding and
9339 removes imerge m from the the merge part of 'tree'.
9340
9341 RETURN VALUE
9342 none
9343 */
9344
9345 static
eliminate_single_tree_imerges(RANGE_OPT_PARAM * param,SEL_TREE * tree)9346 void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree)
9347 {
9348 SEL_IMERGE *imerge;
9349 List<SEL_IMERGE> merges= tree->merges;
9350 List_iterator<SEL_IMERGE> it(merges);
9351 tree->merges.empty();
9352 while ((imerge= it++))
9353 {
9354 if (imerge->trees+1 == imerge->trees_next)
9355 {
9356 tree= tree_and(param, tree, *imerge->trees);
9357 it.remove();
9358 }
9359 }
9360 tree->merges= merges;
9361 }
9362
9363
9364 /*
9365 For two trees check that there are indexes with ranges in both of them
9366
9367 SYNOPSIS
9368 sel_trees_have_common_keys()
9369 tree1 SEL_TREE for the first tree
9370 tree2 SEL_TREE for the second tree
9371 common_keys OUT bitmap of all indexes with ranges in both trees
9372
9373 DESCRIPTION
9374 For two trees tree1 and tree1 the function checks if there are indexes
9375 in their range parts such that SEL_ARG trees are defined for them in the
9376 range parts of both trees. The function returns the bitmap of such
9377 indexes in the parameter common_keys.
9378
9379 RETURN
9380 TRUE if there are such indexes (common_keys is nor empty)
9381 FALSE otherwise
9382 */
9383
9384 static
sel_trees_have_common_keys(SEL_TREE * tree1,SEL_TREE * tree2,key_map * common_keys)9385 bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
9386 key_map *common_keys)
9387 {
9388 *common_keys= tree1->keys_map;
9389 common_keys->intersect(tree2->keys_map);
9390 return !common_keys->is_clear_all();
9391 }
9392
9393
9394 /*
9395 Check whether range parts of two trees can be ored for some indexes
9396
9397 SYNOPSIS
9398 sel_trees_can_be_ored()
9399 param Context info for the function
9400 tree1 SEL_TREE for the first tree
9401 tree2 SEL_TREE for the second tree
9402 common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees
9403 OUT: bitmap of all indexes that can be ored
9404
9405 DESCRIPTION
9406 For two trees tree1 and tree2 and the bitmap common_keys containing
9407 bits for indexes that have SEL_ARG trees in range parts of both trees
9408 the function checks if there are indexes for which SEL_ARG trees can
9409 be ored. Two SEL_ARG trees for the same index can be ored if the most
9410 major components of the index used in these trees coincide. If the
9411 SEL_ARG trees for an index cannot be ored the function clears the bit
9412 for this index in the bitmap common_keys.
9413
9414 The function does not verify that indexes marked in common_keys really
9415 have SEL_ARG trees in both tree1 and tree2. It assumes that this is true.
9416
9417 NOTE
9418 The function sel_trees_can_be_ored is usually used in pair with the
9419 function sel_trees_have_common_keys.
9420
9421 RETURN
9422 TRUE if there are indexes for which SEL_ARG trees can be ored
9423 FALSE otherwise
9424 */
9425
9426 static
sel_trees_can_be_ored(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,key_map * common_keys)9427 bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
9428 SEL_TREE *tree1, SEL_TREE *tree2,
9429 key_map *common_keys)
9430 {
9431 DBUG_ENTER("sel_trees_can_be_ored");
9432 if (!sel_trees_have_common_keys(tree1, tree2, common_keys))
9433 DBUG_RETURN(FALSE);
9434 int key_no;
9435 key_map::Iterator it(*common_keys);
9436 while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9437 {
9438 DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]);
9439 /* Trees have a common key, check if they refer to the same key part */
9440 if (tree1->keys[key_no]->part != tree2->keys[key_no]->part)
9441 common_keys->clear_bit(key_no);
9442 }
9443 DBUG_RETURN(!common_keys->is_clear_all());
9444 }
9445
9446 /*
9447 Check whether the key parts inf_init..inf_end-1 of one index can compose
9448 an infix for the key parts key_init..key_end-1 of another index
9449 */
9450
9451 static
is_key_infix(KEY_PART * key_init,KEY_PART * key_end,KEY_PART * inf_init,KEY_PART * inf_end)9452 bool is_key_infix(KEY_PART *key_init, KEY_PART *key_end,
9453 KEY_PART *inf_init, KEY_PART *inf_end)
9454 {
9455 KEY_PART *key_part, *inf_part;
9456 for (key_part= key_init; key_part < key_end; key_part++)
9457 {
9458 if (key_part->field->eq(inf_init->field))
9459 break;
9460 }
9461 if (key_part == key_end)
9462 return false;
9463 for (key_part++, inf_part= inf_init + 1;
9464 key_part < key_end && inf_part < inf_end;
9465 key_part++, inf_part++)
9466 {
9467 if (!key_part->field->eq(inf_part->field))
9468 return false;
9469 }
9470 return inf_part == inf_end;
9471 }
9472
9473
9474 /*
9475 Check whether range parts of two trees must be ored for some indexes
9476
9477 SYNOPSIS
9478 sel_trees_must_be_ored()
9479 param Context info for the function
9480 tree1 SEL_TREE for the first tree
9481 tree2 SEL_TREE for the second tree
9482 ordable_keys bitmap of SEL_ARG trees that can be ored
9483
9484 DESCRIPTION
9485 For two trees tree1 and tree2 the function checks whether they must be
9486 ored. The function assumes that the bitmap ordable_keys contains bits for
9487 those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can
9488 be ored.
9489 We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees
9490 r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both
9491 of them are marked in ordable_keys, can be merged.
9492
9493 NOTE
9494 The function sel_trees_must_be_ored as a rule is used in pair with the
9495 function sel_trees_can_be_ored.
9496
9497 RETURN
9498 TRUE if there are indexes for which SEL_ARG trees must be ored
9499 FALSE otherwise
9500 */
9501
9502 static
sel_trees_must_be_ored(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,key_map oredable_keys)9503 bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
9504 SEL_TREE *tree1, SEL_TREE *tree2,
9505 key_map oredable_keys)
9506 {
9507 key_map tmp;
9508 DBUG_ENTER("sel_trees_must_be_ored");
9509
9510 tmp= tree1->keys_map;
9511 tmp.merge(tree2->keys_map);
9512 tmp.subtract(oredable_keys);
9513 if (!tmp.is_clear_all())
9514 DBUG_RETURN(FALSE);
9515
9516 int idx1, idx2;
9517 key_map::Iterator it1(oredable_keys);
9518 while ((idx1= it1++) != key_map::Iterator::BITMAP_END)
9519 {
9520 KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part;
9521 KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no;
9522 key_map::Iterator it2(oredable_keys);
9523 while ((idx2= it2++) != key_map::Iterator::BITMAP_END)
9524 {
9525 if (idx2 <= idx1)
9526 continue;
9527
9528 KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part;
9529 KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no;
9530 if (!is_key_infix(key1_init, key1_end, key2_init, key2_end) &&
9531 !is_key_infix(key2_init, key2_end, key1_init, key1_end))
9532 DBUG_RETURN(FALSE);
9533 }
9534 }
9535
9536 DBUG_RETURN(TRUE);
9537 }
9538
9539
9540 /*
9541 Remove the trees that are not suitable for record retrieval
9542
9543 SYNOPSIS
9544 remove_nonrange_trees()
9545 param Context info for the function
9546 tree Tree to be processed, tree->type is KEY or KEY_SMALLER
9547
9548 DESCRIPTION
9549 This function walks through tree->keys[] and removes the SEL_ARG* trees
9550 that are not "maybe" trees (*) and cannot be used to construct quick range
9551 selects.
9552 (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
9553 these types here as well.
9554
9555 A SEL_ARG* tree cannot be used to construct quick select if it has
9556 tree->part != 0. (e.g. it could represent "keypart2 < const").
9557
9558 Normally we allow construction of SEL_TREE objects that have SEL_ARG
9559 trees that do not allow quick range select construction.
9560 For example:
9561 for " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
9562 tree1= SEL_TREE { SEL_ARG{keypart1=1} }
9563 tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
9564 from this
9565 call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
9566 tree.
9567
9568 Another example:
9569 tree3= SEL_TREE { SEL_ARG{key1part1 = 1} }
9570 tree4= SEL_TREE { SEL_ARG{key2part2 = 2} } -- can't make quick range select
9571 from this
9572 call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index
9573 merge can be constructed, but it is potentially useful, as anding it with
9574 tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that
9575 represents the formula
9576 key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2
9577 for which an index merge can be built.
9578
9579 Any final SEL_TREE may contain SEL_ARG trees for which no quick select
9580 can be built. Such SEL_ARG trees should be removed from the range part
9581 before different range scans are evaluated. Such SEL_ARG trees also should
9582 be removed from all range trees of each index merge before different
9583 possible index merge plans are evaluated. If after this removal one
9584 of the range trees in the index merge becomes empty the whole index merge
9585 must be discarded.
9586
9587 RETURN
9588 0 Ok, some suitable trees left
9589 1 No tree->keys[] left.
9590 */
9591
remove_nonrange_trees(PARAM * param,SEL_TREE * tree)9592 static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree)
9593 {
9594 bool res= FALSE;
9595 for (uint i=0; i < param->keys; i++)
9596 {
9597 if (tree->keys[i])
9598 {
9599 if (tree->keys[i]->part)
9600 {
9601 tree->keys[i]= NULL;
9602 /* Mark that records_in_range has not been called */
9603 param->quick_rows[param->real_keynr[i]]= HA_POS_ERROR;
9604 tree->keys_map.clear_bit(i);
9605 }
9606 else
9607 res= TRUE;
9608 }
9609 }
9610 return !res;
9611 }
9612
9613
9614 /*
9615 Restore nonrange trees to their previous state
9616 */
9617
restore_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree,SEL_ARG ** backup_keys)9618 static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree,
9619 SEL_ARG **backup_keys)
9620 {
9621 for (uint i=0; i < param->keys; i++)
9622 {
9623 if (backup_keys[i])
9624 {
9625 tree->keys[i]= backup_keys[i];
9626 tree->keys_map.set_bit(i);
9627 }
9628 }
9629 }
9630
9631 /*
9632 Build a SEL_TREE for a disjunction out of such trees for the disjuncts
9633
9634 SYNOPSIS
9635 tree_or()
9636 param Context info for the operation
9637 tree1 SEL_TREE for the first disjunct
9638 tree2 SEL_TREE for the second disjunct
9639
9640 DESCRIPTION
9641 This function builds a tree for the formula (A OR B) out of the trees
9642 tree1 and tree2 that has been built for the formulas A and B respectively.
9643
9644 In a general case
9645 tree1 represents the formula RT1 AND MT1,
9646 where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
9647 tree2 represents the formula RT2 AND MT2
9648 where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
9649
9650 The function constructs the result tree according the formula
9651 (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2)
9652 that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2).
9653
9654 To limit the number of produced imerges the function considers
9655 a weaker formula than the original one:
9656 (RT1 AND M1_1) OR (RT2 AND M2_1)
9657 that is equivalent to:
9658 (RT1 OR RT2) (1)
9659 AND
9660 (M1_1 OR M2_1) (2)
9661 AND
9662 (M1_1 OR RT2) (3)
9663 AND
9664 (M2_1 OR RT1) (4)
9665
9666 For the first conjunct (1) the function builds a tree with a range part
9667 and, possibly, one imerge. For the other conjuncts (2-4)the function
9668 produces sets of imerges. All constructed imerges are included into the
9669 result tree.
9670
9671 For the formula (1) the function produces the tree representing a formula
9672 of the structure RT [AND M], such that:
9673 - the range tree rt contains the result of oring SEL_ARG trees from rt1
9674 and rt2
9675 - the imerge m consists of two range trees rt1 and rt2.
9676 The imerge m is added if it's not true that rt1 and rt2 must be ored
9677 If rt1 and rt2 can't be ored rt is empty and only m is produced for (1).
9678
9679 To produce imerges for the formula (2) the function calls the function
9680 imerge_list_or_list passing it the merge parts of tree1 and tree2 as
9681 parameters.
9682
9683 To produce imerges for the formula (3) the function calls the function
9684 imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as
9685 parameters. Similarly, to produce imerges for the formula (4) the function
9686 calls the function imerge_list_or_tree passing it the imerge m2_1 and the
9687 range tree rt1.
9688
9689 If rt1 is empty then the trees for (1) and (4) are empty.
9690 If rt2 is empty then the trees for (1) and (3) are empty.
9691 If mt1 is empty then the trees for (2) and (3) are empty.
9692 If mt2 is empty then the trees for (2) and (4) are empty.
9693
9694 RETURN
9695 The result tree for the operation if a success
9696 0 - otherwise
9697 */
9698
9699 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)9700 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
9701 {
9702 DBUG_ENTER("tree_or");
9703 if (!tree1 || !tree2)
9704 DBUG_RETURN(0);
9705 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
9706 DBUG_RETURN(tree2);
9707 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
9708 DBUG_RETURN(tree1);
9709 if (tree1->type == SEL_TREE::MAYBE)
9710 DBUG_RETURN(tree1); // Can't use this
9711 if (tree2->type == SEL_TREE::MAYBE)
9712 DBUG_RETURN(tree2);
9713
9714 SEL_TREE *result= NULL;
9715 key_map result_keys;
9716 key_map ored_keys;
9717 SEL_TREE *rtree[2]= {NULL,NULL};
9718 SEL_IMERGE *imerge[2]= {NULL, NULL};
9719 bool no_ranges1= tree1->without_ranges();
9720 bool no_ranges2= tree2->without_ranges();
9721 bool no_merges1= tree1->without_imerges();
9722 bool no_merges2= tree2->without_imerges();
9723 if (!no_ranges1 && !no_merges2)
9724 {
9725 rtree[0]= new SEL_TREE(tree1, TRUE, param);
9726 imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param);
9727 }
9728 if (!no_ranges2 && !no_merges1)
9729 {
9730 rtree[1]= new SEL_TREE(tree2, TRUE, param);
9731 imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param);
9732 }
9733 bool no_imerge_from_ranges= FALSE;
9734
9735 /* Build the range part of the tree for the formula (1) */
9736 if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys))
9737 {
9738 bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys);
9739 no_imerge_from_ranges= must_be_ored;
9740
9741 if (no_imerge_from_ranges && no_merges1 && no_merges2)
9742 {
9743 /*
9744 Reuse tree1 as the result in simple cases. This reduces memory usage
9745 for e.g. "key IN (c1, ..., cN)" which produces a lot of ranges.
9746 */
9747 result= tree1;
9748 result->keys_map.clear_all();
9749 }
9750 else
9751 {
9752 if (!(result= new (param->mem_root) SEL_TREE(param->mem_root,
9753 param->keys)))
9754 {
9755 DBUG_RETURN(result);
9756 }
9757 }
9758
9759 key_map::Iterator it(ored_keys);
9760 int key_no;
9761 while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9762 {
9763 SEL_ARG *key1= tree1->keys[key_no];
9764 SEL_ARG *key2= tree2->keys[key_no];
9765 if (!must_be_ored)
9766 {
9767 key1->incr_refs();
9768 key2->incr_refs();
9769 }
9770 if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2)))
9771 result->keys_map.set_bit(key_no);
9772 }
9773 result->type= tree1->type;
9774 }
9775 else
9776 {
9777 if (!result && !(result= new (param->mem_root) SEL_TREE(param->mem_root,
9778 param->keys)))
9779 DBUG_RETURN(result);
9780 }
9781
9782 if (no_imerge_from_ranges && no_merges1 && no_merges2)
9783 {
9784 if (result->keys_map.is_clear_all())
9785 result->type= SEL_TREE::ALWAYS;
9786 DBUG_RETURN(result);
9787 }
9788
9789 SEL_IMERGE *imerge_from_ranges;
9790 if (!(imerge_from_ranges= new SEL_IMERGE()))
9791 result= NULL;
9792 else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges)
9793 {
9794 /* Build the imerge part of the tree for the formula (1) */
9795 SEL_TREE *rt1= tree1;
9796 SEL_TREE *rt2= tree2;
9797 if (no_merges1)
9798 rt1= new SEL_TREE(tree1, TRUE, param);
9799 if (no_merges2)
9800 rt2= new SEL_TREE(tree2, TRUE, param);
9801 if (!rt1 || !rt2 ||
9802 result->merges.push_back(imerge_from_ranges) ||
9803 imerge_from_ranges->or_sel_tree(param, rt1) ||
9804 imerge_from_ranges->or_sel_tree(param, rt2))
9805 result= NULL;
9806 }
9807 if (!result)
9808 DBUG_RETURN(result);
9809
9810 result->type= tree1->type;
9811
9812 if (!no_merges1 && !no_merges2 &&
9813 !imerge_list_or_list(param, &tree1->merges, &tree2->merges))
9814 {
9815 /* Build the imerges for the formula (2) */
9816 imerge_list_and_list(&result->merges, &tree1->merges);
9817 }
9818
9819 /* Build the imerges for the formulas (3) and (4) */
9820 for (uint i=0; i < 2; i++)
9821 {
9822 List<SEL_IMERGE> merges;
9823 SEL_TREE *rt= rtree[i];
9824 SEL_IMERGE *im= imerge[1-i];
9825
9826 if (rt && im && !merges.push_back(im) &&
9827 !imerge_list_or_tree(param, &merges, rt))
9828 imerge_list_and_list(&result->merges, &merges);
9829 }
9830
9831 DBUG_RETURN(result);
9832 }
9833
9834
9835 /* And key trees where key1->part < key2 -> part */
9836
9837 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)9838 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
9839 uint clone_flag)
9840 {
9841 SEL_ARG *next;
9842 ulong use_count=key1->use_count;
9843
9844 if (sel_arg_and_weight_heuristic(param, key1, key2))
9845 return key1;
9846
9847 if (key1->elements != 1)
9848 {
9849 key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
9850 key2->increment_use_count((int) key1->elements-1);
9851 }
9852 if (key1->type == SEL_ARG::MAYBE_KEY)
9853 {
9854 if (key2->type == SEL_ARG::KEY_RANGE)
9855 return key2;
9856 key1->right= key1->left= &null_element;
9857 key1->next= key1->prev= 0;
9858 }
9859
9860 for (next=key1->first(); next ; next=next->next)
9861 {
9862 if (next->next_key_part)
9863 {
9864 SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
9865 if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
9866 {
9867 key1=key1->tree_delete(next);
9868 continue;
9869 }
9870 next->next_key_part=tmp;
9871 if (use_count)
9872 next->increment_use_count(use_count);
9873 if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
9874 break;
9875 }
9876 else
9877 next->next_key_part=key2;
9878 }
9879 if (!key1)
9880 return &null_element; // Impossible ranges
9881 key1->use_count++;
9882
9883 /* Re-compute the result tree's weight. */
9884 key1->update_weight_locally();
9885
9886 key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
9887 return key1;
9888 }
9889
9890
9891 /*
9892 Produce a SEL_ARG graph that represents "key1 AND key2"
9893
9894 SYNOPSIS
9895 key_and()
9896 param Range analysis context (needed to track if we have allocated
9897 too many SEL_ARGs)
9898 key1 First argument, root of its RB-tree
9899 key2 Second argument, root of its RB-tree
9900
9901 RETURN
9902 RB-tree root of the resulting SEL_ARG graph.
9903 NULL if the result of AND operation is an empty interval {0}.
9904 */
9905
9906 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)9907 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
9908 {
9909 if (!key1)
9910 return key2;
9911 if (!key2)
9912 return key1;
9913 if (key1->part != key2->part)
9914 {
9915 if (key1->part > key2->part)
9916 {
9917 swap_variables(SEL_ARG *, key1, key2);
9918 clone_flag=swap_clone_flag(clone_flag);
9919 }
9920 // key1->part < key2->part
9921
9922 if (sel_arg_and_weight_heuristic(param, key1, key2))
9923 return key1;
9924
9925 key1->use_count--;
9926 if (key1->use_count > 0)
9927 if (!(key1= key1->clone_tree(param)))
9928 return 0; // OOM
9929 return and_all_keys(param, key1, key2, clone_flag);
9930 }
9931
9932 if (((clone_flag & CLONE_KEY2_MAYBE) &&
9933 !(clone_flag & CLONE_KEY1_MAYBE) &&
9934 key2->type != SEL_ARG::MAYBE_KEY) ||
9935 key1->type == SEL_ARG::MAYBE_KEY)
9936 { // Put simple key in key2
9937 swap_variables(SEL_ARG *, key1, key2);
9938 clone_flag=swap_clone_flag(clone_flag);
9939 }
9940
9941 /* If one of the key is MAYBE_KEY then the found region may be smaller */
9942 if (key2->type == SEL_ARG::MAYBE_KEY)
9943 {
9944 if (key1->use_count > 1)
9945 {
9946 key1->use_count--;
9947 if (!(key1=key1->clone_tree(param)))
9948 return 0; // OOM
9949 key1->use_count++;
9950 }
9951 if (key1->type == SEL_ARG::MAYBE_KEY)
9952 { // Both are maybe key
9953 key1->next_key_part=key_and(param, key1->next_key_part,
9954 key2->next_key_part, clone_flag);
9955
9956 key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0);
9957
9958 if (key1->next_key_part &&
9959 key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
9960 return key1;
9961 }
9962 else
9963 {
9964 key1->maybe_smaller();
9965 if (key2->next_key_part)
9966 {
9967 key1->use_count--; // Incremented in and_all_keys
9968 return and_all_keys(param, key1, key2->next_key_part, clone_flag);
9969 }
9970 key2->use_count--; // Key2 doesn't have a tree
9971 }
9972 return key1;
9973 }
9974
9975 if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
9976 {
9977 /* TODO: why not leave one of the trees? */
9978 key1->free_tree();
9979 key2->free_tree();
9980 return 0; // Can't optimize this
9981 }
9982
9983 key1->use_count--;
9984 key2->use_count--;
9985 SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
9986 uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
9987
9988 while (e1 && e2)
9989 {
9990 int cmp=e1->cmp_min_to_min(e2);
9991 if (cmp < 0)
9992 {
9993 if (get_range(&e1,&e2,key1))
9994 continue;
9995 }
9996 else if (get_range(&e2,&e1,key2))
9997 continue;
9998 SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
9999 clone_flag);
10000 e1->incr_refs();
10001 e2->incr_refs();
10002 if (!next || next->type != SEL_ARG::IMPOSSIBLE)
10003 {
10004 SEL_ARG *new_arg= e1->clone_and(param->thd, e2);
10005 if (!new_arg)
10006 return &null_element; // End of memory
10007 new_arg->next_key_part=next;
10008 if (new_arg->next_key_part)
10009 new_arg->weight += new_arg->next_key_part->weight;
10010
10011 if (!new_tree)
10012 {
10013 new_tree=new_arg;
10014 }
10015 else
10016 new_tree=new_tree->insert(new_arg);
10017 }
10018 if (e1->cmp_max_to_max(e2) < 0)
10019 e1=e1->next; // e1 can't overlapp next e2
10020 else
10021 e2=e2->next;
10022 }
10023 key1->free_tree();
10024 key2->free_tree();
10025 if (!new_tree)
10026 return &null_element; // Impossible range
10027 new_tree->max_part_no= max_part_no;
10028 return new_tree;
10029 }
10030
10031
10032 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)10033 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
10034 {
10035 (*e1)=root1->find_range(*e2); // first e1->min < e2->min
10036 if ((*e1)->cmp_max_to_min(*e2) < 0)
10037 {
10038 if (!((*e1)=(*e1)->next))
10039 return 1;
10040 if ((*e1)->cmp_min_to_max(*e2) > 0)
10041 {
10042 (*e2)=(*e2)->next;
10043 return 1;
10044 }
10045 }
10046 return 0;
10047 }
10048
10049 /*
10050 @brief
10051 Update the tree weight.
10052
10053 @detail
10054 Utility function to be called on a SEL_ARG tree root after doing local
10055 modifications concerning changes at this key part.
10056 Assumes that the weight of the graphs connected via next_key_part is
10057 up to dayte.
10058 */
update_weight_locally()10059 void SEL_ARG::update_weight_locally()
10060 {
10061 uint new_weight= 0;
10062 const SEL_ARG *sl;
10063 for (sl= first(); sl ; sl= sl->next)
10064 {
10065 new_weight++;
10066 if (sl->next_key_part)
10067 new_weight += sl->next_key_part->weight;
10068 }
10069 weight= new_weight;
10070 }
10071
10072
10073 #ifndef DBUG_OFF
10074 /*
10075 Verify SEL_TREE's weight.
10076
10077 Recompute the weight and compare
10078 */
verify_weight()10079 uint SEL_ARG::verify_weight()
10080 {
10081 uint computed_weight= 0;
10082 SEL_ARG *first_arg= first();
10083
10084 if (first_arg)
10085 {
10086 for (SEL_ARG *arg= first_arg; arg; arg= arg->next)
10087 {
10088 computed_weight++;
10089 if (arg->next_key_part)
10090 computed_weight+= arg->next_key_part->verify_weight();
10091 }
10092 }
10093 else
10094 {
10095 // first()=NULL means this is a special kind of SEL_ARG, e.g.
10096 // SEL_ARG with type=MAYBE_KEY
10097 computed_weight= 1;
10098 if (next_key_part)
10099 computed_weight += next_key_part->verify_weight();
10100 }
10101
10102 if (computed_weight != weight)
10103 {
10104 sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n",
10105 computed_weight, weight);
10106 DBUG_ASSERT(computed_weight == weight); // Fail an assertion
10107 }
10108 return computed_weight;
10109 }
10110 #endif
10111
10112 static
key_or_with_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * key1,SEL_ARG * key2)10113 SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
10114 SEL_ARG *key1, SEL_ARG *key2)
10115 {
10116 #ifndef DBUG_OFF
10117 if (key1)
10118 key1->verify_weight();
10119 if (key2)
10120 key2->verify_weight();
10121 #endif
10122
10123 SEL_ARG *res= key_or(param, key1, key2);
10124 res= enforce_sel_arg_weight_limit(param, keyno, res);
10125 #ifndef DBUG_OFF
10126 if (res)
10127 res->verify_weight();
10128 #endif
10129 return res;
10130 }
10131
10132
10133 static
key_and_with_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)10134 SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
10135 SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
10136 {
10137 #ifndef DBUG_OFF
10138 if (key1)
10139 key1->verify_weight();
10140 if (key2)
10141 key2->verify_weight();
10142 #endif
10143 SEL_ARG *res= key_and(param, key1, key2, clone_flag);
10144 res= enforce_sel_arg_weight_limit(param, keyno, res);
10145 #ifndef DBUG_OFF
10146 if (res)
10147 res->verify_weight();
10148 #endif
10149 return res;
10150 }
10151
10152
10153 /**
10154 Combine two range expression under a common OR. On a logical level, the
10155 transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
10156
10157 Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
10158 theformat is reminiscent of DNF, since an expression such as the following
10159
10160 ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
10161
10162 where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
10163 and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
10164 SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
10165 the first range and ranges must not overlap. It follows that they are also
10166 ordered by maximum endpoints. Thus
10167
10168 ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
10169
10170 Is a a valid SER_ARG expression for a key of at least 2 keyparts.
10171
10172 For simplicity, we will assume that expr2 is a single range predicate,
10173 i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
10174 disjunction of several predicates by subsequently call key_or for each
10175 disjunct.
10176
10177 The algorithm iterates over each disjunct of expr1, and for each disjunct
10178 where the first keypart's range overlaps with the first keypart's range in
10179 expr2:
10180
10181 If the predicates are equal for the rest of the keyparts, or if there are
10182 no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
10183 node in expr2 is deallocated. If more ranges became connected in expr1, the
10184 surplus is also dealocated. If they differ, two ranges are created.
10185
10186 - The range leading up to the overlap. Empty if endpoints are equal.
10187
10188 - The overlapping sub-range. May be the entire range if they are equal.
10189
10190 Finally, there may be one more range if expr2's first keypart's range has a
10191 greater maximum endpoint than the last range in expr1.
10192
10193 For the overlapping sub-range, we recursively call key_or. Thus in order to
10194 compute key_or of
10195
10196 (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
10197
10198 (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
10199
10200 We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
10201 first one, we simply hook on the condition for the second keypart from (1)
10202 : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
10203 < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
10204 the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
10205
10206 ( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR
10207 ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR
10208 ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
10209 */
10210 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)10211 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
10212 {
10213 if (!key1)
10214 {
10215 if (key2)
10216 {
10217 key2->use_count--;
10218 key2->free_tree();
10219 }
10220 return 0;
10221 }
10222 if (!key2)
10223 {
10224 key1->use_count--;
10225 key1->free_tree();
10226 return 0;
10227 }
10228 key1->use_count--;
10229 key2->use_count--;
10230
10231 if (key1->part != key2->part ||
10232 (key1->min_flag | key2->min_flag) & GEOM_FLAG)
10233 {
10234 key1->free_tree();
10235 key2->free_tree();
10236 return 0; // Can't optimize this
10237 }
10238
10239 // If one of the key is MAYBE_KEY then the found region may be bigger
10240 if (key1->type == SEL_ARG::MAYBE_KEY)
10241 {
10242 key2->free_tree();
10243 key1->use_count++;
10244 return key1;
10245 }
10246 if (key2->type == SEL_ARG::MAYBE_KEY)
10247 {
10248 key1->free_tree();
10249 key2->use_count++;
10250 return key2;
10251 }
10252
10253 if (key1->use_count > 0)
10254 {
10255 if (key2->use_count == 0 || key1->elements > key2->elements)
10256 {
10257 swap_variables(SEL_ARG *,key1,key2);
10258 }
10259 if (key1->use_count > 0 && !(key1=key1->clone_tree(param)))
10260 return 0; // OOM
10261 }
10262
10263 // Add tree at key2 to tree at key1
10264 bool key2_shared=key2->use_count != 0;
10265 key1->maybe_flag|=key2->maybe_flag;
10266
10267 /*
10268 Notation for illustrations used in the rest of this function:
10269
10270 Range: [--------]
10271 ^ ^
10272 start stop
10273
10274 Two overlapping ranges:
10275 [-----] [----] [--]
10276 [---] or [---] or [-------]
10277
10278 Ambiguity: ***
10279 The range starts or stops somewhere in the "***" range.
10280 Example: a starts before b and may end before/the same plase/after b
10281 a: [----***]
10282 b: [---]
10283
10284 Adjacent ranges:
10285 Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
10286 a: ----]
10287 b: [----
10288 */
10289
10290 uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
10291
10292 for (key2=key2->first(); ; )
10293 {
10294 /*
10295 key1 consists of one or more ranges. tmp is the range currently
10296 being handled.
10297
10298 initialize tmp to the latest range in key1 that starts the same
10299 place or before the range in key2 starts
10300
10301 key2: [------]
10302 key1: [---] [-----] [----]
10303 ^
10304 tmp
10305 */
10306 if (key1->min_flag & NO_MIN_RANGE &&
10307 key1->max_flag & NO_MAX_RANGE)
10308 {
10309 if (key1->maybe_flag)
10310 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10311 return 0; // Always true OR
10312 }
10313 if (!key2)
10314 break;
10315
10316 SEL_ARG *tmp=key1->find_range(key2);
10317
10318 /*
10319 Used to describe how two key values are positioned compared to
10320 each other. Consider key_value_a.<cmp_func>(key_value_b):
10321
10322 -2: key_value_a is smaller than key_value_b, and they are adjacent
10323 -1: key_value_a is smaller than key_value_b (not adjacent)
10324 0: the key values are equal
10325 1: key_value_a is bigger than key_value_b (not adjacent)
10326 -2: key_value_a is bigger than key_value_b, and they are adjacent
10327
10328 Example: "cmp= tmp->cmp_max_to_min(key2)"
10329
10330 key2: [-------- (10 <= x ...)
10331 tmp: -----] (... x < 10) => cmp==-2
10332 tmp: ----] (... x <= 9) => cmp==-1
10333 tmp: ------] (... x = 10) => cmp== 0
10334 tmp: --------] (... x <= 12) => cmp== 1
10335 (cmp == 2 does not make sense for cmp_max_to_min())
10336 */
10337 int cmp= 0;
10338
10339 if (!tmp)
10340 {
10341 /*
10342 The range in key2 starts before the first range in key1. Use
10343 the first range in key1 as tmp.
10344
10345 key2: [--------]
10346 key1: [****--] [----] [-------]
10347 ^
10348 tmp
10349 */
10350 tmp=key1->first();
10351 cmp= -1;
10352 }
10353 else if ((cmp= tmp->cmp_max_to_min(key2)) < 0)
10354 {
10355 /*
10356 This is the case:
10357 key2: [-------]
10358 tmp: [----**]
10359 */
10360 SEL_ARG *next=tmp->next;
10361 if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
10362 {
10363 /*
10364 Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
10365
10366 This is the case:
10367 key2: [-------]
10368 tmp: [----]
10369
10370 Result:
10371 key2: [-------------] => inserted into key1 below
10372 tmp: => deleted
10373 */
10374 SEL_ARG *key2_next=key2->next;
10375 if (key2_shared)
10376 {
10377 if (!(key2=new SEL_ARG(*key2)))
10378 return 0; // out of memory
10379 key2->increment_use_count(key1->use_count+1);
10380 key2->next=key2_next; // New copy of key2
10381 }
10382
10383 key2->copy_min(tmp);
10384 if (!(key1=key1->tree_delete(tmp)))
10385 { // Only one key in tree
10386 if (key2->min_flag & NO_MIN_RANGE &&
10387 key2->max_flag & NO_MAX_RANGE)
10388 {
10389 if (key2->maybe_flag)
10390 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10391 return 0; // Always true OR
10392 }
10393 key1=key2;
10394 key1->make_root();
10395 key2=key2_next;
10396 break;
10397 }
10398 }
10399 if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min
10400 break; // No more ranges in key1. Copy rest of key2
10401 }
10402
10403 if (cmp < 0)
10404 {
10405 /*
10406 This is the case:
10407 key2: [--***]
10408 tmp: [----]
10409 */
10410 int tmp_cmp;
10411 if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0)
10412 {
10413 /*
10414 This is the case:
10415 key2: [------**]
10416 tmp: [----]
10417 */
10418 if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
10419 {
10420 /*
10421 Adjacent ranges with equal next_key_part. Merge like this:
10422
10423 This is the case:
10424 key2: [------]
10425 tmp: [-----]
10426
10427 Result:
10428 key2: [------]
10429 tmp: [-------------]
10430
10431 Then move on to next key2 range.
10432 */
10433 tmp->copy_min_to_min(key2);
10434 key1->merge_flags(key2);
10435 if (tmp->min_flag & NO_MIN_RANGE &&
10436 tmp->max_flag & NO_MAX_RANGE)
10437 {
10438 if (key1->maybe_flag)
10439 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10440 return 0;
10441 }
10442 key2->increment_use_count(-1); // Free not used tree
10443 key2=key2->next;
10444 continue;
10445 }
10446 else
10447 {
10448 /*
10449 key2 not adjacent to tmp or has different next_key_part.
10450 Insert into key1 and move to next range in key2
10451
10452 This is the case:
10453 key2: [------**]
10454 tmp: [----]
10455
10456 Result:
10457 key1_ [------**][----]
10458 ^ ^
10459 insert tmp
10460 */
10461 SEL_ARG *next=key2->next;
10462 if (key2_shared)
10463 {
10464 SEL_ARG *cpy= new SEL_ARG(*key2); // Must make copy
10465 if (!cpy)
10466 return 0; // OOM
10467 key1=key1->insert(cpy);
10468 key2->increment_use_count(key1->use_count+1);
10469 }
10470 else
10471 key1=key1->insert(key2); // Will destroy key2_root
10472 key2=next;
10473 continue;
10474 }
10475 }
10476 }
10477
10478 /*
10479 The ranges in tmp and key2 are overlapping:
10480
10481 key2: [----------]
10482 tmp: [*****-----*****]
10483
10484 Corollary: tmp.min <= key2.max
10485 */
10486 if (eq_tree(tmp->next_key_part,key2->next_key_part))
10487 {
10488 // Merge overlapping ranges with equal next_key_part
10489 if (tmp->is_same(key2))
10490 {
10491 /*
10492 Found exact match of key2 inside key1.
10493 Use the relevant range in key1.
10494 */
10495 tmp->merge_flags(key2); // Copy maybe flags
10496 key2->increment_use_count(-1); // Free not used tree
10497 }
10498 else
10499 {
10500 SEL_ARG *last= tmp;
10501 SEL_ARG *first= tmp;
10502
10503 /*
10504 Find the last range in key1 that overlaps key2 and
10505 where all ranges first...last have the same next_key_part as
10506 key2.
10507
10508 key2: [****----------------------*******]
10509 key1: [--] [----] [---] [-----] [xxxx]
10510 ^ ^ ^
10511 first last different next_key_part
10512
10513 Since key2 covers them, the ranges between first and last
10514 are merged into one range by deleting first...last-1 from
10515 the key1 tree. In the figure, this applies to first and the
10516 two consecutive ranges. The range of last is then extended:
10517 * last.min: Set to MY_MIN(key2.min, first.min)
10518 * last.max: If there is a last->next that overlaps key2 (i.e.,
10519 last->next has a different next_key_part):
10520 Set adjacent to last->next.min
10521 Otherwise: Set to MY_MAX(key2.max, last.max)
10522
10523 Result:
10524 key2: [****----------------------*******]
10525 [--] [----] [---] => deleted from key1
10526 key1: [**------------------------***][xxxx]
10527 ^ ^
10528 tmp=last different next_key_part
10529 */
10530 while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
10531 eq_tree(last->next->next_key_part,key2->next_key_part))
10532 {
10533 /*
10534 last->next is covered by key2 and has same next_key_part.
10535 last can be deleted
10536 */
10537 SEL_ARG *save=last;
10538 last=last->next;
10539 key1=key1->tree_delete(save);
10540 }
10541 // Redirect tmp to last which will cover the entire range
10542 tmp= last;
10543
10544 /*
10545 We need the minimum endpoint of first so we can compare it
10546 with the minimum endpoint of the enclosing key2 range.
10547 */
10548 last->copy_min(first);
10549 bool full_range= last->copy_min(key2);
10550 if (!full_range)
10551 {
10552 if (last->next && key2->cmp_max_to_min(last->next) >= 0)
10553 {
10554 /*
10555 This is the case:
10556 key2: [-------------]
10557 key1: [***------] [xxxx]
10558 ^ ^
10559 last different next_key_part
10560
10561 Extend range of last up to last->next:
10562 key2: [-------------]
10563 key1: [***--------][xxxx]
10564 */
10565 last->copy_min_to_max(last->next);
10566 }
10567 else
10568 /*
10569 This is the case:
10570 key2: [--------*****]
10571 key1: [***---------] [xxxx]
10572 ^ ^
10573 last different next_key_part
10574
10575 Extend range of last up to MY_MAX(last.max, key2.max):
10576 key2: [--------*****]
10577 key1: [***----------**] [xxxx]
10578 */
10579 full_range= last->copy_max(key2);
10580 }
10581 if (full_range)
10582 { // Full range
10583 key1->free_tree();
10584 for (; key2 ; key2=key2->next)
10585 key2->increment_use_count(-1); // Free not used tree
10586 if (key1->maybe_flag)
10587 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10588 return 0;
10589 }
10590 }
10591 }
10592
10593 if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
10594 {
10595 /*
10596 This is the case ("cmp>=0" means that tmp.max >= key2.min):
10597 key2: [----]
10598 tmp: [------------*****]
10599 */
10600
10601 if (!tmp->next_key_part)
10602 {
10603 SEL_ARG *key2_next= key2->next;
10604 if (key2_shared)
10605 {
10606 SEL_ARG *key2_cpy= new SEL_ARG(*key2);
10607 if (!key2_cpy)
10608 return 0;
10609 key2= key2_cpy;
10610 }
10611 /*
10612 tmp->next_key_part is empty: cut the range that is covered
10613 by tmp from key2.
10614 Reason: (key2->next_key_part OR tmp->next_key_part) will be
10615 empty and therefore equal to tmp->next_key_part. Thus, this
10616 part of the key2 range is completely covered by tmp.
10617 */
10618 if (tmp->cmp_max_to_max(key2) >= 0)
10619 {
10620 /*
10621 tmp covers the entire range in key2.
10622 key2: [----]
10623 tmp: [-----------------]
10624
10625 Move on to next range in key2
10626 */
10627 key2->increment_use_count(-1); // Free not used tree
10628 key2=key2_next;
10629 continue;
10630 }
10631 else
10632 {
10633 /*
10634 This is the case:
10635 key2: [-------]
10636 tmp: [---------]
10637
10638 Result:
10639 key2: [---]
10640 tmp: [---------]
10641 */
10642 key2->copy_max_to_min(tmp);
10643 continue;
10644 }
10645 }
10646
10647 /*
10648 The ranges are overlapping but have not been merged because
10649 next_key_part of tmp and key2 differ.
10650 key2: [----]
10651 tmp: [------------*****]
10652
10653 Split tmp in two where key2 starts:
10654 key2: [----]
10655 key1: [--------][--*****]
10656 ^ ^
10657 insert tmp
10658 */
10659 SEL_ARG *new_arg=tmp->clone_first(key2);
10660 if (!new_arg)
10661 return 0; // OOM
10662 if ((new_arg->next_key_part= tmp->next_key_part))
10663 new_arg->increment_use_count(key1->use_count+1);
10664 tmp->copy_min_to_min(key2);
10665 key1=key1->insert(new_arg);
10666 } // tmp.min >= key2.min due to this if()
10667
10668 /*
10669 Now key2.min <= tmp.min <= key2.max:
10670 key2: [---------]
10671 tmp: [****---*****]
10672 */
10673 SEL_ARG key2_cpy(*key2); // Get copy we can modify
10674 for (;;)
10675 {
10676 if (tmp->cmp_min_to_min(&key2_cpy) > 0)
10677 {
10678 /*
10679 This is the case:
10680 key2_cpy: [------------]
10681 key1: [-*****]
10682 ^
10683 tmp
10684
10685 Result:
10686 key2_cpy: [---]
10687 key1: [-------][-*****]
10688 ^ ^
10689 insert tmp
10690 */
10691 SEL_ARG *new_arg=key2_cpy.clone_first(tmp);
10692 if (!new_arg)
10693 return 0; // OOM
10694 if ((new_arg->next_key_part=key2_cpy.next_key_part))
10695 new_arg->increment_use_count(key1->use_count+1);
10696 key1=key1->insert(new_arg);
10697 key2_cpy.copy_min_to_min(tmp);
10698 }
10699 // Now key2_cpy.min == tmp.min
10700
10701 if ((cmp= tmp->cmp_max_to_max(&key2_cpy)) <= 0)
10702 {
10703 /*
10704 tmp.max <= key2_cpy.max:
10705 key2_cpy: a) [-------] or b) [----]
10706 tmp: [----] [----]
10707
10708 Steps:
10709 1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part.
10710 2) If case a: Insert range [tmp.max, key2_cpy.max] into key1 using
10711 next_key_part of key2_cpy
10712
10713 Result:
10714 key1: a) [----][-] or b) [----]
10715 */
10716 tmp->maybe_flag|= key2_cpy.maybe_flag;
10717 key2_cpy.increment_use_count(key1->use_count+1);
10718
10719 uint old_weight= tmp->next_key_part? tmp->next_key_part->weight: 0;
10720
10721 tmp->next_key_part= key_or(param, tmp->next_key_part,
10722 key2_cpy.next_key_part);
10723
10724 uint new_weight= tmp->next_key_part? tmp->next_key_part->weight: 0;
10725 key1->weight += (new_weight - old_weight);
10726
10727 if (!cmp)
10728 break; // case b: done with this key2 range
10729
10730 // Make key2_cpy the range [tmp.max, key2_cpy.max]
10731 key2_cpy.copy_max_to_min(tmp);
10732 if (!(tmp=tmp->next))
10733 {
10734 /*
10735 No more ranges in key1. Insert key2_cpy and go to "end"
10736 label to insert remaining ranges in key2 if any.
10737 */
10738 SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
10739 if (!tmp2)
10740 return 0; // OOM
10741 key1=key1->insert(tmp2);
10742 key2=key2->next;
10743 goto end;
10744 }
10745 if (tmp->cmp_min_to_max(&key2_cpy) > 0)
10746 {
10747 /*
10748 The next range in key1 does not overlap with key2_cpy.
10749 Insert this range into key1 and move on to the next range
10750 in key2.
10751 */
10752 SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
10753 if (!tmp2)
10754 return 0; // OOM
10755 key1=key1->insert(tmp2);
10756 break;
10757 }
10758 /*
10759 key2_cpy overlaps with the next range in key1 and the case
10760 is now "key2.min <= tmp.min <= key2.max". Go back to for(;;)
10761 to handle this situation.
10762 */
10763 continue;
10764 }
10765 else
10766 {
10767 /*
10768 This is the case:
10769 key2_cpy: [-------]
10770 tmp: [------------]
10771
10772 Result:
10773 key1: [-------][---]
10774 ^ ^
10775 new_arg tmp
10776 Steps:
10777 0) If tmp->next_key_part is empty: do nothing. Reason:
10778 (key2_cpy->next_key_part OR tmp->next_key_part) will be
10779 empty and therefore equal to tmp->next_key_part. Thus,
10780 the range in key2_cpy is completely covered by tmp
10781 1) Make new_arg with range [tmp.min, key2_cpy.max].
10782 new_arg->next_key_part is OR between next_key_part
10783 of tmp and key2_cpy
10784 2) Make tmp the range [key2.max, tmp.max]
10785 3) Insert new_arg into key1
10786 */
10787 if (!tmp->next_key_part) // Step 0
10788 {
10789 key2_cpy.increment_use_count(-1); // Free not used tree
10790 break;
10791 }
10792 SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
10793 if (!new_arg)
10794 return 0; // OOM
10795 tmp->copy_max_to_min(&key2_cpy);
10796 tmp->increment_use_count(key1->use_count+1);
10797 /* Increment key count as it may be used for next loop */
10798 key2_cpy.increment_use_count(1);
10799 new_arg->next_key_part= key_or(param, tmp->next_key_part,
10800 key2_cpy.next_key_part);
10801 key1=key1->insert(new_arg);
10802 break;
10803 }
10804 }
10805 // Move on to next range in key2
10806 key2=key2->next;
10807 }
10808
10809 end:
10810 /*
10811 Add key2 ranges that are non-overlapping with and higher than the
10812 highest range in key1.
10813 */
10814 while (key2)
10815 {
10816 SEL_ARG *next=key2->next;
10817 if (key2_shared)
10818 {
10819 SEL_ARG *tmp=new SEL_ARG(*key2); // Must make copy
10820 if (!tmp)
10821 return 0;
10822 key2->increment_use_count(key1->use_count+1);
10823 key1=key1->insert(tmp);
10824 }
10825 else
10826 key1=key1->insert(key2); // Will destroy key2_root
10827 key2=next;
10828 }
10829 key1->use_count++;
10830
10831 /* Re-compute the result tree's weight. */
10832 key1->update_weight_locally();
10833
10834 key1->max_part_no= max_part_no;
10835 return key1;
10836 }
10837
10838
10839 /* Compare if two trees are equal */
10840
eq_tree(SEL_ARG * a,SEL_ARG * b)10841 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
10842 {
10843 if (a == b)
10844 return 1;
10845 if (!a || !b || !a->is_same(b))
10846 return 0;
10847 if (a->left != &null_element && b->left != &null_element)
10848 {
10849 if (!eq_tree(a->left,b->left))
10850 return 0;
10851 }
10852 else if (a->left != &null_element || b->left != &null_element)
10853 return 0;
10854 if (a->right != &null_element && b->right != &null_element)
10855 {
10856 if (!eq_tree(a->right,b->right))
10857 return 0;
10858 }
10859 else if (a->right != &null_element || b->right != &null_element)
10860 return 0;
10861 if (a->next_key_part != b->next_key_part)
10862 { // Sub range
10863 if (!a->next_key_part != !b->next_key_part ||
10864 !eq_tree(a->next_key_part, b->next_key_part))
10865 return 0;
10866 }
10867 return 1;
10868 }
10869
10870
10871 /*
10872 Compute the MAX(key part) in this SEL_ARG graph.
10873 */
get_max_key_part() const10874 uint SEL_ARG::get_max_key_part() const
10875 {
10876 const SEL_ARG *cur;
10877 uint max_part= part;
10878 for (cur= first(); cur ; cur=cur->next)
10879 {
10880 if (cur->next_key_part)
10881 {
10882 uint mp= cur->next_key_part->get_max_key_part();
10883 max_part= MY_MAX(part, mp);
10884 }
10885 }
10886 return max_part;
10887 }
10888
10889
10890 /*
10891 Remove the SEL_ARG graph elements which have part > max_part.
10892
10893 @detail
10894 Also update weight for the graph and any modified subgraphs.
10895 */
10896
prune_sel_arg_graph(SEL_ARG * sel_arg,uint max_part)10897 void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
10898 {
10899 SEL_ARG *cur;
10900 DBUG_ASSERT(max_part >= sel_arg->part);
10901
10902 for (cur= sel_arg->first(); cur ; cur=cur->next)
10903 {
10904 if (cur->next_key_part)
10905 {
10906 if (cur->next_key_part->part > max_part)
10907 {
10908 // Remove cur->next_key_part.
10909 sel_arg->weight -= cur->next_key_part->weight;
10910 cur->next_key_part= NULL;
10911 }
10912 else
10913 {
10914 uint old_weight= cur->next_key_part->weight;
10915 prune_sel_arg_graph(cur->next_key_part, max_part);
10916 sel_arg->weight -= (old_weight - cur->next_key_part->weight);
10917 }
10918 }
10919 }
10920 }
10921
10922
10923 /*
10924 @brief
10925 Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT,
10926 by cutting off branches if necessary.
10927
10928 @detail
10929 @see declaration of SEL_ARG::weight for definition of weight.
10930
10931 This function attempts to reduce the graph's weight by cutting off
10932 SEL_ARG::next_key_part connections if necessary.
10933
10934 We start with maximum used keypart and then remove one keypart after
10935 another until the graph's weight is within the limit.
10936
10937 @seealso
10938 sel_arg_and_weight_heuristic();
10939
10940 @return
10941 tree pointer The tree after processing,
10942 NULL If it was not possible to reduce the weight of the tree below the
10943 limit.
10944 */
10945
enforce_sel_arg_weight_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * sel_arg)10946 SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
10947 SEL_ARG *sel_arg)
10948 {
10949 if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE ||
10950 !param->thd->variables.optimizer_max_sel_arg_weight)
10951 return sel_arg;
10952
10953 Field *field= sel_arg->field;
10954 uint weight1= sel_arg->weight;
10955
10956 while (1)
10957 {
10958 if (likely(sel_arg->weight <= param->thd->variables.
10959 optimizer_max_sel_arg_weight))
10960 break;
10961
10962 uint max_part= sel_arg->get_max_key_part();
10963 if (max_part == sel_arg->part)
10964 {
10965 /*
10966 We don't return NULL right away as we want to have the information
10967 about the changed tree in the optimizer trace.
10968 */
10969 sel_arg= NULL;
10970 break;
10971 }
10972
10973 max_part--;
10974 prune_sel_arg_graph(sel_arg, max_part);
10975 }
10976
10977 uint weight2= sel_arg? sel_arg->weight : 0;
10978
10979 if (weight2 != weight1)
10980 {
10981 Json_writer_object wrapper(param->thd);
10982 Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit");
10983 if (param->using_real_indexes)
10984 obj.add("index", param->table->key_info[param->real_keynr[keyno]].name);
10985 else
10986 obj.add("pseudo_index", field->field_name);
10987
10988 obj.add("old_weight", (longlong)weight1);
10989 obj.add("new_weight", (longlong)weight2);
10990 }
10991 return sel_arg;
10992 }
10993
10994
10995 /*
10996 @detail
10997 Do not combine the trees if their total weight is likely to exceed the
10998 MAX_WEIGHT.
10999 (It is possible that key1 has next_key_part that has empty overlap with
11000 key2. In this case, the combined tree will have a smaller weight than we
11001 predict. We assume this is rare.)
11002 */
11003
11004 static
sel_arg_and_weight_heuristic(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)11005 bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
11006 SEL_ARG *key2)
11007 {
11008 DBUG_ASSERT(key1->part < key2->part);
11009
11010 ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight;
11011 if (max_weight && key1->weight + key1->elements*key2->weight > max_weight)
11012 {
11013 Json_writer_object wrapper(param->thd);
11014 Json_writer_object obj(param->thd, "sel_arg_weight_heuristic");
11015 obj.add("key1_field", key1->field->field_name);
11016 obj.add("key2_field", key2->field->field_name);
11017 obj.add("key1_weight", (longlong)key1->weight);
11018 obj.add("key2_weight", (longlong)key2->weight);
11019 return true; // Discard key2
11020 }
11021 return false;
11022 }
11023
11024
11025 SEL_ARG *
insert(SEL_ARG * key)11026 SEL_ARG::insert(SEL_ARG *key)
11027 {
11028 SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element);
11029
11030 for (element= this; element != &null_element ; )
11031 {
11032 last_element=element;
11033 if (key->cmp_min_to_min(element) > 0)
11034 {
11035 par= &element->right; element= element->right;
11036 }
11037 else
11038 {
11039 par = &element->left; element= element->left;
11040 }
11041 }
11042 *par=key;
11043 key->parent=last_element;
11044 /* Link in list */
11045 if (par == &last_element->left)
11046 {
11047 key->next=last_element;
11048 if ((key->prev=last_element->prev))
11049 key->prev->next=key;
11050 last_element->prev=key;
11051 }
11052 else
11053 {
11054 if ((key->next=last_element->next))
11055 key->next->prev=key;
11056 key->prev=last_element;
11057 last_element->next=key;
11058 }
11059 key->left=key->right= &null_element;
11060 SEL_ARG *root=rb_insert(key); // rebalance tree
11061 root->use_count=this->use_count; // copy root info
11062 root->elements= this->elements+1;
11063 /*
11064 The new weight is:
11065 old root's weight
11066 +1 for the weight of the added element
11067 + next_key_part's weight of the added element
11068 */
11069 root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0);
11070 root->maybe_flag=this->maybe_flag;
11071 return root;
11072 }
11073
11074
11075 /*
11076 ** Find best key with min <= given key
11077 ** Because the call context this should never return 0 to get_range
11078 */
11079
11080 SEL_ARG *
find_range(SEL_ARG * key)11081 SEL_ARG::find_range(SEL_ARG *key)
11082 {
11083 SEL_ARG *element=this,*found=0;
11084
11085 for (;;)
11086 {
11087 if (element == &null_element)
11088 return found;
11089 int cmp=element->cmp_min_to_min(key);
11090 if (cmp == 0)
11091 return element;
11092 if (cmp < 0)
11093 {
11094 found=element;
11095 element=element->right;
11096 }
11097 else
11098 element=element->left;
11099 }
11100 }
11101
11102
11103 /*
11104 Remove a element from the tree
11105
11106 SYNOPSIS
11107 tree_delete()
11108 key Key that is to be deleted from tree (this)
11109
11110 NOTE
11111 This also frees all sub trees that is used by the element
11112
11113 RETURN
11114 root of new tree (with key deleted)
11115 */
11116
11117 SEL_ARG *
tree_delete(SEL_ARG * key)11118 SEL_ARG::tree_delete(SEL_ARG *key)
11119 {
11120 enum leaf_color remove_color;
11121 SEL_ARG *root,*nod,**par,*fix_par;
11122 DBUG_ENTER("tree_delete");
11123
11124 root=this;
11125 this->parent= 0;
11126
11127 /*
11128 Compute the weight the tree will have after the element is removed.
11129 We remove the element itself (weight=1)
11130 and the sub-graph connected to its next_key_part.
11131 */
11132 uint new_weight= root->weight - (1 + (key->next_key_part?
11133 key->next_key_part->weight : 0));
11134
11135 DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ?
11136 key->next_key_part->weight : 0)));
11137
11138 /* Unlink from list */
11139 if (key->prev)
11140 key->prev->next=key->next;
11141 if (key->next)
11142 key->next->prev=key->prev;
11143 key->increment_use_count(-1);
11144 if (!key->parent)
11145 par= &root;
11146 else
11147 par=key->parent_ptr();
11148
11149 if (key->left == &null_element)
11150 {
11151 *par=nod=key->right;
11152 fix_par=key->parent;
11153 if (nod != &null_element)
11154 nod->parent=fix_par;
11155 remove_color= key->color;
11156 }
11157 else if (key->right == &null_element)
11158 {
11159 *par= nod=key->left;
11160 nod->parent=fix_par=key->parent;
11161 remove_color= key->color;
11162 }
11163 else
11164 {
11165 SEL_ARG *tmp=key->next; // next bigger key (exist!)
11166 nod= *tmp->parent_ptr()= tmp->right; // unlink tmp from tree
11167 fix_par=tmp->parent;
11168 if (nod != &null_element)
11169 nod->parent=fix_par;
11170 remove_color= tmp->color;
11171
11172 tmp->parent=key->parent; // Move node in place of key
11173 (tmp->left=key->left)->parent=tmp;
11174 if ((tmp->right=key->right) != &null_element)
11175 tmp->right->parent=tmp;
11176 tmp->color=key->color;
11177 *par=tmp;
11178 if (fix_par == key) // key->right == key->next
11179 fix_par=tmp; // new parent of nod
11180 }
11181
11182 if (root == &null_element)
11183 DBUG_RETURN(0); // Maybe root later
11184 if (remove_color == BLACK)
11185 root=rb_delete_fixup(root,nod,fix_par);
11186 test_rb_tree(root,root->parent);
11187
11188 root->use_count=this->use_count; // Fix root counters
11189 root->weight= new_weight;
11190 root->elements=this->elements-1;
11191 root->maybe_flag=this->maybe_flag;
11192 DBUG_RETURN(root);
11193 }
11194
11195
11196 /* Functions to fix up the tree after insert and delete */
11197
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)11198 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
11199 {
11200 SEL_ARG *y=leaf->right;
11201 leaf->right=y->left;
11202 if (y->left != &null_element)
11203 y->left->parent=leaf;
11204 if (!(y->parent=leaf->parent))
11205 *root=y;
11206 else
11207 *leaf->parent_ptr()=y;
11208 y->left=leaf;
11209 leaf->parent=y;
11210 }
11211
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)11212 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
11213 {
11214 SEL_ARG *y=leaf->left;
11215 leaf->left=y->right;
11216 if (y->right != &null_element)
11217 y->right->parent=leaf;
11218 if (!(y->parent=leaf->parent))
11219 *root=y;
11220 else
11221 *leaf->parent_ptr()=y;
11222 y->right=leaf;
11223 leaf->parent=y;
11224 }
11225
11226
11227 SEL_ARG *
rb_insert(SEL_ARG * leaf)11228 SEL_ARG::rb_insert(SEL_ARG *leaf)
11229 {
11230 SEL_ARG *y,*par,*par2,*root;
11231 root= this; root->parent= 0;
11232
11233 leaf->color=RED;
11234 while (leaf != root && (par= leaf->parent)->color == RED)
11235 { // This can't be root or 1 level under
11236 if (par == (par2= leaf->parent->parent)->left)
11237 {
11238 y= par2->right;
11239 if (y->color == RED)
11240 {
11241 par->color=BLACK;
11242 y->color=BLACK;
11243 leaf=par2;
11244 leaf->color=RED; /* And the loop continues */
11245 }
11246 else
11247 {
11248 if (leaf == par->right)
11249 {
11250 left_rotate(&root,leaf->parent);
11251 par=leaf; /* leaf is now parent to old leaf */
11252 }
11253 par->color=BLACK;
11254 par2->color=RED;
11255 right_rotate(&root,par2);
11256 break;
11257 }
11258 }
11259 else
11260 {
11261 y= par2->left;
11262 if (y->color == RED)
11263 {
11264 par->color=BLACK;
11265 y->color=BLACK;
11266 leaf=par2;
11267 leaf->color=RED; /* And the loop continues */
11268 }
11269 else
11270 {
11271 if (leaf == par->left)
11272 {
11273 right_rotate(&root,par);
11274 par=leaf;
11275 }
11276 par->color=BLACK;
11277 par2->color=RED;
11278 left_rotate(&root,par2);
11279 break;
11280 }
11281 }
11282 }
11283 root->color=BLACK;
11284 test_rb_tree(root,root->parent);
11285 return root;
11286 }
11287
11288
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)11289 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
11290 {
11291 SEL_ARG *x,*w;
11292 root->parent=0;
11293
11294 x= key;
11295 while (x != root && x->color == SEL_ARG::BLACK)
11296 {
11297 if (x == par->left)
11298 {
11299 w=par->right;
11300 if (w->color == SEL_ARG::RED)
11301 {
11302 w->color=SEL_ARG::BLACK;
11303 par->color=SEL_ARG::RED;
11304 left_rotate(&root,par);
11305 w=par->right;
11306 }
11307 if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
11308 {
11309 w->color=SEL_ARG::RED;
11310 x=par;
11311 }
11312 else
11313 {
11314 if (w->right->color == SEL_ARG::BLACK)
11315 {
11316 w->left->color=SEL_ARG::BLACK;
11317 w->color=SEL_ARG::RED;
11318 right_rotate(&root,w);
11319 w=par->right;
11320 }
11321 w->color=par->color;
11322 par->color=SEL_ARG::BLACK;
11323 w->right->color=SEL_ARG::BLACK;
11324 left_rotate(&root,par);
11325 x=root;
11326 break;
11327 }
11328 }
11329 else
11330 {
11331 w=par->left;
11332 if (w->color == SEL_ARG::RED)
11333 {
11334 w->color=SEL_ARG::BLACK;
11335 par->color=SEL_ARG::RED;
11336 right_rotate(&root,par);
11337 w=par->left;
11338 }
11339 if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
11340 {
11341 w->color=SEL_ARG::RED;
11342 x=par;
11343 }
11344 else
11345 {
11346 if (w->left->color == SEL_ARG::BLACK)
11347 {
11348 w->right->color=SEL_ARG::BLACK;
11349 w->color=SEL_ARG::RED;
11350 left_rotate(&root,w);
11351 w=par->left;
11352 }
11353 w->color=par->color;
11354 par->color=SEL_ARG::BLACK;
11355 w->left->color=SEL_ARG::BLACK;
11356 right_rotate(&root,par);
11357 x=root;
11358 break;
11359 }
11360 }
11361 par=x->parent;
11362 }
11363 x->color=SEL_ARG::BLACK;
11364 return root;
11365 }
11366
11367
11368 /* Test that the properties for a red-black tree hold */
11369
11370 #ifdef EXTRA_DEBUG
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)11371 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
11372 {
11373 int count_l,count_r;
11374
11375 if (element == &null_element)
11376 return 0; // Found end of tree
11377 if (element->parent != parent)
11378 {
11379 sql_print_error("Wrong tree: Parent doesn't point at parent");
11380 return -1;
11381 }
11382 if (element->color == SEL_ARG::RED &&
11383 (element->left->color == SEL_ARG::RED ||
11384 element->right->color == SEL_ARG::RED))
11385 {
11386 sql_print_error("Wrong tree: Found two red in a row");
11387 return -1;
11388 }
11389 if (element->left == element->right && element->left != &null_element)
11390 { // Dummy test
11391 sql_print_error("Wrong tree: Found right == left");
11392 return -1;
11393 }
11394 count_l=test_rb_tree(element->left,element);
11395 count_r=test_rb_tree(element->right,element);
11396 if (count_l >= 0 && count_r >= 0)
11397 {
11398 if (count_l == count_r)
11399 return count_l+(element->color == SEL_ARG::BLACK);
11400 sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
11401 count_l,count_r);
11402 }
11403 return -1; // Error, no more warnings
11404 }
11405
11406
11407 /**
11408 Count how many times SEL_ARG graph "root" refers to its part "key" via
11409 transitive closure.
11410
11411 @param root An RB-Root node in a SEL_ARG graph.
11412 @param key Another RB-Root node in that SEL_ARG graph.
11413
11414 The passed "root" node may refer to "key" node via root->next_key_part,
11415 root->next->n
11416
11417 This function counts how many times the node "key" is referred (via
11418 SEL_ARG::next_key_part) by
11419 - intervals of RB-tree pointed by "root",
11420 - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
11421 intervals of RB-tree pointed by "root",
11422 - and so on.
11423
11424 Here is an example (horizontal links represent next_key_part pointers,
11425 vertical links - next/prev prev pointers):
11426
11427 +----+ $
11428 |root|-----------------+
11429 +----+ $ |
11430 | $ |
11431 | $ |
11432 +----+ +---+ $ | +---+ Here the return value
11433 | |- ... -| |---$-+--+->|key| will be 4.
11434 +----+ +---+ $ | | +---+
11435 | $ | |
11436 ... $ | |
11437 | $ | |
11438 +----+ +---+ $ | |
11439 | |---| |---------+ |
11440 +----+ +---+ $ |
11441 | | $ |
11442 ... +---+ $ |
11443 | |------------+
11444 +---+ $
11445 @return
11446 Number of links to "key" from nodes reachable from "root".
11447 */
11448
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)11449 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
11450 {
11451 ulong count= 0;
11452 for (root=root->first(); root ; root=root->next)
11453 {
11454 if (root->next_key_part)
11455 {
11456 if (root->next_key_part == key)
11457 count++;
11458 if (root->next_key_part->part < key->part)
11459 count+=count_key_part_usage(root->next_key_part,key);
11460 }
11461 }
11462 return count;
11463 }
11464
11465
11466 /*
11467 Check if SEL_ARG::use_count value is correct
11468
11469 SYNOPSIS
11470 SEL_ARG::test_use_count()
11471 root The root node of the SEL_ARG graph (an RB-tree root node that
11472 has the least value of sel_arg->part in the entire graph, and
11473 thus is the "origin" of the graph)
11474
11475 DESCRIPTION
11476 Check if SEL_ARG::use_count value is correct. See the definition of
11477 use_count for what is "correct".
11478 */
11479
test_use_count(SEL_ARG * root)11480 void SEL_ARG::test_use_count(SEL_ARG *root)
11481 {
11482 uint e_count=0;
11483
11484 if (this->type != SEL_ARG::KEY_RANGE)
11485 return;
11486 for (SEL_ARG *pos=first(); pos ; pos=pos->next)
11487 {
11488 e_count++;
11489 if (pos->next_key_part)
11490 {
11491 ulong count=count_key_part_usage(root,pos->next_key_part);
11492 if (count > pos->next_key_part->use_count)
11493 {
11494 sql_print_information("Use_count: Wrong count for key at %p: %lu "
11495 "should be %lu", pos,
11496 pos->next_key_part->use_count, count);
11497 return;
11498 }
11499 pos->next_key_part->test_use_count(root);
11500 }
11501 }
11502 if (e_count != elements)
11503 sql_print_warning("Wrong use count: %u (should be %u) for tree at %p",
11504 e_count, elements, this);
11505 }
11506 #endif
11507
11508 /*
11509 Calculate cost and E(#rows) for a given index and intervals tree
11510
11511 SYNOPSIS
11512 check_quick_select()
11513 param Parameter from test_quick_select
11514 idx Number of index to use in PARAM::key SEL_TREE::key
11515 index_only TRUE - assume only index tuples will be accessed
11516 FALSE - assume full table rows will be read
11517 tree Transformed selection condition, tree->key[idx] holds
11518 the intervals for the given index.
11519 update_tbl_stats TRUE <=> update table->quick_* with information
11520 about range scan we've evaluated.
11521 mrr_flags INOUT MRR access flags
11522 cost OUT Scan cost
11523 is_ror_scan is set to reflect if the key scan is a ROR (see
11524 is_key_scan_ror function for more info)
11525
11526 NOTES
11527 param->table->opt_range*, param->range_count (and maybe others) are
11528 updated with data of given key scan, see quick_range_seq_next for details.
11529
11530 RETURN
11531 Estimate # of records to be retrieved.
11532 HA_POS_ERROR if estimate calculation failed due to table handler problems.
11533 */
11534
11535 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost,bool * is_ror_scan)11536 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
11537 SEL_ARG *tree, bool update_tbl_stats,
11538 uint *mrr_flags, uint *bufsize, Cost_estimate *cost,
11539 bool *is_ror_scan)
11540 {
11541 SEL_ARG_RANGE_SEQ seq;
11542 RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
11543 handler *file= param->table->file;
11544 ha_rows rows= HA_POS_ERROR;
11545 uint keynr= param->real_keynr[idx];
11546 DBUG_ENTER("check_quick_select");
11547
11548 /* Range not calculated yet */
11549 param->quick_rows[keynr]= HA_POS_ERROR;
11550
11551 /* Handle cases when we don't have a valid non-empty list of range */
11552 if (!tree)
11553 DBUG_RETURN(HA_POS_ERROR);
11554 if (tree->type == SEL_ARG::IMPOSSIBLE)
11555 DBUG_RETURN(0L);
11556 if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
11557 DBUG_RETURN(HA_POS_ERROR);
11558
11559 seq.keyno= idx;
11560 seq.real_keyno= keynr;
11561 seq.param= param;
11562 seq.start= tree;
11563
11564 param->range_count=0;
11565 param->max_key_parts=0;
11566
11567 seq.is_ror_scan= TRUE;
11568 if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
11569 seq.is_ror_scan= FALSE;
11570
11571 *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
11572 /*
11573 Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
11574 */
11575 *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
11576
11577 // TODO: param->max_key_parts holds 0 now, and not the #keyparts used.
11578 // Passing wrong second argument to index_flags() makes no difference for
11579 // most storage engines but might be an issue for MyRocks with certain
11580 // datatypes.
11581 if (index_only &&
11582 (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) &&
11583 !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX))
11584 *mrr_flags |= HA_MRR_INDEX_ONLY;
11585
11586 if (param->thd->lex->sql_command != SQLCOM_SELECT)
11587 *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
11588
11589 *bufsize= param->thd->variables.mrr_buff_size;
11590 /*
11591 Skip materialized derived table/view result table from MRR check as
11592 they aren't contain any data yet.
11593 */
11594 if (param->table->pos_in_table_list->is_non_derived())
11595 rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
11596 bufsize, mrr_flags, cost);
11597 param->quick_rows[keynr]= rows;
11598 if (rows != HA_POS_ERROR)
11599 {
11600 ha_rows table_records= param->table->stat_records();
11601 if (rows > table_records)
11602 {
11603 /*
11604 For any index the total number of records within all ranges
11605 cannot be be bigger than the number of records in the table.
11606 This check is needed as sometimes that table statistics or range
11607 estimates may be slightly out of sync.
11608 */
11609 rows= table_records;
11610 set_if_bigger(rows, 1);
11611 param->quick_rows[keynr]= rows;
11612 }
11613 param->possible_keys.set_bit(keynr);
11614 if (update_tbl_stats)
11615 {
11616 param->table->opt_range_keys.set_bit(keynr);
11617 param->table->opt_range[keynr].key_parts= param->max_key_parts;
11618 param->table->opt_range[keynr].ranges= param->range_count;
11619 param->table->opt_range_condition_rows=
11620 MY_MIN(param->table->opt_range_condition_rows, rows);
11621 param->table->opt_range[keynr].rows= rows;
11622 param->table->opt_range[keynr].cost= cost->total_cost();
11623 if (param->table->file->is_clustering_key(keynr))
11624 param->table->opt_range[keynr].index_only_cost= 0;
11625 else
11626 param->table->opt_range[keynr].index_only_cost= cost->index_only_cost();
11627 }
11628 }
11629
11630 /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
11631 enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
11632 if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
11633 {
11634 /*
11635 All scans are non-ROR scans for those index types.
11636 TODO: Don't have this logic here, make table engines return
11637 appropriate flags instead.
11638 */
11639 seq.is_ror_scan= FALSE;
11640 }
11641 else if (param->table->file->is_clustering_key(keynr))
11642 {
11643 /* Clustered PK scan is always a ROR scan (TODO: same as above) */
11644 seq.is_ror_scan= TRUE;
11645 }
11646 else if (param->range_count > 1)
11647 {
11648 /*
11649 Scaning multiple key values in the index: the records are ROR
11650 for each value, but not between values. E.g, "SELECT ... x IN
11651 (1,3)" returns ROR order for all records with x=1, then ROR
11652 order for records with x=3
11653 */
11654 seq.is_ror_scan= FALSE;
11655 }
11656 *is_ror_scan= seq.is_ror_scan;
11657
11658 DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
11659 DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp.
11660 }
11661
11662
11663 /*
11664 Check if key scan on given index with equality conditions on first n key
11665 parts is a ROR scan.
11666
11667 SYNOPSIS
11668 is_key_scan_ror()
11669 param Parameter from test_quick_select
11670 keynr Number of key in the table. The key must not be a clustered
11671 primary key.
11672 nparts Number of first key parts for which equality conditions
11673 are present.
11674
11675 NOTES
11676 ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
11677 ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
11678
11679 This function is needed to handle a practically-important special case:
11680 an index scan is a ROR scan if it is done using a condition in form
11681
11682 "key1_1=c_1 AND ... AND key1_n=c_n"
11683
11684 where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
11685
11686 and the table has a clustered Primary Key defined as
11687 PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
11688
11689 i.e. the first key parts of it are identical to uncovered parts ot the
11690 key being scanned. This function assumes that the index flags do not
11691 include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
11692
11693 Check (1) is made in quick_range_seq_next()
11694
11695 RETURN
11696 TRUE The scan is ROR-scan
11697 FALSE Otherwise
11698 */
11699
is_key_scan_ror(PARAM * param,uint keynr,uint8 nparts)11700 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
11701 {
11702 KEY *table_key= param->table->key_info + keynr;
11703 KEY_PART_INFO *key_part= table_key->key_part + nparts;
11704 KEY_PART_INFO *key_part_end= (table_key->key_part +
11705 table_key->user_defined_key_parts);
11706 uint pk_number;
11707
11708 if (param->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
11709 return false;
11710
11711 for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
11712 {
11713 uint16 fieldnr= param->table->key_info[keynr].
11714 key_part[kp - table_key->key_part].fieldnr - 1;
11715 if (param->table->field[fieldnr]->key_length() != kp->length)
11716 return FALSE;
11717 }
11718
11719 /*
11720 If there are equalities for all key parts, it is a ROR scan. If there are
11721 equalities all keyparts and even some of key parts from "Extended Key"
11722 index suffix, it is a ROR-scan, too.
11723 */
11724 if (key_part >= key_part_end)
11725 return TRUE;
11726
11727 key_part= table_key->key_part + nparts;
11728 pk_number= param->table->s->primary_key;
11729 if (!param->table->file->pk_is_clustering_key(pk_number))
11730 return FALSE;
11731
11732 KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
11733 KEY_PART_INFO *pk_part_end= pk_part +
11734 param->table->key_info[pk_number].user_defined_key_parts;
11735 for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
11736 ++key_part, ++pk_part)
11737 {
11738 if ((key_part->field != pk_part->field) ||
11739 (key_part->length != pk_part->length))
11740 return FALSE;
11741 }
11742 return (key_part == key_part_end);
11743 }
11744
11745
11746 /*
11747 Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
11748
11749 SYNOPSIS
11750 get_quick_select()
11751 param
11752 idx Index of used key in param->key.
11753 key_tree SEL_ARG tree for the used key
11754 mrr_flags MRR parameter for quick select
11755 mrr_buf_size MRR parameter for quick select
11756 parent_alloc If not NULL, use it to allocate memory for
11757 quick select data. Otherwise use quick->alloc.
11758 NOTES
11759 The caller must call QUICK_SELECT::init for returned quick select.
11760
11761 CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
11762 deallocated when the returned quick select is deleted.
11763
11764 RETURN
11765 NULL on error
11766 otherwise created quick select
11767 */
11768
11769 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)11770 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
11771 uint mrr_buf_size, MEM_ROOT *parent_alloc)
11772 {
11773 QUICK_RANGE_SELECT *quick;
11774 bool create_err= FALSE;
11775 DBUG_ENTER("get_quick_select");
11776
11777 if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
11778 quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
11779 param->real_keynr[idx],
11780 MY_TEST(parent_alloc),
11781 parent_alloc, &create_err);
11782 else
11783 quick=new QUICK_RANGE_SELECT(param->thd, param->table,
11784 param->real_keynr[idx],
11785 MY_TEST(parent_alloc), NULL, &create_err);
11786
11787 if (quick)
11788 {
11789 if (create_err ||
11790 get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
11791 param->max_key,0))
11792 {
11793 delete quick;
11794 quick=0;
11795 }
11796 else
11797 {
11798 KEY *keyinfo= param->table->key_info+param->real_keynr[idx];
11799 quick->mrr_flags= mrr_flags;
11800 quick->mrr_buf_size= mrr_buf_size;
11801 quick->key_parts=(KEY_PART*)
11802 memdup_root(parent_alloc? parent_alloc : &quick->alloc,
11803 (char*) param->key[idx],
11804 sizeof(KEY_PART)*
11805 param->table->actual_n_key_parts(keyinfo));
11806 }
11807 }
11808 DBUG_RETURN(quick);
11809 }
11810
11811
11812 /*
11813 ** Fix this to get all possible sub_ranges
11814 */
11815 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)11816 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
11817 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
11818 uchar *max_key, uint max_key_flag)
11819 {
11820 QUICK_RANGE *range;
11821 uint flag;
11822 int min_part= key_tree->part-1, // # of keypart values in min_key buffer
11823 max_part= key_tree->part-1; // # of keypart values in max_key buffer
11824
11825 if (key_tree->left != &null_element)
11826 {
11827 if (get_quick_keys(param,quick,key,key_tree->left,
11828 min_key,min_key_flag, max_key, max_key_flag))
11829 return 1;
11830 }
11831 uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
11832 min_part+= key_tree->store_min(key[key_tree->part].store_length,
11833 &tmp_min_key,min_key_flag);
11834 max_part+= key_tree->store_max(key[key_tree->part].store_length,
11835 &tmp_max_key,max_key_flag);
11836
11837 if (key_tree->next_key_part &&
11838 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
11839 key_tree->next_key_part->part == key_tree->part+1)
11840 { // const key as prefix
11841 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
11842 memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
11843 key_tree->min_flag==0 && key_tree->max_flag==0)
11844 {
11845 if (get_quick_keys(param,quick,key,key_tree->next_key_part,
11846 tmp_min_key, min_key_flag | key_tree->min_flag,
11847 tmp_max_key, max_key_flag | key_tree->max_flag))
11848 return 1;
11849 goto end; // Ugly, but efficient
11850 }
11851 {
11852 uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
11853 if (!tmp_min_flag)
11854 min_part+= key_tree->next_key_part->store_min_key(key,
11855 &tmp_min_key,
11856 &tmp_min_flag,
11857 MAX_KEY);
11858 if (!tmp_max_flag)
11859 max_part+= key_tree->next_key_part->store_max_key(key,
11860 &tmp_max_key,
11861 &tmp_max_flag,
11862 MAX_KEY);
11863 flag=tmp_min_flag | tmp_max_flag;
11864 }
11865 }
11866 else
11867 {
11868 flag = (key_tree->min_flag & GEOM_FLAG) ?
11869 key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
11870 }
11871
11872 /*
11873 Ensure that some part of min_key and max_key are used. If not,
11874 regard this as no lower/upper range
11875 */
11876 if ((flag & GEOM_FLAG) == 0)
11877 {
11878 if (tmp_min_key != param->min_key)
11879 flag&= ~NO_MIN_RANGE;
11880 else
11881 flag|= NO_MIN_RANGE;
11882 if (tmp_max_key != param->max_key)
11883 flag&= ~NO_MAX_RANGE;
11884 else
11885 flag|= NO_MAX_RANGE;
11886 }
11887 if (flag == 0)
11888 {
11889 uint length= (uint) (tmp_min_key - param->min_key);
11890 if (length == (uint) (tmp_max_key - param->max_key) &&
11891 !memcmp(param->min_key,param->max_key,length))
11892 {
11893 KEY *table_key=quick->head->key_info+quick->index;
11894 flag=EQ_RANGE;
11895 if ((table_key->flags & HA_NOSAME) &&
11896 min_part == key_tree->part &&
11897 key_tree->part == table_key->user_defined_key_parts-1)
11898 {
11899 DBUG_ASSERT(min_part == max_part);
11900 if ((table_key->flags & HA_NULL_PART_KEY) &&
11901 null_part_in_key(key,
11902 param->min_key,
11903 (uint) (tmp_min_key - param->min_key)))
11904 flag|= NULL_RANGE;
11905 else
11906 flag|= UNIQUE_RANGE;
11907 }
11908 }
11909 }
11910
11911 /* Get range for retrieving rows in QUICK_SELECT::get_next */
11912 if (!(range= new (param->thd->mem_root) QUICK_RANGE(
11913 param->thd,
11914 param->min_key,
11915 (uint) (tmp_min_key - param->min_key),
11916 min_part >=0 ? make_keypart_map(min_part) : 0,
11917 param->max_key,
11918 (uint) (tmp_max_key - param->max_key),
11919 max_part >=0 ? make_keypart_map(max_part) : 0,
11920 flag)))
11921 return 1; // out of memory
11922
11923 set_if_bigger(quick->max_used_key_length, range->min_length);
11924 set_if_bigger(quick->max_used_key_length, range->max_length);
11925 set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
11926 if (insert_dynamic(&quick->ranges, (uchar*) &range))
11927 return 1;
11928
11929 end:
11930 if (key_tree->right != &null_element)
11931 return get_quick_keys(param,quick,key,key_tree->right,
11932 min_key,min_key_flag,
11933 max_key,max_key_flag);
11934 return 0;
11935 }
11936
11937 /*
11938 Return 1 if there is only one range and this uses the whole unique key
11939 */
11940
unique_key_range()11941 bool QUICK_RANGE_SELECT::unique_key_range()
11942 {
11943 if (ranges.elements == 1)
11944 {
11945 QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
11946 if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
11947 {
11948 KEY *key=head->key_info+index;
11949 return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
11950 }
11951 }
11952 return 0;
11953 }
11954
11955
11956
11957 /*
11958 Return TRUE if any part of the key is NULL
11959
11960 SYNOPSIS
11961 null_part_in_key()
11962 key_part Array of key parts (index description)
11963 key Key values tuple
11964 length Length of key values tuple in bytes.
11965
11966 RETURN
11967 TRUE The tuple has at least one "keypartX is NULL"
11968 FALSE Otherwise
11969 */
11970
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)11971 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
11972 {
11973 for (const uchar *end=key+length ;
11974 key < end;
11975 key+= key_part++->store_length)
11976 {
11977 if (key_part->null_bit && *key)
11978 return 1;
11979 }
11980 return 0;
11981 }
11982
11983
is_keys_used(const MY_BITMAP * fields)11984 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
11985 {
11986 return is_key_used(head, index, fields);
11987 }
11988
is_keys_used(const MY_BITMAP * fields)11989 bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields)
11990 {
11991 QUICK_RANGE_SELECT *quick;
11992 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11993 while ((quick= it++))
11994 {
11995 if (is_key_used(head, quick->index, fields))
11996 return 1;
11997 }
11998 return 0;
11999 }
12000
is_keys_used(const MY_BITMAP * fields)12001 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
12002 {
12003 QUICK_SELECT_WITH_RECORD *qr;
12004 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
12005 while ((qr= it++))
12006 {
12007 if (is_key_used(head, qr->quick->index, fields))
12008 return 1;
12009 }
12010 return 0;
12011 }
12012
is_keys_used(const MY_BITMAP * fields)12013 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
12014 {
12015 QUICK_SELECT_I *quick;
12016 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
12017 while ((quick= it++))
12018 {
12019 if (quick->is_keys_used(fields))
12020 return 1;
12021 }
12022 return 0;
12023 }
12024
12025
get_ft_select(THD * thd,TABLE * table,uint key)12026 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
12027 {
12028 bool create_err= FALSE;
12029 FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
12030 if (create_err)
12031 {
12032 delete fts;
12033 return NULL;
12034 }
12035 else
12036 return fts;
12037 }
12038
12039 /*
12040 Create quick select from ref/ref_or_null scan.
12041
12042 SYNOPSIS
12043 get_quick_select_for_ref()
12044 thd Thread handle
12045 table Table to access
12046 ref ref[_or_null] scan parameters
12047 records Estimate of number of records (needed only to construct
12048 quick select)
12049 NOTES
12050 This allocates things in a new memory root, as this may be called many
12051 times during a query.
12052
12053 RETURN
12054 Quick select that retrieves the same rows as passed ref scan
12055 NULL on error.
12056 */
12057
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)12058 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
12059 TABLE_REF *ref, ha_rows records)
12060 {
12061 MEM_ROOT *old_root, *alloc;
12062 QUICK_RANGE_SELECT *quick;
12063 KEY *key_info = &table->key_info[ref->key];
12064 KEY_PART *key_part;
12065 QUICK_RANGE *range;
12066 uint part;
12067 bool create_err= FALSE;
12068 Cost_estimate cost;
12069 uint max_used_key_len;
12070
12071 old_root= thd->mem_root;
12072 /* The following call may change thd->mem_root */
12073 quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
12074 /* save mem_root set by QUICK_RANGE_SELECT constructor */
12075 alloc= thd->mem_root;
12076 /*
12077 return back default mem_root (thd->mem_root) changed by
12078 QUICK_RANGE_SELECT constructor
12079 */
12080 thd->mem_root= old_root;
12081
12082 if (!quick || create_err || quick->init())
12083 goto err;
12084 quick->records= records;
12085
12086 if ((cp_buffer_from_ref(thd, table, ref) &&
12087 unlikely(thd->is_fatal_error)) ||
12088 unlikely(!(range= new(alloc) QUICK_RANGE())))
12089 goto err; // out of memory
12090
12091 range->min_key= range->max_key= ref->key_buff;
12092 range->min_length= range->max_length= ref->key_length;
12093 range->min_keypart_map= range->max_keypart_map=
12094 make_prev_keypart_map(ref->key_parts);
12095 range->flag= EQ_RANGE;
12096
12097 if (unlikely(!(quick->key_parts=key_part=(KEY_PART *)
12098 alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts))))
12099 goto err;
12100
12101 max_used_key_len=0;
12102 for (part=0 ; part < ref->key_parts ;part++,key_part++)
12103 {
12104 key_part->part=part;
12105 key_part->field= key_info->key_part[part].field;
12106 key_part->length= key_info->key_part[part].length;
12107 key_part->store_length= key_info->key_part[part].store_length;
12108 key_part->null_bit= key_info->key_part[part].null_bit;
12109 key_part->flag= (uint8) key_info->key_part[part].key_part_flag;
12110
12111 max_used_key_len +=key_info->key_part[part].store_length;
12112 }
12113
12114 quick->max_used_key_length= max_used_key_len;
12115
12116 if (insert_dynamic(&quick->ranges,(uchar*)&range))
12117 goto err;
12118
12119 /*
12120 Add a NULL range if REF_OR_NULL optimization is used.
12121 For example:
12122 if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
12123 and have ref->null_ref_key set. Will create a new NULL range here.
12124 */
12125 if (ref->null_ref_key)
12126 {
12127 QUICK_RANGE *null_range;
12128
12129 *ref->null_ref_key= 1; // Set null byte then create a range
12130 if (!(null_range= new (alloc)
12131 QUICK_RANGE(thd, ref->key_buff, ref->key_length,
12132 make_prev_keypart_map(ref->key_parts),
12133 ref->key_buff, ref->key_length,
12134 make_prev_keypart_map(ref->key_parts), EQ_RANGE)))
12135 goto err;
12136 *ref->null_ref_key= 0; // Clear null byte
12137 if (insert_dynamic(&quick->ranges,(uchar*)&null_range))
12138 goto err;
12139 }
12140
12141 /* Call multi_range_read_info() to get the MRR flags and buffer size */
12142 quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
12143 (table->file->keyread_enabled() ? HA_MRR_INDEX_ONLY : 0);
12144 if (thd->lex->sql_command != SQLCOM_SELECT)
12145 quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
12146
12147 quick->mrr_buf_size= thd->variables.mrr_buff_size;
12148 if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
12149 ~0,
12150 &quick->mrr_buf_size,
12151 &quick->mrr_flags, &cost))
12152 goto err;
12153
12154 return quick;
12155 err:
12156 delete quick;
12157 return 0;
12158 }
12159
12160
12161 /*
12162 Perform key scans for all used indexes (except CPK), get rowids and merge
12163 them into an ordered non-recurrent sequence of rowids.
12164
12165 The merge/duplicate removal is performed using Unique class. We put all
12166 rowids into Unique, get the sorted sequence and destroy the Unique.
12167
12168 If table has a clustered primary key that covers all rows (TRUE for bdb
12169 and innodb currently) and one of the index_merge scans is a scan on PK,
12170 then rows that will be retrieved by PK scan are not put into Unique and
12171 primary key scan is not performed here, it is performed later separately.
12172
12173 RETURN
12174 0 OK
12175 other error
12176 */
12177
read_keys_and_merge_scans(THD * thd,TABLE * head,List<QUICK_RANGE_SELECT> quick_selects,QUICK_RANGE_SELECT * pk_quick_select,READ_RECORD * read_record,bool intersection,key_map * filtered_scans,Unique ** unique_ptr)12178 int read_keys_and_merge_scans(THD *thd,
12179 TABLE *head,
12180 List<QUICK_RANGE_SELECT> quick_selects,
12181 QUICK_RANGE_SELECT *pk_quick_select,
12182 READ_RECORD *read_record,
12183 bool intersection,
12184 key_map *filtered_scans,
12185 Unique **unique_ptr)
12186 {
12187 List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
12188 QUICK_RANGE_SELECT* cur_quick;
12189 int result;
12190 Unique *unique= *unique_ptr;
12191 handler *file= head->file;
12192 bool with_cpk_filter= pk_quick_select != NULL;
12193 DBUG_ENTER("read_keys_and_merge");
12194
12195 /* We're going to just read rowids. */
12196 head->prepare_for_position();
12197
12198 cur_quick_it.rewind();
12199 cur_quick= cur_quick_it++;
12200 bool first_quick= TRUE;
12201 DBUG_ASSERT(cur_quick != 0);
12202 head->file->ha_start_keyread(cur_quick->index);
12203
12204 /*
12205 We reuse the same instance of handler so we need to call both init and
12206 reset here.
12207 */
12208 if (cur_quick->init() || cur_quick->reset())
12209 goto err;
12210
12211 if (unique == NULL)
12212 {
12213 DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_SUICIDE(); );
12214 DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
12215 DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
12216
12217 unique= new Unique(refpos_order_cmp, (void *)file,
12218 file->ref_length,
12219 (size_t)thd->variables.sortbuff_size,
12220 intersection ? quick_selects.elements : 0);
12221 if (!unique)
12222 goto err;
12223 *unique_ptr= unique;
12224 }
12225 else
12226 {
12227 unique->reset();
12228 }
12229
12230 DBUG_ASSERT(file->ref_length == unique->get_size());
12231 DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
12232
12233 for (;;)
12234 {
12235 while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
12236 {
12237 if (intersection)
12238 with_cpk_filter= filtered_scans->is_set(cur_quick->index);
12239 if (first_quick)
12240 {
12241 first_quick= FALSE;
12242 if (intersection && unique->is_in_memory())
12243 unique->close_for_expansion();
12244 }
12245 cur_quick->range_end();
12246 cur_quick= cur_quick_it++;
12247 if (!cur_quick)
12248 break;
12249
12250 if (cur_quick->file->inited != handler::NONE)
12251 cur_quick->file->ha_index_end();
12252 if (cur_quick->init() || cur_quick->reset())
12253 goto err;
12254 }
12255
12256 if (result)
12257 {
12258 if (result != HA_ERR_END_OF_FILE)
12259 {
12260 cur_quick->range_end();
12261 goto err;
12262 }
12263 break;
12264 }
12265
12266 if (thd->killed)
12267 goto err;
12268
12269 if (with_cpk_filter &&
12270 pk_quick_select->row_in_ranges() != intersection )
12271 continue;
12272
12273 cur_quick->file->position(cur_quick->record);
12274 if (unique->unique_add((char*)cur_quick->file->ref))
12275 goto err;
12276 }
12277
12278 /*
12279 Ok all rowids are in the Unique now. The next call will initialize
12280 the unique structure so it can be used to iterate through the rowids
12281 sequence.
12282 */
12283 result= unique->get(head);
12284 /*
12285 index merge currently doesn't support "using index" at all
12286 */
12287 head->file->ha_end_keyread();
12288 if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0,
12289 &unique->sort, 1 , 1, TRUE))
12290 result= 1;
12291 DBUG_RETURN(result);
12292
12293 err:
12294 head->file->ha_end_keyread();
12295 DBUG_RETURN(1);
12296 }
12297
12298
read_keys_and_merge()12299 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
12300
12301 {
12302 int result;
12303 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
12304 result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
12305 &read_record, FALSE, NULL, &unique);
12306 doing_pk_scan= FALSE;
12307 DBUG_RETURN(result);
12308 }
12309
12310 /*
12311 Get next row for index_merge.
12312 NOTES
12313 The rows are read from
12314 1. rowids stored in Unique.
12315 2. QUICK_RANGE_SELECT with clustered primary key (if any).
12316 The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
12317 */
12318
get_next()12319 int QUICK_INDEX_MERGE_SELECT::get_next()
12320 {
12321 int result;
12322 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
12323
12324 if (doing_pk_scan)
12325 DBUG_RETURN(pk_quick_select->get_next());
12326
12327 if ((result= read_record.read_record()) == -1)
12328 {
12329 result= HA_ERR_END_OF_FILE;
12330 end_read_record(&read_record);
12331 // Free things used by sort early. Shouldn't be strictly necessary
12332 unique->sort.reset();
12333 /* All rows from Unique have been retrieved, do a clustered PK scan */
12334 if (pk_quick_select)
12335 {
12336 doing_pk_scan= TRUE;
12337 if ((result= pk_quick_select->init()) ||
12338 (result= pk_quick_select->reset()))
12339 DBUG_RETURN(result);
12340 DBUG_RETURN(pk_quick_select->get_next());
12341 }
12342 }
12343
12344 DBUG_RETURN(result);
12345 }
12346
read_keys_and_merge()12347 int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge()
12348
12349 {
12350 int result;
12351 DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge");
12352 result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
12353 &read_record, TRUE, &filtered_scans,
12354 &unique);
12355 DBUG_RETURN(result);
12356 }
12357
get_next()12358 int QUICK_INDEX_INTERSECT_SELECT::get_next()
12359 {
12360 int result;
12361 DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next");
12362
12363 if ((result= read_record.read_record()) == -1)
12364 {
12365 result= HA_ERR_END_OF_FILE;
12366 end_read_record(&read_record);
12367 unique->sort.reset(); // Free things early
12368 }
12369
12370 DBUG_RETURN(result);
12371 }
12372
12373
12374 /*
12375 Retrieve next record.
12376 SYNOPSIS
12377 QUICK_ROR_INTERSECT_SELECT::get_next()
12378
12379 NOTES
12380 Invariant on enter/exit: all intersected selects have retrieved all index
12381 records with rowid <= some_rowid_val and no intersected select has
12382 retrieved any index records with rowid > some_rowid_val.
12383 We start fresh and loop until we have retrieved the same rowid in each of
12384 the key scans or we got an error.
12385
12386 If a Clustered PK scan is present, it is used only to check if row
12387 satisfies its condition (and never used for row retrieval).
12388
12389 Locking: to ensure that exclusive locks are only set on records that
12390 are included in the final result we must release the lock
12391 on all rows we read but do not include in the final result. This
12392 must be done on each index that reads the record and the lock
12393 must be released using the same handler (the same quick object) as
12394 used when reading the record.
12395
12396 RETURN
12397 0 - Ok
12398 other - Error code if any error occurred.
12399 */
12400
get_next()12401 int QUICK_ROR_INTERSECT_SELECT::get_next()
12402 {
12403 List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
12404 QUICK_SELECT_WITH_RECORD *qr;
12405 QUICK_RANGE_SELECT* quick;
12406
12407 /* quick that reads the given rowid first. This is needed in order
12408 to be able to unlock the row using the same handler object that locked
12409 it */
12410 QUICK_RANGE_SELECT* quick_with_last_rowid;
12411
12412 int error, cmp;
12413 uint last_rowid_count=0;
12414 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
12415
12416 /* Get a rowid for first quick and save it as a 'candidate' */
12417 qr= quick_it++;
12418 quick= qr->quick;
12419 error= quick->get_next();
12420 if (cpk_quick)
12421 {
12422 while (!error && !cpk_quick->row_in_ranges())
12423 {
12424 quick->file->unlock_row(); /* row not in range; unlock */
12425 error= quick->get_next();
12426 }
12427 }
12428 if (unlikely(error))
12429 DBUG_RETURN(error);
12430
12431 /* Save the read key tuple */
12432 key_copy(qr->key_tuple, record, head->key_info + quick->index,
12433 quick->max_used_key_length);
12434
12435 quick->file->position(quick->record);
12436 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
12437 last_rowid_count= 1;
12438 quick_with_last_rowid= quick;
12439
12440 while (last_rowid_count < quick_selects.elements)
12441 {
12442 if (!(qr= quick_it++))
12443 {
12444 quick_it.rewind();
12445 qr= quick_it++;
12446 }
12447 quick= qr->quick;
12448
12449 do
12450 {
12451 DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
12452 DBUG_SET("+d,innodb_report_deadlock"););
12453 if (unlikely((error= quick->get_next())))
12454 {
12455 /* On certain errors like deadlock, trx might be rolled back.*/
12456 if (!thd->transaction_rollback_request)
12457 quick_with_last_rowid->file->unlock_row();
12458 DBUG_RETURN(error);
12459 }
12460 quick->file->position(quick->record);
12461 cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
12462 if (cmp < 0)
12463 {
12464 /* This row is being skipped. Release lock on it. */
12465 quick->file->unlock_row();
12466 }
12467 } while (cmp < 0);
12468
12469 key_copy(qr->key_tuple, record, head->key_info + quick->index,
12470 quick->max_used_key_length);
12471
12472 /* Ok, current select 'caught up' and returned ref >= cur_ref */
12473 if (cmp > 0)
12474 {
12475 /* Found a row with ref > cur_ref. Make it a new 'candidate' */
12476 if (cpk_quick)
12477 {
12478 while (!cpk_quick->row_in_ranges())
12479 {
12480 quick->file->unlock_row(); /* row not in range; unlock */
12481 if (unlikely((error= quick->get_next())))
12482 {
12483 /* On certain errors like deadlock, trx might be rolled back.*/
12484 if (!thd->transaction_rollback_request)
12485 quick_with_last_rowid->file->unlock_row();
12486 DBUG_RETURN(error);
12487 }
12488 }
12489 quick->file->position(quick->record);
12490 }
12491 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
12492 quick_with_last_rowid->file->unlock_row();
12493 last_rowid_count= 1;
12494 quick_with_last_rowid= quick;
12495
12496 //save the fields here
12497 key_copy(qr->key_tuple, record, head->key_info + quick->index,
12498 quick->max_used_key_length);
12499 }
12500 else
12501 {
12502 /* current 'candidate' row confirmed by this select */
12503 last_rowid_count++;
12504 }
12505 }
12506
12507 /* We get here if we got the same row ref in all scans. */
12508 if (need_to_fetch_row)
12509 error= head->file->ha_rnd_pos(head->record[0], last_rowid);
12510
12511 if (!need_to_fetch_row)
12512 {
12513 /* Restore the columns we've read/saved with other quick selects */
12514 quick_it.rewind();
12515 while ((qr= quick_it++))
12516 {
12517 if (qr->quick != quick)
12518 {
12519 key_restore(record, qr->key_tuple, head->key_info + qr->quick->index,
12520 qr->quick->max_used_key_length);
12521 }
12522 }
12523 }
12524
12525 DBUG_RETURN(error);
12526 }
12527
12528
12529 /*
12530 Retrieve next record.
12531 SYNOPSIS
12532 QUICK_ROR_UNION_SELECT::get_next()
12533
12534 NOTES
12535 Enter/exit invariant:
12536 For each quick select in the queue a {key,rowid} tuple has been
12537 retrieved but the corresponding row hasn't been passed to output.
12538
12539 RETURN
12540 0 - Ok
12541 other - Error code if any error occurred.
12542 */
12543
get_next()12544 int QUICK_ROR_UNION_SELECT::get_next()
12545 {
12546 int error, dup_row;
12547 QUICK_SELECT_I *quick;
12548 uchar *tmp;
12549 DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
12550
12551 do
12552 {
12553 if (!queue.elements)
12554 DBUG_RETURN(HA_ERR_END_OF_FILE);
12555 /* Ok, we have a queue with >= 1 scans */
12556
12557 quick= (QUICK_SELECT_I*)queue_top(&queue);
12558 memcpy(cur_rowid, quick->last_rowid, rowid_length);
12559
12560 /* put into queue rowid from the same stream as top element */
12561 if ((error= quick->get_next()))
12562 {
12563 if (error != HA_ERR_END_OF_FILE)
12564 DBUG_RETURN(error);
12565 queue_remove_top(&queue);
12566 }
12567 else
12568 {
12569 quick->save_last_pos();
12570 queue_replace_top(&queue);
12571 }
12572
12573 if (!have_prev_rowid)
12574 {
12575 /* No rows have been returned yet */
12576 dup_row= FALSE;
12577 have_prev_rowid= TRUE;
12578 }
12579 else
12580 dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
12581 } while (dup_row);
12582
12583 tmp= cur_rowid;
12584 cur_rowid= prev_rowid;
12585 prev_rowid= tmp;
12586
12587 error= head->file->ha_rnd_pos(quick->record, prev_rowid);
12588 DBUG_RETURN(error);
12589 }
12590
12591
reset()12592 int QUICK_RANGE_SELECT::reset()
12593 {
12594 uint buf_size;
12595 uchar *mrange_buff;
12596 int error;
12597 HANDLER_BUFFER empty_buf;
12598 MY_BITMAP * const save_read_set= head->read_set;
12599 MY_BITMAP * const save_write_set= head->write_set;
12600 DBUG_ENTER("QUICK_RANGE_SELECT::reset");
12601 last_range= NULL;
12602 cur_range= (QUICK_RANGE**) ranges.buffer;
12603 RANGE_SEQ_IF seq_funcs= {NULL, quick_range_seq_init, quick_range_seq_next, 0, 0};
12604
12605 if (file->inited == handler::RND)
12606 {
12607 /* Handler could be left in this state by MRR */
12608 if (unlikely((error= file->ha_rnd_end())))
12609 DBUG_RETURN(error);
12610 }
12611
12612 if (in_ror_merged_scan)
12613 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
12614
12615 if (file->inited == handler::NONE)
12616 {
12617 DBUG_EXECUTE_IF("bug14365043_2",
12618 DBUG_SET("+d,ha_index_init_fail"););
12619 if (unlikely((error= file->ha_index_init(index,1))))
12620 {
12621 file->print_error(error, MYF(0));
12622 goto err;
12623 }
12624 }
12625
12626 /* Allocate buffer if we need one but haven't allocated it yet */
12627 if (mrr_buf_size && !mrr_buf_desc)
12628 {
12629 buf_size= mrr_buf_size;
12630 while (buf_size && !my_multi_malloc(key_memory_QUICK_RANGE_SELECT_mrr_buf_desc,
12631 MYF(MY_WME),
12632 &mrr_buf_desc, sizeof(*mrr_buf_desc),
12633 &mrange_buff, buf_size,
12634 NullS))
12635 {
12636 /* Try to shrink the buffers until both are 0. */
12637 buf_size/= 2;
12638 }
12639 if (!mrr_buf_desc)
12640 {
12641 error= HA_ERR_OUT_OF_MEM;
12642 goto err;
12643 }
12644
12645 /* Initialize the handler buffer. */
12646 mrr_buf_desc->buffer= mrange_buff;
12647 mrr_buf_desc->buffer_end= mrange_buff + buf_size;
12648 mrr_buf_desc->end_of_used_area= mrange_buff;
12649 }
12650
12651 if (!mrr_buf_desc)
12652 empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
12653
12654 error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
12655 mrr_flags, mrr_buf_desc? mrr_buf_desc:
12656 &empty_buf);
12657 err:
12658 /* Restore bitmaps set on entry */
12659 if (in_ror_merged_scan)
12660 head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
12661 DBUG_RETURN(error);
12662 }
12663
12664
12665 /*
12666 Get next possible record using quick-struct.
12667
12668 SYNOPSIS
12669 QUICK_RANGE_SELECT::get_next()
12670
12671 NOTES
12672 Record is read into table->record[0]
12673
12674 RETURN
12675 0 Found row
12676 HA_ERR_END_OF_FILE No (more) rows in range
12677 # Error code
12678 */
12679
get_next()12680 int QUICK_RANGE_SELECT::get_next()
12681 {
12682 range_id_t dummy;
12683 int result;
12684 DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
12685
12686 if (!in_ror_merged_scan)
12687 DBUG_RETURN(file->multi_range_read_next(&dummy));
12688
12689 MY_BITMAP * const save_read_set= head->read_set;
12690 MY_BITMAP * const save_write_set= head->write_set;
12691 /*
12692 We don't need to signal the bitmap change as the bitmap is always the
12693 same for this head->file
12694 */
12695 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
12696 result= file->multi_range_read_next(&dummy);
12697 head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
12698 DBUG_RETURN(result);
12699 }
12700
12701
12702 /*
12703 Get the next record with a different prefix.
12704
12705 @param prefix_length length of cur_prefix
12706 @param group_key_parts The number of key parts in the group prefix
12707 @param cur_prefix prefix of a key to be searched for
12708
12709 Each subsequent call to the method retrieves the first record that has a
12710 prefix with length prefix_length and which is different from cur_prefix,
12711 such that the record with the new prefix is within the ranges described by
12712 this->ranges. The record found is stored into the buffer pointed by
12713 this->record. The method is useful for GROUP-BY queries with range
12714 conditions to discover the prefix of the next group that satisfies the range
12715 conditions.
12716
12717 @todo
12718
12719 This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
12720 methods should be unified into a more general one to reduce code
12721 duplication.
12722
12723 @retval 0 on success
12724 @retval HA_ERR_END_OF_FILE if returned all keys
12725 @retval other if some error occurred
12726 */
12727
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)12728 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
12729 uint group_key_parts,
12730 uchar *cur_prefix)
12731 {
12732 DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
12733 const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
12734
12735 for (;;)
12736 {
12737 int result;
12738 if (last_range)
12739 {
12740 /* Read the next record in the same range with prefix after cur_prefix. */
12741 DBUG_ASSERT(cur_prefix != NULL);
12742 result= file->ha_index_read_map(record, cur_prefix, keypart_map,
12743 HA_READ_AFTER_KEY);
12744 if (result || last_range->max_keypart_map == 0) {
12745 /*
12746 Only return if actual failure occurred. For HA_ERR_KEY_NOT_FOUND
12747 or HA_ERR_END_OF_FILE, we just want to continue to reach the next
12748 set of ranges. It is possible for the storage engine to return
12749 HA_ERR_KEY_NOT_FOUND/HA_ERR_END_OF_FILE even when there are more
12750 keys if it respects the end range set by the read_range_first call
12751 below.
12752 */
12753 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
12754 DBUG_RETURN(result);
12755 } else {
12756 /*
12757 For storage engines that don't respect end range, check if we've
12758 moved past the current range.
12759 */
12760 key_range previous_endpoint;
12761 last_range->make_max_endpoint(&previous_endpoint, prefix_length,
12762 keypart_map);
12763 if (file->compare_key(&previous_endpoint) <= 0)
12764 DBUG_RETURN(0);
12765 }
12766 }
12767
12768 uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
12769 if (count == 0)
12770 {
12771 /* Ranges have already been used up before. None is left for read. */
12772 last_range= 0;
12773 DBUG_RETURN(HA_ERR_END_OF_FILE);
12774 }
12775 last_range= *(cur_range++);
12776
12777 key_range start_key, end_key;
12778 last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
12779 last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
12780
12781 result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
12782 last_range->max_keypart_map ? &end_key : 0,
12783 MY_TEST(last_range->flag & EQ_RANGE),
12784 TRUE);
12785 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
12786 last_range= 0; // Stop searching
12787
12788 if (result != HA_ERR_END_OF_FILE)
12789 DBUG_RETURN(result);
12790 last_range= 0; // No matching rows; go to next range
12791 }
12792 }
12793
12794
12795 /* Get next for geometrical indexes */
12796
get_next()12797 int QUICK_RANGE_SELECT_GEOM::get_next()
12798 {
12799 DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
12800
12801 for (;;)
12802 {
12803 int result;
12804 if (last_range)
12805 {
12806 // Already read through key
12807 result= file->ha_index_next_same(record, last_range->min_key,
12808 last_range->min_length);
12809 if (result != HA_ERR_END_OF_FILE)
12810 DBUG_RETURN(result);
12811 }
12812
12813 uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
12814 if (count == 0)
12815 {
12816 /* Ranges have already been used up before. None is left for read. */
12817 last_range= 0;
12818 DBUG_RETURN(HA_ERR_END_OF_FILE);
12819 }
12820 last_range= *(cur_range++);
12821
12822 result= file->ha_index_read_map(record, last_range->min_key,
12823 last_range->min_keypart_map,
12824 (ha_rkey_function)(last_range->flag ^
12825 GEOM_FLAG));
12826 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
12827 DBUG_RETURN(result);
12828 last_range= 0; // Not found, to next range
12829 }
12830 }
12831
12832
12833 /*
12834 Check if current row will be retrieved by this QUICK_RANGE_SELECT
12835
12836 NOTES
12837 It is assumed that currently a scan is being done on another index
12838 which reads all necessary parts of the index that is scanned by this
12839 quick select.
12840 The implementation does a binary search on sorted array of disjoint
12841 ranges, without taking size of range into account.
12842
12843 This function is used to filter out clustered PK scan rows in
12844 index_merge quick select.
12845
12846 RETURN
12847 TRUE if current row will be retrieved by this quick select
12848 FALSE if not
12849 */
12850
row_in_ranges()12851 bool QUICK_RANGE_SELECT::row_in_ranges()
12852 {
12853 QUICK_RANGE *res;
12854 uint min= 0;
12855 uint max= ranges.elements - 1;
12856 uint mid= (max + min)/2;
12857
12858 while (min != max)
12859 {
12860 if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
12861 {
12862 /* current row value > mid->max */
12863 min= mid + 1;
12864 }
12865 else
12866 max= mid;
12867 mid= (min + max) / 2;
12868 }
12869 res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
12870 return (!cmp_next(res) && !cmp_prev(res));
12871 }
12872
12873 /*
12874 This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
12875 get_next() interface, but we have to hold a pointer to the original
12876 QUICK_RANGE_SELECT because its data are used all over the place. What
12877 should be done is to factor out the data that is needed into a base
12878 class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
12879 which handle the ranges and implement the get_next() function. But
12880 for now, this seems to work right at least.
12881 */
12882
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg)12883 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
12884 uint used_key_parts_arg)
12885 :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
12886 used_key_parts (used_key_parts_arg)
12887 {
12888 QUICK_RANGE *r;
12889 /*
12890 Use default MRR implementation for reverse scans. No table engine
12891 currently can do an MRR scan with output in reverse index order.
12892 */
12893 mrr_buf_desc= NULL;
12894 mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
12895 mrr_buf_size= 0;
12896
12897 QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
12898 QUICK_RANGE **end_range= pr + ranges.elements;
12899 for (; pr!=end_range; pr++)
12900 rev_ranges.push_front(*pr);
12901
12902 /* Remove EQ_RANGE flag for keys that are not using the full key */
12903 for (r = rev_it++; r; r = rev_it++)
12904 {
12905 if ((r->flag & EQ_RANGE) &&
12906 head->key_info[index].key_length != r->max_length)
12907 r->flag&= ~EQ_RANGE;
12908 }
12909 rev_it.rewind();
12910 q->dont_free=1; // Don't free shared mem
12911 }
12912
12913
get_next()12914 int QUICK_SELECT_DESC::get_next()
12915 {
12916 DBUG_ENTER("QUICK_SELECT_DESC::get_next");
12917
12918 /* The max key is handled as follows:
12919 * - if there is NO_MAX_RANGE, start at the end and move backwards
12920 * - if it is an EQ_RANGE, which means that max key covers the entire
12921 * key, go directly to the key and read through it (sorting backwards is
12922 * same as sorting forwards)
12923 * - if it is NEAR_MAX, go to the key or next, step back once, and
12924 * move backwards
12925 * - otherwise (not NEAR_MAX == include the key), go after the key,
12926 * step back once, and move backwards
12927 */
12928
12929 for (;;)
12930 {
12931 int result;
12932 if (last_range)
12933 { // Already read through key
12934 result = ((last_range->flag & EQ_RANGE &&
12935 used_key_parts <= head->key_info[index].user_defined_key_parts) ?
12936 file->ha_index_next_same(record, last_range->min_key,
12937 last_range->min_length) :
12938 file->ha_index_prev(record));
12939 if (!result)
12940 {
12941 if (cmp_prev(*rev_it.ref()) == 0)
12942 DBUG_RETURN(0);
12943 }
12944 else if (result != HA_ERR_END_OF_FILE)
12945 DBUG_RETURN(result);
12946 }
12947
12948 if (!(last_range= rev_it++))
12949 DBUG_RETURN(HA_ERR_END_OF_FILE); // All ranges used
12950
12951 key_range start_key;
12952 start_key.key= (const uchar*) last_range->min_key;
12953 start_key.length= last_range->min_length;
12954 start_key.flag= ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
12955 (last_range->flag & EQ_RANGE) ?
12956 HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
12957 start_key.keypart_map= last_range->min_keypart_map;
12958 key_range end_key;
12959 end_key.key= (const uchar*) last_range->max_key;
12960 end_key.length= last_range->max_length;
12961 end_key.flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
12962 HA_READ_AFTER_KEY);
12963 end_key.keypart_map= last_range->max_keypart_map;
12964 result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &start_key,
12965 (last_range->flag & NO_MAX_RANGE) ? NULL : &end_key);
12966 if (result)
12967 {
12968 DBUG_RETURN(result);
12969 }
12970
12971 if (last_range->flag & NO_MAX_RANGE) // Read last record
12972 {
12973 int local_error;
12974 if (unlikely((local_error= file->ha_index_last(record))))
12975 DBUG_RETURN(local_error); // Empty table
12976 if (cmp_prev(last_range) == 0)
12977 DBUG_RETURN(0);
12978 last_range= 0; // No match; go to next range
12979 continue;
12980 }
12981
12982 if (last_range->flag & EQ_RANGE &&
12983 used_key_parts <= head->key_info[index].user_defined_key_parts)
12984
12985 {
12986 result= file->ha_index_read_map(record, last_range->max_key,
12987 last_range->max_keypart_map,
12988 HA_READ_KEY_EXACT);
12989 }
12990 else
12991 {
12992 DBUG_ASSERT(last_range->flag & NEAR_MAX ||
12993 (last_range->flag & EQ_RANGE &&
12994 used_key_parts > head->key_info[index].user_defined_key_parts) ||
12995 range_reads_after_key(last_range));
12996 result= file->ha_index_read_map(record, last_range->max_key,
12997 last_range->max_keypart_map,
12998 ((last_range->flag & NEAR_MAX) ?
12999 HA_READ_BEFORE_KEY :
13000 HA_READ_PREFIX_LAST_OR_PREV));
13001 }
13002 if (result)
13003 {
13004 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
13005 DBUG_RETURN(result);
13006 last_range= 0; // Not found, to next range
13007 continue;
13008 }
13009 if (cmp_prev(last_range) == 0)
13010 {
13011 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
13012 last_range= 0; // Stop searching
13013 DBUG_RETURN(0); // Found key is in range
13014 }
13015 last_range= 0; // To next range
13016 }
13017 }
13018
13019
13020 /**
13021 Create a compatible quick select with the result ordered in an opposite way
13022
13023 @param used_key_parts_arg Number of used key parts
13024
13025 @retval NULL in case of errors (OOM etc)
13026 @retval pointer to a newly created QUICK_SELECT_DESC if success
13027 */
13028
make_reverse(uint used_key_parts_arg)13029 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
13030 {
13031 QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg);
13032 if (new_quick == NULL)
13033 {
13034 delete new_quick;
13035 return NULL;
13036 }
13037 return new_quick;
13038 }
13039
13040
13041 /*
13042 Compare if found key is over max-value
13043 Returns 0 if key <= range->max_key
13044 TODO: Figure out why can't this function be as simple as cmp_prev().
13045 */
13046
cmp_next(QUICK_RANGE * range_arg)13047 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
13048 {
13049 if (range_arg->flag & NO_MAX_RANGE)
13050 return 0; /* key can't be to large */
13051
13052 KEY_PART *key_part=key_parts;
13053 uint store_length;
13054
13055 for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
13056 key < end;
13057 key+= store_length, key_part++)
13058 {
13059 int cmp;
13060 store_length= key_part->store_length;
13061 if (key_part->null_bit)
13062 {
13063 if (*key)
13064 {
13065 if (!key_part->field->is_null())
13066 return 1;
13067 continue;
13068 }
13069 else if (key_part->field->is_null())
13070 return 0;
13071 key++; // Skip null byte
13072 store_length--;
13073 }
13074 if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
13075 return 0;
13076 if (cmp > 0)
13077 return 1;
13078 }
13079 return (range_arg->flag & NEAR_MAX) ? 1 : 0; // Exact match
13080 }
13081
13082
13083 /*
13084 Returns 0 if found key is inside range (found key >= range->min_key).
13085 */
13086
cmp_prev(QUICK_RANGE * range_arg)13087 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
13088 {
13089 int cmp;
13090 if (range_arg->flag & NO_MIN_RANGE)
13091 return 0; /* key can't be to small */
13092
13093 cmp= key_cmp(key_part_info, range_arg->min_key,
13094 range_arg->min_length);
13095 if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
13096 return 0;
13097 return 1; // outside of range
13098 }
13099
13100
13101 /*
13102 * TRUE if this range will require using HA_READ_AFTER_KEY
13103 See comment in get_next() about this
13104 */
13105
range_reads_after_key(QUICK_RANGE * range_arg)13106 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
13107 {
13108 return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
13109 !(range_arg->flag & EQ_RANGE) ||
13110 head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
13111 }
13112
13113
add_key_name(String * str,bool * first)13114 void QUICK_SELECT_I::add_key_name(String *str, bool *first)
13115 {
13116 KEY *key_info= head->key_info + index;
13117
13118 if (*first)
13119 *first= FALSE;
13120 else
13121 str->append(',');
13122 str->append(&key_info->name);
13123 }
13124
13125
get_explain(MEM_ROOT * local_alloc)13126 Explain_quick_select* QUICK_RANGE_SELECT::get_explain(MEM_ROOT *local_alloc)
13127 {
13128 Explain_quick_select *res;
13129 if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_RANGE)))
13130 res->range.set(local_alloc, &head->key_info[index], max_used_key_length);
13131 return res;
13132 }
13133
13134
13135 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13136 QUICK_GROUP_MIN_MAX_SELECT::get_explain(MEM_ROOT *local_alloc)
13137 {
13138 Explain_quick_select *res;
13139 if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_GROUP_MIN_MAX)))
13140 res->range.set(local_alloc, &head->key_info[index], max_used_key_length);
13141 return res;
13142 }
13143
13144
13145 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13146 QUICK_INDEX_SORT_SELECT::get_explain(MEM_ROOT *local_alloc)
13147 {
13148 Explain_quick_select *res;
13149 if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13150 return NULL;
13151
13152 QUICK_RANGE_SELECT *quick;
13153 Explain_quick_select *child_explain;
13154 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13155 while ((quick= it++))
13156 {
13157 if ((child_explain= quick->get_explain(local_alloc)))
13158 res->children.push_back(child_explain);
13159 else
13160 return NULL;
13161 }
13162
13163 if (pk_quick_select)
13164 {
13165 if ((child_explain= pk_quick_select->get_explain(local_alloc)))
13166 res->children.push_back(child_explain);
13167 else
13168 return NULL;
13169 }
13170 return res;
13171 }
13172
13173
13174 /*
13175 Same as QUICK_INDEX_SORT_SELECT::get_explain(), but primary key is printed
13176 first
13177 */
13178
13179 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13180 QUICK_INDEX_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc)
13181 {
13182 Explain_quick_select *res;
13183 Explain_quick_select *child_explain;
13184
13185 if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13186 return NULL;
13187
13188 if (pk_quick_select)
13189 {
13190 if ((child_explain= pk_quick_select->get_explain(local_alloc)))
13191 res->children.push_back(child_explain);
13192 else
13193 return NULL;
13194 }
13195
13196 QUICK_RANGE_SELECT *quick;
13197 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13198 while ((quick= it++))
13199 {
13200 if ((child_explain= quick->get_explain(local_alloc)))
13201 res->children.push_back(child_explain);
13202 else
13203 return NULL;
13204 }
13205 return res;
13206 }
13207
13208
13209 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13210 QUICK_ROR_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc)
13211 {
13212 Explain_quick_select *res;
13213 Explain_quick_select *child_explain;
13214
13215 if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13216 return NULL;
13217
13218 QUICK_SELECT_WITH_RECORD *qr;
13219 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13220 while ((qr= it++))
13221 {
13222 if ((child_explain= qr->quick->get_explain(local_alloc)))
13223 res->children.push_back(child_explain);
13224 else
13225 return NULL;
13226 }
13227
13228 if (cpk_quick)
13229 {
13230 if ((child_explain= cpk_quick->get_explain(local_alloc)))
13231 res->children.push_back(child_explain);
13232 else
13233 return NULL;
13234 }
13235 return res;
13236 }
13237
13238
13239 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13240 QUICK_ROR_UNION_SELECT::get_explain(MEM_ROOT *local_alloc)
13241 {
13242 Explain_quick_select *res;
13243 Explain_quick_select *child_explain;
13244
13245 if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13246 return NULL;
13247
13248 QUICK_SELECT_I *quick;
13249 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13250 while ((quick= it++))
13251 {
13252 if ((child_explain= quick->get_explain(local_alloc)))
13253 res->children.push_back(child_explain);
13254 else
13255 return NULL;
13256 }
13257
13258 return res;
13259 }
13260
13261
add_key_and_length(String * key_names,String * used_lengths,bool * first)13262 void QUICK_SELECT_I::add_key_and_length(String *key_names,
13263 String *used_lengths,
13264 bool *first)
13265 {
13266 char buf[64];
13267 size_t length;
13268 KEY *key_info= head->key_info + index;
13269
13270 if (*first)
13271 *first= FALSE;
13272 else
13273 {
13274 key_names->append(',');
13275 used_lengths->append(',');
13276 }
13277 key_names->append(&key_info->name);
13278 length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
13279 used_lengths->append(buf, length);
13280 }
13281
13282
add_keys_and_lengths(String * key_names,String * used_lengths)13283 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
13284 String *used_lengths)
13285 {
13286 bool first= TRUE;
13287
13288 add_key_and_length(key_names, used_lengths, &first);
13289 }
13290
add_keys_and_lengths(String * key_names,String * used_lengths)13291 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
13292 String *used_lengths)
13293 {
13294 QUICK_RANGE_SELECT *quick;
13295 bool first= TRUE;
13296
13297 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13298
13299 while ((quick= it++))
13300 {
13301 quick->add_key_and_length(key_names, used_lengths, &first);
13302 }
13303
13304 if (pk_quick_select)
13305 pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
13306 }
13307
13308
add_keys_and_lengths(String * key_names,String * used_lengths)13309 void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
13310 String *used_lengths)
13311 {
13312 QUICK_RANGE_SELECT *quick;
13313 bool first= TRUE;
13314
13315 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13316
13317 if (pk_quick_select)
13318 pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
13319
13320 while ((quick= it++))
13321 {
13322 quick->add_key_and_length(key_names, used_lengths, &first);
13323 }
13324 }
13325
add_keys_and_lengths(String * key_names,String * used_lengths)13326 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
13327 String *used_lengths)
13328 {
13329 QUICK_SELECT_WITH_RECORD *qr;
13330 bool first= TRUE;
13331
13332 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13333
13334 while ((qr= it++))
13335 {
13336 qr->quick->add_key_and_length(key_names, used_lengths, &first);
13337 }
13338 if (cpk_quick)
13339 cpk_quick->add_key_and_length(key_names, used_lengths, &first);
13340 }
13341
add_keys_and_lengths(String * key_names,String * used_lengths)13342 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
13343 String *used_lengths)
13344 {
13345 QUICK_SELECT_I *quick;
13346 bool first= TRUE;
13347
13348 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13349
13350 while ((quick= it++))
13351 {
13352 if (first)
13353 first= FALSE;
13354 else
13355 {
13356 used_lengths->append(',');
13357 key_names->append(',');
13358 }
13359 quick->add_keys_and_lengths(key_names, used_lengths);
13360 }
13361 }
13362
13363
add_used_key_part_to_set()13364 void QUICK_RANGE_SELECT::add_used_key_part_to_set()
13365 {
13366 uint key_len;
13367 KEY_PART *part= key_parts;
13368 for (key_len=0; key_len < max_used_key_length;
13369 key_len += (part++)->store_length)
13370 {
13371 /*
13372 We have to use field_index instead of part->field
13373 as for partial fields, part->field points to
13374 a temporary field that is only part of the original
13375 field. field_index always points to the original field
13376 */
13377 Field *field= head->field[part->field->field_index];
13378 field->register_field_in_read_map();
13379 }
13380 }
13381
13382
add_used_key_part_to_set()13383 void QUICK_GROUP_MIN_MAX_SELECT::add_used_key_part_to_set()
13384 {
13385 uint key_len;
13386 KEY_PART_INFO *part= index_info->key_part;
13387 for (key_len=0; key_len < max_used_key_length;
13388 key_len += (part++)->store_length)
13389 {
13390 /*
13391 We have to use field_index instead of part->field
13392 as for partial fields, part->field points to
13393 a temporary field that is only part of the original
13394 field. field_index always points to the original field
13395 */
13396 Field *field= head->field[part->field->field_index];
13397 field->register_field_in_read_map();
13398 }
13399 }
13400
13401
add_used_key_part_to_set()13402 void QUICK_ROR_INTERSECT_SELECT::add_used_key_part_to_set()
13403 {
13404 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13405 QUICK_SELECT_WITH_RECORD *quick;
13406 while ((quick= it++))
13407 {
13408 quick->quick->add_used_key_part_to_set();
13409 }
13410 }
13411
13412
add_used_key_part_to_set()13413 void QUICK_INDEX_SORT_SELECT::add_used_key_part_to_set()
13414 {
13415 QUICK_RANGE_SELECT *quick;
13416 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13417 while ((quick= it++))
13418 {
13419 quick->add_used_key_part_to_set();
13420 }
13421 if (pk_quick_select)
13422 pk_quick_select->add_used_key_part_to_set();
13423 }
13424
13425
add_used_key_part_to_set()13426 void QUICK_ROR_UNION_SELECT::add_used_key_part_to_set()
13427 {
13428 QUICK_SELECT_I *quick;
13429 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13430
13431 while ((quick= it++))
13432 {
13433 quick->add_used_key_part_to_set();
13434 }
13435 }
13436
13437
13438 /*******************************************************************************
13439 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
13440 *******************************************************************************/
13441
13442 static inline uint get_field_keypart(KEY *index, Field *field);
13443 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
13444 SEL_ARG **cur_range);
13445 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
13446 KEY_PART_INFO *first_non_group_part,
13447 KEY_PART_INFO *min_max_arg_part,
13448 KEY_PART_INFO *last_part, THD *thd,
13449 uchar *key_infix, uint *key_infix_len,
13450 KEY_PART_INFO **first_non_infix_part);
13451 static bool
13452 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
13453 Field::imagetype image_type,
13454 bool *has_min_max_fld, bool *has_other_fld);
13455
13456 static void
13457 cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
13458 uint group_key_parts, SEL_TREE *range_tree,
13459 SEL_ARG *index_tree, ha_rows quick_prefix_records,
13460 bool have_min, bool have_max,
13461 double *read_cost, ha_rows *records);
13462
13463
13464 /**
13465 Test if this access method is applicable to a GROUP query with MIN/MAX
13466 functions, and if so, construct a new TRP object.
13467
13468 DESCRIPTION
13469 Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
13470 Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
13471 following conditions:
13472 A) Table T has at least one compound index I of the form:
13473 I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
13474 B) Query conditions:
13475 B0. Q is over a single table T.
13476 B1. The attributes referenced by Q are a subset of the attributes of I.
13477 B2. All attributes QA in Q can be divided into 3 overlapping groups:
13478 - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
13479 referenced by any number of MIN and/or MAX functions if present.
13480 - WA = {W_1, ..., W_p} - from the WHERE clause
13481 - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
13482 = SA - if Q is a DISTINCT query (based on the
13483 equivalence of DISTINCT and GROUP queries.
13484 - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
13485 GROUP BY and not referenced by MIN/MAX functions.
13486 with the following properties specified below.
13487 B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
13488 applicable.
13489
13490 SA1. There is at most one attribute in SA referenced by any number of
13491 MIN and/or MAX functions which, which if present, is denoted as C.
13492 SA2. The position of the C attribute in the index is after the last A_k.
13493 SA3. The attribute C can be referenced in the WHERE clause only in
13494 predicates of the forms:
13495 - (C {< | <= | > | >= | =} const)
13496 - (const {< | <= | > | >= | =} C)
13497 - (C between const_i and const_j)
13498 - C IS NULL
13499 - C IS NOT NULL
13500 - C != const
13501 SA4. If Q has a GROUP BY clause, there are no other aggregate functions
13502 except MIN and MAX. For queries with DISTINCT, aggregate functions
13503 are allowed.
13504 SA5. The select list in DISTINCT queries should not contain expressions.
13505 SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
13506 for AGG_FUNC(DISTINCT ...) optimization because cursor position is
13507 never stored after a unique key lookup in the clustered index and
13508 furhter index_next/prev calls can not be used. So loose index scan
13509 optimization can not be used in this case.
13510 SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
13511 access method is not used.
13512 For above queries MIN/MAX() aggregation has to be done at
13513 nested_loops_join (end_send_group). But with current design MIN/MAX()
13514 is always set as part of loose index scan. Because of this mismatch
13515 MIN() and MAX() values will be set incorrectly. For such queries to
13516 work we need a new interface for loose index scan. This new interface
13517 should only fetch records with min and max values and let
13518 end_send_group to do aggregation. Until then do not use
13519 loose_index_scan.
13520 GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
13521 G_i = A_j => i = j.
13522 GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
13523 forms a prefix of I. This permutation is used as the GROUP clause
13524 when the DISTINCT query is converted to a GROUP query.
13525 GA3. The attributes in GA may participate in arbitrary predicates, divided
13526 into two groups:
13527 - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
13528 attributes of a prefix of GA
13529 - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
13530 of GA. Since P is applied to only GROUP attributes it filters some
13531 groups, and thus can be applied after the grouping.
13532 GA4. There are no expressions among G_i, just direct column references.
13533 NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
13534 and the MIN/MAX attribute C, then NGA must consist of exactly the
13535 index attributes that constitute the gap. As a result there is a
13536 permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
13537 in the index.
13538 NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
13539 equality conditions for all NG_i of the form (NG_i = const) or
13540 (const = NG_i), such that each NG_i is referenced in exactly one
13541 conjunct. Informally, the predicates provide constants to fill the
13542 gap in the index.
13543 NGA3.If BA <> {}, there can only be one range. TODO: This is a code
13544 limitation and is not strictly needed. See BUG#15947433
13545 WA1. There are no other attributes in the WHERE clause except the ones
13546 referenced in predicates RNG, PA, PC, EQ defined above. Therefore
13547 WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
13548 above tests. By transitivity then it also follows that each WA_i
13549 participates in the index I (if this was already tested for GA, NGA
13550 and C).
13551 WA2. If there is a predicate on C, then it must be in conjunction
13552 to all predicates on all earlier keyparts in I.
13553
13554 C) Overall query form:
13555 SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
13556 FROM T
13557 WHERE [RNG(A_1,...,A_p ; where p <= k)]
13558 [AND EQ(B_1,...,B_m)]
13559 [AND PC(C)]
13560 [AND PA(A_i1,...,A_iq)]
13561 GROUP BY A_1,...,A_k
13562 [HAVING PH(A_1, ..., B_1,..., C)]
13563 where EXPR(...) is an arbitrary expression over some or all SELECT fields,
13564 or:
13565 SELECT DISTINCT A_i1,...,A_ik
13566 FROM T
13567 WHERE [RNG(A_1,...,A_p ; where p <= k)]
13568 [AND PA(A_i1,...,A_iq)];
13569
13570 NOTES
13571 If the current query satisfies the conditions above, and if
13572 (mem_root! = NULL), then the function constructs and returns a new TRP
13573 object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
13574 If (mem_root == NULL), then the function only tests whether the current
13575 query satisfies the conditions above, and, if so, sets
13576 is_applicable = TRUE.
13577
13578 Queries with DISTINCT for which index access can be used are transformed
13579 into equivalent group-by queries of the form:
13580
13581 SELECT A_1,...,A_k FROM T
13582 WHERE [RNG(A_1,...,A_p ; where p <= k)]
13583 [AND PA(A_i1,...,A_iq)]
13584 GROUP BY A_1,...,A_k;
13585
13586 The group-by list is a permutation of the select attributes, according
13587 to their order in the index.
13588
13589 TODO
13590 - What happens if the query groups by the MIN/MAX field, and there is no
13591 other field as in: "select MY_MIN(a) from t1 group by a" ?
13592 - We assume that the general correctness of the GROUP-BY query was checked
13593 before this point. Is this correct, or do we have to check it completely?
13594 - Lift the limitation in condition (B3), that is, make this access method
13595 applicable to ROLLUP queries.
13596
13597 @param param Parameter from test_quick_select
13598 @param sel_tree Range tree generated by get_mm_tree
13599 @param read_time Best read time so far of table or index scan time
13600 @return table read plan
13601 @retval NULL Loose index scan not applicable or mem_root == NULL
13602 @retval !NULL Loose index scan table read plan
13603 */
13604
13605 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,double read_time)13606 get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
13607 {
13608 THD *thd= param->thd;
13609 JOIN *join= thd->lex->current_select->join;
13610 TABLE *table= param->table;
13611 bool have_min= FALSE; /* TRUE if there is a MIN function. */
13612 bool have_max= FALSE; /* TRUE if there is a MAX function. */
13613 Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
13614 KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
13615 uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
13616 KEY *index_info= NULL; /* The index chosen for data access. */
13617 uint index= 0; /* The id of the chosen index. */
13618 uint group_key_parts= 0; // Number of index key parts in the group prefix.
13619 uint used_key_parts= 0; /* Number of index key parts used for access. */
13620 uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
13621 uint key_infix_len= 0; /* Length of key_infix. */
13622 TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
13623 uint key_part_nr;
13624 uint elements_in_group;
13625 ORDER *tmp_group;
13626 Item *item;
13627 Item_field *item_field;
13628 bool is_agg_distinct;
13629 List<Item_field> agg_distinct_flds;
13630 DBUG_ENTER("get_best_group_min_max");
13631
13632 Json_writer_object trace_group(thd, "group_index_range");
13633 const char* cause= NULL;
13634
13635 /* Perform few 'cheap' tests whether this access method is applicable. */
13636 if (!join) /* This is not a select statement. */
13637 cause= "no join";
13638 else if (join->table_count != 1) /* The query must reference one table. */
13639 cause= "not single_table";
13640 else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
13641 cause= "rollup";
13642 else if (table->s->keys == 0) /* There are no indexes to use. */
13643 cause= "no index";
13644 else if (join->conds && join->conds->used_tables()
13645 & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */
13646 cause= "correlated conditions";
13647
13648 if (cause)
13649 {
13650 trace_group.add("chosen", false).add("cause", cause);
13651 DBUG_RETURN(NULL);
13652 }
13653
13654 is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
13655
13656 if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
13657 (!join->select_distinct) &&
13658 !is_agg_distinct)
13659 {
13660 trace_group.add("chosen", false).add("cause","no group by or distinct");
13661 DBUG_RETURN(NULL);
13662 }
13663 /* Analyze the query in more detail. */
13664
13665 /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
13666 List_iterator<Item> select_items_it(join->fields_list);
13667
13668 if (join->sum_funcs[0])
13669 {
13670 Item_sum *min_max_item;
13671 Item_sum **func_ptr= join->sum_funcs;
13672 while ((min_max_item= *(func_ptr++)))
13673 {
13674 if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
13675 have_min= TRUE;
13676 else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
13677 have_max= TRUE;
13678 else if (is_agg_distinct &&
13679 (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
13680 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
13681 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
13682 continue;
13683 else
13684 {
13685 trace_group.add("chosen", false)
13686 .add("cause", "not applicable aggregate function");
13687 DBUG_RETURN(NULL);
13688 }
13689
13690 /* The argument of MIN/MAX. */
13691 Item *expr= min_max_item->get_arg(0)->real_item();
13692 if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
13693 {
13694 if (! min_max_arg_item)
13695 min_max_arg_item= (Item_field*) expr;
13696 else if (! min_max_arg_item->eq(expr, 1))
13697 {
13698 trace_group.add("chosen", false)
13699 .add("cause", "arguments different in min max function");
13700 DBUG_RETURN(NULL);
13701 }
13702 }
13703 else
13704 {
13705 trace_group.add("chosen", false)
13706 .add("cause", "no field item in min max function");
13707 DBUG_RETURN(NULL);
13708 }
13709 }
13710 }
13711
13712 /* Check (SA7). */
13713 if (is_agg_distinct && (have_max || have_min))
13714 {
13715 trace_group.add("chosen", false)
13716 .add("cause", "have both agg distinct and min max");
13717 DBUG_RETURN(NULL);
13718 }
13719
13720 /* Check (SA5). */
13721 if (join->select_distinct)
13722 {
13723 trace_group.add("distinct_query", true);
13724 while ((item= select_items_it++))
13725 {
13726 if (item->real_item()->type() != Item::FIELD_ITEM)
13727 {
13728 trace_group.add("chosen", false)
13729 .add("cause", "distinct field is expression");
13730 DBUG_RETURN(NULL);
13731 }
13732 }
13733 }
13734
13735 /* Check (GA4) - that there are no expressions among the group attributes. */
13736 elements_in_group= 0;
13737 for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
13738 {
13739 if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
13740 {
13741 trace_group.add("chosen", false)
13742 .add("cause", "group field is expression");
13743 DBUG_RETURN(NULL);
13744 }
13745 elements_in_group++;
13746 }
13747
13748 /*
13749 Check that table has at least one compound index such that the conditions
13750 (GA1,GA2) are all TRUE. If there is more than one such index, select the
13751 first one. Here we set the variables: group_prefix_len and index_info.
13752 */
13753 /* Cost-related variables for the best index so far. */
13754 double best_read_cost= DBL_MAX;
13755 ha_rows best_records= 0;
13756 SEL_ARG *best_index_tree= NULL;
13757 ha_rows best_quick_prefix_records= 0;
13758 uint best_param_idx= 0;
13759
13760 const uint pk= param->table->s->primary_key;
13761 uint max_key_part;
13762 SEL_ARG *cur_index_tree= NULL;
13763 ha_rows cur_quick_prefix_records= 0;
13764
13765 // We go through allowed indexes
13766 Json_writer_array trace_indexes(thd, "potential_group_range_indexes");
13767
13768 for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx)
13769 {
13770 const uint cur_index= param->real_keynr[cur_param_idx];
13771 KEY *const cur_index_info= &table->key_info[cur_index];
13772
13773 Json_writer_object trace_idx(thd);
13774 trace_idx.add("index", cur_index_info->name);
13775
13776 KEY_PART_INFO *cur_part;
13777 KEY_PART_INFO *end_part; /* Last part for loops. */
13778 /* Last index part. */
13779 KEY_PART_INFO *last_part;
13780 KEY_PART_INFO *first_non_group_part;
13781 KEY_PART_INFO *first_non_infix_part;
13782 uint key_parts;
13783 uint key_infix_parts;
13784 uint cur_group_key_parts= 0;
13785 uint cur_group_prefix_len= 0;
13786 double cur_read_cost;
13787 ha_rows cur_records;
13788 key_map used_key_parts_map;
13789 uint cur_key_infix_len= 0;
13790 uchar cur_key_infix[MAX_KEY_LENGTH];
13791 uint cur_used_key_parts;
13792
13793 /*
13794 Check (B1) - if current index is covering.
13795 (was also: "Exclude UNIQUE indexes ..." but this was removed because
13796 there are cases Loose Scan over a multi-part index is useful).
13797 */
13798 if (!table->covering_keys.is_set(cur_index) ||
13799 !table->keys_in_use_for_group_by.is_set(cur_index))
13800 {
13801 cause= "not covering";
13802 goto next_index;
13803 }
13804
13805 /*
13806 This function is called on the precondition that the index is covering.
13807 Therefore if the GROUP BY list contains more elements than the index,
13808 these are duplicates. The GROUP BY list cannot be a prefix of the index.
13809 */
13810 if (elements_in_group > table->actual_n_key_parts(cur_index_info))
13811 {
13812 cause= "group key parts greater than index key parts";
13813 goto next_index;
13814 }
13815
13816 /*
13817 Unless extended keys can be used for cur_index:
13818 If the current storage manager is such that it appends the primary key to
13819 each index, then the above condition is insufficient to check if the
13820 index is covering. In such cases it may happen that some fields are
13821 covered by the PK index, but not by the current index. Since we can't
13822 use the concatenation of both indexes for index lookup, such an index
13823 does not qualify as covering in our case. If this is the case, below
13824 we check that all query fields are indeed covered by 'cur_index'.
13825 */
13826 if (cur_index_info->user_defined_key_parts == table->actual_n_key_parts(cur_index_info)
13827 && pk < MAX_KEY && cur_index != pk &&
13828 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
13829 {
13830 /* For each table field */
13831 for (uint i= 0; i < table->s->fields; i++)
13832 {
13833 Field *cur_field= table->field[i];
13834 /*
13835 If the field is used in the current query ensure that it's
13836 part of 'cur_index'
13837 */
13838 if (bitmap_is_set(table->read_set, cur_field->field_index) &&
13839 !cur_field->part_of_key_not_clustered.is_set(cur_index))
13840 {
13841 cause= "not covering";
13842 goto next_index; // Field was not part of key
13843 }
13844 }
13845 }
13846
13847 trace_idx.add("covering", true);
13848
13849 max_key_part= 0;
13850 used_key_parts_map.clear_all();
13851
13852 /*
13853 Check (GA1) for GROUP BY queries.
13854 */
13855 if (join->group_list)
13856 {
13857 cur_part= cur_index_info->key_part;
13858 end_part= cur_part + table->actual_n_key_parts(cur_index_info);
13859 /* Iterate in parallel over the GROUP list and the index parts. */
13860 for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
13861 tmp_group= tmp_group->next, cur_part++)
13862 {
13863 /*
13864 TODO:
13865 tmp_group::item is an array of Item, is it OK to consider only the
13866 first Item? If so, then why? What is the array for?
13867 */
13868 /* Above we already checked that all group items are fields. */
13869 DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
13870 Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
13871 if (group_field->field->eq(cur_part->field))
13872 {
13873 cur_group_prefix_len+= cur_part->store_length;
13874 ++cur_group_key_parts;
13875 max_key_part= (uint)(cur_part - cur_index_info->key_part) + 1;
13876 used_key_parts_map.set_bit(max_key_part);
13877 }
13878 else
13879 {
13880 cause= "group attribute not prefix in index";
13881 goto next_index;
13882 }
13883 }
13884 }
13885 /*
13886 Check (GA2) if this is a DISTINCT query.
13887 If GA2, then Store a new ORDER object in group_fields_array at the
13888 position of the key part of item_field->field. Thus we get the ORDER
13889 objects for each field ordered as the corresponding key parts.
13890 Later group_fields_array of ORDER objects is used to convert the query
13891 to a GROUP query.
13892 */
13893 if ((!join->group && join->select_distinct) ||
13894 is_agg_distinct)
13895 {
13896 if (!is_agg_distinct)
13897 {
13898 select_items_it.rewind();
13899 }
13900
13901 List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
13902 while (NULL != (item = (is_agg_distinct ?
13903 (Item *) agg_distinct_flds_it++ : select_items_it++)))
13904 {
13905 /* (SA5) already checked above. */
13906 item_field= (Item_field*) item->real_item();
13907 DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
13908
13909 /* not doing loose index scan for derived tables */
13910 if (!item_field->field)
13911 {
13912 cause= "derived table";
13913 goto next_index;
13914 }
13915
13916 /* Find the order of the key part in the index. */
13917 key_part_nr= get_field_keypart(cur_index_info, item_field->field);
13918 /*
13919 Check if this attribute was already present in the select list.
13920 If it was present, then its corresponding key part was alredy used.
13921 */
13922 if (used_key_parts_map.is_set(key_part_nr))
13923 continue;
13924 if (key_part_nr < 1 ||
13925 (!is_agg_distinct && key_part_nr > join->fields_list.elements))
13926 {
13927 cause= "select attribute not prefix in index";
13928 goto next_index;
13929 }
13930 cur_part= cur_index_info->key_part + key_part_nr - 1;
13931 cur_group_prefix_len+= cur_part->store_length;
13932 used_key_parts_map.set_bit(key_part_nr);
13933 ++cur_group_key_parts;
13934 max_key_part= MY_MAX(max_key_part,key_part_nr);
13935 }
13936 /*
13937 Check that used key parts forms a prefix of the index.
13938 To check this we compare bits in all_parts and cur_parts.
13939 all_parts have all bits set from 0 to (max_key_part-1).
13940 cur_parts have bits set for only used keyparts.
13941 */
13942 ulonglong all_parts, cur_parts;
13943 all_parts= (1ULL << max_key_part) - 1;
13944 cur_parts= used_key_parts_map.to_ulonglong() >> 1;
13945 if (all_parts != cur_parts)
13946 goto next_index;
13947 }
13948
13949 /* Check (SA2). */
13950 if (min_max_arg_item)
13951 {
13952 key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
13953 if (key_part_nr <= cur_group_key_parts)
13954 {
13955 cause= "aggregate column not suffix in idx";
13956 goto next_index;
13957 }
13958 min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
13959 }
13960
13961 /*
13962 Aplly a heuristic: there is no point to use loose index scan when we're
13963 using the whole unique index.
13964 */
13965 if (cur_index_info->flags & HA_NOSAME &&
13966 cur_group_key_parts == cur_index_info->user_defined_key_parts)
13967 {
13968 cause= "using unique index";
13969 goto next_index;
13970 }
13971
13972 /*
13973 Check (NGA1, NGA2) and extract a sequence of constants to be used as part
13974 of all search keys.
13975 */
13976
13977 /*
13978 If there is MIN/MAX, each keypart between the last group part and the
13979 MIN/MAX part must participate in one equality with constants, and all
13980 keyparts after the MIN/MAX part must not be referenced in the query.
13981
13982 If there is no MIN/MAX, the keyparts after the last group part can be
13983 referenced only in equalities with constants, and the referenced keyparts
13984 must form a sequence without any gaps that starts immediately after the
13985 last group keypart.
13986 */
13987 key_parts= table->actual_n_key_parts(cur_index_info);
13988 last_part= cur_index_info->key_part + key_parts;
13989 first_non_group_part= (cur_group_key_parts < key_parts) ?
13990 cur_index_info->key_part + cur_group_key_parts :
13991 NULL;
13992 first_non_infix_part= min_max_arg_part ?
13993 (min_max_arg_part < last_part) ?
13994 min_max_arg_part :
13995 NULL :
13996 NULL;
13997 if (first_non_group_part &&
13998 (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
13999 {
14000 if (tree)
14001 {
14002 SEL_ARG *index_range_tree= tree->keys[cur_param_idx];
14003 if (!get_constant_key_infix(cur_index_info, index_range_tree,
14004 first_non_group_part, min_max_arg_part,
14005 last_part, thd, cur_key_infix,
14006 &cur_key_infix_len,
14007 &first_non_infix_part))
14008 {
14009 cause= "nonconst equality gap attribute";
14010 goto next_index;
14011 }
14012 }
14013 else if (min_max_arg_part &&
14014 (min_max_arg_part - first_non_group_part > 0))
14015 {
14016 /*
14017 There is a gap but no range tree, thus no predicates at all for the
14018 non-group keyparts.
14019 */
14020 cause= "no nongroup keypart predicate";
14021 goto next_index;
14022 }
14023 else if (first_non_group_part && join->conds)
14024 {
14025 /*
14026 If there is no MIN/MAX function in the query, but some index
14027 key part is referenced in the WHERE clause, then this index
14028 cannot be used because the WHERE condition over the keypart's
14029 field cannot be 'pushed' to the index (because there is no
14030 range 'tree'), and the WHERE clause must be evaluated before
14031 GROUP BY/DISTINCT.
14032 */
14033 /*
14034 Store the first and last keyparts that need to be analyzed
14035 into one array that can be passed as parameter.
14036 */
14037 KEY_PART_INFO *key_part_range[2];
14038 key_part_range[0]= first_non_group_part;
14039 key_part_range[1]= last_part;
14040
14041 /* Check if cur_part is referenced in the WHERE clause. */
14042 if (join->conds->walk(&Item::find_item_in_field_list_processor, true,
14043 key_part_range))
14044 {
14045 cause= "keypart reference from where clause";
14046 goto next_index;
14047 }
14048 }
14049 }
14050
14051 /*
14052 Test (WA1) partially - that no other keypart after the last infix part is
14053 referenced in the query.
14054 */
14055 if (first_non_infix_part)
14056 {
14057 cur_part= first_non_infix_part +
14058 (min_max_arg_part && (min_max_arg_part < last_part));
14059 for (; cur_part != last_part; cur_part++)
14060 {
14061 if (bitmap_is_set(table->read_set, cur_part->field->field_index))
14062 {
14063 cause= "keypart after infix in query";
14064 goto next_index;
14065 }
14066 }
14067 }
14068
14069 /**
14070 Test WA2:If there are conditions on a column C participating in
14071 MIN/MAX, those conditions must be conjunctions to all earlier
14072 keyparts. Otherwise, Loose Index Scan cannot be used.
14073 */
14074 if (tree && min_max_arg_item)
14075 {
14076 SEL_ARG *index_range_tree= tree->keys[cur_param_idx];
14077 SEL_ARG *cur_range= NULL;
14078 if (get_sel_arg_for_keypart(min_max_arg_part->field,
14079 index_range_tree, &cur_range) ||
14080 (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
14081 {
14082 cause= "minmax keypart in disjunctive query";
14083 goto next_index;
14084 }
14085 }
14086
14087 /* If we got to this point, cur_index_info passes the test. */
14088 key_infix_parts= cur_key_infix_len ? (uint)
14089 (first_non_infix_part - first_non_group_part) : 0;
14090 cur_used_key_parts= cur_group_key_parts + key_infix_parts;
14091
14092 /* Compute the cost of using this index. */
14093 if (tree)
14094 {
14095 if ((cur_index_tree= tree->keys[cur_param_idx]))
14096 {
14097 cur_quick_prefix_records= param->quick_rows[cur_index];
14098 if (unlikely(cur_index_tree && thd->trace_started()))
14099 {
14100 Json_writer_array trace_range(thd, "ranges");
14101 trace_ranges(&trace_range, param, cur_param_idx,
14102 cur_index_tree, cur_index_info->key_part);
14103 }
14104 }
14105 else
14106 cur_quick_prefix_records= HA_POS_ERROR;
14107 }
14108 cost_group_min_max(table, cur_index_info, cur_used_key_parts,
14109 cur_group_key_parts, tree, cur_index_tree,
14110 cur_quick_prefix_records, have_min, have_max,
14111 &cur_read_cost, &cur_records);
14112 /*
14113 If cur_read_cost is lower than best_read_cost use cur_index.
14114 Do not compare doubles directly because they may have different
14115 representations (64 vs. 80 bits).
14116 */
14117 trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
14118
14119 if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
14120 {
14121 index_info= cur_index_info;
14122 index= cur_index;
14123 best_read_cost= cur_read_cost;
14124 best_records= cur_records;
14125 best_index_tree= cur_index_tree;
14126 best_quick_prefix_records= cur_quick_prefix_records;
14127 best_param_idx= cur_param_idx;
14128 group_key_parts= cur_group_key_parts;
14129 group_prefix_len= cur_group_prefix_len;
14130 key_infix_len= cur_key_infix_len;
14131 if (key_infix_len)
14132 memcpy (key_infix, cur_key_infix, sizeof (key_infix));
14133 used_key_parts= cur_used_key_parts;
14134 }
14135
14136 next_index:
14137 if (cause)
14138 {
14139 trace_idx.add("usable", false).add("cause", cause);
14140 cause= NULL;
14141 }
14142 }
14143
14144 trace_indexes.end();
14145
14146 if (!index_info) /* No usable index found. */
14147 DBUG_RETURN(NULL);
14148
14149 /* Check (SA3) for the where clause. */
14150 bool has_min_max_fld= false, has_other_fld= false;
14151 if (join->conds && min_max_arg_item &&
14152 !check_group_min_max_predicates(join->conds, min_max_arg_item,
14153 (index_info->flags & HA_SPATIAL) ?
14154 Field::itMBR : Field::itRAW,
14155 &has_min_max_fld, &has_other_fld))
14156 {
14157 trace_group.add("usable", false)
14158 .add("cause", "unsupported predicate on agg attribute");
14159 DBUG_RETURN(NULL);
14160 }
14161
14162 /*
14163 Check (SA6) if clustered key is used
14164 */
14165 if (is_agg_distinct && table->file->is_clustering_key(index))
14166 {
14167 trace_group.add("usable", false)
14168 .add("cause", "index is clustered");
14169 DBUG_RETURN(NULL);
14170 }
14171
14172 /* The query passes all tests, so construct a new TRP object. */
14173 read_plan= new (param->mem_root)
14174 TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
14175 min_max_arg_part,
14176 group_prefix_len, used_key_parts,
14177 group_key_parts, index_info, index,
14178 key_infix_len,
14179 (key_infix_len > 0) ? key_infix : NULL,
14180 tree, best_index_tree, best_param_idx,
14181 best_quick_prefix_records);
14182 if (read_plan)
14183 {
14184 if (tree && read_plan->quick_prefix_records == 0)
14185 DBUG_RETURN(NULL);
14186
14187 read_plan->read_cost= best_read_cost;
14188 read_plan->records= best_records;
14189 if (read_time < best_read_cost && is_agg_distinct)
14190 {
14191 trace_group.add("index_scan", true);
14192 read_plan->read_cost= 0;
14193 read_plan->use_index_scan();
14194 }
14195
14196 DBUG_PRINT("info",
14197 ("Returning group min/max plan: cost: %g, records: %lu",
14198 read_plan->read_cost, (ulong) read_plan->records));
14199 }
14200
14201 DBUG_RETURN(read_plan);
14202 }
14203
14204
14205 /*
14206 Check that the MIN/MAX attribute participates only in range predicates
14207 with constants.
14208
14209 SYNOPSIS
14210 check_group_min_max_predicates()
14211 cond [in] the expression tree being analyzed
14212 min_max_arg [in] the field referenced by the MIN/MAX function(s)
14213 image_type [in]
14214 has_min_max_arg [out] true if the subtree being analyzed references
14215 min_max_arg
14216 has_other_arg [out] true if the subtree being analyzed references a
14217 column other min_max_arg
14218
14219 DESCRIPTION
14220 The function walks recursively over the cond tree representing a WHERE
14221 clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
14222 aggregate function, it is referenced only by one of the following
14223 predicates $FUNC$:
14224 {=, !=, <, <=, >, >=, between, is [not] null, multiple equal}.
14225 In addition the function checks that the WHERE condition is equivalent to
14226 "cond1 AND cond2" where :
14227 cond1 - does not use min_max_column at all.
14228 cond2 - is an AND/OR tree with leaves in form
14229 "$FUNC$(min_max_column[, const])".
14230
14231 RETURN
14232 TRUE if cond passes the test
14233 FALSE o/w
14234 */
14235
14236 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type,bool * has_min_max_arg,bool * has_other_arg)14237 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
14238 Field::imagetype image_type,
14239 bool *has_min_max_arg, bool *has_other_arg)
14240 {
14241 DBUG_ENTER("check_group_min_max_predicates");
14242 DBUG_ASSERT(cond && min_max_arg_item);
14243
14244 cond= cond->real_item();
14245 Item::Type cond_type= cond->real_type();
14246 if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
14247 {
14248 DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
14249 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
14250 Item *and_or_arg;
14251 Item_func::Functype func_type= ((Item_cond*) cond)->functype();
14252 bool has_min_max= false, has_other= false;
14253 while ((and_or_arg= li++))
14254 {
14255 /*
14256 The WHERE clause doesn't pass the condition if:
14257 (1) any subtree doesn't pass the condition or
14258 (2) the subtree passes the test, but it is an OR and it references both
14259 the min/max argument and other columns.
14260 */
14261 if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item, //1
14262 image_type,
14263 &has_min_max, &has_other) ||
14264 (func_type == Item_func::COND_OR_FUNC && has_min_max && has_other))//2
14265 DBUG_RETURN(FALSE);
14266 }
14267 *has_min_max_arg= has_min_max || *has_min_max_arg;
14268 *has_other_arg= has_other || *has_other_arg;
14269 DBUG_RETURN(TRUE);
14270 }
14271
14272 /*
14273 Disallow loose index scan if the MIN/MAX argument field is referenced by
14274 a subquery in the WHERE clause.
14275 */
14276
14277 if (unlikely(cond_type == Item::SUBSELECT_ITEM))
14278 {
14279 Item_subselect *subs_cond= (Item_subselect*) cond;
14280 if (subs_cond->is_correlated)
14281 {
14282 DBUG_ASSERT(subs_cond->upper_refs.elements > 0);
14283 List_iterator_fast<Item_subselect::Ref_to_outside>
14284 li(subs_cond->upper_refs);
14285 Item_subselect::Ref_to_outside *dep;
14286 while ((dep= li++))
14287 {
14288 if (dep->item->eq(min_max_arg_item, FALSE))
14289 DBUG_RETURN(FALSE);
14290 }
14291 }
14292 DBUG_RETURN(TRUE);
14293 }
14294 /*
14295 Subquery with IS [NOT] NULL
14296 TODO: Look into the cache_item and optimize it like we do for
14297 subselect's above
14298 */
14299 if (unlikely(cond_type == Item::CACHE_ITEM))
14300 DBUG_RETURN(cond->const_item());
14301
14302 /*
14303 Condition of the form 'field' is equivalent to 'field <> 0' and thus
14304 satisfies the SA3 condition.
14305 */
14306 if (cond_type == Item::FIELD_ITEM)
14307 {
14308 DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
14309 if (min_max_arg_item->eq((Item_field*)cond, 1))
14310 *has_min_max_arg= true;
14311 else
14312 *has_other_arg= true;
14313 DBUG_RETURN(TRUE);
14314 }
14315
14316 /* We presume that at this point there are no other Items than functions. */
14317 DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
14318 if (unlikely(cond_type != Item::FUNC_ITEM)) /* Safety */
14319 DBUG_RETURN(FALSE);
14320
14321 /* Test if cond references only group-by or non-group fields. */
14322 Item_func *pred= (Item_func*) cond;
14323 Item_func::Functype pred_type= pred->functype();
14324 DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
14325 if (pred_type == Item_func::MULT_EQUAL_FUNC)
14326 {
14327 /*
14328 Check that each field in a multiple equality is either a constant or
14329 it is a reference to the min/max argument, or it doesn't contain the
14330 min/max argument at all.
14331 */
14332 Item_equal_fields_iterator eq_it(*((Item_equal*)pred));
14333 Item *eq_item;
14334 bool has_min_max= false, has_other= false;
14335 while ((eq_item= eq_it++))
14336 {
14337 if (min_max_arg_item->eq(eq_item->real_item(), 1))
14338 has_min_max= true;
14339 else
14340 has_other= true;
14341 }
14342 *has_min_max_arg= has_min_max || *has_min_max_arg;
14343 *has_other_arg= has_other || *has_other_arg;
14344 DBUG_RETURN(!(has_min_max && has_other));
14345 }
14346
14347 Item **arguments= pred->arguments();
14348 Item *cur_arg;
14349 bool has_min_max= false, has_other= false;
14350 for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
14351 {
14352 cur_arg= arguments[arg_idx]->real_item();
14353 DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
14354 if (cur_arg->type() == Item::FIELD_ITEM)
14355 {
14356 if (min_max_arg_item->eq(cur_arg, 1))
14357 {
14358 has_min_max= true;
14359 /*
14360 If pred references the MIN/MAX argument, check whether pred is a range
14361 condition that compares the MIN/MAX argument with a constant.
14362 */
14363 if (pred_type != Item_func::EQUAL_FUNC &&
14364 pred_type != Item_func::LT_FUNC &&
14365 pred_type != Item_func::LE_FUNC &&
14366 pred_type != Item_func::GT_FUNC &&
14367 pred_type != Item_func::GE_FUNC &&
14368 pred_type != Item_func::BETWEEN &&
14369 pred_type != Item_func::ISNULL_FUNC &&
14370 pred_type != Item_func::ISNOTNULL_FUNC &&
14371 pred_type != Item_func::EQ_FUNC &&
14372 pred_type != Item_func::NE_FUNC)
14373 DBUG_RETURN(FALSE);
14374
14375 /* Check that pred compares min_max_arg_item with a constant. */
14376 Item *args[3];
14377 bzero(args, 3 * sizeof(Item*));
14378 bool inv;
14379 /* Test if this is a comparison of a field and a constant. */
14380 if (!simple_pred(pred, args, &inv))
14381 DBUG_RETURN(FALSE);
14382
14383 if (args[0] && args[1]) // this is a binary function or BETWEEN
14384 {
14385 DBUG_ASSERT(pred->fixed_type_handler());
14386 DBUG_ASSERT(pred->fixed_type_handler()->is_bool_type());
14387 Item_bool_func *bool_func= (Item_bool_func*) pred;
14388 Field *field= min_max_arg_item->field;
14389 if (!args[2]) // this is a binary function
14390 {
14391 if (!field->can_optimize_group_min_max(bool_func, args[1]))
14392 DBUG_RETURN(FALSE);
14393 }
14394 else // this is BETWEEN
14395 {
14396 if (!field->can_optimize_group_min_max(bool_func, args[1]) ||
14397 !field->can_optimize_group_min_max(bool_func, args[2]))
14398 DBUG_RETURN(FALSE);
14399 }
14400 }
14401 }
14402 else
14403 has_other= true;
14404 }
14405 else if (cur_arg->type() == Item::FUNC_ITEM)
14406 {
14407 if (!check_group_min_max_predicates(cur_arg, min_max_arg_item, image_type,
14408 &has_min_max, &has_other))
14409 DBUG_RETURN(FALSE);
14410 }
14411 else if (cur_arg->const_item() && !cur_arg->is_expensive())
14412 {
14413 /*
14414 For predicates of the form "const OP expr" we also have to check 'expr'
14415 to make a decision.
14416 */
14417 continue;
14418 }
14419 else
14420 DBUG_RETURN(FALSE);
14421 if(has_min_max && has_other)
14422 DBUG_RETURN(FALSE);
14423 }
14424 *has_min_max_arg= has_min_max || *has_min_max_arg;
14425 *has_other_arg= has_other || *has_other_arg;
14426
14427 DBUG_RETURN(TRUE);
14428 }
14429
14430
14431 /*
14432 Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
14433 any. 'tree' must be a unique conjunction to ALL predicates in earlier
14434 keyparts of 'keypart_tree'.
14435
14436 E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
14437 covers 'field', all these conditions satisfies the requirement:
14438
14439 1. "(kp1=2 OR kp1=3) AND kp2=10" => returns "kp2=10"
14440 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)" => returns "kp2=10"
14441 3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
14442 => returns "kp2=10 OR kp2=11"
14443
14444 whereas these do not
14445 1. "(kp1=2 AND kp2=10) OR kp1=3"
14446 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
14447 3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
14448
14449 This function effectively tests requirement WA2. In combination with
14450 a test that the returned tree has no more than one range it is also
14451 a test of NGA3.
14452
14453 @param[in] field The field we want the SEL_ARG tree for
14454 @param[in] keypart_tree Root node of the SEL_ARG* tree for the index
14455 @param[out] cur_range The SEL_ARG tree, if any, for the keypart
14456 covering field 'keypart_field'
14457 @retval true 'keypart_tree' contained a predicate for 'field' that
14458 is not conjunction to all predicates on earlier keyparts
14459 @retval false otherwise
14460 */
14461
14462 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)14463 get_sel_arg_for_keypart(Field *field,
14464 SEL_ARG *keypart_tree,
14465 SEL_ARG **cur_range)
14466 {
14467 if (keypart_tree == NULL)
14468 return false;
14469 if (keypart_tree->field->eq(field))
14470 {
14471 *cur_range= keypart_tree;
14472 return false;
14473 }
14474
14475 SEL_ARG *tree_first_range= NULL;
14476 SEL_ARG *first_kp= keypart_tree->first();
14477
14478 for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
14479 {
14480 SEL_ARG *curr_tree= NULL;
14481 if (cur_kp->next_key_part)
14482 {
14483 if (get_sel_arg_for_keypart(field,
14484 cur_kp->next_key_part,
14485 &curr_tree))
14486 return true;
14487 }
14488 /*
14489 Check if the SEL_ARG tree for 'field' is identical for all ranges in
14490 'keypart_tree
14491 */
14492 if (cur_kp == first_kp)
14493 tree_first_range= curr_tree;
14494 else if (!all_same(tree_first_range, curr_tree))
14495 return true;
14496 }
14497 *cur_range= tree_first_range;
14498 return false;
14499 }
14500
14501 /*
14502 Extract a sequence of constants from a conjunction of equality predicates.
14503
14504 SYNOPSIS
14505 get_constant_key_infix()
14506 index_info [in] Descriptor of the chosen index.
14507 index_range_tree [in] Range tree for the chosen index
14508 first_non_group_part [in] First index part after group attribute parts
14509 min_max_arg_part [in] The keypart of the MIN/MAX argument if any
14510 last_part [in] Last keypart of the index
14511 thd [in] Current thread
14512 key_infix [out] Infix of constants to be used for index lookup
14513 key_infix_len [out] Length of the infix
14514 first_non_infix_part [out] The first keypart after the infix (if any)
14515
14516 DESCRIPTION
14517 Test conditions (NGA1, NGA2, NGA3) from get_best_group_min_max(). Namely,
14518 for each keypart field NG_i not in GROUP-BY, check that there is exactly one
14519 constant equality predicate among conds with the form (NG_i = const_ci) or
14520 (const_ci = NG_i).. In addition, there can only be one range when there is
14521 such a gap.
14522 Thus all the NGF_i attributes must fill the 'gap' between the last group-by
14523 attribute and the MIN/MAX attribute in the index (if present). Also ensure
14524 that there is only a single range on NGF_i (NGA3). If these
14525 conditions hold, copy each constant from its corresponding predicate into
14526 key_infix, in the order its NG_i attribute appears in the index, and update
14527 key_infix_len with the total length of the key parts in key_infix.
14528
14529 RETURN
14530 TRUE if the index passes the test
14531 FALSE o/w
14532 */
14533 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)14534 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
14535 KEY_PART_INFO *first_non_group_part,
14536 KEY_PART_INFO *min_max_arg_part,
14537 KEY_PART_INFO *last_part, THD *thd,
14538 uchar *key_infix, uint *key_infix_len,
14539 KEY_PART_INFO **first_non_infix_part)
14540 {
14541 KEY_PART_INFO *cur_part;
14542 /* End part for the first loop below. */
14543 KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
14544
14545 *key_infix_len= 0;
14546 uchar *key_ptr= key_infix;
14547 for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
14548 {
14549 SEL_ARG *cur_range= NULL;
14550 /*
14551 Check NGA3:
14552 1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
14553 checks for a unique conjunction of this tree with all the predicates
14554 on the earlier keyparts in the index.
14555 2. Check for multiple ranges on the found keypart tree.
14556
14557 We assume that index_range_tree points to the leftmost keypart in
14558 the index.
14559 */
14560 if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
14561 &cur_range))
14562 return false;
14563
14564 if (cur_range && cur_range->elements > 1)
14565 return false;
14566
14567 if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
14568 {
14569 if (min_max_arg_part)
14570 return false; /* The current keypart has no range predicates at all. */
14571 else
14572 {
14573 *first_non_infix_part= cur_part;
14574 return true;
14575 }
14576 }
14577
14578 if ((cur_range->min_flag & NO_MIN_RANGE) ||
14579 (cur_range->max_flag & NO_MAX_RANGE) ||
14580 (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
14581 return false;
14582
14583 uint field_length= cur_part->store_length;
14584 if (cur_range->maybe_null &&
14585 cur_range->min_value[0] && cur_range->max_value[0])
14586 {
14587 /*
14588 cur_range specifies 'IS NULL'. In this case the argument points
14589 to a "null value" (is_null_string) that may not always be long
14590 enough for a direct memcpy to a field.
14591 */
14592 DBUG_ASSERT (field_length > 0);
14593 *key_ptr= 1;
14594 bzero(key_ptr+1,field_length-1);
14595 key_ptr+= field_length;
14596 *key_infix_len+= field_length;
14597 }
14598 else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
14599 { /* cur_range specifies an equality condition. */
14600 memcpy(key_ptr, cur_range->min_value, field_length);
14601 key_ptr+= field_length;
14602 *key_infix_len+= field_length;
14603 }
14604 else
14605 return false;
14606 }
14607
14608 if (!min_max_arg_part && (cur_part == last_part))
14609 *first_non_infix_part= last_part;
14610
14611 return TRUE;
14612 }
14613
14614
14615 /*
14616 Find the key part referenced by a field.
14617
14618 SYNOPSIS
14619 get_field_keypart()
14620 index descriptor of an index
14621 field field that possibly references some key part in index
14622
14623 NOTES
14624 The return value can be used to get a KEY_PART_INFO pointer by
14625 part= index->key_part + get_field_keypart(...) - 1;
14626
14627 RETURN
14628 Positive number which is the consecutive number of the key part, or
14629 0 if field does not reference any index field.
14630 */
14631
14632 static inline uint
get_field_keypart(KEY * index,Field * field)14633 get_field_keypart(KEY *index, Field *field)
14634 {
14635 KEY_PART_INFO *part, *end;
14636
14637 for (part= index->key_part,
14638 end= part + field->table->actual_n_key_parts(index);
14639 part < end; part++)
14640 {
14641 if (field->eq(part->field))
14642 return (uint)(part - index->key_part + 1);
14643 }
14644 return 0;
14645 }
14646
14647
14648 /*
14649 Compute the cost of a quick_group_min_max_select for a particular index.
14650
14651 SYNOPSIS
14652 cost_group_min_max()
14653 table [in] The table being accessed
14654 index_info [in] The index used to access the table
14655 used_key_parts [in] Number of key parts used to access the index
14656 group_key_parts [in] Number of index key parts in the group prefix
14657 range_tree [in] Tree of ranges for all indexes
14658 index_tree [in] The range tree for the current index
14659 quick_prefix_records [in] Number of records retrieved by the internally
14660 used quick range select if any
14661 have_min [in] True if there is a MIN function
14662 have_max [in] True if there is a MAX function
14663 read_cost [out] The cost to retrieve rows via this quick select
14664 records [out] The number of rows retrieved
14665
14666 DESCRIPTION
14667 This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
14668 the number of rows returned.
14669
14670 NOTES
14671 The cost computation distinguishes several cases:
14672 1) No equality predicates over non-group attributes (thus no key_infix).
14673 If groups are bigger than blocks on the average, then we assume that it
14674 is very unlikely that block ends are aligned with group ends, thus even
14675 if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
14676 keys, except for the first MIN and the last MAX keys, will be in the
14677 same block. If groups are smaller than blocks, then we are going to
14678 read all blocks.
14679 2) There are equality predicates over non-group attributes.
14680 In this case the group prefix is extended by additional constants, and
14681 as a result the min/max values are inside sub-groups of the original
14682 groups. The number of blocks that will be read depends on whether the
14683 ends of these sub-groups will be contained in the same or in different
14684 blocks. We compute the probability for the two ends of a subgroup to be
14685 in two different blocks as the ratio of:
14686 - the number of positions of the left-end of a subgroup inside a group,
14687 such that the right end of the subgroup is past the end of the buffer
14688 containing the left-end, and
14689 - the total number of possible positions for the left-end of the
14690 subgroup, which is the number of keys in the containing group.
14691 We assume it is very unlikely that two ends of subsequent subgroups are
14692 in the same block.
14693 3) The are range predicates over the group attributes.
14694 Then some groups may be filtered by the range predicates. We use the
14695 selectivity of the range predicates to decide how many groups will be
14696 filtered.
14697
14698 TODO
14699 - Take into account the optional range predicates over the MIN/MAX
14700 argument.
14701 - Check if we have a PK index and we use all cols - then each key is a
14702 group, and it will be better to use an index scan.
14703
14704 RETURN
14705 None
14706 */
14707
cost_group_min_max(TABLE * table,KEY * index_info,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,double * read_cost,ha_rows * records)14708 void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
14709 uint group_key_parts, SEL_TREE *range_tree,
14710 SEL_ARG *index_tree, ha_rows quick_prefix_records,
14711 bool have_min, bool have_max,
14712 double *read_cost, ha_rows *records)
14713 {
14714 ha_rows table_records;
14715 ha_rows num_groups;
14716 ha_rows num_blocks;
14717 uint keys_per_block;
14718 ha_rows keys_per_group;
14719 ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */
14720 /* formed by a key infix. */
14721 double p_overlap; /* Probability that a sub-group overlaps two blocks. */
14722 double quick_prefix_selectivity;
14723 double io_cost;
14724 DBUG_ENTER("cost_group_min_max");
14725
14726 table_records= table->stat_records();
14727 /* Assume block is 75 % full */
14728 keys_per_block= (uint) (table->file->stats.block_size * 3 / 4 /
14729 (index_info->key_length + table->file->ref_length)
14730 + 1);
14731 num_blocks= (ha_rows)(table_records / keys_per_block) + 1;
14732
14733 /* Compute the number of keys in a group. */
14734 if (!group_key_parts)
14735 {
14736 /* Summary over the whole table */
14737 keys_per_group= table_records;
14738 }
14739 else
14740 {
14741 keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts -
14742 1);
14743 }
14744
14745 if (keys_per_group == 0) /* If there is no statistics try to guess */
14746 /* each group contains 10% of all records */
14747 keys_per_group= (table_records / 10) + 1;
14748 num_groups= (table_records / keys_per_group) + 1;
14749
14750 /* Apply the selectivity of the quick select for group prefixes. */
14751 if (range_tree && (quick_prefix_records != HA_POS_ERROR))
14752 {
14753 quick_prefix_selectivity= (double) quick_prefix_records /
14754 (double) table_records;
14755 num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
14756 set_if_bigger(num_groups, 1);
14757 }
14758
14759 if (used_key_parts > group_key_parts)
14760 { /*
14761 Compute the probability that two ends of a subgroup are inside
14762 different blocks.
14763 */
14764 keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts - 1);
14765 if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
14766 p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
14767 else
14768 {
14769 double blocks_per_group= (double) num_blocks / (double) num_groups;
14770 p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
14771 p_overlap= MY_MIN(p_overlap, 1.0);
14772 }
14773 io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
14774 }
14775 else
14776 io_cost= (keys_per_group > keys_per_block) ?
14777 (have_min && have_max) ? (double) (num_groups + 1) :
14778 (double) num_groups :
14779 (double) num_blocks;
14780
14781 /*
14782 CPU cost must be comparable to that of an index scan as computed
14783 in SQL_SELECT::test_quick_select(). When the groups are small,
14784 e.g. for a unique index, using index scan will be cheaper since it
14785 reads the next record without having to re-position to it on every
14786 group. To make the CPU cost reflect this, we estimate the CPU cost
14787 as the sum of:
14788 1. Cost for evaluating the condition (similarly as for index scan).
14789 2. Cost for navigating the index structure (assuming a b-tree).
14790 Note: We only add the cost for one comparision per block. For a
14791 b-tree the number of comparisons will be larger.
14792 TODO: This cost should be provided by the storage engine.
14793 */
14794 const double tree_traversal_cost=
14795 ceil(log(static_cast<double>(table_records))/
14796 log(static_cast<double>(keys_per_block))) *
14797 1/(2*TIME_FOR_COMPARE);
14798
14799 const double cpu_cost= num_groups *
14800 (tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX);
14801
14802 *read_cost= io_cost + cpu_cost;
14803 *records= num_groups;
14804
14805 DBUG_PRINT("info",
14806 ("table rows: %lu keys/block: %u keys/group: %lu "
14807 "result rows: %lu blocks: %lu",
14808 (ulong) table_records, keys_per_block, (ulong) keys_per_group,
14809 (ulong) *records, (ulong) num_blocks));
14810 DBUG_VOID_RETURN;
14811 }
14812
14813
14814 /*
14815 Construct a new quick select object for queries with group by with min/max.
14816
14817 SYNOPSIS
14818 TRP_GROUP_MIN_MAX::make_quick()
14819 param Parameter from test_quick_select
14820 retrieve_full_rows ignored
14821 parent_alloc Memory pool to use, if any.
14822
14823 NOTES
14824 Make_quick ignores the retrieve_full_rows parameter because
14825 QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
14826 The other parameter are ignored as well because all necessary
14827 data to create the QUICK object is computed at this TRP creation
14828 time.
14829
14830 RETURN
14831 New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
14832 NULL otherwise.
14833 */
14834
14835 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)14836 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
14837 MEM_ROOT *parent_alloc)
14838 {
14839 QUICK_GROUP_MIN_MAX_SELECT *quick;
14840 DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
14841
14842 quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
14843 param->thd->lex->current_select->join,
14844 have_min, have_max,
14845 have_agg_distinct, min_max_arg_part,
14846 group_prefix_len, group_key_parts,
14847 used_key_parts, index_info, index,
14848 read_cost, records, key_infix_len,
14849 key_infix, parent_alloc, is_index_scan);
14850 if (!quick)
14851 DBUG_RETURN(NULL);
14852
14853 if (quick->init())
14854 {
14855 delete quick;
14856 DBUG_RETURN(NULL);
14857 }
14858
14859 if (range_tree)
14860 {
14861 DBUG_ASSERT(quick_prefix_records > 0);
14862 if (quick_prefix_records == HA_POS_ERROR)
14863 quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
14864 else
14865 /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
14866 quick->quick_prefix_select= get_quick_select(param, param_idx,
14867 index_tree,
14868 HA_MRR_USE_DEFAULT_IMPL, 0,
14869 &quick->alloc);
14870
14871 /*
14872 Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
14873 attribute, and create an array of QUICK_RANGES to be used by the
14874 new quick select.
14875 */
14876 if (min_max_arg_part)
14877 {
14878 SEL_ARG *min_max_range= index_tree;
14879 while (min_max_range) /* Find the tree for the MIN/MAX key part. */
14880 {
14881 if (min_max_range->field->eq(min_max_arg_part->field))
14882 break;
14883 min_max_range= min_max_range->next_key_part;
14884 }
14885 /* Scroll to the leftmost interval for the MIN/MAX argument. */
14886 while (min_max_range && min_max_range->prev)
14887 min_max_range= min_max_range->prev;
14888 /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
14889 while (min_max_range)
14890 {
14891 if (quick->add_range(min_max_range))
14892 {
14893 delete quick;
14894 quick= NULL;
14895 DBUG_RETURN(NULL);
14896 }
14897 min_max_range= min_max_range->next;
14898 }
14899 }
14900 }
14901 else
14902 quick->quick_prefix_select= NULL;
14903
14904 quick->update_key_stat();
14905 quick->adjust_prefix_ranges();
14906
14907 DBUG_RETURN(quick);
14908 }
14909
14910
14911 /*
14912 Construct new quick select for group queries with min/max.
14913
14914 SYNOPSIS
14915 QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
14916 table The table being accessed
14917 join Descriptor of the current query
14918 have_min TRUE if the query selects a MIN function
14919 have_max TRUE if the query selects a MAX function
14920 min_max_arg_part The only argument field of all MIN/MAX functions
14921 group_prefix_len Length of all key parts in the group prefix
14922 prefix_key_parts All key parts in the group prefix
14923 index_info The index chosen for data access
14924 use_index The id of index_info
14925 read_cost Cost of this access method
14926 records Number of records returned
14927 key_infix_len Length of the key infix appended to the group prefix
14928 key_infix Infix of constants from equality predicates
14929 parent_alloc Memory pool for this and quick_prefix_select data
14930 is_index_scan get the next different key not by jumping on it via
14931 index read, but by scanning until the end of the
14932 rows with equal key value.
14933
14934 RETURN
14935 None
14936 */
14937
14938 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,double read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)14939 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
14940 bool have_max_arg, bool have_agg_distinct_arg,
14941 KEY_PART_INFO *min_max_arg_part_arg,
14942 uint group_prefix_len_arg, uint group_key_parts_arg,
14943 uint used_key_parts_arg, KEY *index_info_arg,
14944 uint use_index, double read_cost_arg,
14945 ha_rows records_arg, uint key_infix_len_arg,
14946 uchar *key_infix_arg, MEM_ROOT *parent_alloc,
14947 bool is_index_scan_arg)
14948 :file(table->file), join(join_arg), index_info(index_info_arg),
14949 group_prefix_len(group_prefix_len_arg),
14950 group_key_parts(group_key_parts_arg), have_min(have_min_arg),
14951 have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
14952 seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
14953 key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
14954 min_functions_it(NULL), max_functions_it(NULL),
14955 is_index_scan(is_index_scan_arg)
14956 {
14957 head= table;
14958 index= use_index;
14959 record= head->record[0];
14960 tmp_record= head->record[1];
14961 read_time= read_cost_arg;
14962 records= records_arg;
14963 used_key_parts= used_key_parts_arg;
14964 real_key_parts= used_key_parts_arg;
14965 real_prefix_len= group_prefix_len + key_infix_len;
14966 group_prefix= NULL;
14967 min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
14968
14969 /*
14970 We can't have parent_alloc set as the init function can't handle this case
14971 yet.
14972 */
14973 DBUG_ASSERT(!parent_alloc);
14974 if (!parent_alloc)
14975 {
14976 THD *thd= join->thd;
14977 init_sql_alloc(key_memory_quick_range_select_root, &alloc,
14978 thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
14979 thd->mem_root= &alloc;
14980 }
14981 else
14982 bzero(&alloc, sizeof(MEM_ROOT)); // ensure that it's not used
14983 }
14984
14985
14986 /*
14987 Do post-constructor initialization.
14988
14989 SYNOPSIS
14990 QUICK_GROUP_MIN_MAX_SELECT::init()
14991
14992 DESCRIPTION
14993 The method performs initialization that cannot be done in the constructor
14994 such as memory allocations that may fail. It allocates memory for the
14995 group prefix and inifix buffers, and for the lists of MIN/MAX item to be
14996 updated during execution.
14997
14998 RETURN
14999 0 OK
15000 other Error code
15001 */
15002
init()15003 int QUICK_GROUP_MIN_MAX_SELECT::init()
15004 {
15005 if (group_prefix) /* Already initialized. */
15006 return 0;
15007
15008 /*
15009 We allocate one byte more to serve the case when the last field in
15010 the buffer is compared using uint3korr (e.g. a Field_newdate field)
15011 */
15012 if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len+1)))
15013 return 1;
15014 /*
15015 We may use group_prefix to store keys with all select fields, so allocate
15016 enough space for it.
15017 We allocate one byte more to serve the case when the last field in
15018 the buffer is compared using uint3korr (e.g. a Field_newdate field)
15019 */
15020 if (!(group_prefix= (uchar*) alloc_root(&alloc,
15021 real_prefix_len+min_max_arg_len+1)))
15022 return 1;
15023
15024 if (key_infix_len > 0)
15025 {
15026 /*
15027 The memory location pointed to by key_infix will be deleted soon, so
15028 allocate a new buffer and copy the key_infix into it.
15029 */
15030 uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
15031 if (!tmp_key_infix)
15032 return 1;
15033 memcpy(tmp_key_infix, this->key_infix, key_infix_len);
15034 this->key_infix= tmp_key_infix;
15035 }
15036
15037 if (min_max_arg_part)
15038 {
15039 if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &min_max_ranges,
15040 sizeof(QUICK_RANGE*), 16, 16,
15041 MYF(MY_THREAD_SPECIFIC)))
15042 return 1;
15043
15044 if (have_min)
15045 {
15046 if (!(min_functions= new List<Item_sum>))
15047 return 1;
15048 }
15049 else
15050 min_functions= NULL;
15051 if (have_max)
15052 {
15053 if (!(max_functions= new List<Item_sum>))
15054 return 1;
15055 }
15056 else
15057 max_functions= NULL;
15058
15059 Item_sum *min_max_item;
15060 Item_sum **func_ptr= join->sum_funcs;
15061 while ((min_max_item= *(func_ptr++)))
15062 {
15063 if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
15064 min_functions->push_back(min_max_item);
15065 else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
15066 max_functions->push_back(min_max_item);
15067 }
15068
15069 if (have_min)
15070 {
15071 if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
15072 return 1;
15073 }
15074
15075 if (have_max)
15076 {
15077 if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
15078 return 1;
15079 }
15080 }
15081 else
15082 min_max_ranges.elements= 0;
15083
15084 return 0;
15085 }
15086
15087
~QUICK_GROUP_MIN_MAX_SELECT()15088 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
15089 {
15090 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
15091 if (file->inited != handler::NONE)
15092 {
15093 DBUG_ASSERT(file == head->file);
15094 head->file->ha_end_keyread();
15095 /*
15096 There may be a code path when the same table was first accessed by index,
15097 then the index is closed, and the table is scanned (order by + loose scan).
15098 */
15099 file->ha_index_or_rnd_end();
15100 }
15101 if (min_max_arg_part)
15102 delete_dynamic(&min_max_ranges);
15103 free_root(&alloc,MYF(0));
15104 delete min_functions_it;
15105 delete max_functions_it;
15106 delete quick_prefix_select;
15107 DBUG_VOID_RETURN;
15108 }
15109
15110
15111 /*
15112 Eventually create and add a new quick range object.
15113
15114 SYNOPSIS
15115 QUICK_GROUP_MIN_MAX_SELECT::add_range()
15116 sel_range Range object from which a
15117
15118 NOTES
15119 Construct a new QUICK_RANGE object from a SEL_ARG object, and
15120 add it to the array min_max_ranges. If sel_arg is an infinite
15121 range, e.g. (x < 5 or x > 4), then skip it and do not construct
15122 a quick range.
15123
15124 RETURN
15125 FALSE on success
15126 TRUE otherwise
15127 */
15128
add_range(SEL_ARG * sel_range)15129 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
15130 {
15131 QUICK_RANGE *range;
15132 uint range_flag= sel_range->min_flag | sel_range->max_flag;
15133
15134 /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
15135 if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
15136 return FALSE;
15137
15138 if (!(sel_range->min_flag & NO_MIN_RANGE) &&
15139 !(sel_range->max_flag & NO_MAX_RANGE))
15140 {
15141 if (sel_range->maybe_null &&
15142 sel_range->min_value[0] && sel_range->max_value[0])
15143 range_flag|= NULL_RANGE; /* IS NULL condition */
15144 else if (memcmp(sel_range->min_value, sel_range->max_value,
15145 min_max_arg_len) == 0)
15146 range_flag|= EQ_RANGE; /* equality condition */
15147 }
15148 range= new QUICK_RANGE(join->thd, sel_range->min_value, min_max_arg_len,
15149 make_keypart_map(sel_range->part),
15150 sel_range->max_value, min_max_arg_len,
15151 make_keypart_map(sel_range->part),
15152 range_flag);
15153 if (!range)
15154 return TRUE;
15155 if (insert_dynamic(&min_max_ranges, (uchar*)&range))
15156 return TRUE;
15157 return FALSE;
15158 }
15159
15160
15161 /*
15162 Opens the ranges if there are more conditions in quick_prefix_select than
15163 the ones used for jumping through the prefixes.
15164
15165 SYNOPSIS
15166 QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
15167
15168 NOTES
15169 quick_prefix_select is made over the conditions on the whole key.
15170 It defines a number of ranges of length x.
15171 However when jumping through the prefixes we use only the the first
15172 few most significant keyparts in the range key. However if there
15173 are more keyparts to follow the ones we are using we must make the
15174 condition on the key inclusive (because x < "ab" means
15175 x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
15176 To achive the above we must turn off the NEAR_MIN/NEAR_MAX
15177 */
adjust_prefix_ranges()15178 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
15179 {
15180 if (quick_prefix_select &&
15181 group_prefix_len < quick_prefix_select->max_used_key_length)
15182 {
15183 DYNAMIC_ARRAY *arr;
15184 uint inx;
15185
15186 for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++)
15187 {
15188 QUICK_RANGE *range;
15189
15190 get_dynamic(arr, (uchar*)&range, inx);
15191 range->flag &= ~(NEAR_MIN | NEAR_MAX);
15192 }
15193 }
15194 }
15195
15196
15197 /*
15198 Determine the total number and length of the keys that will be used for
15199 index lookup.
15200
15201 SYNOPSIS
15202 QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
15203
15204 DESCRIPTION
15205 The total length of the keys used for index lookup depends on whether
15206 there are any predicates referencing the min/max argument, and/or if
15207 the min/max argument field can be NULL.
15208 This function does an optimistic analysis whether the search key might
15209 be extended by a constant for the min/max keypart. It is 'optimistic'
15210 because during actual execution it may happen that a particular range
15211 is skipped, and then a shorter key will be used. However this is data
15212 dependent and can't be easily estimated here.
15213
15214 RETURN
15215 None
15216 */
15217
update_key_stat()15218 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
15219 {
15220 max_used_key_length= real_prefix_len;
15221 if (min_max_ranges.elements > 0)
15222 {
15223 QUICK_RANGE *cur_range;
15224 if (have_min)
15225 { /* Check if the right-most range has a lower boundary. */
15226 get_dynamic(&min_max_ranges, (uchar*)&cur_range,
15227 min_max_ranges.elements - 1);
15228 if (!(cur_range->flag & NO_MIN_RANGE))
15229 {
15230 max_used_key_length+= min_max_arg_len;
15231 used_key_parts++;
15232 return;
15233 }
15234 }
15235 if (have_max)
15236 { /* Check if the left-most range has an upper boundary. */
15237 get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0);
15238 if (!(cur_range->flag & NO_MAX_RANGE))
15239 {
15240 max_used_key_length+= min_max_arg_len;
15241 used_key_parts++;
15242 return;
15243 }
15244 }
15245 }
15246 else if (have_min && min_max_arg_part &&
15247 min_max_arg_part->field->real_maybe_null())
15248 {
15249 /*
15250 If a MIN/MAX argument value is NULL, we can quickly determine
15251 that we're in the beginning of the next group, because NULLs
15252 are always < any other value. This allows us to quickly
15253 determine the end of the current group and jump to the next
15254 group (see next_min()) and thus effectively increases the
15255 usable key length.
15256 */
15257 max_used_key_length+= min_max_arg_len;
15258 used_key_parts++;
15259 }
15260 }
15261
15262
15263 /*
15264 Initialize a quick group min/max select for key retrieval.
15265
15266 SYNOPSIS
15267 QUICK_GROUP_MIN_MAX_SELECT::reset()
15268
15269 DESCRIPTION
15270 Initialize the index chosen for access and find and store the prefix
15271 of the last group. The method is expensive since it performs disk access.
15272
15273 RETURN
15274 0 OK
15275 other Error code
15276 */
15277
reset(void)15278 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
15279 {
15280 int result;
15281 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
15282
15283 seen_first_key= FALSE;
15284 head->file->ha_start_keyread(index); /* We need only the key attributes */
15285
15286 if ((result= file->ha_index_init(index,1)))
15287 {
15288 head->file->print_error(result, MYF(0));
15289 DBUG_RETURN(result);
15290 }
15291 if (quick_prefix_select && quick_prefix_select->reset())
15292 DBUG_RETURN(1);
15293 result= file->ha_index_last(record);
15294 if (result == HA_ERR_END_OF_FILE)
15295 DBUG_RETURN(0);
15296 /* Save the prefix of the last group. */
15297 key_copy(last_prefix, record, index_info, group_prefix_len);
15298
15299 DBUG_RETURN(0);
15300 }
15301
15302
15303
15304 /*
15305 Get the next key containing the MIN and/or MAX key for the next group.
15306
15307 SYNOPSIS
15308 QUICK_GROUP_MIN_MAX_SELECT::get_next()
15309
15310 DESCRIPTION
15311 The method finds the next subsequent group of records that satisfies the
15312 query conditions and finds the keys that contain the MIN/MAX values for
15313 the key part referenced by the MIN/MAX function(s). Once a group and its
15314 MIN/MAX values are found, store these values in the Item_sum objects for
15315 the MIN/MAX functions. The rest of the values in the result row are stored
15316 in the Item_field::result_field of each select field. If the query does
15317 not contain MIN and/or MAX functions, then the function only finds the
15318 group prefix, which is a query answer itself.
15319
15320 NOTES
15321 If both MIN and MAX are computed, then we use the fact that if there is
15322 no MIN key, there can't be a MAX key as well, so we can skip looking
15323 for a MAX key in this case.
15324
15325 RETURN
15326 0 on success
15327 HA_ERR_END_OF_FILE if returned all keys
15328 other if some error occurred
15329 */
15330
get_next()15331 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
15332 {
15333 int min_res= 0;
15334 int max_res= 0;
15335 #ifdef HPUX11
15336 /*
15337 volatile is required by a bug in the HP compiler due to which the
15338 last test of result fails.
15339 */
15340 volatile int result;
15341 #else
15342 int result;
15343 #endif
15344 int is_last_prefix= 0;
15345
15346 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
15347
15348 /*
15349 Loop until a group is found that satisfies all query conditions or the last
15350 group is reached.
15351 */
15352 do
15353 {
15354 result= next_prefix();
15355 /*
15356 Check if this is the last group prefix. Notice that at this point
15357 this->record contains the current prefix in record format.
15358 */
15359 if (!result)
15360 {
15361 is_last_prefix= key_cmp(index_info->key_part, last_prefix,
15362 group_prefix_len);
15363 DBUG_ASSERT(is_last_prefix <= 0);
15364 }
15365 else
15366 {
15367 if (result == HA_ERR_KEY_NOT_FOUND)
15368 continue;
15369 break;
15370 }
15371
15372 if (have_min)
15373 {
15374 min_res= next_min();
15375 if (min_res == 0)
15376 update_min_result();
15377 }
15378 /* If there is no MIN in the group, there is no MAX either. */
15379 if ((have_max && !have_min) ||
15380 (have_max && have_min && (min_res == 0)))
15381 {
15382 max_res= next_max();
15383 if (max_res == 0)
15384 update_max_result();
15385 /* If a MIN was found, a MAX must have been found as well. */
15386 DBUG_ASSERT((have_max && !have_min) ||
15387 (have_max && have_min && (max_res == 0)));
15388 }
15389 /*
15390 If this is just a GROUP BY or DISTINCT without MIN or MAX and there
15391 are equality predicates for the key parts after the group, find the
15392 first sub-group with the extended prefix.
15393 */
15394 if (!have_min && !have_max && key_infix_len > 0)
15395 result= file->ha_index_read_map(record, group_prefix,
15396 make_prev_keypart_map(real_key_parts),
15397 HA_READ_KEY_EXACT);
15398
15399 result= have_min ? min_res : have_max ? max_res : result;
15400 } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15401 is_last_prefix != 0);
15402
15403 if (result == HA_ERR_KEY_NOT_FOUND)
15404 result= HA_ERR_END_OF_FILE;
15405
15406 DBUG_RETURN(result);
15407 }
15408
15409
15410 /*
15411 Retrieve the minimal key in the next group.
15412
15413 SYNOPSIS
15414 QUICK_GROUP_MIN_MAX_SELECT::next_min()
15415
15416 DESCRIPTION
15417 Find the minimal key within this group such that the key satisfies the query
15418 conditions and NULL semantics. The found key is loaded into this->record.
15419
15420 IMPLEMENTATION
15421 Depending on the values of min_max_ranges.elements, key_infix_len, and
15422 whether there is a NULL in the MIN field, this function may directly
15423 return without any data access. In this case we use the key loaded into
15424 this->record by the call to this->next_prefix() just before this call.
15425
15426 RETURN
15427 0 on success
15428 HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
15429 HA_ERR_END_OF_FILE - "" -
15430 other if some error occurred
15431 */
15432
next_min()15433 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
15434 {
15435 int result= 0;
15436 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
15437
15438 /* Find the MIN key using the eventually extended group prefix. */
15439 if (min_max_ranges.elements > 0)
15440 {
15441 if ((result= next_min_in_range()))
15442 DBUG_RETURN(result);
15443 }
15444 else
15445 {
15446 /* Apply the constant equality conditions to the non-group select fields */
15447 if (key_infix_len > 0)
15448 {
15449 if ((result=
15450 file->ha_index_read_map(record, group_prefix,
15451 make_prev_keypart_map(real_key_parts),
15452 HA_READ_KEY_EXACT)))
15453 DBUG_RETURN(result);
15454 }
15455
15456 /*
15457 If the min/max argument field is NULL, skip subsequent rows in the same
15458 group with NULL in it. Notice that:
15459 - if the first row in a group doesn't have a NULL in the field, no row
15460 in the same group has (because NULL < any other value),
15461 - min_max_arg_part->field->ptr points to some place in 'record'.
15462 */
15463 if (min_max_arg_part && min_max_arg_part->field->is_null())
15464 {
15465 uchar *tmp_key_buff= (uchar*)my_alloca(max_used_key_length);
15466 /* Find the first subsequent record without NULL in the MIN/MAX field. */
15467 key_copy(tmp_key_buff, record, index_info, max_used_key_length);
15468 result= file->ha_index_read_map(record, tmp_key_buff,
15469 make_keypart_map(real_key_parts),
15470 HA_READ_AFTER_KEY);
15471 /*
15472 Check if the new record belongs to the current group by comparing its
15473 prefix with the group's prefix. If it is from the next group, then the
15474 whole group has NULLs in the MIN/MAX field, so use the first record in
15475 the group as a result.
15476 TODO:
15477 It is possible to reuse this new record as the result candidate for the
15478 next call to next_min(), and to save one lookup in the next call. For
15479 this add a new member 'this->next_group_prefix'.
15480 */
15481 if (!result)
15482 {
15483 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15484 key_restore(record, tmp_key_buff, index_info, 0);
15485 }
15486 else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
15487 result= 0; /* There is a result in any case. */
15488 my_afree(tmp_key_buff);
15489 }
15490 }
15491
15492 /*
15493 If the MIN attribute is non-nullable, this->record already contains the
15494 MIN key in the group, so just return.
15495 */
15496 DBUG_RETURN(result);
15497 }
15498
15499
15500 /*
15501 Retrieve the maximal key in the next group.
15502
15503 SYNOPSIS
15504 QUICK_GROUP_MIN_MAX_SELECT::next_max()
15505
15506 DESCRIPTION
15507 Lookup the maximal key of the group, and store it into this->record.
15508
15509 RETURN
15510 0 on success
15511 HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
15512 HA_ERR_END_OF_FILE - "" -
15513 other if some error occurred
15514 */
15515
next_max()15516 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
15517 {
15518 int result;
15519
15520 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
15521
15522 /* Get the last key in the (possibly extended) group. */
15523 if (min_max_ranges.elements > 0)
15524 result= next_max_in_range();
15525 else
15526 result= file->ha_index_read_map(record, group_prefix,
15527 make_prev_keypart_map(real_key_parts),
15528 HA_READ_PREFIX_LAST);
15529 DBUG_RETURN(result);
15530 }
15531
15532
15533 /**
15534 Find the next different key value by skiping all the rows with the same key
15535 value.
15536
15537 Implements a specialized loose index access method for queries
15538 containing aggregate functions with distinct of the form:
15539 SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
15540 This method comes to replace the index scan + Unique class
15541 (distinct selection) for loose index scan that visits all the rows of a
15542 covering index instead of jumping in the beginning of each group.
15543 TODO: Placeholder function. To be replaced by a handler API call
15544
15545 @param is_index_scan hint to use index scan instead of random index read
15546 to find the next different value.
15547 @param file table handler
15548 @param key_part group key to compare
15549 @param record row data
15550 @param group_prefix current key prefix data
15551 @param group_prefix_len length of the current key prefix data
15552 @param group_key_parts number of the current key prefix columns
15553 @return status
15554 @retval 0 success
15555 @retval !0 failure
15556 */
15557
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)15558 static int index_next_different (bool is_index_scan, handler *file,
15559 KEY_PART_INFO *key_part, uchar * record,
15560 const uchar * group_prefix,
15561 uint group_prefix_len,
15562 uint group_key_parts)
15563 {
15564 if (is_index_scan)
15565 {
15566 int result= 0;
15567
15568 while (!key_cmp (key_part, group_prefix, group_prefix_len))
15569 {
15570 result= file->ha_index_next(record);
15571 if (result)
15572 return(result);
15573 }
15574 return result;
15575 }
15576 else
15577 return file->ha_index_read_map(record, group_prefix,
15578 make_prev_keypart_map(group_key_parts),
15579 HA_READ_AFTER_KEY);
15580 }
15581
15582
15583 /*
15584 Determine the prefix of the next group.
15585
15586 SYNOPSIS
15587 QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
15588
15589 DESCRIPTION
15590 Determine the prefix of the next group that satisfies the query conditions.
15591 If there is a range condition referencing the group attributes, use a
15592 QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
15593 condition. If there is a key infix of constants, append this infix
15594 immediately after the group attributes. The possibly extended prefix is
15595 stored in this->group_prefix. The first key of the found group is stored in
15596 this->record, on which relies this->next_min().
15597
15598 RETURN
15599 0 on success
15600 HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
15601 HA_ERR_END_OF_FILE if there are no more keys
15602 other if some error occurred
15603 */
next_prefix()15604 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
15605 {
15606 int result;
15607 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
15608
15609 if (quick_prefix_select)
15610 {
15611 uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
15612 if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
15613 group_key_parts,
15614 cur_prefix)))
15615 DBUG_RETURN(result);
15616 seen_first_key= TRUE;
15617 }
15618 else
15619 {
15620 if (!seen_first_key)
15621 {
15622 result= file->ha_index_first(record);
15623 if (result)
15624 DBUG_RETURN(result);
15625 seen_first_key= TRUE;
15626 }
15627 else
15628 {
15629 /* Load the first key in this group into record. */
15630 result= index_next_different (is_index_scan, file, index_info->key_part,
15631 record, group_prefix, group_prefix_len,
15632 group_key_parts);
15633 if (result)
15634 DBUG_RETURN(result);
15635 }
15636 }
15637
15638 /* Save the prefix of this group for subsequent calls. */
15639 key_copy(group_prefix, record, index_info, group_prefix_len);
15640 /* Append key_infix to group_prefix. */
15641 if (key_infix_len > 0)
15642 memcpy(group_prefix + group_prefix_len,
15643 key_infix, key_infix_len);
15644
15645 DBUG_RETURN(0);
15646 }
15647
15648
15649 /**
15650 Allocate a temporary buffer, populate the buffer using the group prefix key
15651 and the min/max field key, and compare the result to the current key pointed
15652 by index_info.
15653
15654 @param key - the min or max field key
15655 @param length - length of "key"
15656 */
15657 int
cmp_min_max_key(const uchar * key,uint16 length)15658 QUICK_GROUP_MIN_MAX_SELECT::cmp_min_max_key(const uchar *key, uint16 length)
15659 {
15660 /*
15661 Allocate a buffer.
15662 Note, we allocate one extra byte, because some of Field_xxx::cmp(),
15663 e.g. Field_newdate::cmp(), use uint3korr() which actually read four bytes
15664 and then bit-and the read value with 0xFFFFFF.
15665 See "MDEV-7920 main.group_min_max fails ... with valgrind" for details.
15666 */
15667 uchar *buffer= (uchar*) my_alloca(real_prefix_len + min_max_arg_len + 1);
15668 /* Concatenate the group prefix key and the min/max field key */
15669 memcpy(buffer, group_prefix, real_prefix_len);
15670 memcpy(buffer + real_prefix_len, key, length);
15671 /* Compare the key pointed by key_info to the created key */
15672 int cmp_res= key_cmp(index_info->key_part, buffer,
15673 real_prefix_len + min_max_arg_len);
15674 my_afree(buffer);
15675 return cmp_res;
15676 }
15677
15678
15679 /*
15680 Find the minimal key in a group that satisfies some range conditions for the
15681 min/max argument field.
15682
15683 SYNOPSIS
15684 QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
15685
15686 DESCRIPTION
15687 Given the sequence of ranges min_max_ranges, find the minimal key that is
15688 in the left-most possible range. If there is no such key, then the current
15689 group does not have a MIN key that satisfies the WHERE clause. If a key is
15690 found, its value is stored in this->record.
15691
15692 RETURN
15693 0 on success
15694 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
15695 the ranges
15696 HA_ERR_END_OF_FILE - "" -
15697 other if some error
15698 */
15699
next_min_in_range()15700 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
15701 {
15702 ha_rkey_function find_flag;
15703 key_part_map keypart_map;
15704 QUICK_RANGE *cur_range;
15705 bool found_null= FALSE;
15706 int result= HA_ERR_KEY_NOT_FOUND;
15707
15708 DBUG_ASSERT(min_max_ranges.elements > 0);
15709
15710 for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
15711 { /* Search from the left-most range to the right. */
15712 get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx);
15713
15714 /*
15715 If the current value for the min/max argument is bigger than the right
15716 boundary of cur_range, there is no need to check this range.
15717 */
15718 if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
15719 (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
15720 min_max_arg_len) == 1))
15721 continue;
15722
15723 if (cur_range->flag & NO_MIN_RANGE)
15724 {
15725 keypart_map= make_prev_keypart_map(real_key_parts);
15726 find_flag= HA_READ_KEY_EXACT;
15727 }
15728 else
15729 {
15730 /* Extend the search key with the lower boundary for this range. */
15731 memcpy(group_prefix + real_prefix_len, cur_range->min_key,
15732 cur_range->min_length);
15733 keypart_map= make_keypart_map(real_key_parts);
15734 find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
15735 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
15736 HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
15737 }
15738
15739 result= file->ha_index_read_map(record, group_prefix, keypart_map,
15740 find_flag);
15741 if (result)
15742 {
15743 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15744 (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
15745 continue; /* Check the next range. */
15746
15747 /*
15748 In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
15749 HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
15750 range, it can't succeed for any other subsequent range.
15751 */
15752 break;
15753 }
15754
15755 /* A key was found. */
15756 if (cur_range->flag & EQ_RANGE)
15757 break; /* No need to perform the checks below for equal keys. */
15758
15759 if (cur_range->flag & NULL_RANGE)
15760 {
15761 /*
15762 Remember this key, and continue looking for a non-NULL key that
15763 satisfies some other condition.
15764 */
15765 memcpy(tmp_record, record, head->s->rec_buff_length);
15766 found_null= TRUE;
15767 continue;
15768 }
15769
15770 /* Check if record belongs to the current group. */
15771 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15772 {
15773 result= HA_ERR_KEY_NOT_FOUND;
15774 continue;
15775 }
15776
15777 /* If there is an upper limit, check if the found key is in the range. */
15778 if ( !(cur_range->flag & NO_MAX_RANGE) )
15779 {
15780 int cmp_res= cmp_min_max_key(cur_range->max_key, cur_range->max_length);
15781 /*
15782 The key is outside of the range if:
15783 the interval is open and the key is equal to the maximum boundry
15784 or
15785 the key is greater than the maximum
15786 */
15787 if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
15788 cmp_res > 0)
15789 {
15790 result= HA_ERR_KEY_NOT_FOUND;
15791 continue;
15792 }
15793 }
15794 /* If we got to this point, the current key qualifies as MIN. */
15795 DBUG_ASSERT(result == 0);
15796 break;
15797 }
15798 /*
15799 If there was a key with NULL in the MIN/MAX field, and there was no other
15800 key without NULL from the same group that satisfies some other condition,
15801 then use the key with the NULL.
15802 */
15803 if (found_null && result)
15804 {
15805 memcpy(record, tmp_record, head->s->rec_buff_length);
15806 result= 0;
15807 }
15808 return result;
15809 }
15810
15811
15812 /*
15813 Find the maximal key in a group that satisfies some range conditions for the
15814 min/max argument field.
15815
15816 SYNOPSIS
15817 QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
15818
15819 DESCRIPTION
15820 Given the sequence of ranges min_max_ranges, find the maximal key that is
15821 in the right-most possible range. If there is no such key, then the current
15822 group does not have a MAX key that satisfies the WHERE clause. If a key is
15823 found, its value is stored in this->record.
15824
15825 RETURN
15826 0 on success
15827 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
15828 the ranges
15829 HA_ERR_END_OF_FILE - "" -
15830 other if some error
15831 */
15832
next_max_in_range()15833 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
15834 {
15835 ha_rkey_function find_flag;
15836 key_part_map keypart_map;
15837 QUICK_RANGE *cur_range;
15838 int result;
15839
15840 DBUG_ASSERT(min_max_ranges.elements > 0);
15841
15842 for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
15843 { /* Search from the right-most range to the left. */
15844 get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1);
15845
15846 /*
15847 If the current value for the min/max argument is smaller than the left
15848 boundary of cur_range, there is no need to check this range.
15849 */
15850 if (range_idx != min_max_ranges.elements &&
15851 !(cur_range->flag & NO_MIN_RANGE) &&
15852 (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
15853 min_max_arg_len) == -1))
15854 continue;
15855
15856 if (cur_range->flag & NO_MAX_RANGE)
15857 {
15858 keypart_map= make_prev_keypart_map(real_key_parts);
15859 find_flag= HA_READ_PREFIX_LAST;
15860 }
15861 else
15862 {
15863 /* Extend the search key with the upper boundary for this range. */
15864 memcpy(group_prefix + real_prefix_len, cur_range->max_key,
15865 cur_range->max_length);
15866 keypart_map= make_keypart_map(real_key_parts);
15867 find_flag= (cur_range->flag & EQ_RANGE) ?
15868 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
15869 HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
15870 }
15871
15872 result= file->ha_index_read_map(record, group_prefix, keypart_map,
15873 find_flag);
15874
15875 if (result)
15876 {
15877 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15878 (cur_range->flag & EQ_RANGE))
15879 continue; /* Check the next range. */
15880
15881 /*
15882 In no key was found with this upper bound, there certainly are no keys
15883 in the ranges to the left.
15884 */
15885 return result;
15886 }
15887 /* A key was found. */
15888 if (cur_range->flag & EQ_RANGE)
15889 return 0; /* No need to perform the checks below for equal keys. */
15890
15891 /* Check if record belongs to the current group. */
15892 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15893 continue; // Row not found
15894
15895 /* If there is a lower limit, check if the found key is in the range. */
15896 if ( !(cur_range->flag & NO_MIN_RANGE) )
15897 {
15898 int cmp_res= cmp_min_max_key(cur_range->min_key, cur_range->min_length);
15899 /*
15900 The key is outside of the range if:
15901 the interval is open and the key is equal to the minimum boundry
15902 or
15903 the key is less than the minimum
15904 */
15905 if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
15906 cmp_res < 0)
15907 continue;
15908 }
15909 /* If we got to this point, the current key qualifies as MAX. */
15910 return result;
15911 }
15912 return HA_ERR_KEY_NOT_FOUND;
15913 }
15914
15915
15916 /*
15917 Update all MIN function results with the newly found value.
15918
15919 SYNOPSIS
15920 QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
15921
15922 DESCRIPTION
15923 The method iterates through all MIN functions and updates the result value
15924 of each function by calling Item_sum::reset(), which in turn picks the new
15925 result value from this->head->record[0], previously updated by
15926 next_min(). The updated value is stored in a member variable of each of the
15927 Item_sum objects, depending on the value type.
15928
15929 IMPLEMENTATION
15930 The update must be done separately for MIN and MAX, immediately after
15931 next_min() was called and before next_max() is called, because both MIN and
15932 MAX take their result value from the same buffer this->head->record[0]
15933 (i.e. this->record).
15934
15935 RETURN
15936 None
15937 */
15938
update_min_result()15939 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
15940 {
15941 Item_sum *min_func;
15942
15943 min_functions_it->rewind();
15944 while ((min_func= (*min_functions_it)++))
15945 min_func->reset_and_add();
15946 }
15947
15948
15949 /*
15950 Update all MAX function results with the newly found value.
15951
15952 SYNOPSIS
15953 QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
15954
15955 DESCRIPTION
15956 The method iterates through all MAX functions and updates the result value
15957 of each function by calling Item_sum::reset(), which in turn picks the new
15958 result value from this->head->record[0], previously updated by
15959 next_max(). The updated value is stored in a member variable of each of the
15960 Item_sum objects, depending on the value type.
15961
15962 IMPLEMENTATION
15963 The update must be done separately for MIN and MAX, immediately after
15964 next_max() was called, because both MIN and MAX take their result value
15965 from the same buffer this->head->record[0] (i.e. this->record).
15966
15967 RETURN
15968 None
15969 */
15970
update_max_result()15971 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
15972 {
15973 Item_sum *max_func;
15974
15975 max_functions_it->rewind();
15976 while ((max_func= (*max_functions_it)++))
15977 max_func->reset_and_add();
15978 }
15979
15980
15981 /*
15982 Append comma-separated list of keys this quick select uses to key_names;
15983 append comma-separated list of corresponding used lengths to used_lengths.
15984
15985 SYNOPSIS
15986 QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
15987 key_names [out] Names of used indexes
15988 used_lengths [out] Corresponding lengths of the index names
15989
15990 DESCRIPTION
15991 This method is used by select_describe to extract the names of the
15992 indexes used by a quick select.
15993
15994 */
15995
add_keys_and_lengths(String * key_names,String * used_lengths)15996 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
15997 String *used_lengths)
15998 {
15999 bool first= TRUE;
16000
16001 add_key_and_length(key_names, used_lengths, &first);
16002 }
16003
16004
16005 /* Check whether the number for equality ranges exceeds the set threshold */
16006
eq_ranges_exceeds_limit(RANGE_SEQ_IF * seq,void * seq_init_param,uint limit)16007 bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param,
16008 uint limit)
16009 {
16010 KEY_MULTI_RANGE range;
16011 range_seq_t seq_it;
16012 uint count = 0;
16013
16014 if (limit == 0)
16015 {
16016 /* 'Statistics instead of index dives' feature is turned off */
16017 return false;
16018 }
16019 seq_it= seq->init(seq_init_param, 0, 0);
16020 while (!seq->next(seq_it, &range))
16021 {
16022 if ((range.range_flag & EQ_RANGE) && !(range.range_flag & NULL_RANGE))
16023 {
16024 if (++count >= limit)
16025 return true;
16026 }
16027 }
16028 return false;
16029 }
16030
16031 #ifndef DBUG_OFF
16032
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)16033 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
16034 const char *msg)
16035 {
16036 char buff[1024];
16037 DBUG_ENTER("print_sel_tree");
16038
16039 String tmp(buff,sizeof(buff),&my_charset_bin);
16040 tmp.length(0);
16041 for (uint idx= 0; idx < param->keys; idx++)
16042 {
16043 if (tree_map->is_set(idx))
16044 {
16045 uint keynr= param->real_keynr[idx];
16046 if (tmp.length())
16047 tmp.append(',');
16048 tmp.append(¶m->table->key_info[keynr].name);
16049 }
16050 }
16051 if (!tmp.length())
16052 tmp.append(STRING_WITH_LEN("(empty)"));
16053
16054 DBUG_PRINT("info", ("SEL_TREE: %p (%s) scans: %s", tree, msg,
16055 tmp.c_ptr_safe()));
16056
16057 DBUG_VOID_RETURN;
16058 }
16059
16060
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)16061 static void print_ror_scans_arr(TABLE *table, const char *msg,
16062 struct st_ror_scan_info **start,
16063 struct st_ror_scan_info **end)
16064 {
16065 DBUG_ENTER("print_ror_scans_arr");
16066
16067 char buff[1024];
16068 String tmp(buff,sizeof(buff),&my_charset_bin);
16069 tmp.length(0);
16070 for (;start != end; start++)
16071 {
16072 if (tmp.length())
16073 tmp.append(',');
16074 tmp.append(&table->key_info[(*start)->keynr].name);
16075 }
16076 if (!tmp.length())
16077 tmp.append(STRING_WITH_LEN("(empty)"));
16078 DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.c_ptr()));
16079 DBUG_VOID_RETURN;
16080 }
16081
16082 static String dbug_print_sel_arg_buf;
16083
16084 static void
print_sel_arg_key(Field * field,const uchar * key,String * out)16085 print_sel_arg_key(Field *field, const uchar *key, String *out)
16086 {
16087 TABLE *table= field->table;
16088 MY_BITMAP *old_sets[2];
16089 dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16090
16091 if (field->real_maybe_null())
16092 {
16093 if (*key)
16094 {
16095 out->append("NULL");
16096 goto end;
16097 }
16098 key++; // Skip null byte
16099 }
16100
16101 field->set_key_image(key, field->pack_length());
16102
16103 if (field->type() == MYSQL_TYPE_BIT)
16104 (void) field->val_int_as_str(out, 1);
16105 else
16106 field->val_str(out);
16107
16108 end:
16109 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16110 }
16111
16112
16113 /*
16114 @brief
16115 Produce a string representation of an individual SEL_ARG and return pointer
16116 to it
16117
16118 @detail
16119 Intended usage:
16120
16121 (gdb) p dbug_print_sel_arg(ptr)
16122 */
16123
dbug_print_sel_arg(SEL_ARG * sel_arg)16124 const char *dbug_print_sel_arg(SEL_ARG *sel_arg)
16125 {
16126 StringBuffer<64> buf;
16127 String &out= dbug_print_sel_arg_buf;
16128 out.length(0);
16129
16130 if (!sel_arg)
16131 {
16132 out.append("NULL");
16133 goto end;
16134 }
16135
16136 out.append("SEL_ARG(");
16137
16138 const char *stype;
16139 switch(sel_arg->type) {
16140 case SEL_ARG::IMPOSSIBLE:
16141 stype="IMPOSSIBLE";
16142 break;
16143 case SEL_ARG::MAYBE:
16144 stype="MAYBE";
16145 break;
16146 case SEL_ARG::MAYBE_KEY:
16147 stype="MAYBE_KEY";
16148 break;
16149 case SEL_ARG::KEY_RANGE:
16150 default:
16151 stype= NULL;
16152 }
16153
16154 if (stype)
16155 {
16156 out.append("type=");
16157 out.append(stype);
16158 goto end;
16159 }
16160
16161 if (sel_arg->min_flag & NO_MIN_RANGE)
16162 out.append("-inf");
16163 else
16164 {
16165 print_sel_arg_key(sel_arg->field, sel_arg->min_value, &buf);
16166 out.append(buf);
16167 }
16168
16169 out.append((sel_arg->min_flag & NEAR_MIN)? "<" : "<=");
16170
16171 out.append(sel_arg->field->field_name);
16172
16173 out.append((sel_arg->max_flag & NEAR_MAX)? "<" : "<=");
16174
16175 if (sel_arg->max_flag & NO_MAX_RANGE)
16176 out.append("+inf");
16177 else
16178 {
16179 print_sel_arg_key(sel_arg->field, sel_arg->max_value, &buf);
16180 out.append(buf);
16181 }
16182
16183 out.append(")");
16184
16185 end:
16186 return dbug_print_sel_arg_buf.c_ptr_safe();
16187 }
16188
16189
16190 /*****************************************************************************
16191 ** Print a quick range for debugging
16192 ** TODO:
16193 ** This should be changed to use a String to store each row instead
16194 ** of locking the DEBUG stream !
16195 *****************************************************************************/
16196
16197 static void
print_key(KEY_PART * key_part,const uchar * key,uint used_length)16198 print_key(KEY_PART *key_part, const uchar *key, uint used_length)
16199 {
16200 char buff[1024];
16201 const uchar *key_end= key+used_length;
16202 uint store_length;
16203 TABLE *table= key_part->field->table;
16204 MY_BITMAP *old_sets[2];
16205
16206 dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16207
16208 for (; key < key_end; key+=store_length, key_part++)
16209 {
16210 String tmp(buff,sizeof(buff),&my_charset_bin);
16211 Field *field= key_part->field;
16212 store_length= key_part->store_length;
16213
16214 if (field->real_maybe_null())
16215 {
16216 if (*key)
16217 {
16218 fwrite("NULL",sizeof(char),4,DBUG_FILE);
16219 continue;
16220 }
16221 key++; // Skip null byte
16222 store_length--;
16223 }
16224 field->set_key_image(key, key_part->length);
16225 if (field->type() == MYSQL_TYPE_BIT)
16226 (void) field->val_int_as_str(&tmp, 1);
16227 else
16228 field->val_str(&tmp);
16229 fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
16230 if (key+store_length < key_end)
16231 fputc('/',DBUG_FILE);
16232 }
16233 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16234 }
16235
16236
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)16237 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
16238 {
16239 char buf[MAX_KEY/8+1];
16240 TABLE *table;
16241 MY_BITMAP *old_sets[2];
16242 DBUG_ENTER("print_quick");
16243 if (!quick)
16244 DBUG_VOID_RETURN;
16245 DBUG_LOCK_FILE;
16246
16247 table= quick->head;
16248 dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16249 quick->dbug_dump(0, TRUE);
16250 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16251
16252 fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
16253
16254 DBUG_UNLOCK_FILE;
16255 DBUG_VOID_RETURN;
16256 }
16257
dbug_dump(int indent,bool verbose)16258 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
16259 {
16260 /* purecov: begin inspected */
16261 fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
16262 indent, "", head->key_info[index].name.str, max_used_key_length);
16263
16264 if (verbose)
16265 {
16266 QUICK_RANGE *range;
16267 QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
16268 QUICK_RANGE **end_range= pr + ranges.elements;
16269 for (; pr != end_range; ++pr)
16270 {
16271 fprintf(DBUG_FILE, "%*s", indent + 2, "");
16272 range= *pr;
16273 if (!(range->flag & NO_MIN_RANGE))
16274 {
16275 print_key(key_parts, range->min_key, range->min_length);
16276 if (range->flag & NEAR_MIN)
16277 fputs(" < ",DBUG_FILE);
16278 else
16279 fputs(" <= ",DBUG_FILE);
16280 }
16281 fputs("X",DBUG_FILE);
16282
16283 if (!(range->flag & NO_MAX_RANGE))
16284 {
16285 if (range->flag & NEAR_MAX)
16286 fputs(" < ",DBUG_FILE);
16287 else
16288 fputs(" <= ",DBUG_FILE);
16289 print_key(key_parts, range->max_key, range->max_length);
16290 }
16291 fputs("\n",DBUG_FILE);
16292 }
16293 }
16294 /* purecov: end */
16295 }
16296
dbug_dump(int indent,bool verbose)16297 void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose)
16298 {
16299 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
16300 QUICK_RANGE_SELECT *quick;
16301 fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
16302 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16303 while ((quick= it++))
16304 quick->dbug_dump(indent+2, verbose);
16305 if (pk_quick_select)
16306 {
16307 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
16308 pk_quick_select->dbug_dump(indent+2, verbose);
16309 }
16310 fprintf(DBUG_FILE, "%*s}\n", indent, "");
16311 }
16312
dbug_dump(int indent,bool verbose)16313 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
16314 {
16315 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
16316 QUICK_SELECT_WITH_RECORD *qr;
16317 fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
16318 indent, "", need_to_fetch_row? "":"non-");
16319 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16320 while ((qr= it++))
16321 qr->quick->dbug_dump(indent+2, verbose);
16322 if (cpk_quick)
16323 {
16324 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
16325 cpk_quick->dbug_dump(indent+2, verbose);
16326 }
16327 fprintf(DBUG_FILE, "%*s}\n", indent, "");
16328 }
16329
dbug_dump(int indent,bool verbose)16330 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
16331 {
16332 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
16333 QUICK_SELECT_I *quick;
16334 fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
16335 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16336 while ((quick= it++))
16337 quick->dbug_dump(indent+2, verbose);
16338 fprintf(DBUG_FILE, "%*s}\n", indent, "");
16339 }
16340
16341
16342 /*
16343 Print quick select information to DBUG_FILE.
16344
16345 SYNOPSIS
16346 QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
16347 indent Indentation offset
16348 verbose If TRUE show more detailed output.
16349
16350 DESCRIPTION
16351 Print the contents of this quick select to DBUG_FILE. The method also
16352 calls dbug_dump() for the used quick select if any.
16353
16354 IMPLEMENTATION
16355 Caller is responsible for locking DBUG_FILE before this call and unlocking
16356 it afterwards.
16357
16358 RETURN
16359 None
16360 */
16361
dbug_dump(int indent,bool verbose)16362 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
16363 {
16364 fprintf(DBUG_FILE,
16365 "%*squick_group_min_max_select: index %s (%d), length: %d\n",
16366 indent, "", index_info->name.str, index, max_used_key_length);
16367 if (key_infix_len > 0)
16368 {
16369 fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
16370 indent, "", key_infix_len);
16371 }
16372 if (quick_prefix_select)
16373 {
16374 fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
16375 quick_prefix_select->dbug_dump(indent + 2, verbose);
16376 }
16377 if (min_max_ranges.elements > 0)
16378 {
16379 fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
16380 indent, "", min_max_ranges.elements);
16381 }
16382 }
16383
16384 #endif /* !DBUG_OFF */
16385
16386
16387 /*
16388 @brief Print the comparison operator for the min range
16389 */
16390
print_min_range_operator(String * out,const ha_rkey_function flag)16391 static void print_min_range_operator(String *out, const ha_rkey_function flag)
16392 {
16393 if (flag == HA_READ_AFTER_KEY)
16394 out->append(STRING_WITH_LEN(" < "));
16395 else if (flag == HA_READ_KEY_EXACT || flag == HA_READ_KEY_OR_NEXT)
16396 out->append(STRING_WITH_LEN(" <= "));
16397 else
16398 out->append(STRING_WITH_LEN(" ? "));
16399 }
16400
16401
16402 /*
16403 @brief Print the comparison operator for the max range
16404 */
16405
print_max_range_operator(String * out,const ha_rkey_function flag)16406 static void print_max_range_operator(String *out, const ha_rkey_function flag)
16407 {
16408 if (flag == HA_READ_BEFORE_KEY)
16409 out->append(STRING_WITH_LEN(" < "));
16410 else if (flag == HA_READ_AFTER_KEY)
16411 out->append(STRING_WITH_LEN(" <= "));
16412 else
16413 out->append(STRING_WITH_LEN(" ? "));
16414 }
16415
16416
16417 static
print_range(String * out,const KEY_PART_INFO * key_part,KEY_MULTI_RANGE * range,uint n_key_parts)16418 void print_range(String *out, const KEY_PART_INFO *key_part,
16419 KEY_MULTI_RANGE *range, uint n_key_parts)
16420 {
16421 uint flag= range->range_flag;
16422 String key_name;
16423 key_name.set_charset(system_charset_info);
16424 key_part_map keypart_map= range->start_key.keypart_map |
16425 range->end_key.keypart_map;
16426
16427 if (flag & GEOM_FLAG)
16428 {
16429 /*
16430 The flags of GEOM ranges do not work the same way as for other
16431 range types, so printing "col < some_geom" doesn't make sense.
16432 Just print the column name, not operator.
16433 */
16434 print_keyparts_name(out, key_part, n_key_parts, keypart_map);
16435 out->append(STRING_WITH_LEN(" "));
16436 print_key_value(out, key_part, range->start_key.key,
16437 range->start_key.length);
16438 return;
16439 }
16440
16441 if (range->start_key.length)
16442 {
16443 print_key_value(out, key_part, range->start_key.key,
16444 range->start_key.length);
16445 print_min_range_operator(out, range->start_key.flag);
16446 }
16447
16448 print_keyparts_name(out, key_part, n_key_parts, keypart_map);
16449
16450 if (range->end_key.length)
16451 {
16452 print_max_range_operator(out, range->end_key.flag);
16453 print_key_value(out, key_part, range->end_key.key,
16454 range->end_key.length);
16455 }
16456 }
16457
16458
16459 /*
16460 @brief Print range created for non-indexed columns
16461
16462 @param
16463 out output string
16464 field field for which the range is printed
16465 range range for the field
16466 */
16467
16468 static
print_range_for_non_indexed_field(String * out,Field * field,KEY_MULTI_RANGE * range)16469 void print_range_for_non_indexed_field(String *out, Field *field,
16470 KEY_MULTI_RANGE *range)
16471 {
16472 TABLE *table= field->table;
16473 MY_BITMAP *old_sets[2];
16474 dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16475
16476 if (range->start_key.length)
16477 {
16478 field->print_key_part_value(out, range->start_key.key, field->key_length());
16479 print_min_range_operator(out, range->start_key.flag);
16480 }
16481
16482 out->append(field->field_name);
16483
16484 if (range->end_key.length)
16485 {
16486 print_max_range_operator(out, range->end_key.flag);
16487 field->print_key_part_value(out, range->end_key.key, field->key_length());
16488 }
16489 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16490 }
16491
16492
16493
16494 /*
16495
16496 Add ranges to the trace
16497 For ex:
16498 lets say we have an index a_b(a,b)
16499 query: select * from t1 where a=2 and b=4 ;
16500 so we create a range:
16501 (2,4) <= (a,b) <= (2,4)
16502 this is added to the trace
16503 */
16504
trace_ranges(Json_writer_array * range_trace,PARAM * param,uint idx,SEL_ARG * keypart,const KEY_PART_INFO * key_parts)16505 static void trace_ranges(Json_writer_array *range_trace,
16506 PARAM *param, uint idx,
16507 SEL_ARG *keypart,
16508 const KEY_PART_INFO *key_parts)
16509 {
16510 SEL_ARG_RANGE_SEQ seq;
16511 KEY_MULTI_RANGE range;
16512 range_seq_t seq_it;
16513 uint flags= 0;
16514 RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
16515 sel_arg_range_seq_next, 0, 0};
16516 KEY *keyinfo= param->table->key_info + param->real_keynr[idx];
16517 uint n_key_parts= param->table->actual_n_key_parts(keyinfo);
16518 DBUG_ASSERT(range_trace->trace_started());
16519 seq.keyno= idx;
16520 seq.real_keyno= param->real_keynr[idx];
16521 seq.param= param;
16522 seq.start= keypart;
16523 /*
16524 is_ror_scan is set to FALSE here, because we are only interested
16525 in iterating over all the ranges and printing them.
16526 */
16527 seq.is_ror_scan= FALSE;
16528 const KEY_PART_INFO *cur_key_part= key_parts + keypart->part;
16529 seq_it= seq_if.init((void *) &seq, 0, flags);
16530
16531 while (!seq_if.next(seq_it, &range))
16532 {
16533 StringBuffer<128> range_info(system_charset_info);
16534 print_range(&range_info, cur_key_part, &range, n_key_parts);
16535 range_trace->add(range_info.c_ptr_safe(), range_info.length());
16536 }
16537 }
16538
16539 /**
16540 Print a key to a string
16541
16542 @param[out] out String the key is appended to
16543 @param[in] key_part Index components description
16544 @param[in] key Key tuple
16545 @param[in] used_length length of the key tuple
16546 */
16547
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key,uint used_length)16548 static void print_key_value(String *out, const KEY_PART_INFO *key_part,
16549 const uchar* key, uint used_length)
16550 {
16551 out->append(STRING_WITH_LEN("("));
16552 Field *field= key_part->field;
16553 StringBuffer<128> tmp(system_charset_info);
16554 TABLE *table= field->table;
16555 uint store_length;
16556 MY_BITMAP *old_sets[2];
16557 dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16558 const uchar *key_end= key+used_length;
16559
16560 for (; key < key_end; key+=store_length, key_part++)
16561 {
16562 field= key_part->field;
16563 store_length= key_part->store_length;
16564
16565 field->print_key_part_value(out, key, key_part->length);
16566
16567 if (key + store_length < key_end)
16568 out->append(STRING_WITH_LEN(","));
16569 }
16570 dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16571 out->append(STRING_WITH_LEN(")"));
16572 }
16573
16574 /**
16575 Print key parts involed in a range
16576 @param[out] out String the key is appended to
16577 @param[in] key_part Index components description
16578 @param[in] n_keypart Number of keyparts in index
16579 @param[in] keypart_map map for keyparts involved in the range
16580 */
16581
print_keyparts_name(String * out,const KEY_PART_INFO * key_part,uint n_keypart,key_part_map keypart_map)16582 void print_keyparts_name(String *out, const KEY_PART_INFO *key_part,
16583 uint n_keypart, key_part_map keypart_map)
16584 {
16585 uint i;
16586 out->append(STRING_WITH_LEN("("));
16587 bool first_keypart= TRUE;
16588 for (i=0; i < n_keypart; key_part++, i++)
16589 {
16590 if (keypart_map & (1 << i))
16591 {
16592 if (first_keypart)
16593 first_keypart= FALSE;
16594 else
16595 out->append(STRING_WITH_LEN(","));
16596 out->append(key_part->field->field_name);
16597 }
16598 else
16599 break;
16600 }
16601 out->append(STRING_WITH_LEN(")"));
16602 }
16603