1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /*
24 TODO:
25 Fix that MAYBE_KEY are stored in the tree so that we can detect use
26 of full hash keys for queries like:
27
28 select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
29
30 */
31
32 // Needed by the unit tests
33 #ifndef OPT_RANGE_CC_INCLUDED
34 #define OPT_RANGE_CC_INCLUDED
35
36 /*
37 This file contains:
38
39 RangeAnalysisModule
40 A module that accepts a condition, index (or partitioning) description,
41 and builds lists of intervals (in index/partitioning space), such that
42 all possible records that match the condition are contained within the
43 intervals.
44 The entry point for the range analysis module is get_mm_tree()
45 (mm=min_max) function.
46
47 The lists are returned in form of complicated structure of interlinked
48 SEL_TREE/SEL_IMERGE/SEL_ARG objects.
49 See quick_range_seq_next, find_used_partitions for examples of how to walk
50 this structure.
51 All direct "users" of this module are located within this file, too.
52
53
54 PartitionPruningModule
55 A module that accepts a partitioned table, condition, and finds which
56 partitions we will need to use in query execution. Search down for
57 "PartitionPruningModule" for description.
58 The module has single entry point - prune_partitions() function.
59
60
61 Range/index_merge/groupby-minmax optimizer module
62 A module that accepts a table, condition, and returns
63 - a QUICK_*_SELECT object that can be used to retrieve rows that match
64 the specified condition, or a "no records will match the condition"
65 statement.
66
67 The module entry points are
68 test_quick_select()
69 get_quick_select_for_ref()
70
71
72 Record retrieval code for range/index_merge/groupby-min-max.
73 Implementations of QUICK_*_SELECT classes.
74
75 KeyTupleFormat
76 ~~~~~~~~~~~~~~
77 The code in this file (and elsewhere) makes operations on key value tuples.
78 Those tuples are stored in the following format:
79
80 The tuple is a sequence of key part values. The length of key part value
81 depends only on its type (and not depends on the what value is stored)
82
83 KeyTuple: keypart1-data, keypart2-data, ...
84
85 The value of each keypart is stored in the following format:
86
87 keypart_data: [isnull_byte] keypart-value-bytes
88
89 If a keypart may have a NULL value (key_part->field->real_maybe_null() can
90 be used to check this), then the first byte is a NULL indicator with the
91 following valid values:
92 1 - keypart has NULL value.
93 0 - keypart has non-NULL value.
94
95 <questionable-statement> If isnull_byte==1 (NULL value), then the following
96 keypart->length bytes must be 0.
97 </questionable-statement>
98
99 keypart-value-bytes holds the value. Its format depends on the field type.
100 The length of keypart-value-bytes may or may not depend on the value being
101 stored. The default is that length is static and equal to
102 KEY_PART_INFO::length.
103
104 Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
105 value:
106
107 keypart-value-bytes: value_length value_bytes
108
109 The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
110
111 See key_copy() and key_restore() for code to move data between index tuple
112 and table record
113
114 CAUTION: the above description is only sergefp's understanding of the
115 subject and may omit some details.
116 */
117
118 #include "opt_range.h"
119
120 #include "item_sum.h" // Item_sum
121 #include "key.h" // is_key_used
122 #include "log.h" // sql_print_error
123 #include "opt_statistics.h" // guess_rec_per_key
124 #include "opt_trace.h" // Opt_trace_array
125 #include "partition_info.h" // partition_info
126 #include "sql_partition.h" // HA_USE_AUTO_PARTITION
127 #include "sql_base.h" // free_io_cache
128 #include "sql_class.h" // THD
129 #include "sql_opt_exec_shared.h" // QEP_shared_owner
130 #include "sql_optimizer.h" // JOIN
131 #include "sql_parse.h" // check_stack_overrun
132 #include "uniques.h" // Unique
133 #include "opt_hints.h" // hint_key_state
134 #include "mysys_err.h" // EE_CAPACITY_EXCEEDED
135
136 using std::min;
137 using std::max;
138
139 /*
140 Convert double value to #rows. Currently this does floor(), and we
141 might consider using round() instead.
142 */
143 #define double2rows(x) ((ha_rows)(x))
144
145 static int sel_cmp(Field *f,uchar *a,uchar *b,uint8 a_flag,uint8 b_flag);
146
147 static uchar is_null_string[2]= {1,0};
148
149 class RANGE_OPT_PARAM;
150
151 /**
152 Error handling class for range optimizer. We handle only out of memory
153 error here. This is to give a hint to the user to
154 raise range_optimizer_max_mem_size if required.
155 Warning for the memory error is pushed only once. The consequent errors
156 will be ignored.
157 */
158 class Range_optimizer_error_handler : public Internal_error_handler
159 {
160 public:
Range_optimizer_error_handler()161 Range_optimizer_error_handler()
162 : m_has_errors(false), m_is_mem_error(false)
163 {}
164
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)165 virtual bool handle_condition(THD *thd,
166 uint sql_errno,
167 const char* sqlstate,
168 Sql_condition::enum_severity_level *level,
169 const char* msg)
170 {
171 if (*level == Sql_condition::SL_ERROR)
172 {
173 m_has_errors= true;
174 /* Out of memory error is reported only once. Return as handled */
175 if (m_is_mem_error && sql_errno == EE_CAPACITY_EXCEEDED)
176 return true;
177 if (sql_errno == EE_CAPACITY_EXCEEDED)
178 {
179 m_is_mem_error= true;
180 /* Convert the error into a warning. */
181 *level= Sql_condition::SL_WARNING;
182 push_warning_printf(
183 thd, Sql_condition::SL_WARNING,
184 ER_CAPACITY_EXCEEDED,
185 ER_THD(thd, ER_CAPACITY_EXCEEDED),
186 (ulonglong)thd->variables.range_optimizer_max_mem_size,
187 "range_optimizer_max_mem_size",
188 ER_THD(thd, ER_CAPACITY_EXCEEDED_IN_RANGE_OPTIMIZER));
189 return true;
190 }
191 }
192 return false;
193 }
194
has_errors() const195 bool has_errors() const { return m_has_errors; }
196 private:
197 bool m_has_errors;
198 bool m_is_mem_error;
199 };
200
201 /*
202 A construction block of the SEL_ARG-graph.
203
204 The following description only covers graphs of SEL_ARG objects with
205 sel_arg->type==KEY_RANGE:
206
207 One SEL_ARG object represents an "elementary interval" in form
208
209 min_value <=? table.keypartX <=? max_value
210
211 The interval is a non-empty interval of any kind: with[out] minimum/maximum
212 bound, [half]open/closed, single-point interval, etc.
213
214 1. SEL_ARG GRAPH STRUCTURE
215
216 SEL_ARG objects are linked together in a graph. The meaning of the graph
217 is better demostrated by an example:
218
219 tree->keys[i]
220 |
221 | $ $
222 | part=1 $ part=2 $ part=3
223 | $ $
224 | +-------+ $ +-------+ $ +--------+
225 | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
226 | +-------+ $ +-------+ $ +--------+
227 | | $ $ |
228 | | $ $ +--------+
229 | | $ $ | kp3=12 |
230 | | $ $ +--------+
231 | +-------+ $ $
232 \->| kp1=2 |--$--------------$-+
233 +-------+ $ $ | +--------+
234 | $ $ ==>| kp3=11 |
235 +-------+ $ $ | +--------+
236 | kp1=3 |--$--------------$-+ |
237 +-------+ $ $ +--------+
238 | $ $ | kp3=14 |
239 ... $ $ +--------+
240
241 The entire graph is partitioned into "interval lists".
242
243 An interval list is a sequence of ordered disjoint intervals over
244 the same key part. SEL_ARG are linked via "next" and "prev" pointers
245 with NULL as sentinel.
246
247 In the example pic, there are 4 interval lists:
248 "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
249 The vertical lines represent SEL_ARG::next/prev pointers.
250
251 Additionally, all intervals in the list form a red-black (RB) tree,
252 linked via left/right/parent pointers with null_element as sentinel. The
253 red-black tree root SEL_ARG object will be further called "root of the
254 interval list".
255
256 A red-black tree with 7 SEL_ARGs will look similar to what is shown
257 below. Left/right/parent pointers are shown while next pointers go from a
258 node with number X to the node with number X+1 (and prev in the
259 opposite direction):
260
261 Root
262 +---+
263 | 4 |
264 +---+
265 left/ \ right
266 __/ \__
267 / \
268 +---+ +---+
269 | 2 | | 6 |
270 +---+ +---+
271 left / \ right left / \ right
272 | | | |
273 +---+ +---+ +---+ +---+
274 | 1 | | 3 | | 5 | | 7 |
275 +---+ +---+ +---+ +---+
276
277 In this tree,
278 * node1->prev == node7->next == NULL
279 * node1->left == node1->right ==
280 node3->left == ... node7->right == &null_element
281
282 In an interval list, each member X may have SEL_ARG::next_key_part pointer
283 pointing to the root of another interval list Y. The pointed interval list
284 must cover a key part with greater number (i.e. Y->part > X->part).
285
286 In the example pic, the next_key_part pointers are represented by
287 horisontal lines.
288
289 2. SEL_ARG GRAPH SEMANTICS
290
291 It represents a condition in a special form (we don't have a name for it ATM)
292 The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
293
294 For example, the picture represents the condition in form:
295 (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
296 (kp1=2 AND (kp3=11 OR kp3=14)) OR
297 (kp1=3 AND (kp3=11 OR kp3=14))
298
299 In red-black tree form:
300
301 +-------+ +--------+
302 | kp1=2 |.................| kp3=14 |
303 +-------+ +--------+
304 / \ /
305 +---------+ +-------+ +--------+
306 | kp1 < 1 | | kp1=3 | | kp3=11 |
307 +---------+ +-------+ +--------+
308 . .
309 ...... .......
310 . .
311 +-------+ +--------+
312 | kp2=5 | | kp3=14 |
313 +-------+ +--------+
314 . /
315 . +--------+
316 (root of R-B tree | kp3=11 |
317 for "kp3={10|12}") +--------+
318
319
320 Where / and \ denote left and right pointers and ... denotes
321 next_key_part pointers to the root of the R-B tree of intervals for
322 consecutive key parts.
323
324 3. SEL_ARG GRAPH USE
325
326 Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
327 Then walk the SEL_ARG graph and get a list of dijsoint ordered key
328 intervals (i.e. intervals in form
329
330 (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
331
332 Those intervals can be used to access the index. The uses are in:
333 - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
334 how many table records are contained within all
335 intervals.
336 - get_quick_select() - Walk the SEL_ARG, materialize the key intervals,
337 and create QUICK_RANGE_SELECT object that will
338 read records within these intervals.
339
340 4. SPACE COMPLEXITY NOTES
341
342 SEL_ARG graph is a representation of an ordered disjoint sequence of
343 intervals over the ordered set of index tuple values.
344
345 For multi-part keys, one can construct a WHERE expression such that its
346 list of intervals will be of combinatorial size. Here is an example:
347
348 (keypart1 IN (1,2, ..., n1)) AND
349 (keypart2 IN (1,2, ..., n2)) AND
350 (keypart3 IN (1,2, ..., n3))
351
352 For this WHERE clause the list of intervals will have n1*n2*n3 intervals
353 of form
354
355 (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
356
357 SEL_ARG graph structure aims to reduce the amount of required space by
358 "sharing" the elementary intervals when possible (the pic at the
359 beginning of this comment has examples of such sharing). The sharing may
360 prevent combinatorial blowup:
361
362 There are WHERE clauses that have combinatorial-size interval lists but
363 will be represented by a compact SEL_ARG graph.
364 Example:
365 (keypartN IN (1,2, ..., n1)) AND
366 ...
367 (keypart2 IN (1,2, ..., n2)) AND
368 (keypart1 IN (1,2, ..., n3))
369
370 but not in all cases:
371
372 - There are WHERE clauses that do have a compact SEL_ARG-graph
373 representation but get_mm_tree() and its callees will construct a
374 graph of combinatorial size.
375 Example:
376 (keypart1 IN (1,2, ..., n1)) AND
377 (keypart2 IN (1,2, ..., n2)) AND
378 ...
379 (keypartN IN (1,2, ..., n3))
380
381 - There are WHERE clauses for which the minimal possible SEL_ARG graph
382 representation will have combinatorial size.
383 Example:
384 By induction: Let's take any interval on some keypart in the middle:
385
386 kp15=c0
387
388 Then let's AND it with this interval 'structure' from preceding and
389 following keyparts:
390
391 (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
392
393 We will obtain this SEL_ARG graph:
394
395 kp14 $ kp15 $ kp16
396 $ $
397 +---------+ $ +---------+ $ +---------+
398 | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
399 +---------+ $ +---------+ $ +---------+
400 | $ $
401 +---------+ $ +---------+ $
402 | kp14=c2 |--$-->| kp15=c0 | $
403 +---------+ $ +---------+ $
404 $ $
405
406 Note that we had to duplicate "kp15=c0" and there was no way to avoid
407 that.
408 The induction step: AND the obtained expression with another "wrapping"
409 expression like (*).
410 When the process ends because of the limit on max. number of keyparts
411 we'll have:
412
413 WHERE clause length is O(3*#max_keyparts)
414 SEL_ARG graph size is O(2^(#max_keyparts/2))
415
416 (it is also possible to construct a case where instead of 2 in 2^n we
417 have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
418 nodes)
419
420 We avoid consuming too much memory by setting a limit on the number of
421 SEL_ARG object we can construct during one range analysis invocation.
422 */
423
424 class SEL_ARG :public Sql_alloc
425 {
426 public:
427 uint8 min_flag,max_flag,maybe_flag;
428 uint8 part; // Which key part
429 uint8 maybe_null;
430 /**
431 The rtree index interval to scan, undefined unless
432 SEL_ARG::min_flag == GEOM_FLAG.
433 */
434 enum ha_rkey_function rkey_func_flag;
435 /*
436 Number of children of this element in the RB-tree, plus 1 for this
437 element itself.
438 */
439 uint16 elements;
440 /**
441 Valid only for elements which are RB-tree roots: Number of
442 references to this SEL_ARG tree. References may be from
443 SEL_ARG::next_key_part of SEL_ARGs from earlier keyparts or
444 SEL_TREE::keys[i].
445
446 The SEL_ARGs are re-used in a lazy-copy manner based on this
447 reference counting.
448 */
449 ulong use_count;
450
451 Field *field;
452 uchar *min_value,*max_value; // Pointer to range
453
454 /*
455 eq_tree(), first(), last() etc require that left == right == NULL
456 if the type is MAYBE_KEY. Todo: fix this so SEL_ARGs without R-B
457 children are handled consistently. See related WL#5894.
458 */
459 SEL_ARG *left,*right; /* R-B tree children */
460 SEL_ARG *next,*prev; /* Links for bi-directional interval list */
461 SEL_ARG *parent; /* R-B tree parent */
462 /*
463 R-B tree root of intervals covering keyparts consecutive to this
464 SEL_ARG. See documentation of SEL_ARG GRAPH semantics for details.
465 */
466 SEL_ARG *next_key_part;
467 enum leaf_color { BLACK,RED } color;
468
469 /**
470 Used to indicate if the range predicate for an index is always
471 true/false, depends on values from other tables or can be
472 evaluated as is.
473 */
474 enum Type {
475 /** The range predicate for this index is always false. */
476 IMPOSSIBLE,
477 /** The range predicate for this index is always true.*/
478 ALWAYS,
479 /**
480 There is a range predicate that refers to another table. The
481 range access method cannot be used on this index unless that
482 other table is earlier in the join sequence. The bit
483 representing the index is set in SQL_SELECT::needed_reg to
484 notify the join optimizer that there is a table dependency.
485 After deciding on join order, the optimizer may chose to rerun
486 the range optimizer for tables with such dependencies.
487 */
488 MAYBE_KEY,
489 /**
490 There is a range condition that can be used on this index. The
491 range conditions for this index in stored in the SEL_ARG tree.
492 */
493 KEY_RANGE
494 } type;
495
SEL_ARG()496 SEL_ARG() {}
497 SEL_ARG(SEL_ARG &);
498 SEL_ARG(Field *,const uchar *, const uchar *);
499 SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
500 uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
501 /*
502 Used to construct MAYBE_KEY and IMPOSSIBLE SEL_ARGs. left and
503 right is NULL, so this ctor must not be used to create other
504 SEL_ARG types. See todo for left/right pointers.
505 */
SEL_ARG(enum Type type_arg)506 SEL_ARG(enum Type type_arg)
507 :min_flag(0), part(0), rkey_func_flag(HA_READ_INVALID), elements(1),
508 use_count(1), left(NULL), right(NULL),
509 next_key_part(0), color(BLACK), type(type_arg)
510 {
511 DBUG_ASSERT(type_arg == MAYBE_KEY || type_arg == IMPOSSIBLE);
512 }
513 /**
514 returns true if a range predicate is equal. Use all_same()
515 to check for equality of all the predicates on this keypart.
516 */
is_same(const SEL_ARG * arg) const517 inline bool is_same(const SEL_ARG *arg) const
518 {
519 if (type != arg->type || part != arg->part)
520 return false;
521 if (type != KEY_RANGE)
522 return true;
523 return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
524 }
525 /**
526 returns true if all the predicates in the keypart tree are equal
527 */
all_same(const SEL_ARG * arg) const528 bool all_same(const SEL_ARG *arg) const
529 {
530 if (type != arg->type || part != arg->part)
531 return false;
532 if (type != KEY_RANGE)
533 return true;
534 if (arg == this)
535 return true;
536 const SEL_ARG *cmp_arg= arg->first();
537 const SEL_ARG *cur_arg= first();
538 for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg);
539 cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ;
540 if (cur_arg || cmp_arg)
541 return false;
542 return true;
543 }
merge_flags(SEL_ARG * arg)544 inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
maybe_smaller()545 inline void maybe_smaller() { maybe_flag=1; }
546 /* Return true iff it's a single-point null interval */
is_null_interval()547 inline bool is_null_interval() { return maybe_null && max_value[0] == 1; }
cmp_min_to_min(const SEL_ARG * arg) const548 inline int cmp_min_to_min(const SEL_ARG* arg) const
549 {
550 return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
551 }
cmp_min_to_max(const SEL_ARG * arg) const552 inline int cmp_min_to_max(const SEL_ARG* arg) const
553 {
554 return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
555 }
cmp_max_to_max(const SEL_ARG * arg) const556 inline int cmp_max_to_max(const SEL_ARG* arg) const
557 {
558 return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
559 }
cmp_max_to_min(const SEL_ARG * arg) const560 inline int cmp_max_to_min(const SEL_ARG* arg) const
561 {
562 return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
563 }
clone_and(SEL_ARG * arg,MEM_ROOT * mem_root)564 SEL_ARG *clone_and(SEL_ARG* arg, MEM_ROOT *mem_root)
565 { // Get overlapping range
566 uchar *new_min,*new_max;
567 uint8 flag_min,flag_max;
568 if (cmp_min_to_min(arg) >= 0)
569 {
570 new_min=min_value; flag_min=min_flag;
571 }
572 else
573 {
574 new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
575 }
576 if (cmp_max_to_max(arg) <= 0)
577 {
578 new_max=max_value; flag_max=max_flag;
579 }
580 else
581 {
582 new_max=arg->max_value; flag_max=arg->max_flag;
583 }
584 return new (mem_root) SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
585 MY_TEST(maybe_flag && arg->maybe_flag));
586 }
clone_first(SEL_ARG * arg,MEM_ROOT * mem_root)587 SEL_ARG *clone_first(SEL_ARG *arg, MEM_ROOT *mem_root)
588 { // min <= X < arg->min
589 return new (mem_root) SEL_ARG(field,part, min_value, arg->min_value,
590 min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
591 maybe_flag | arg->maybe_flag);
592 }
clone_last(SEL_ARG * arg,MEM_ROOT * mem_root)593 SEL_ARG *clone_last(SEL_ARG *arg, MEM_ROOT *mem_root)
594 { // min <= X <= key_max
595 return new (mem_root) SEL_ARG(field, part, min_value, arg->max_value,
596 min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
597 }
598 SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next);
599
copy_min(SEL_ARG * arg)600 bool copy_min(SEL_ARG* arg)
601 { // Get overlapping range
602 if (cmp_min_to_min(arg) > 0)
603 {
604 min_value=arg->min_value; min_flag=arg->min_flag;
605 if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
606 return 1; // Full range
607 }
608 maybe_flag|=arg->maybe_flag;
609 return 0;
610 }
copy_max(SEL_ARG * arg)611 bool copy_max(SEL_ARG* arg)
612 { // Get overlapping range
613 if (cmp_max_to_max(arg) <= 0)
614 {
615 max_value=arg->max_value; max_flag=arg->max_flag;
616 if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
617 return 1; // Full range
618 }
619 maybe_flag|=arg->maybe_flag;
620 return 0;
621 }
622
copy_min_to_min(SEL_ARG * arg)623 void copy_min_to_min(SEL_ARG *arg)
624 {
625 min_value=arg->min_value; min_flag=arg->min_flag;
626 }
copy_min_to_max(SEL_ARG * arg)627 void copy_min_to_max(SEL_ARG *arg)
628 {
629 max_value=arg->min_value;
630 max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
631 }
copy_max_to_min(SEL_ARG * arg)632 void copy_max_to_min(SEL_ARG *arg)
633 {
634 min_value=arg->max_value;
635 min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
636 }
637
638 /**
639 Set spatial index range scan parameters. This object will be used to do
640 spatial index range scan after this call.
641
642 @param rkey_func The scan function to perform. It must be one of the
643 spatial index specific scan functions.
644 */
set_gis_index_read_function(const enum ha_rkey_function rkey_func)645 void set_gis_index_read_function(const enum ha_rkey_function rkey_func)
646 {
647 DBUG_ASSERT(rkey_func >= HA_READ_MBR_CONTAIN &&
648 rkey_func <= HA_READ_MBR_EQUAL);
649 min_flag= GEOM_FLAG;
650 rkey_func_flag= rkey_func;
651 max_flag= NO_MAX_RANGE;
652 }
653
654 /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_min(uint length,uchar ** min_key,uint min_key_flag)655 int store_min(uint length, uchar **min_key,uint min_key_flag)
656 {
657 /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
658 if ((min_flag & GEOM_FLAG) ||
659 (!(min_flag & NO_MIN_RANGE) &&
660 !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
661 {
662 if (maybe_null && *min_value)
663 {
664 **min_key=1;
665 memset(*min_key+1, 0, length-1);
666 }
667 else
668 memcpy(*min_key,min_value,length);
669 (*min_key)+= length;
670 return 1;
671 }
672 return 0;
673 }
674 /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_max(uint length,uchar ** max_key,uint max_key_flag)675 int store_max(uint length, uchar **max_key, uint max_key_flag)
676 {
677 if (!(max_flag & NO_MAX_RANGE) &&
678 !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
679 {
680 if (maybe_null && *max_value)
681 {
682 **max_key=1;
683 memset(*max_key+1, 0, length-1);
684 }
685 else
686 memcpy(*max_key,max_value,length);
687 (*max_key)+= length;
688 return 1;
689 }
690 return 0;
691 }
692
693 /*
694 Returns a number of keypart values appended to the key buffer
695 for min key and max key. This function is used by both Range
696 Analysis and Partition pruning. For partition pruning we have
697 to ensure that we don't store also subpartition fields. Thus
698 we have to stop at the last partition part and not step into
699 the subpartition fields. For Range Analysis we set last_part
700 to MAX_KEY which we should never reach.
701 */
store_min_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)702 int store_min_key(KEY_PART *key,
703 uchar **range_key,
704 uint *range_key_flag,
705 uint last_part)
706 {
707 SEL_ARG *key_tree= first();
708 uint res= key_tree->store_min(key[key_tree->part].store_length,
709 range_key, *range_key_flag);
710 *range_key_flag|= key_tree->min_flag;
711
712 if (key_tree->next_key_part &&
713 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
714 key_tree->part != last_part &&
715 key_tree->next_key_part->part == key_tree->part+1 &&
716 !(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)))
717 res+= key_tree->next_key_part->store_min_key(key,
718 range_key,
719 range_key_flag,
720 last_part);
721 return res;
722 }
723
724 /* returns a number of keypart values appended to the key buffer */
store_max_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)725 int store_max_key(KEY_PART *key,
726 uchar **range_key,
727 uint *range_key_flag,
728 uint last_part)
729 {
730 SEL_ARG *key_tree= last();
731 uint res=key_tree->store_max(key[key_tree->part].store_length,
732 range_key, *range_key_flag);
733 (*range_key_flag)|= key_tree->max_flag;
734 if (key_tree->next_key_part &&
735 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
736 key_tree->part != last_part &&
737 key_tree->next_key_part->part == key_tree->part+1 &&
738 !(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
739 res+= key_tree->next_key_part->store_max_key(key,
740 range_key,
741 range_key_flag,
742 last_part);
743 return res;
744 }
745
746 SEL_ARG *insert(SEL_ARG *key);
747 SEL_ARG *tree_delete(SEL_ARG *key);
748 SEL_ARG *find_range(SEL_ARG *key);
749 SEL_ARG *rb_insert(SEL_ARG *leaf);
750 friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
751 #ifndef DBUG_OFF
752 friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
753 #endif
754 bool test_use_count(SEL_ARG *root);
755 SEL_ARG *first();
756 const SEL_ARG *first() const;
757 SEL_ARG *last();
758 void make_root();
simple_key()759 inline bool simple_key()
760 {
761 return !next_key_part && elements == 1;
762 }
763 /**
764 Update use_count of all SEL_ARG trees for later keyparts to
765 reflect that this SEL_ARG tree is now referred to 'count' more
766 times than it used to be (either through SEL_TREE::keys[] or
767 SEL_ARG::next_key_part pointers).
768
769 This function does NOT update use_count of the current SEL_ARG
770 object.
771
772 @param count The number of additional references to this SEL_ARG
773 tree.
774
775 @todo consider refactoring this function to also increase
776 use_count of 'this' instead of incrementing use_count only
777 on later keyparts.
778 */
increment_use_count(long count)779 void increment_use_count(long count)
780 {
781 /*
782 Increment use_count for all SEL_ARG trees referenced via
783 next_key_part from any SEL_ARG in this tree.
784 */
785 for (SEL_ARG *cur_selarg= first();
786 cur_selarg;
787 cur_selarg= cur_selarg->next)
788 {
789 if (cur_selarg->next_key_part)
790 {
791 cur_selarg->next_key_part->use_count+= count;
792 cur_selarg->next_key_part->increment_use_count(count);
793 }
794 }
795 }
796
797 /**
798 Update use count for SEL_ARG's next_key_part.
799 This function does NOT update use_count of the current
800 SEL_ARG object.
801
802 Primarily used for reducing reference count of next_key_part of a
803 node when removed from SEL_ARG tree during tree merge operations.
804
805 @param count The number of additional references to this SEL_ARG
806 tree.
807 */
increment_next_key_part_use_count(long count)808 void increment_next_key_part_use_count(long count)
809 {
810 if (next_key_part)
811 {
812 next_key_part->use_count+= count;
813 next_key_part->increment_use_count(count);
814 }
815 }
816
free_tree()817 void free_tree()
818 {
819 for (SEL_ARG *pos=first(); pos ; pos=pos->next)
820 if (pos->next_key_part)
821 {
822 pos->next_key_part->use_count--;
823 pos->next_key_part->free_tree();
824 }
825 }
826
parent_ptr()827 inline SEL_ARG **parent_ptr()
828 {
829 return parent->left == this ? &parent->left : &parent->right;
830 }
831
832
833 /*
834 Check if this SEL_ARG object represents a single-point interval
835
836 SYNOPSIS
837 is_singlepoint()
838
839 DESCRIPTION
840 Check if this SEL_ARG object (not tree) represents a single-point
841 interval, i.e. if it represents a "keypart = const" or
842 "keypart IS NULL".
843
844 RETURN
845 TRUE This SEL_ARG object represents a singlepoint interval
846 FALSE Otherwise
847 */
848
is_singlepoint() const849 bool is_singlepoint() const
850 {
851 /*
852 Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
853 flags, and the same for right edge.
854 */
855 if (min_flag || max_flag)
856 return FALSE;
857 uchar *min_val= min_value;
858 uchar *max_val= max_value;
859
860 if (maybe_null)
861 {
862 /* First byte is a NULL value indicator */
863 if (*min_val != *max_val)
864 return FALSE;
865
866 if (*min_val)
867 return TRUE; /* This "x IS NULL" */
868 min_val++;
869 max_val++;
870 }
871 return !field->key_cmp(min_val, max_val);
872 }
873 SEL_ARG *clone_tree(RANGE_OPT_PARAM *param);
874 };
875
876 /**
877 Helper function to compare two SEL_ARG's.
878 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)879 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
880 {
881 if (sa1 == NULL && sa2 == NULL)
882 return true;
883 if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
884 return false;
885 return sa1->all_same(sa2);
886 }
887
888 class SEL_IMERGE;
889
890
891 class SEL_TREE :public Sql_alloc
892 {
893 public:
894 /**
895 Starting an effort to document this field:
896
897 IMPOSSIBLE: if keys[i]->type == SEL_ARG::IMPOSSIBLE for some i,
898 then type == SEL_TREE::IMPOSSIBLE. Rationale: if the predicate for
899 one of the indexes is always false, then the full predicate is also
900 always false.
901
902 ALWAYS: if either (keys[i]->type == SEL_ARG::ALWAYS) or
903 (keys[i] == NULL) for all i, then type == SEL_TREE::ALWAYS.
904 Rationale: the range access method will not be able to filter
905 out any rows when there are no range predicates that can be used
906 to filter on any index.
907
908 KEY: There are range predicates that can be used on at least one
909 index.
910
911 KEY_SMALLER: There are range predicates that can be used on at
912 least one index. In addition, there are predicates that cannot
913 be directly utilized by range access on key parts in the same
914 index. These unused predicates makes it probable that the row
915 estimate for range access on this index is too pessimistic.
916 */
917 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
918
SEL_TREE(enum Type type_arg,MEM_ROOT * root,size_t num_keys)919 SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys)
920 : type(type_arg), keys(root, num_keys), n_ror_scans(0)
921 { }
SEL_TREE(MEM_ROOT * root,size_t num_keys)922 SEL_TREE(MEM_ROOT *root, size_t num_keys) :
923 type(KEY), keys(root, num_keys), n_ror_scans(0)
924 { }
925 /**
926 Constructor that performs deep-copy of the SEL_ARG trees in
927 'keys[]' and the index merge alternatives in 'merges'.
928
929 @param arg The SEL_TREE to copy
930 @param param Parameters for range analysis
931 */
932 SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
933 /*
934 Possible ways to read rows using a single index because the
935 conditions of the query consists of single-index conjunctions:
936
937 (ranges_for_idx_1) AND (ranges_for_idx_2) AND ...
938
939 The SEL_ARG graph for each non-NULL element in keys[] may consist
940 of many single-index ranges (disjunctions), so ranges_for_idx_1
941 may e.g. be:
942
943 "idx_field1 = 1 OR (idx_field1 > 5 AND idx_field2 = 10)"
944
945 assuming that index1 is a composite index covering
946 (idx_field1,...,idx_field2,..)
947
948 Index merge intersection intersects ranges on SEL_ARGs from two or
949 more indexes.
950
951 Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
952 keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
953 merit in range analyzer functions (e.g. get_mm_parts) returning a
954 pointer to such SEL_TREE instead of NULL)
955 */
956 Mem_root_array<SEL_ARG *, true> keys;
957 key_map keys_map; /* bitmask of non-NULL elements in keys */
958
959 /*
960 Possible ways to read rows using Index merge (sort) union.
961
962 Each element in 'merges' consists of multi-index disjunctions,
963 which means that Index merge (sort) union must be applied to read
964 rows. The nodes in the 'merges' list forms a conjunction of such
965 multi-index disjunctions.
966
967 The list is non-empty only if type==KEY.
968 */
969 List<SEL_IMERGE> merges;
970
971 /* The members below are filled/used only after get_mm_tree is done */
972 key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */
973 uint n_ror_scans; /* number of set bits in ror_scans_map */
974
975 struct st_ror_scan_info **ror_scans; /* list of ROR key scans */
976 struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
977 /* Note that #records for each key scan is stored in table->quick_rows */
978 };
979
980 class RANGE_OPT_PARAM
981 {
982 public:
983 THD *thd; /* Current thread handle */
984 TABLE *table; /* Table being analyzed */
985 Item *cond; /* Used inside get_mm_tree(). */
986 table_map prev_tables;
987 table_map read_tables;
988 table_map current_table; /* Bit of the table being analyzed */
989
990 /* Array of parts of all keys for which range analysis is performed */
991 KEY_PART *key_parts;
992 KEY_PART *key_parts_end;
993 MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
994 MEM_ROOT *old_root; /* Memory that will last until the query end */
995 /*
996 Number of indexes used in range analysis (In SEL_TREE::keys only first
997 #keys elements are not empty)
998 */
999 uint keys;
1000
1001 /*
1002 If true, the index descriptions describe real indexes (and it is ok to
1003 call field->optimize_range(real_keynr[...], ...).
1004 Otherwise index description describes fake indexes, like a partitioning
1005 expression.
1006 */
1007 bool using_real_indexes;
1008
1009 /*
1010 Aggressively remove "scans" that do not have conditions on first
1011 keyparts. Such scans are usable when doing partition pruning but not
1012 regular range optimization.
1013 */
1014 bool remove_jump_scans;
1015
1016 /*
1017 used_key_no -> table_key_no translation table. Only makes sense if
1018 using_real_indexes==TRUE
1019 */
1020 uint real_keynr[MAX_KEY];
1021
1022 /*
1023 Used to store 'current key tuples', in both range analysis and
1024 partitioning (list) analysis
1025 */
1026 uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
1027 max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
1028
1029 bool force_default_mrr;
1030 /**
1031 Whether index statistics or index dives should be used when
1032 estimating the number of rows in an equality range. If true, index
1033 statistics is used for these indexes.
1034 */
1035 bool use_index_statistics;
1036
1037 /// Error handler for this param.
1038
1039 Range_optimizer_error_handler error_handler;
1040
has_errors() const1041 bool has_errors() const { return (error_handler.has_errors()); }
1042
~RANGE_OPT_PARAM()1043 virtual ~RANGE_OPT_PARAM() {}
1044
1045 };
1046
1047 class PARAM : public RANGE_OPT_PARAM
1048 {
1049 public:
1050 KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
1051 longlong baseflag;
1052 uint max_key_part;
1053 /* Number of ranges in the last checked tree->key */
1054 uint range_count;
1055
1056 bool quick; // Don't calulate possible keys
1057
1058 uint fields_bitmap_size;
1059 MY_BITMAP needed_fields; /* bitmask of fields needed by the query */
1060 MY_BITMAP tmp_covered_fields;
1061
1062 key_map *needed_reg; /* ptr to needed_reg argument of test_quick_select() */
1063
1064 // Buffer for index_merge cost estimates.
1065 Unique::Imerge_cost_buf_type imerge_cost_buff;
1066
1067 /* TRUE if last checked tree->key can be used for ROR-scan */
1068 bool is_ror_scan;
1069 /* Number of ranges in the last checked tree->key */
1070 uint n_ranges;
1071
1072 /*
1073 The sort order the range access method must be able
1074 to provide. Three-value logic: asc/desc/don't care
1075 */
1076 ORDER::enum_order order_direction;
1077
1078 /// Control whether the various index merge strategies are allowed
1079 bool index_merge_allowed;
1080 bool index_merge_union_allowed;
1081 bool index_merge_sort_union_allowed;
1082 bool index_merge_intersect_allowed;
1083 };
1084
1085 class TABLE_READ_PLAN;
1086 class TRP_RANGE;
1087 class TRP_ROR_INTERSECT;
1088 class TRP_ROR_UNION;
1089 class TRP_INDEX_MERGE;
1090 class TRP_GROUP_MIN_MAX;
1091
1092 struct st_ror_scan_info;
1093
1094 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,
1095 Item_func *cond_func,Field *field,
1096 Item_func::Functype type,Item *value,
1097 Item_result cmp_type);
1098 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,Item *cond_func,Field *field,
1099 KEY_PART *key_part,
1100 Item_func::Functype type,Item *value);
1101 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond);
1102
1103 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts);
1104 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
1105 SEL_ARG *tree, bool update_tbl_stats,
1106 uint *mrr_flags, uint *bufsize,
1107 Cost_estimate *cost);
1108 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
1109 SEL_ARG *key_tree, uint mrr_flags,
1110 uint mrr_buf_size, MEM_ROOT *alloc);
1111 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
1112 bool index_read_must_be_used,
1113 bool update_tbl_stats,
1114 const Cost_estimate *cost_est);
1115 static
1116 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
1117 const Cost_estimate *cost_est);
1118 static
1119 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
1120 const Cost_estimate *cost_est);
1121 static
1122 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
1123 const Cost_estimate *cost_est);
1124 #ifndef DBUG_OFF
1125 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
1126 const char *msg);
1127 static void print_ror_scans_arr(TABLE *table, const char *msg,
1128 struct st_ror_scan_info **start,
1129 struct st_ror_scan_info **end);
1130 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
1131 #endif
1132
1133 static void append_range_all_keyparts(Opt_trace_array *range_trace,
1134 String *range_string,
1135 String *range_so_far,
1136 SEL_ARG *keypart_root,
1137 const KEY_PART_INFO *key_parts,
1138 const bool print_full);
1139 static inline void dbug_print_tree(const char *tree_name,
1140 SEL_TREE *tree,
1141 const RANGE_OPT_PARAM *param);
1142
1143 static inline void print_tree(String *out,
1144 const char *tree_name,
1145 SEL_TREE *tree,
1146 const RANGE_OPT_PARAM *param,
1147 const bool print_full) MY_ATTRIBUTE((unused));
1148
1149 void append_range(String *out,
1150 const KEY_PART_INFO *key_parts,
1151 const uchar *min_key, const uchar *max_key,
1152 const uint flag);
1153
1154 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
1155 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
1156 /*
1157 A null_sel_tree is used in get_func_mm_tree_from_in_predicate to pass
1158 as an argument to tree_or. It is used only to influence the return
1159 value from tree_or function.
1160 */
1161
1162 static MEM_ROOT null_root;
1163 static SEL_TREE null_sel_tree(SEL_TREE::IMPOSSIBLE, &null_root, 0);
1164
1165
1166 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
1167 static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
1168 static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
1169 uint clone_flag);
1170 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
1171 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
1172 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
1173 uchar *max_key,uint max_key_flag);
1174 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
1175 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count,
1176 uint limit);
1177
1178 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
1179 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
1180 uint length);
1181 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
1182
1183
1184 /*
1185 SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
1186 a condition in the following form:
1187 (t_1||t_2||...||t_N) && (next)
1188
1189 where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
1190 (t_i,t_j) contains SEL_ARGS for the same index.
1191
1192 SEL_TREE contained in SEL_IMERGE always has merges=NULL.
1193
1194 This class relies on memory manager to do the cleanup.
1195 */
1196
1197 class SEL_IMERGE : public Sql_alloc
1198 {
1199 enum { PREALLOCED_TREES= 10};
1200 public:
1201 SEL_TREE *trees_prealloced[PREALLOCED_TREES];
1202 SEL_TREE **trees; /* trees used to do index_merge */
1203 SEL_TREE **trees_next; /* last of these trees */
1204 SEL_TREE **trees_end; /* end of allocated space */
1205
1206 SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */
1207
SEL_IMERGE()1208 SEL_IMERGE() :
1209 trees(&trees_prealloced[0]),
1210 trees_next(trees),
1211 trees_end(trees + PREALLOCED_TREES)
1212 {}
1213 SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
1214 int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
1215 int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
1216 int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
1217 };
1218
1219
1220 /*
1221 Add SEL_TREE to this index_merge without any checks,
1222
1223 NOTES
1224 This function implements the following:
1225 (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
1226
1227 RETURN
1228 0 - OK
1229 -1 - Out of memory.
1230 */
1231
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)1232 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
1233 {
1234 if (trees_next == trees_end)
1235 {
1236 const int realloc_ratio= 2; /* Double size for next round */
1237 uint old_elements= static_cast<uint>(trees_end - trees);
1238 uint old_size= sizeof(SEL_TREE**) * old_elements;
1239 uint new_size= old_size * realloc_ratio;
1240 SEL_TREE **new_trees;
1241 if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
1242 return -1;
1243 memcpy(new_trees, trees, old_size);
1244 trees= new_trees;
1245 trees_next= trees + old_elements;
1246 trees_end= trees + old_elements * realloc_ratio;
1247 }
1248 *(trees_next++)= tree;
1249 return 0;
1250 }
1251
1252
1253 /*
1254 Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
1255 combining new_tree with one of the trees in this SEL_IMERGE if they both
1256 have SEL_ARGs for the same key.
1257
1258 SYNOPSIS
1259 or_sel_tree_with_checks()
1260 param PARAM from test_quick_select
1261 new_tree SEL_TREE with type KEY or KEY_SMALLER.
1262
1263 NOTES
1264 This does the following:
1265 (t_1||...||t_k)||new_tree =
1266 either
1267 = (t_1||...||t_k||new_tree)
1268 or
1269 = (t_1||....||(t_j|| new_tree)||...||t_k),
1270
1271 where t_i, y are SEL_TREEs.
1272 new_tree is combined with the first t_j it has a SEL_ARG on common
1273 key with. As a consequence of this, choice of keys to do index_merge
1274 read may depend on the order of conditions in WHERE part of the query.
1275
1276 RETURN
1277 0 OK
1278 1 One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
1279 and (*this) should be discarded.
1280 -1 An error occurred.
1281 */
1282
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,SEL_TREE * new_tree)1283 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
1284 {
1285 DBUG_ENTER("SEL_IMERGE::or_sel_tree_with_checks");
1286 for (SEL_TREE** tree = trees;
1287 tree != trees_next;
1288 tree++)
1289 {
1290 if (sel_trees_can_be_ored(*tree, new_tree, param))
1291 {
1292 *tree = tree_or(param, *tree, new_tree);
1293 if (!*tree)
1294 DBUG_RETURN(1);
1295 if (((*tree)->type == SEL_TREE::MAYBE) ||
1296 ((*tree)->type == SEL_TREE::ALWAYS))
1297 DBUG_RETURN(1);
1298 /* SEL_TREE::IMPOSSIBLE is impossible here */
1299 DBUG_RETURN(0);
1300 }
1301 }
1302
1303 /* New tree cannot be combined with any of existing trees. */
1304 const int ret= or_sel_tree(param, new_tree);
1305 DBUG_RETURN(ret);
1306 }
1307
1308
1309 /*
1310 Perform OR operation on this index_merge and supplied index_merge list.
1311
1312 RETURN
1313 0 - OK
1314 1 - One of conditions in result is always TRUE and this SEL_IMERGE
1315 should be discarded.
1316 -1 - An error occurred
1317 */
1318
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,SEL_IMERGE * imerge)1319 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
1320 {
1321 for (SEL_TREE** tree= imerge->trees;
1322 tree != imerge->trees_next;
1323 tree++)
1324 {
1325 if (or_sel_tree_with_checks(param, *tree))
1326 return 1;
1327 }
1328 return 0;
1329 }
1330
1331
SEL_TREE(SEL_TREE * arg,RANGE_OPT_PARAM * param)1332 SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param):
1333 Sql_alloc(), keys(param->mem_root, param->keys), n_ror_scans(0)
1334 {
1335 keys_map= arg->keys_map;
1336 type= arg->type;
1337 for (uint idx= 0; idx < param->keys; idx++)
1338 {
1339 if (arg->keys[idx])
1340 {
1341 keys[idx]= arg->keys[idx]->clone_tree(param);
1342 if (!keys[idx])
1343 break;
1344 keys[idx]->use_count++;
1345 keys[idx]->increment_use_count(1);
1346 }
1347 else
1348 keys[idx]= NULL;
1349 }
1350
1351 List_iterator<SEL_IMERGE> it(arg->merges);
1352 for (SEL_IMERGE *el= it++; el; el= it++)
1353 {
1354 SEL_IMERGE *merge= new (param->mem_root) SEL_IMERGE(el, param);
1355 if (!merge || merge->trees == merge->trees_next ||
1356 param->has_errors())
1357 {
1358 merges.empty();
1359 return;
1360 }
1361 merges.push_back (merge);
1362 }
1363
1364 /*
1365 SEL_TREEs are only created by get_mm_tree() (and functions called
1366 by get_mm_tree()). Index intersection is checked after
1367 get_mm_tree() has constructed all ranges. In other words, there
1368 should not be any ROR scans to copy when this ctor is called.
1369 */
1370 DBUG_ASSERT(n_ror_scans == 0);
1371 }
1372
1373
SEL_IMERGE(SEL_IMERGE * arg,RANGE_OPT_PARAM * param)1374 SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
1375 {
1376 uint elements= static_cast<uint>(arg->trees_end - arg->trees);
1377 if (elements > PREALLOCED_TREES)
1378 {
1379 uint size= elements * sizeof (SEL_TREE **);
1380 if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
1381 goto mem_err;
1382 }
1383 else
1384 trees= &trees_prealloced[0];
1385
1386 trees_next= trees;
1387 trees_end= trees + elements;
1388
1389 for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end;
1390 tree++, arg_tree++)
1391 {
1392 if (!(*tree= new (param->mem_root) SEL_TREE(*arg_tree, param)) ||
1393 param->has_errors())
1394 goto mem_err;
1395 }
1396
1397 return;
1398
1399 mem_err:
1400 trees= &trees_prealloced[0];
1401 trees_next= trees;
1402 trees_end= trees;
1403 }
1404
1405
1406 /*
1407 Perform AND operation on two index_merge lists and store result in *im1.
1408 */
1409
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1410 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
1411 {
1412 im1->concat(im2);
1413 }
1414
1415
1416 /*
1417 Perform OR operation on 2 index_merge lists, storing result in first list.
1418
1419 NOTES
1420 The following conversion is implemented:
1421 (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
1422 => (a_1||b_1).
1423
1424 i.e. all conjuncts except the first one are currently dropped.
1425 This is done to avoid producing N*K ways to do index_merge.
1426
1427 If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
1428 and index_merge is discarded (while it is actually possible to try
1429 harder).
1430
1431 As a consequence of this, choice of keys to do index_merge read may depend
1432 on the order of conditions in WHERE part of the query.
1433
1434 RETURN
1435 0 OK, result is stored in *im1
1436 other Error, both passed lists are unusable
1437 */
1438
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1439 int imerge_list_or_list(RANGE_OPT_PARAM *param,
1440 List<SEL_IMERGE> *im1,
1441 List<SEL_IMERGE> *im2)
1442 {
1443 SEL_IMERGE *imerge= im1->head();
1444 im1->empty();
1445 im1->push_back(imerge);
1446
1447 return imerge->or_sel_imerge_with_checks(param, im2->head());
1448 }
1449
1450
1451 /*
1452 Perform OR operation on index_merge list and key tree.
1453
1454 RETURN
1455 false OK, result is stored in *im1.
1456 true Error
1457 */
1458
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,SEL_TREE * tree)1459 static bool imerge_list_or_tree(RANGE_OPT_PARAM *param,
1460 List<SEL_IMERGE> *im1,
1461 SEL_TREE *tree)
1462 {
1463 DBUG_ENTER("imerge_list_or_tree");
1464 SEL_IMERGE *imerge;
1465 List_iterator<SEL_IMERGE> it(*im1);
1466
1467 uint remaining_trees= im1->elements;
1468 while ((imerge= it++))
1469 {
1470 SEL_TREE *or_tree;
1471 /*
1472 Need to make a copy of 'tree' for all but the last OR operation
1473 because or_sel_tree_with_checks() may change it.
1474 */
1475 if (--remaining_trees == 0)
1476 or_tree= tree;
1477 else
1478 {
1479 or_tree= new (param->mem_root) SEL_TREE (tree, param);
1480 if (!or_tree || param->has_errors())
1481 DBUG_RETURN(true);
1482 if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
1483 DBUG_RETURN(false);
1484 }
1485
1486 int result_or= imerge->or_sel_tree_with_checks(param, or_tree);
1487 if (result_or == 1)
1488 it.remove();
1489 else if (result_or == -1)
1490 DBUG_RETURN(true);
1491 }
1492 DBUG_ASSERT(remaining_trees == 0);
1493 DBUG_RETURN(im1->is_empty());
1494 }
1495
1496
1497 #undef index // Fix for Unixware 7
1498
QUICK_SELECT_I()1499 QUICK_SELECT_I::QUICK_SELECT_I()
1500 :max_used_key_length(0),
1501 used_key_parts(0)
1502 {}
1503
trace_quick_description(Opt_trace_context * trace)1504 void QUICK_SELECT_I::trace_quick_description(Opt_trace_context *trace)
1505 {
1506 Opt_trace_object range_trace(trace, "range_details");
1507
1508 String range_info;
1509 range_info.set_charset(system_charset_info);
1510 add_info_string(&range_info);
1511 range_trace.add_utf8("used_index", range_info.ptr(), range_info.length());
1512 }
1513
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1514 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1515 bool no_alloc, MEM_ROOT *parent_alloc,
1516 bool *create_error)
1517 :ranges(key_memory_Quick_ranges), free_file(0), cur_range(NULL), last_range(0),
1518 mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
1519 dont_free(0)
1520 {
1521 my_bitmap_map *bitmap;
1522 DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1523
1524 in_ror_merged_scan= 0;
1525 index= key_nr;
1526 head= table;
1527 key_part_info= head->key_info[index].key_part;
1528
1529 /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1530 mrr_buf_size= thd->variables.read_rnd_buff_size;
1531
1532 if (!no_alloc && !parent_alloc)
1533 {
1534 // Allocates everything through the internal memroot
1535 init_sql_alloc(key_memory_quick_range_select_root,
1536 &alloc, thd->variables.range_alloc_block_size, 0);
1537 thd->mem_root= &alloc;
1538 }
1539 else
1540 memset(&alloc, 0, sizeof(alloc));
1541 file= head->file;
1542 record= head->record[0];
1543
1544 /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
1545 if (!(bitmap= (my_bitmap_map*) my_malloc(key_memory_my_bitmap_map,
1546 head->s->column_bitmap_size,
1547 MYF(MY_WME))))
1548 {
1549 column_bitmap.bitmap= 0;
1550 *create_error= 1;
1551 }
1552 else
1553 bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1554 DBUG_VOID_RETURN;
1555 }
1556
1557
need_sorted_output()1558 void QUICK_RANGE_SELECT::need_sorted_output()
1559 {
1560 mrr_flags |= HA_MRR_SORTED;
1561 }
1562
1563
init()1564 int QUICK_RANGE_SELECT::init()
1565 {
1566 DBUG_ENTER("QUICK_RANGE_SELECT::init");
1567
1568 if (file->inited)
1569 file->ha_index_or_rnd_end();
1570 DBUG_RETURN(FALSE);
1571 }
1572
1573
range_end()1574 void QUICK_RANGE_SELECT::range_end()
1575 {
1576 if (file->inited)
1577 file->ha_index_or_rnd_end();
1578 }
1579
1580
~QUICK_RANGE_SELECT()1581 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1582 {
1583 DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1584 if (!dont_free)
1585 {
1586 /* file is NULL for CPK scan on covering ROR-intersection */
1587 if (file)
1588 {
1589 range_end();
1590 if (free_file)
1591 {
1592 DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1593 free_file));
1594 file->ha_external_lock(current_thd, F_UNLCK);
1595 file->ha_close();
1596 delete file;
1597 }
1598 }
1599 free_root(&alloc,MYF(0));
1600 my_free(column_bitmap.bitmap);
1601 }
1602 my_free(mrr_buf_desc);
1603 DBUG_VOID_RETURN;
1604 }
1605
1606
QUICK_INDEX_MERGE_SELECT(THD * thd_param,TABLE * table)1607 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
1608 TABLE *table)
1609 :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1610 {
1611 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
1612 index= MAX_KEY;
1613 head= table;
1614
1615 init_sql_alloc(key_memory_quick_index_merge_root,
1616 &alloc, thd->variables.range_alloc_block_size, 0);
1617 DBUG_VOID_RETURN;
1618 }
1619
init()1620 int QUICK_INDEX_MERGE_SELECT::init()
1621 {
1622 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
1623 DBUG_RETURN(0);
1624 }
1625
reset()1626 int QUICK_INDEX_MERGE_SELECT::reset()
1627 {
1628 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
1629 const int retval= read_keys_and_merge();
1630 DBUG_RETURN(retval);
1631 }
1632
1633 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1634 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1635 {
1636 /*
1637 Save quick_select that does scan on clustered primary key as it will be
1638 processed separately.
1639 */
1640 if (head->file->primary_key_is_clustered() &&
1641 quick_sel_range->index == head->s->primary_key)
1642 pk_quick_select= quick_sel_range;
1643 else
1644 return quick_selects.push_back(quick_sel_range);
1645 return 0;
1646 }
1647
~QUICK_INDEX_MERGE_SELECT()1648 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
1649 {
1650 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1651 QUICK_RANGE_SELECT* quick;
1652 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
1653 delete unique;
1654 quick_it.rewind();
1655 while ((quick= quick_it++))
1656 quick->file= NULL;
1657 quick_selects.delete_elements();
1658 delete pk_quick_select;
1659 /* It's ok to call the next two even if they are already deinitialized */
1660 end_read_record(&read_record);
1661 free_io_cache(head);
1662 free_root(&alloc,MYF(0));
1663 DBUG_VOID_RETURN;
1664 }
1665
1666
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1667 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1668 TABLE *table,
1669 bool retrieve_full_rows,
1670 MEM_ROOT *parent_alloc)
1671 : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1672 scans_inited(FALSE)
1673 {
1674 index= MAX_KEY;
1675 head= table;
1676 record= head->record[0];
1677 if (!parent_alloc)
1678 init_sql_alloc(key_memory_quick_ror_intersect_select_root,
1679 &alloc, thd->variables.range_alloc_block_size, 0);
1680 else
1681 memset(&alloc, 0, sizeof(MEM_ROOT));
1682 last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1683 head->file->ref_length);
1684 }
1685
1686
1687 /*
1688 Do post-constructor initialization.
1689 SYNOPSIS
1690 QUICK_ROR_INTERSECT_SELECT::init()
1691
1692 RETURN
1693 0 OK
1694 other Error code
1695 */
1696
init()1697 int QUICK_ROR_INTERSECT_SELECT::init()
1698 {
1699 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1700 /* Check if last_rowid was successfully allocated in ctor */
1701 DBUG_RETURN(!last_rowid);
1702 }
1703
1704
1705 /*
1706 Initialize this quick select to be a ROR-merged scan.
1707
1708 SYNOPSIS
1709 QUICK_RANGE_SELECT::init_ror_merged_scan()
1710 reuse_handler If TRUE, use head->file, otherwise create a separate
1711 handler object
1712
1713 NOTES
1714 This function creates and prepares for subsequent use a separate handler
1715 object if it can't reuse head->file. The reason for this is that during
1716 ROR-merge several key scans are performed simultaneously, and a single
1717 handler is only capable of preserving context of a single key scan.
1718
1719 In ROR-merge the quick select doing merge does full records retrieval,
1720 merged quick selects read only keys.
1721
1722 RETURN
1723 0 ROR child scan initialized, ok to use.
1724 1 error
1725 */
1726
init_ror_merged_scan(bool reuse_handler)1727 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1728 {
1729 handler *save_file= file, *org_file;
1730 THD *thd;
1731 MY_BITMAP * const save_read_set= head->read_set;
1732 MY_BITMAP * const save_write_set= head->write_set;
1733 DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1734
1735 in_ror_merged_scan= 1;
1736 mrr_flags|= HA_MRR_SORTED;
1737 if (reuse_handler)
1738 {
1739 DBUG_PRINT("info", ("Reusing handler %p", file));
1740 if (init() || reset())
1741 {
1742 DBUG_RETURN(1);
1743 }
1744 head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1745 file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1746 goto end;
1747 }
1748
1749 /* Create a separate handler object for this quick select */
1750 if (free_file)
1751 {
1752 /* already have own 'handler' object. */
1753 DBUG_RETURN(0);
1754 }
1755
1756 thd= head->in_use;
1757 if (!(file= head->file->clone(head->s->normalized_path.str, thd->mem_root)))
1758 {
1759 /*
1760 Manually set the error flag. Note: there seems to be quite a few
1761 places where a failure could cause the server to "hang" the client by
1762 sending no response to a query. ATM those are not real errors because
1763 the storage engine calls in question happen to never fail with the
1764 existing storage engines.
1765 */
1766 my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1767 /* Caller will free the memory */
1768 goto failure; /* purecov: inspected */
1769 }
1770
1771 head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1772
1773 if (file->ha_external_lock(thd, F_RDLCK))
1774 goto failure;
1775
1776 if (init() || reset())
1777 {
1778 file->ha_external_lock(thd, F_UNLCK);
1779 file->ha_close();
1780 goto failure;
1781 }
1782 free_file= TRUE;
1783 last_rowid= file->ref;
1784 file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1785
1786 end:
1787 /*
1788 We are only going to read key fields and call position() on 'file'
1789 The following sets head->tmp_set to only use this key and then updates
1790 head->read_set and head->write_set to use this bitmap.
1791 The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
1792 */
1793 org_file= head->file;
1794 head->file= file;
1795 /* We don't have to set 'head->keyread' here as the 'file' is unique */
1796 if (!head->no_keyread)
1797 head->mark_columns_used_by_index(index);
1798 head->prepare_for_position();
1799 head->file= org_file;
1800 bitmap_copy(&column_bitmap, head->read_set);
1801
1802 /*
1803 We have prepared a column_bitmap which get_next() will use. To do this we
1804 used TABLE::read_set/write_set as playground; restore them to their
1805 original value to not pollute other scans.
1806 */
1807 head->column_bitmaps_set(save_read_set, save_write_set);
1808 bitmap_clear_all(&head->tmp_set);
1809
1810 DBUG_RETURN(0);
1811
1812 failure:
1813 head->column_bitmaps_set(save_read_set, save_write_set);
1814 delete file;
1815 file= save_file;
1816 DBUG_RETURN(1);
1817 }
1818
1819
1820 /*
1821 Initialize this quick select to be a part of a ROR-merged scan.
1822 SYNOPSIS
1823 QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1824 reuse_handler If TRUE, use head->file, otherwise create separate
1825 handler object.
1826 RETURN
1827 0 OK
1828 other error code
1829 */
init_ror_merged_scan(bool reuse_handler)1830 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1831 {
1832 int error;
1833 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1834 QUICK_RANGE_SELECT* quick;
1835 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1836
1837 /* Initialize all merged "children" quick selects */
1838 DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1839 if (!need_to_fetch_row && reuse_handler)
1840 {
1841 quick= quick_it++;
1842 /*
1843 There is no use of this->file. Use it for the first of merged range
1844 selects.
1845 */
1846 int error= quick->init_ror_merged_scan(TRUE);
1847 if (error)
1848 DBUG_RETURN(error);
1849 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1850 }
1851 while ((quick= quick_it++))
1852 {
1853 #ifndef DBUG_OFF
1854 const MY_BITMAP * const save_read_set= quick->head->read_set;
1855 const MY_BITMAP * const save_write_set= quick->head->write_set;
1856 #endif
1857 if ((error= quick->init_ror_merged_scan(FALSE)))
1858 DBUG_RETURN(error);
1859 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1860 // Sets are shared by all members of "quick_selects" so must not change
1861 DBUG_ASSERT(quick->head->read_set == save_read_set);
1862 DBUG_ASSERT(quick->head->write_set == save_write_set);
1863 /* All merged scans share the same record buffer in intersection. */
1864 quick->record= head->record[0];
1865 }
1866
1867 /* Prepare for ha_rnd_pos calls if needed. */
1868 if (need_to_fetch_row && (error= head->file->ha_rnd_init(false)))
1869 {
1870 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1871 DBUG_RETURN(error);
1872 }
1873 DBUG_RETURN(0);
1874 }
1875
1876
1877 /*
1878 Initialize quick select for row retrieval.
1879 SYNOPSIS
1880 reset()
1881 RETURN
1882 0 OK
1883 other Error code
1884 */
1885
reset()1886 int QUICK_ROR_INTERSECT_SELECT::reset()
1887 {
1888 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1889 if (!scans_inited && init_ror_merged_scan(TRUE))
1890 DBUG_RETURN(1);
1891 scans_inited= TRUE;
1892 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
1893 QUICK_RANGE_SELECT *quick;
1894 while ((quick= it++))
1895 quick->reset();
1896 DBUG_RETURN(0);
1897 }
1898
1899
1900 /*
1901 Add a merged quick select to this ROR-intersection quick select.
1902
1903 SYNOPSIS
1904 QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1905 quick Quick select to be added. The quick select must return
1906 rows in rowid order.
1907 NOTES
1908 This call can only be made before init() is called.
1909
1910 RETURN
1911 FALSE OK
1912 TRUE Out of memory.
1913 */
1914
1915 bool
push_quick_back(QUICK_RANGE_SELECT * quick)1916 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
1917 {
1918 return quick_selects.push_back(quick);
1919 }
1920
~QUICK_ROR_INTERSECT_SELECT()1921 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1922 {
1923 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1924 quick_selects.delete_elements();
1925 delete cpk_quick;
1926 free_root(&alloc,MYF(0));
1927 if (need_to_fetch_row && head->file->inited)
1928 head->file->ha_rnd_end();
1929 DBUG_VOID_RETURN;
1930 }
1931
1932
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1933 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1934 TABLE *table)
1935 : queue(Quick_ror_union_less(this),
1936 Malloc_allocator<PSI_memory_key>(PSI_INSTRUMENT_ME)),
1937 thd(thd_param), scans_inited(FALSE)
1938 {
1939 index= MAX_KEY;
1940 head= table;
1941 rowid_length= table->file->ref_length;
1942 record= head->record[0];
1943 init_sql_alloc(key_memory_quick_ror_union_select_root,
1944 &alloc, thd->variables.range_alloc_block_size, 0);
1945 thd_param->mem_root= &alloc;
1946 }
1947
1948
1949 /*
1950 Do post-constructor initialization.
1951 SYNOPSIS
1952 QUICK_ROR_UNION_SELECT::init()
1953
1954 RETURN
1955 0 OK
1956 other Error code
1957 */
1958
init()1959 int QUICK_ROR_UNION_SELECT::init()
1960 {
1961 DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1962 if (queue.reserve(quick_selects.elements))
1963 {
1964 DBUG_RETURN(1);
1965 }
1966
1967 if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1968 DBUG_RETURN(1);
1969 prev_rowid= cur_rowid + head->file->ref_length;
1970 DBUG_RETURN(0);
1971 }
1972
1973
1974 /*
1975 Initialize quick select for row retrieval.
1976 SYNOPSIS
1977 reset()
1978
1979 RETURN
1980 0 OK
1981 other Error code
1982 */
1983
reset()1984 int QUICK_ROR_UNION_SELECT::reset()
1985 {
1986 QUICK_SELECT_I *quick;
1987 int error;
1988 DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1989 have_prev_rowid= FALSE;
1990 if (!scans_inited)
1991 {
1992 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1993 while ((quick= it++))
1994 {
1995 /*
1996 Use mem_root of this "QUICK" as using the statement mem_root
1997 might result in too many allocations when combined with
1998 dynamic range access where range optimizer is invoked many times
1999 for a single statement.
2000 */
2001 THD *thd= quick->head->in_use;
2002 MEM_ROOT *saved_root= thd->mem_root;
2003 thd->mem_root= &alloc;
2004 error= quick->init_ror_merged_scan(false);
2005 thd->mem_root= saved_root;
2006 if (error)
2007 DBUG_RETURN(1);
2008 }
2009 scans_inited= TRUE;
2010 }
2011 queue.clear();
2012 /*
2013 Initialize scans for merged quick selects and put all merged quick
2014 selects into the queue.
2015 */
2016 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
2017 while ((quick= it++))
2018 {
2019 if ((error= quick->reset()))
2020 DBUG_RETURN(error);
2021 if ((error= quick->get_next()))
2022 {
2023 if (error == HA_ERR_END_OF_FILE)
2024 continue;
2025 DBUG_RETURN(error);
2026 }
2027 quick->save_last_pos();
2028 queue.push(quick);
2029 }
2030
2031 /* Prepare for ha_rnd_pos calls. */
2032 if (head->file->inited && (error= head->file->ha_rnd_end()))
2033 {
2034 DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
2035 DBUG_RETURN(error);
2036 }
2037 if ((error= head->file->ha_rnd_init(false)))
2038 {
2039 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
2040 DBUG_RETURN(error);
2041 }
2042
2043 DBUG_RETURN(0);
2044 }
2045
2046
2047 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)2048 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
2049 {
2050 return quick_selects.push_back(quick_sel_range);
2051 }
2052
~QUICK_ROR_UNION_SELECT()2053 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
2054 {
2055 DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
2056 quick_selects.delete_elements();
2057 if (head->file->inited)
2058 head->file->ha_rnd_end();
2059 free_root(&alloc,MYF(0));
2060 DBUG_VOID_RETURN;
2061 }
2062
2063
QUICK_RANGE()2064 QUICK_RANGE::QUICK_RANGE()
2065 :min_key(0),max_key(0),min_length(0),max_length(0),
2066 flag(NO_MIN_RANGE | NO_MAX_RANGE), rkey_func_flag(HA_READ_INVALID),
2067 min_keypart_map(0), max_keypart_map(0)
2068 {}
2069
QUICK_RANGE(const uchar * min_key_arg,uint min_length_arg,key_part_map min_keypart_map_arg,const uchar * max_key_arg,uint max_length_arg,key_part_map max_keypart_map_arg,uint flag_arg,enum ha_rkey_function rkey_func_flag_arg)2070 QUICK_RANGE::QUICK_RANGE(const uchar *min_key_arg, uint min_length_arg,
2071 key_part_map min_keypart_map_arg,
2072 const uchar *max_key_arg, uint max_length_arg,
2073 key_part_map max_keypart_map_arg, uint flag_arg,
2074 enum ha_rkey_function rkey_func_flag_arg)
2075 :min_key(NULL),
2076 max_key(NULL),
2077 min_length((uint16) min_length_arg),
2078 max_length((uint16) max_length_arg),
2079 flag((uint16) flag_arg),
2080 rkey_func_flag(rkey_func_flag_arg),
2081 min_keypart_map(min_keypart_map_arg),
2082 max_keypart_map(max_keypart_map_arg)
2083 {
2084 min_key= static_cast<uchar*>(sql_memdup(min_key_arg, min_length_arg + 1));
2085 max_key= static_cast<uchar*>(sql_memdup(max_key_arg, max_length_arg + 1));
2086 // If we get is_null_string as argument, the memdup is undefined behavior.
2087 DBUG_ASSERT(min_key_arg != is_null_string);
2088 DBUG_ASSERT(max_key_arg != is_null_string);
2089 }
2090
SEL_ARG(SEL_ARG & arg)2091 SEL_ARG::SEL_ARG(SEL_ARG &arg)
2092 :Sql_alloc(),
2093 min_flag(arg.min_flag),
2094 max_flag(arg.max_flag),
2095 maybe_flag(arg.maybe_flag),
2096 part(arg.part),
2097 maybe_null(arg.maybe_null),
2098 rkey_func_flag(arg.rkey_func_flag),
2099 elements(1),
2100 use_count(1),
2101 field(arg.field),
2102 min_value(arg.min_value),
2103 max_value(arg.max_value),
2104 left(&null_element),
2105 right(&null_element),
2106 next(NULL),
2107 prev(NULL),
2108 next_key_part(arg.next_key_part),
2109 type(arg.type)
2110 {
2111 DBUG_ASSERT(arg.type != MAYBE_KEY); // Would need left=right=NULL
2112 }
2113
2114
make_root()2115 inline void SEL_ARG::make_root()
2116 {
2117 left=right= &null_element;
2118 color=BLACK;
2119 next=prev= NULL;
2120 use_count=0; elements=1;
2121 }
2122
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)2123 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
2124 const uchar *max_value_arg)
2125 :min_flag(0), max_flag(0), maybe_flag(0), part(0),
2126 maybe_null(f->real_maybe_null()), rkey_func_flag(HA_READ_INVALID),
2127 elements(1), use_count(1), field(f),
2128 min_value(const_cast<uchar *>(min_value_arg)),
2129 max_value(const_cast<uchar *>(max_value_arg)),
2130 left(&null_element), right(&null_element),
2131 next(NULL), prev(NULL),
2132 next_key_part(0), color(BLACK), type(KEY_RANGE)
2133 {}
2134
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)2135 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
2136 uchar *min_value_, uchar *max_value_,
2137 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
2138 :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), part(part_),
2139 maybe_null(field_->real_maybe_null()),
2140 rkey_func_flag(HA_READ_INVALID), elements(1),use_count(1),
2141 field(field_), min_value(min_value_), max_value(max_value_),
2142 left(&null_element), right(&null_element),
2143 next(NULL), prev(NULL), next_key_part(0), color(BLACK), type(KEY_RANGE)
2144 {}
2145
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2146 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2147 SEL_ARG **next_arg)
2148 {
2149 SEL_ARG *tmp;
2150
2151 if (param->has_errors())
2152 return 0;
2153
2154 if (type != KEY_RANGE)
2155 {
2156 if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2157 return 0; // out of memory
2158 tmp->prev= *next_arg; // Link into next/prev chain
2159 (*next_arg)->next=tmp;
2160 (*next_arg)= tmp;
2161 tmp->part= this->part;
2162 }
2163 else
2164 {
2165 if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2166 min_flag, max_flag, maybe_flag)))
2167 return 0; // OOM
2168 tmp->parent=new_parent;
2169 tmp->next_key_part=next_key_part;
2170 if (left != &null_element)
2171 if (!(tmp->left=left->clone(param, tmp, next_arg)))
2172 return 0; // OOM
2173
2174 tmp->prev= *next_arg; // Link into next/prev chain
2175 (*next_arg)->next=tmp;
2176 (*next_arg)= tmp;
2177
2178 if (right != &null_element)
2179 if (!(tmp->right= right->clone(param, tmp, next_arg)))
2180 return 0; // OOM
2181 }
2182 increment_use_count(1);
2183 tmp->color= color;
2184 tmp->elements= this->elements;
2185 return tmp;
2186 }
2187
2188 /**
2189 This gives the first SEL_ARG in the interval list, and the minimal element
2190 in the red-black tree
2191
2192 @return
2193 SEL_ARG first SEL_ARG in the interval list
2194 */
first()2195 SEL_ARG *SEL_ARG::first()
2196 {
2197 SEL_ARG *next_arg=this;
2198 if (!next_arg->left)
2199 return 0; // MAYBE_KEY
2200 while (next_arg->left != &null_element)
2201 next_arg=next_arg->left;
2202 return next_arg;
2203 }
2204
first() const2205 const SEL_ARG *SEL_ARG::first() const
2206 {
2207 return const_cast<SEL_ARG*>(this)->first();
2208 }
2209
last()2210 SEL_ARG *SEL_ARG::last()
2211 {
2212 SEL_ARG *next_arg=this;
2213 if (!next_arg->right)
2214 return 0; // MAYBE_KEY
2215 while (next_arg->right != &null_element)
2216 next_arg=next_arg->right;
2217 return next_arg;
2218 }
2219
2220
2221 /*
2222 Check if a compare is ok, when one takes ranges in account
2223 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
2224 */
2225
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2226 static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2227 uint8 b_flag)
2228 {
2229 int cmp;
2230 /* First check if there was a compare to a min or max element */
2231 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2232 {
2233 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2234 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2235 return 0;
2236 return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2237 }
2238 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2239 return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2240
2241 if (field->real_maybe_null()) // If null is part of key
2242 {
2243 if (*a != *b)
2244 {
2245 return *a ? -1 : 1;
2246 }
2247 if (*a)
2248 goto end; // NULL where equal
2249 a++; b++; // Skip NULL marker
2250 }
2251 cmp=field->key_cmp(a , b);
2252 if (cmp) return cmp < 0 ? -1 : 1; // The values differed
2253
2254 // Check if the compared equal arguments was defined with open/closed range
2255 end:
2256 if (a_flag & (NEAR_MIN | NEAR_MAX))
2257 {
2258 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2259 return 0;
2260 if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2261 return (a_flag & NEAR_MIN) ? 2 : -2;
2262 return (a_flag & NEAR_MIN) ? 1 : -1;
2263 }
2264 if (b_flag & (NEAR_MIN | NEAR_MAX))
2265 return (b_flag & NEAR_MIN) ? -2 : 2;
2266 return 0; // The elements where equal
2267 }
2268
2269
clone_tree(RANGE_OPT_PARAM * param)2270 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2271 {
2272 SEL_ARG tmp_link,*next_arg,*root;
2273 next_arg= &tmp_link;
2274 if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)) ||
2275 (param && param->has_errors()))
2276 return 0;
2277 next_arg->next=0; // Fix last link
2278 tmp_link.next->prev=0; // Fix first link
2279 if (root) // If not OOM
2280 root->use_count= 0;
2281 return root;
2282 }
2283
2284
2285 /*
2286 Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2287 objects from table read plans.
2288 */
2289 class TABLE_READ_PLAN
2290 {
2291 public:
2292 /*
2293 Plan read cost, with or without cost of full row retrieval, depending
2294 on plan creation parameters.
2295 */
2296 Cost_estimate cost_est;
2297 ha_rows records; /* estimate of #rows to be examined */
2298
2299 /*
2300 If TRUE, the scan returns rows in rowid order. This is used only for
2301 scans that can be both ROR and non-ROR.
2302 */
2303 bool is_ror;
2304
2305 /*
2306 Create quick select for this plan.
2307 SYNOPSIS
2308 make_quick()
2309 param Parameter from test_quick_select
2310 retrieve_full_rows If TRUE, created quick select will do full record
2311 retrieval.
2312 parent_alloc Memory pool to use, if any.
2313
2314 NOTES
2315 retrieve_full_rows is ignored by some implementations.
2316
2317 RETURN
2318 created quick select
2319 NULL on any error.
2320 */
2321 virtual QUICK_SELECT_I *make_quick(PARAM *param,
2322 bool retrieve_full_rows,
2323 MEM_ROOT *parent_alloc=NULL) = 0;
2324
2325 /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2326 static void *operator new(size_t size, MEM_ROOT *mem_root)
2327 { return alloc_root(mem_root, size); }
operator delete(void * ptr,size_t size)2328 static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2329 static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2330 virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */
2331
2332 /**
2333 Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2334
2335 @param param Parameters for range analysis of this table
2336 @param trace_object The optimizer trace object the info is appended to
2337 */
2338 virtual void trace_basic_info(const PARAM *param,
2339 Opt_trace_object *trace_object) const = 0;
2340 };
2341
2342 /*
2343 Plan for a QUICK_RANGE_SELECT scan.
2344 TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2345 QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2346 record retrieval scans.
2347 */
2348
2349 class TRP_RANGE : public TABLE_READ_PLAN
2350 {
2351 public:
2352 /**
2353 Root of red-black tree for intervals over key fields to be used in
2354 "range" method retrieval. See SEL_ARG graph description.
2355 */
2356 SEL_ARG *key;
2357 uint key_idx; /* key number in PARAM::key and PARAM::real_keynr*/
2358 uint mrr_flags;
2359 uint mrr_buf_size;
2360
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2361 TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2362 : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2363 {}
~TRP_RANGE()2364 virtual ~TRP_RANGE() {} /* Remove gcc warning */
2365
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2366 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2367 MEM_ROOT *parent_alloc)
2368 {
2369 DBUG_ENTER("TRP_RANGE::make_quick");
2370 QUICK_RANGE_SELECT *quick;
2371 if ((quick= get_quick_select(param, key_idx, key, mrr_flags, mrr_buf_size,
2372 parent_alloc)))
2373 {
2374 quick->records= records;
2375 quick->cost_est= cost_est;
2376 }
2377 DBUG_RETURN(quick);
2378 }
2379
2380 void trace_basic_info(const PARAM *param,
2381 Opt_trace_object *trace_object) const;
2382 };
2383
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2384 void TRP_RANGE::trace_basic_info(const PARAM *param,
2385 Opt_trace_object *trace_object) const
2386 {
2387 #ifdef OPTIMIZER_TRACE
2388 DBUG_ASSERT(param->using_real_indexes);
2389 const uint keynr_in_table= param->real_keynr[key_idx];
2390
2391 const KEY &cur_key= param->table->key_info[keynr_in_table];
2392 const KEY_PART_INFO *key_part= cur_key.key_part;
2393
2394 trace_object->add_alnum("type", "range_scan").
2395 add_utf8("index", cur_key.name).add("rows", records);
2396
2397 Opt_trace_array trace_range(¶m->thd->opt_trace, "ranges");
2398
2399 // TRP_RANGE should not be created if there are no range intervals
2400 DBUG_ASSERT(key);
2401
2402 String range_info;
2403 range_info.set_charset(system_charset_info);
2404 append_range_all_keyparts(&trace_range, NULL, &range_info,
2405 key, key_part, false);
2406 #endif
2407 }
2408
2409
2410 typedef struct st_ror_scan_info
2411 {
2412 uint idx; ///< # of used key in param->keys
2413 uint keynr; ///< # of used key in table
2414 ha_rows records; ///< estimate of # records this scan will return
2415
2416 /** Set of intervals over key fields that will be used for row retrieval. */
2417 SEL_ARG *sel_arg;
2418
2419 /** Fields used in the query and covered by this ROR scan. */
2420 MY_BITMAP covered_fields;
2421 /**
2422 Fields used in the query that are a) covered by this ROR scan and
2423 b) not already covered by ROR scans ordered earlier in the merge
2424 sequence.
2425 */
2426 MY_BITMAP covered_fields_remaining;
2427 /** #fields in covered_fields_remaining (caching of bitmap_bits_set()) */
2428 uint num_covered_fields_remaining;
2429
2430 /**
2431 Cost of reading all index records with values in sel_arg intervals set
2432 (assuming there is no need to access full table records)
2433 */
2434 Cost_estimate index_read_cost;
2435 } ROR_SCAN_INFO;
2436
2437 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2438
2439 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2440 {
2441 public:
TRP_ROR_INTERSECT()2442 TRP_ROR_INTERSECT() {} /* Remove gcc warning */
~TRP_ROR_INTERSECT()2443 virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */
2444 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2445 MEM_ROOT *parent_alloc);
2446
2447 /* Array of pointers to ROR range scans used in this intersection */
2448 struct st_ror_scan_info **first_scan;
2449 struct st_ror_scan_info **last_scan; /* End of the above array */
2450 struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
2451 bool is_covering; /* TRUE if no row retrieval phase is necessary */
2452 Cost_estimate index_scan_cost; /* SUM(cost(index_scan)) */
2453
2454 void trace_basic_info(const PARAM *param,
2455 Opt_trace_object *trace_object) const;
2456 };
2457
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2458 void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
2459 Opt_trace_object *trace_object) const
2460 {
2461 #ifdef OPTIMIZER_TRACE
2462 trace_object->add_alnum("type", "index_roworder_intersect").
2463 add("rows", records).
2464 add("cost", cost_est).
2465 add("covering", is_covering).
2466 add("clustered_pk_scan", cpk_scan != NULL);
2467
2468 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2469 Opt_trace_array ota(trace, "intersect_of");
2470 for (st_ror_scan_info **cur_scan= first_scan;
2471 cur_scan != last_scan;
2472 cur_scan++)
2473 {
2474 const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
2475 const KEY_PART_INFO *key_part= cur_key.key_part;
2476
2477 Opt_trace_object trace_isect_idx(trace);
2478 trace_isect_idx.add_alnum("type", "range_scan").
2479 add_utf8("index", cur_key.name).add("rows", (*cur_scan)->records);
2480
2481 Opt_trace_array trace_range(trace, "ranges");
2482 for (const SEL_ARG *current= (*cur_scan)->sel_arg;
2483 current;
2484 current= current->next)
2485 {
2486 String range_info;
2487 range_info.set_charset(system_charset_info);
2488 for (const SEL_ARG *part= current;
2489 part;
2490 part= part->next_key_part)
2491 {
2492 const KEY_PART_INFO *cur_key_part= key_part + part->part;
2493 append_range(&range_info, cur_key_part,
2494 part->min_value, part->max_value,
2495 part->min_flag | part->max_flag);
2496 }
2497 trace_range.add_utf8(range_info.ptr(), range_info.length());
2498 }
2499 }
2500 #endif
2501 }
2502
2503 /*
2504 Plan for QUICK_ROR_UNION_SELECT scan.
2505 QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2506 is ignored by make_quick.
2507 */
2508
2509 class TRP_ROR_UNION : public TABLE_READ_PLAN
2510 {
2511 public:
TRP_ROR_UNION()2512 TRP_ROR_UNION() {} /* Remove gcc warning */
~TRP_ROR_UNION()2513 virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */
2514 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2515 MEM_ROOT *parent_alloc);
2516 TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2517 TABLE_READ_PLAN **last_ror; /* end of the above array */
2518
2519 void trace_basic_info(const PARAM *param,
2520 Opt_trace_object *trace_object) const;
2521 };
2522
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2523 void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
2524 Opt_trace_object *trace_object) const
2525 {
2526 #ifdef OPTIMIZER_TRACE
2527 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2528 trace_object->add_alnum("type", "index_roworder_union");
2529 Opt_trace_array ota(trace, "union_of");
2530 for (TABLE_READ_PLAN **current= first_ror;
2531 current != last_ror;
2532 current++)
2533 {
2534 Opt_trace_object trp_info(trace);
2535 (*current)->trace_basic_info(param, &trp_info);
2536 }
2537 #endif
2538 }
2539
2540 /*
2541 Plan for QUICK_INDEX_MERGE_SELECT scan.
2542 QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2543 is ignored by make_quick.
2544 */
2545
2546 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2547 {
2548 public:
TRP_INDEX_MERGE()2549 TRP_INDEX_MERGE() {} /* Remove gcc warning */
~TRP_INDEX_MERGE()2550 virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */
2551 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2552 MEM_ROOT *parent_alloc);
2553 TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2554 TRP_RANGE **range_scans_end; /* end of the array */
2555
2556 void trace_basic_info(const PARAM *param,
2557 Opt_trace_object *trace_object) const;
2558 };
2559
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2560 void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
2561 Opt_trace_object *trace_object) const
2562 {
2563 #ifdef OPTIMIZER_TRACE
2564 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2565 trace_object->add_alnum("type", "index_merge");
2566 Opt_trace_array ota(trace, "index_merge_of");
2567 for (TRP_RANGE **current= range_scans;
2568 current != range_scans_end;
2569 current++)
2570 {
2571 Opt_trace_object trp_info(trace);
2572 (*current)->trace_basic_info(param, &trp_info);
2573 }
2574 #endif
2575 }
2576
2577 /*
2578 Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2579 */
2580
2581 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2582 {
2583 private:
2584 bool have_min; ///< TRUE if there is a MIN function
2585 bool have_max; ///< TRUE if there is a MAX function
2586 /**
2587 TRUE if there is an aggregate distinct function, e.g.
2588 "COUNT(DISTINCT x)"
2589 */
2590 bool have_agg_distinct;
2591 /**
2592 The key_part of the only field used by all MIN/MAX functions.
2593 Note that TRP_GROUP_MIN_MAX is not used if there are MIN/MAX
2594 functions on more than one field.
2595 */
2596 KEY_PART_INFO *min_max_arg_part;
2597 uint group_prefix_len; ///< Length of all key parts in the group prefix
2598 uint used_key_parts; ///< Number of index key parts used for access
2599 uint group_key_parts; ///< Number of index key parts in the group prefix
2600 KEY *index_info; ///< The index chosen for data access
2601 uint index; ///< The id of the chosen index
2602 uchar key_infix[MAX_KEY_LENGTH]; ///< Constants from equality predicates
2603 uint key_infix_len; ///< Length of key_infix
2604 SEL_TREE *range_tree; ///< Represents all range predicates in the query
2605 SEL_ARG *index_tree; ///< The sub-tree corresponding to index_info
2606 uint param_idx; ///< Index of used key in param->key
2607 bool is_index_scan; ///< Use index_next() instead of random read
2608 public:
2609 /** Number of records selected by the ranges in index_tree. */
2610 ha_rows quick_prefix_records;
2611 public:
2612
2613 void trace_basic_info(const PARAM *param,
2614 Opt_trace_object *trace_object) const;
2615
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2616 TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2617 bool have_agg_distinct_arg,
2618 KEY_PART_INFO *min_max_arg_part_arg,
2619 uint group_prefix_len_arg, uint used_key_parts_arg,
2620 uint group_key_parts_arg, KEY *index_info_arg,
2621 uint index_arg, uint key_infix_len_arg,
2622 uchar *key_infix_arg,
2623 SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2624 uint param_idx_arg, ha_rows quick_prefix_records_arg)
2625 : have_min(have_min_arg), have_max(have_max_arg),
2626 have_agg_distinct(have_agg_distinct_arg),
2627 min_max_arg_part(min_max_arg_part_arg),
2628 group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2629 group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2630 index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2631 index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2632 quick_prefix_records(quick_prefix_records_arg)
2633 {
2634 if (key_infix_len)
2635 memcpy(this->key_infix, key_infix_arg, key_infix_len);
2636 }
~TRP_GROUP_MIN_MAX()2637 virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */
2638
2639 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2640 MEM_ROOT *parent_alloc);
use_index_scan()2641 void use_index_scan() { is_index_scan= TRUE; }
2642 };
2643
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2644 void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
2645 Opt_trace_object *trace_object) const
2646 {
2647 #ifdef OPTIMIZER_TRACE
2648 trace_object->add_alnum("type", "index_group").
2649 add_utf8("index", index_info->name);
2650 if (min_max_arg_part)
2651 trace_object->add_utf8("group_attribute",
2652 min_max_arg_part->field->field_name);
2653 else
2654 trace_object->add_null("group_attribute");
2655 trace_object->add("min_aggregate", have_min).
2656 add("max_aggregate", have_max).
2657 add("distinct_aggregate", have_agg_distinct).
2658 add("rows", records).
2659 add("cost", cost_est);
2660
2661 const KEY_PART_INFO *key_part= index_info->key_part;
2662 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2663 {
2664 Opt_trace_array trace_keyparts(trace, "key_parts_used_for_access");
2665 for (uint partno= 0; partno < used_key_parts; partno++)
2666 {
2667 const KEY_PART_INFO *cur_key_part= key_part + partno;
2668 trace_keyparts.add_utf8(cur_key_part->field->field_name);
2669 }
2670 }
2671 Opt_trace_array trace_range(trace, "ranges");
2672
2673 // can have group quick without ranges
2674 if (index_tree)
2675 {
2676 String range_info;
2677 range_info.set_charset(system_charset_info);
2678 append_range_all_keyparts(&trace_range, NULL,
2679 &range_info, index_tree, key_part, false);
2680 }
2681 #endif
2682 }
2683
2684 /*
2685 Fill param->needed_fields with bitmap of fields used in the query.
2686 SYNOPSIS
2687 fill_used_fields_bitmap()
2688 param Parameter from test_quick_select function.
2689
2690 NOTES
2691 Clustered PK members are not put into the bitmap as they are implicitly
2692 present in all keys (and it is impossible to avoid reading them).
2693 RETURN
2694 0 Ok
2695 1 Out of memory.
2696 */
2697
fill_used_fields_bitmap(PARAM * param)2698 static int fill_used_fields_bitmap(PARAM *param)
2699 {
2700 TABLE *table= param->table;
2701 my_bitmap_map *tmp;
2702 uint pk;
2703 param->tmp_covered_fields.bitmap= 0;
2704 param->fields_bitmap_size= table->s->column_bitmap_size;
2705 if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2706 param->fields_bitmap_size)) ||
2707 bitmap_init(¶m->needed_fields, tmp, table->s->fields, FALSE))
2708 return 1;
2709
2710 bitmap_copy(¶m->needed_fields, table->read_set);
2711 bitmap_union(¶m->needed_fields, table->write_set);
2712
2713 pk= param->table->s->primary_key;
2714 if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2715 {
2716 /* The table uses clustered PK and it is not internally generated */
2717 KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2718 KEY_PART_INFO *key_part_end=
2719 key_part + param->table->key_info[pk].user_defined_key_parts;
2720 for (;key_part != key_part_end; ++key_part)
2721 bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1);
2722 }
2723 return 0;
2724 }
2725
2726
2727 /*
2728 Test if a key can be used in different ranges, and create the QUICK
2729 access method (range, index merge etc) that is estimated to be
2730 cheapest unless table/index scan is even cheaper (exception: @see
2731 parameter force_quick_range).
2732
2733 SYNOPSIS
2734 test_quick_select()
2735 thd Current thread
2736 keys_to_use Keys to use for range retrieval
2737 prev_tables Tables assumed to be already read when the scan is
2738 performed (but not read at the moment of this call)
2739 limit Query limit
2740 force_quick_range Prefer to use range (instead of full table scan) even
2741 if it is more expensive.
2742 interesting_order The sort order the range access method must be able
2743 to provide. Three-value logic: asc/desc/don't care
2744 needed_reg this info is used in make_join_select() even if there is no quick!
2745 quick[out] Calculated QUICK, or NULL
2746 ignore_table_scan Disregard table scan while looking for range.
2747
2748 NOTES
2749 Updates the following:
2750 needed_reg - Bits for keys with may be used if all prev regs are read
2751
2752 In the table struct the following information is updated:
2753 quick_keys - Which keys can be used
2754 quick_rows - How many rows the key matches
2755 quick_condition_rows - E(# rows that will satisfy the table condition)
2756
2757 IMPLEMENTATION
2758 quick_condition_rows value is obtained as follows:
2759
2760 It is a minimum of E(#output rows) for all considered table access
2761 methods (range and index_merge accesses over various indexes).
2762
2763 The obtained value is not a true E(#rows that satisfy table condition)
2764 but rather a pessimistic estimate. To obtain a true E(#...) one would
2765 need to combine estimates of various access methods, taking into account
2766 correlations between sets of rows they will return.
2767
2768 For example, if values of tbl.key1 and tbl.key2 are independent (a right
2769 assumption if we have no information about their correlation) then the
2770 correct estimate will be:
2771
2772 E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2773 = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2774
2775 which is smaller than
2776
2777 MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2778
2779 which is currently produced.
2780
2781 TODO
2782 * Change the value returned in quick_condition_rows from a pessimistic
2783 estimate to true E(#rows that satisfy table condition).
2784 (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2785 for this)
2786
2787 * Check if this function really needs to modify keys_to_use, and change the
2788 code to pass it by reference if it doesn't.
2789
2790 * In addition to force_quick_range other means can be (an usually are) used
2791 to make this function prefer range over full table scan. Figure out if
2792 force_quick_range is really needed.
2793
2794 RETURN
2795 -1 if impossible select (i.e. certainly no rows will be selected)
2796 0 if can't use quick_select
2797 1 if found usable ranges and quick select has been successfully created.
2798
2799 @note After this call, caller may decide to really use the returned QUICK,
2800 by calling QEP_TAB::set_quick() and updating tab->type() if appropriate.
2801
2802 */
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,const ORDER::enum_order interesting_order,const QEP_shared_owner * tab,Item * cond,key_map * needed_reg,QUICK_SELECT_I ** quick,bool ignore_table_scan)2803 int test_quick_select(THD *thd, key_map keys_to_use,
2804 table_map prev_tables,
2805 ha_rows limit, bool force_quick_range,
2806 const ORDER::enum_order interesting_order,
2807 const QEP_shared_owner *tab,
2808 Item *cond, key_map *needed_reg, QUICK_SELECT_I **quick,
2809 bool ignore_table_scan)
2810 {
2811 DBUG_ENTER("test_quick_select");
2812
2813 *quick= NULL;
2814 needed_reg->clear_all();
2815
2816 if (keys_to_use.is_clear_all())
2817 DBUG_RETURN(0);
2818
2819 table_map const_tables, read_tables;
2820 if (tab->join())
2821 {
2822 const_tables= tab->join()->found_const_table_map;
2823 read_tables= tab->join()->is_executed() ?
2824 // in execution, range estimation is done for each row, so can access previous tables
2825 (tab->prefix_tables() & ~tab->added_tables()) :
2826 const_tables;
2827 }
2828 else
2829 const_tables= read_tables= 0;
2830
2831 DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
2832 (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2833 (ulong) const_tables));
2834
2835 const Cost_model_server *const cost_model= thd->cost_model();
2836 TABLE *const head= tab->table();
2837 ha_rows records= head->file->stats.records;
2838 if (!records)
2839 records++; /* purecov: inspected */
2840 double scan_time=
2841 cost_model->row_evaluate_cost(static_cast<double>(records)) + 1;
2842 Cost_estimate cost_est= head->file->table_scan_cost();
2843 cost_est.add_io(1.1);
2844 cost_est.add_cpu(scan_time);
2845 if (ignore_table_scan)
2846 {
2847 scan_time= DBL_MAX;
2848 cost_est.set_max_cost();
2849 }
2850 if (limit < records)
2851 {
2852 cost_est.reset();
2853 // Force to use index
2854 cost_est.add_io(head->cost_model()->page_read_cost(
2855 static_cast<double>(records)) + 1);
2856 cost_est.add_cpu(scan_time);
2857 }
2858 else if (cost_est.total_cost() <= 2.0 && !force_quick_range)
2859 DBUG_RETURN(0); /* No need for quick select */
2860
2861 Opt_trace_context * const trace= &thd->opt_trace;
2862 Opt_trace_object trace_range(trace, "range_analysis");
2863 Opt_trace_object(trace, "table_scan").
2864 add("rows", head->file->stats.records).
2865 add("cost", cost_est);
2866
2867 keys_to_use.intersect(head->keys_in_use_for_query);
2868 if (!keys_to_use.is_clear_all())
2869 {
2870 MEM_ROOT alloc;
2871 SEL_TREE *tree= NULL;
2872 KEY_PART *key_parts;
2873 KEY *key_info;
2874 PARAM param;
2875
2876 /*
2877 Use the 3 multiplier as range optimizer allocates big PARAM structure
2878 and may evaluate a subquery expression
2879 TODO During the optimization phase we should evaluate only inexpensive
2880 single-lookup subqueries.
2881 */
2882 if (check_stack_overrun(thd, 3*STACK_MIN_SIZE + sizeof(PARAM), NULL))
2883 DBUG_RETURN(0); // Fatal error flag is set
2884
2885 /* set up parameter that is passed to all functions */
2886 param.thd= thd;
2887 param.baseflag= head->file->ha_table_flags();
2888 param.prev_tables=prev_tables | const_tables;
2889 param.read_tables=read_tables;
2890 param.current_table= head->pos_in_table_list->map();
2891 param.table=head;
2892 param.keys=0;
2893 param.mem_root= &alloc;
2894 param.old_root= thd->mem_root;
2895 param.needed_reg= needed_reg;
2896 param.imerge_cost_buff.reset();
2897 param.using_real_indexes= TRUE;
2898 param.remove_jump_scans= TRUE;
2899 param.force_default_mrr= (interesting_order == ORDER::ORDER_DESC);
2900 param.order_direction= interesting_order;
2901 param.use_index_statistics= false;
2902 /*
2903 Set index_merge_allowed from OPTIMIZER_SWITCH_INDEX_MERGE.
2904 Notice also that OPTIMIZER_SWITCH_INDEX_MERGE disables all
2905 index merge sub strategies.
2906 */
2907 param.index_merge_allowed=
2908 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE);
2909 param.index_merge_union_allowed=
2910 param.index_merge_allowed &&
2911 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION);
2912 param.index_merge_sort_union_allowed=
2913 param.index_merge_allowed &&
2914 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION);
2915 param.index_merge_intersect_allowed=
2916 param.index_merge_allowed &&
2917 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT);
2918
2919 thd->no_errors=1; // Don't warn about NULL
2920 init_sql_alloc(key_memory_test_quick_select_exec,
2921 &alloc, thd->variables.range_alloc_block_size, 0);
2922 set_memroot_max_capacity(&alloc,
2923 thd->variables.range_optimizer_max_mem_size);
2924 set_memroot_error_reporting(&alloc, true);
2925 thd->push_internal_handler(¶m.error_handler);
2926 if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
2927 sizeof(KEY_PART)*
2928 head->s->key_parts)) ||
2929 fill_used_fields_bitmap(¶m))
2930 {
2931 thd->no_errors=0;
2932 thd->pop_internal_handler();
2933 free_root(&alloc,MYF(0)); // Return memory & allocator
2934 DBUG_RETURN(0); // Can't use range
2935 }
2936 key_parts= param.key_parts;
2937 thd->mem_root= &alloc;
2938
2939 {
2940 Opt_trace_array trace_idx(trace,
2941 "potential_range_indexes",
2942 Opt_trace_context::RANGE_OPTIMIZER);
2943 /*
2944 Make an array with description of all key parts of all table keys.
2945 This is used in get_mm_parts function.
2946 */
2947 key_info= head->key_info;
2948 for (uint idx= 0 ; idx < head->s->keys ; idx++, key_info++)
2949 {
2950 Opt_trace_object trace_idx_details(trace);
2951 trace_idx_details.add_utf8("index", key_info->name);
2952 KEY_PART_INFO *key_part_info;
2953
2954 if (!keys_to_use.is_set(idx))
2955 {
2956 trace_idx_details.add("usable", false).
2957 add_alnum("cause", "not_applicable");
2958 continue;
2959 }
2960
2961 if (hint_key_state(thd, head, idx, NO_RANGE_HINT_ENUM, 0))
2962 {
2963 trace_idx_details.add("usable", false).
2964 add_alnum("cause", "no_range_optimization hint");
2965 continue;
2966 }
2967
2968 if (key_info->flags & HA_FULLTEXT)
2969 {
2970 trace_idx_details.add("usable", false).
2971 add_alnum("cause", "fulltext");
2972 continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
2973 }
2974
2975 trace_idx_details.add("usable", true);
2976
2977 param.key[param.keys]=key_parts;
2978 key_part_info= key_info->key_part;
2979 Opt_trace_array trace_keypart(trace, "key_parts");
2980 for (uint part=0 ; part < actual_key_parts(key_info) ;
2981 part++, key_parts++, key_part_info++)
2982 {
2983 key_parts->key= param.keys;
2984 key_parts->part= part;
2985 key_parts->length= key_part_info->length;
2986 key_parts->store_length= key_part_info->store_length;
2987 key_parts->field= key_part_info->field;
2988 key_parts->null_bit= key_part_info->null_bit;
2989 key_parts->image_type =
2990 (part < key_info->user_defined_key_parts &&
2991 key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2992 /* Only HA_PART_KEY_SEG is used */
2993 key_parts->flag= (uint8) key_part_info->key_part_flag;
2994 trace_keypart.add_utf8(key_parts->field->field_name);
2995 }
2996 param.real_keynr[param.keys++]=idx;
2997 }
2998 }
2999 param.key_parts_end=key_parts;
3000
3001 /* Calculate cost of full index read for the shortest covering index */
3002 if (!head->covering_keys.is_clear_all())
3003 {
3004 int key_for_use= find_shortest_key(head, &head->covering_keys);
3005 Cost_estimate key_read_time=
3006 param.table->file->index_scan_cost(key_for_use, 1,
3007 static_cast<double>(records));
3008 key_read_time.add_cpu(cost_model->row_evaluate_cost(
3009 static_cast<double>(records)));
3010
3011 bool chosen= false;
3012 if (key_read_time < cost_est)
3013 {
3014 cost_est= key_read_time;
3015 chosen= true;
3016 }
3017
3018 Opt_trace_object trace_cov(trace,
3019 "best_covering_index_scan",
3020 Opt_trace_context::RANGE_OPTIMIZER);
3021 trace_cov.add_utf8("index", head->key_info[key_for_use].name).
3022 add("cost", key_read_time).add("chosen", chosen);
3023 if (!chosen)
3024 trace_cov.add_alnum("cause", "cost");
3025 }
3026
3027 TABLE_READ_PLAN *best_trp= NULL;
3028 TRP_GROUP_MIN_MAX *group_trp;
3029 Cost_estimate best_cost= cost_est;
3030
3031 if (cond)
3032 {
3033 {
3034 Opt_trace_array trace_setup_cond(trace, "setup_range_conditions");
3035 tree= get_mm_tree(¶m, cond);
3036 }
3037 if (tree)
3038 {
3039 if (tree->type == SEL_TREE::IMPOSSIBLE)
3040 {
3041 trace_range.add("impossible_range", true);
3042 records=0L; /* Return -1 from this function. */
3043 cost_est.reset();
3044 cost_est.add_io(static_cast<double>(HA_POS_ERROR));
3045 goto free_mem;
3046 }
3047 /*
3048 If the tree can't be used for range scans, proceed anyway, as we
3049 can construct a group-min-max quick select
3050 */
3051 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3052 {
3053 trace_range.add("range_scan_possible", false);
3054 if (tree->type == SEL_TREE::ALWAYS)
3055 trace_range.add_alnum("cause", "condition_always_true");
3056
3057 tree= NULL;
3058 }
3059 }
3060 }
3061
3062 /*
3063 Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
3064 Notice that it can be constructed no matter if there is a range tree.
3065 */
3066 group_trp= get_best_group_min_max(¶m, tree, &best_cost);
3067 if (group_trp)
3068 {
3069 param.table->quick_condition_rows= min(group_trp->records,
3070 head->file->stats.records);
3071 Opt_trace_object grp_summary(trace,
3072 "best_group_range_summary",
3073 Opt_trace_context::RANGE_OPTIMIZER);
3074 if (unlikely(trace->is_started()))
3075 group_trp->trace_basic_info(¶m, &grp_summary);
3076 if (group_trp->cost_est < best_cost)
3077 {
3078 grp_summary.add("chosen", true);
3079 best_trp= group_trp;
3080 best_cost= best_trp->cost_est;
3081 }
3082 else
3083 grp_summary.add("chosen", false).add_alnum("cause", "cost");
3084 }
3085
3086 if (tree)
3087 {
3088 /*
3089 It is possible to use a range-based quick select (but it might be
3090 slower than 'all' table scan).
3091 */
3092 dbug_print_tree("final_tree", tree, ¶m);
3093
3094 {
3095 /*
3096 Calculate cost of single index range scan and possible
3097 intersections of these
3098 */
3099 Opt_trace_object trace_range(trace,
3100 "analyzing_range_alternatives",
3101 Opt_trace_context::RANGE_OPTIMIZER);
3102 TRP_RANGE *range_trp;
3103 TRP_ROR_INTERSECT *rori_trp;
3104
3105 /* Get best 'range' plan and prepare data for making other plans */
3106 if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE,
3107 &best_cost)))
3108 {
3109 best_trp= range_trp;
3110 best_cost= best_trp->cost_est;
3111 }
3112
3113 /*
3114 Simultaneous key scans and row deletes on several handler
3115 objects are not allowed so don't use ROR-intersection for
3116 table deletes. Also, ROR-intersection cannot return rows in
3117 descending order
3118 */
3119 if ((thd->lex->sql_command != SQLCOM_DELETE) &&
3120 param.index_merge_allowed &&
3121 interesting_order != ORDER::ORDER_DESC)
3122 {
3123 /*
3124 Get best non-covering ROR-intersection plan and prepare data for
3125 building covering ROR-intersection.
3126 */
3127 if ((rori_trp= get_best_ror_intersect(¶m, tree, &best_cost)))
3128 {
3129 best_trp= rori_trp;
3130 best_cost= best_trp->cost_est;
3131 }
3132 }
3133 }
3134
3135 // Here we calculate cost of union index merge
3136 if (!tree->merges.is_empty())
3137 {
3138 // Cannot return rows in descending order.
3139 if (param.index_merge_allowed &&
3140 interesting_order != ORDER::ORDER_DESC &&
3141 param.table->file->stats.records)
3142 {
3143 /* Try creating index_merge/ROR-union scan. */
3144 SEL_IMERGE *imerge;
3145 TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp= NULL;
3146 List_iterator_fast<SEL_IMERGE> it(tree->merges);
3147 Opt_trace_array trace_idx_merge(trace,
3148 "analyzing_index_merge_union",
3149 Opt_trace_context::RANGE_OPTIMIZER);
3150 while ((imerge= it++))
3151 {
3152 new_conj_trp= get_best_disjunct_quick(¶m, imerge,
3153 &best_cost);
3154 if (new_conj_trp)
3155 set_if_smaller(param.table->quick_condition_rows,
3156 new_conj_trp->records);
3157 if (!best_conj_trp ||
3158 (new_conj_trp &&
3159 new_conj_trp->cost_est < best_conj_trp->cost_est))
3160 {
3161 best_conj_trp= new_conj_trp;
3162 }
3163 }
3164 if (best_conj_trp)
3165 best_trp= best_conj_trp;
3166 }
3167 }
3168 }
3169
3170 thd->mem_root= param.old_root;
3171
3172 /* If we got a read plan, create a quick select from it. */
3173 if (best_trp)
3174 {
3175 QUICK_SELECT_I *qck;
3176 records= best_trp->records;
3177 if (!(qck= best_trp->make_quick(¶m, TRUE)) || qck->init())
3178 qck= NULL;
3179 *quick= qck;
3180 }
3181
3182 free_mem:
3183 thd->pop_internal_handler();
3184 if (unlikely(*quick && trace->is_started() && best_trp))
3185 {
3186 // best_trp cannot be NULL if quick is set, done to keep fortify happy
3187 Opt_trace_object trace_range_summary(trace,
3188 "chosen_range_access_summary");
3189 {
3190 Opt_trace_object trace_range_plan(trace,
3191 "range_access_plan");
3192 best_trp->trace_basic_info(¶m, &trace_range_plan);
3193 }
3194 trace_range_summary.add("rows_for_plan", (*quick)->records).
3195 add("cost_for_plan", (*quick)->cost_est).
3196 add("chosen", true);
3197 }
3198
3199 free_root(&alloc,MYF(0)); // Return memory & allocator
3200 thd->mem_root= param.old_root;
3201 thd->no_errors=0;
3202
3203 DBUG_EXECUTE("info", print_quick(*quick, needed_reg););
3204 }
3205
3206
3207 /*
3208 Assume that if the user is using 'limit' we will only need to scan
3209 limit rows if we are using a key
3210 */
3211 DBUG_RETURN(records ? MY_TEST(*quick) : -1);
3212 }
3213
3214 /****************************************************************************
3215 * Partition pruning module
3216 ****************************************************************************/
3217
3218 /*
3219 PartitionPruningModule
3220
3221 This part of the code does partition pruning. Partition pruning solves the
3222 following problem: given a query over partitioned tables, find partitions
3223 that we will not need to access (i.e. partitions that we can assume to be
3224 empty) when executing the query.
3225 The set of partitions to prune doesn't depend on which query execution
3226 plan will be used to execute the query.
3227
3228 HOW IT WORKS
3229
3230 Partition pruning module makes use of RangeAnalysisModule. The following
3231 examples show how the problem of partition pruning can be reduced to the
3232 range analysis problem:
3233
3234 EXAMPLE 1
3235 Consider a query:
3236
3237 SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3238
3239 where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent
3240 way to find the used (i.e. not pruned away) partitions is as follows:
3241
3242 1. analyze the WHERE clause and extract the list of intervals over t1.a
3243 for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3244
3245 2. for each interval I
3246 {
3247 find partitions that have non-empty intersection with I;
3248 mark them as used;
3249 }
3250
3251 EXAMPLE 2
3252 Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3253 we need to:
3254
3255 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3256 The list of intervals we'll obtain will look like this:
3257 ((t1.a, t1.b) = (1,'foo')),
3258 ((t1.a, t1.b) = (2,'bar')),
3259 ((t1,a, t1.b) > (10,'zz'))
3260
3261 2. for each interval I
3262 {
3263 if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3264 {
3265 calculate HASH(part_func(t1.a, t1.b));
3266 find which partition has records with this hash value and mark
3267 it as used;
3268 }
3269 else
3270 {
3271 mark all partitions as used;
3272 break;
3273 }
3274 }
3275
3276 For both examples the step #1 is exactly what RangeAnalysisModule could
3277 be used to do, if it was provided with appropriate index description
3278 (array of KEY_PART structures).
3279 In example #1, we need to provide it with description of index(t1.a),
3280 in example #2, we need to provide it with description of index(t1.a, t1.b).
3281
3282 These index descriptions are further called "partitioning index
3283 descriptions". Note that it doesn't matter if such indexes really exist,
3284 as range analysis module only uses the description.
3285
3286 Putting it all together, partitioning module works as follows:
3287
3288 prune_partitions() {
3289 call create_partition_index_description();
3290
3291 call get_mm_tree(); // invoke the RangeAnalysisModule
3292
3293 // analyze the obtained interval list and get used partitions
3294 call find_used_partitions();
3295 }
3296
3297 */
3298
3299 struct st_part_prune_param;
3300 struct st_part_opt_info;
3301
3302 typedef void (*mark_full_part_func)(partition_info*, uint32);
3303
3304 /*
3305 Partition pruning operation context
3306 */
3307 typedef struct st_part_prune_param
3308 {
3309 RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3310
3311 /***************************************************************
3312 Following fields are filled in based solely on partitioning
3313 definition and not modified after that:
3314 **************************************************************/
3315 partition_info *part_info; /* Copy of table->part_info */
3316 /* Function to get partition id from partitioning fields only */
3317 get_part_id_func get_top_partition_id_func;
3318 /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3319 mark_full_part_func mark_full_partition_used;
3320
3321 /* Partitioning 'index' description, array of key parts */
3322 KEY_PART *key;
3323
3324 /*
3325 Number of fields in partitioning 'index' definition created for
3326 partitioning (0 if partitioning 'index' doesn't include partitioning
3327 fields)
3328 */
3329 uint part_fields;
3330 uint subpart_fields; /* Same as above for subpartitioning */
3331
3332 /*
3333 Number of the last partitioning field keypart in the index, or -1 if
3334 partitioning index definition doesn't include partitioning fields.
3335 */
3336 int last_part_partno;
3337 int last_subpart_partno; /* Same as above for supartitioning */
3338
3339 /*
3340 is_part_keypart[i] == test(keypart #i in partitioning index is a member
3341 used in partitioning)
3342 Used to maintain current values of cur_part_fields and cur_subpart_fields
3343 */
3344 my_bool *is_part_keypart;
3345 /* Same as above for subpartitioning */
3346 my_bool *is_subpart_keypart;
3347
3348 my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3349
3350 /***************************************************************
3351 Following fields form find_used_partitions() recursion context:
3352 **************************************************************/
3353 SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */
3354 SEL_ARG **arg_stack_end; /* Top of the stack */
3355 /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3356 uint cur_part_fields;
3357 /* Same as cur_part_fields, but for subpartitioning */
3358 uint cur_subpart_fields;
3359
3360 /* Iterator to be used to obtain the "current" set of used partitions */
3361 PARTITION_ITERATOR part_iter;
3362
3363 /* Initialized bitmap of num_subparts size */
3364 MY_BITMAP subparts_bitmap;
3365
3366 uchar *cur_min_key;
3367 uchar *cur_max_key;
3368
3369 uint cur_min_flag, cur_max_flag;
3370 } PART_PRUNE_PARAM;
3371
3372 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3373 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3374 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3375 SEL_IMERGE *imerge);
3376 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3377 List<SEL_IMERGE> &merges);
3378 static void mark_all_partitions_as_used(partition_info *part_info);
3379
3380 #ifndef DBUG_OFF
3381 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3382 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3383 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3384 #endif
3385
3386
3387 /**
3388 Perform partition pruning for a given table and condition.
3389
3390 @param thd Thread handle
3391 @param table Table to perform partition pruning for
3392 @param pprune_cond Condition to use for partition pruning
3393
3394 @note This function assumes that lock_partitions are setup when it
3395 is invoked. The function analyzes the condition, finds partitions that
3396 need to be used to retrieve the records that match the condition, and
3397 marks them as used by setting appropriate bit in part_info->read_partitions
3398 In the worst case all partitions are marked as used. If the table is not
3399 yet locked, it will also unset bits in part_info->lock_partitions that is
3400 not set in read_partitions.
3401
3402 This function returns promptly if called for non-partitioned table.
3403
3404 @return Operation status
3405 @retval true Failure
3406 @retval false Success
3407 */
3408
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3409 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3410 {
3411 partition_info *part_info = table->part_info;
3412 DBUG_ENTER("prune_partitions");
3413
3414 /*
3415 If the prepare stage already have completed pruning successfully,
3416 it is no use of running prune_partitions() again on the same condition.
3417 Since it will not be able to prune anything more than the previous call
3418 from the prepare step.
3419 */
3420 if (part_info && part_info->is_pruning_completed)
3421 DBUG_RETURN(false);
3422
3423 table->all_partitions_pruned_away= false;
3424
3425 if (!part_info)
3426 DBUG_RETURN(FALSE); /* not a partitioned table */
3427
3428 if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION &&
3429 part_info->is_auto_partitioned)
3430 DBUG_RETURN(false); /* Should not prune auto partitioned table */
3431
3432 if (!pprune_cond)
3433 {
3434 mark_all_partitions_as_used(part_info);
3435 DBUG_RETURN(FALSE);
3436 }
3437
3438 /* No need to continue pruning if there is no more partitions to prune! */
3439 if (bitmap_is_clear_all(&part_info->lock_partitions))
3440 bitmap_clear_all(&part_info->read_partitions);
3441 if (bitmap_is_clear_all(&part_info->read_partitions))
3442 {
3443 table->all_partitions_pruned_away= true;
3444 DBUG_RETURN(false);
3445 }
3446
3447 PART_PRUNE_PARAM prune_param;
3448 MEM_ROOT alloc;
3449 RANGE_OPT_PARAM *range_par= &prune_param.range_param;
3450 my_bitmap_map *old_sets[2];
3451
3452 prune_param.part_info= part_info;
3453 init_sql_alloc(key_memory_prune_partitions_exec,
3454 &alloc, thd->variables.range_alloc_block_size, 0);
3455 set_memroot_max_capacity(&alloc, thd->variables.range_optimizer_max_mem_size);
3456 set_memroot_error_reporting(&alloc, true);
3457 thd->push_internal_handler(&range_par->error_handler);
3458 range_par->mem_root= &alloc;
3459 range_par->old_root= thd->mem_root;
3460
3461 if (create_partition_index_description(&prune_param))
3462 {
3463 mark_all_partitions_as_used(part_info);
3464 thd->pop_internal_handler();
3465 free_root(&alloc,MYF(0)); // Return memory & allocator
3466 DBUG_RETURN(FALSE);
3467 }
3468
3469 dbug_tmp_use_all_columns(table, old_sets,
3470 table->read_set, table->write_set);
3471 range_par->thd= thd;
3472 range_par->table= table;
3473 /* range_par->cond doesn't need initialization */
3474 range_par->prev_tables= range_par->read_tables= 0;
3475 range_par->current_table= table->pos_in_table_list->map();
3476
3477 range_par->keys= 1; // one index
3478 range_par->using_real_indexes= FALSE;
3479 range_par->remove_jump_scans= FALSE;
3480 range_par->real_keynr[0]= 0;
3481
3482 thd->no_errors=1; // Don't warn about NULL
3483 thd->mem_root=&alloc;
3484
3485 bitmap_clear_all(&part_info->read_partitions);
3486
3487 prune_param.key= prune_param.range_param.key_parts;
3488 SEL_TREE *tree;
3489 int res;
3490
3491 tree= get_mm_tree(range_par, pprune_cond);
3492 if (!tree)
3493 goto all_used;
3494
3495 if (tree->type == SEL_TREE::IMPOSSIBLE)
3496 {
3497 /* Cannot improve the pruning any further. */
3498 part_info->is_pruning_completed= true;
3499 goto end;
3500 }
3501
3502 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3503 goto all_used;
3504
3505 if (tree->merges.is_empty())
3506 {
3507 /* Range analysis has produced a single list of intervals. */
3508 prune_param.arg_stack_end= prune_param.arg_stack;
3509 prune_param.cur_part_fields= 0;
3510 prune_param.cur_subpart_fields= 0;
3511
3512 prune_param.cur_min_key= prune_param.range_param.min_key;
3513 prune_param.cur_max_key= prune_param.range_param.max_key;
3514 prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3515
3516 init_all_partitions_iterator(part_info, &prune_param.part_iter);
3517 if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3518 tree->keys[0]))))
3519 goto all_used;
3520 }
3521 else
3522 {
3523 if (tree->merges.elements == 1)
3524 {
3525 /*
3526 Range analysis has produced a "merge" of several intervals lists, a
3527 SEL_TREE that represents an expression in form
3528 sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3529 that cannot be reduced to one tree. This can only happen when
3530 partitioning index has several keyparts and the condition is OR of
3531 conditions that refer to different key parts. For example, we'll get
3532 here for "partitioning_field=const1 OR subpartitioning_field=const2"
3533 */
3534 if (-1 == (res= find_used_partitions_imerge(&prune_param,
3535 tree->merges.head())))
3536 goto all_used;
3537 }
3538 else
3539 {
3540 /*
3541 Range analysis has produced a list of several imerges, i.e. a
3542 structure that represents a condition in form
3543 imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3544 This is produced for complicated WHERE clauses that range analyzer
3545 can't really analyze properly.
3546 */
3547 if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3548 tree->merges)))
3549 goto all_used;
3550 }
3551 }
3552
3553 /*
3554 If the condition can be evaluated now, we are done with pruning.
3555
3556 During the prepare phase, before locking, subqueries and stored programs
3557 are not evaluated. So we need to run prune_partitions() a second time in
3558 the optimize phase to prune partitions for reading, when subqueries and
3559 stored programs may be evaluated.
3560 */
3561 if (pprune_cond->can_be_evaluated_now())
3562 part_info->is_pruning_completed= true;
3563 goto end;
3564
3565 all_used:
3566 mark_all_partitions_as_used(prune_param.part_info);
3567 end:
3568 thd->pop_internal_handler();
3569 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3570 thd->no_errors=0;
3571 thd->mem_root= range_par->old_root;
3572 free_root(&alloc,MYF(0)); // Return memory & allocator
3573 /* If an error occurred we can return failure after freeing the memroot. */
3574 if (thd->is_error())
3575 {
3576 DBUG_RETURN(true);
3577 }
3578 /*
3579 Must be a subset of the locked partitions.
3580 lock_partitions contains the partitions marked by explicit partition
3581 selection (... t PARTITION (pX) ...) and we must only use partitions
3582 within that set.
3583 */
3584 bitmap_intersect(&prune_param.part_info->read_partitions,
3585 &prune_param.part_info->lock_partitions);
3586 /*
3587 If not yet locked, also prune partitions to lock if not UPDATEing
3588 partition key fields. This will also prune lock_partitions if we are under
3589 LOCK TABLES, so prune away calls to start_stmt().
3590 TODO: enhance this prune locking to also allow pruning of
3591 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3592 a lock for part_key partition.
3593 */
3594 if (!thd->lex->is_query_tables_locked() &&
3595 !partition_key_modified(table, table->write_set))
3596 {
3597 bitmap_copy(&prune_param.part_info->lock_partitions,
3598 &prune_param.part_info->read_partitions);
3599 }
3600 if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3601 table->all_partitions_pruned_away= true;
3602 DBUG_RETURN(false);
3603 }
3604
3605
3606 /*
3607 Store field key image to table record
3608
3609 SYNOPSIS
3610 store_key_image_to_rec()
3611 field Field which key image should be stored
3612 ptr Field value in key format
3613 len Length of the value, in bytes
3614
3615 DESCRIPTION
3616 Copy the field value from its key image to the table record. The source
3617 is the value in key image format, occupying len bytes in buffer pointed
3618 by ptr. The destination is table record, in "field value in table record"
3619 format.
3620 */
3621
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3622 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3623 {
3624 /* Do the same as print_key_value() does */
3625 my_bitmap_map *old_map;
3626
3627 if (field->real_maybe_null())
3628 {
3629 if (*ptr)
3630 {
3631 field->set_null();
3632 return;
3633 }
3634 field->set_notnull();
3635 ptr++;
3636 }
3637 old_map= dbug_tmp_use_all_columns(field->table,
3638 field->table->write_set);
3639 field->set_key_image(ptr, len);
3640 dbug_tmp_restore_column_map(field->table->write_set, old_map);
3641 }
3642
3643
3644 /*
3645 For SEL_ARG* array, store sel_arg->min values into table record buffer
3646
3647 SYNOPSIS
3648 store_selargs_to_rec()
3649 ppar Partition pruning context
3650 start Array of SEL_ARG* for which the minimum values should be stored
3651 num Number of elements in the array
3652
3653 DESCRIPTION
3654 For each SEL_ARG* interval in the specified array, store the left edge
3655 field value (sel_arg->min, key image format) into the table record.
3656 */
3657
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)3658 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3659 int num)
3660 {
3661 KEY_PART *parts= ppar->range_param.key_parts;
3662 for (SEL_ARG **end= start + num; start != end; start++)
3663 {
3664 SEL_ARG *sel_arg= (*start);
3665 store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3666 parts[sel_arg->part].length);
3667 }
3668 }
3669
3670
3671 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)3672 static void mark_full_partition_used_no_parts(partition_info* part_info,
3673 uint32 part_id)
3674 {
3675 DBUG_ENTER("mark_full_partition_used_no_parts");
3676 DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3677 bitmap_set_bit(&part_info->read_partitions, part_id);
3678 DBUG_VOID_RETURN;
3679 }
3680
3681
3682 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)3683 static void mark_full_partition_used_with_parts(partition_info *part_info,
3684 uint32 part_id)
3685 {
3686 uint32 start= part_id * part_info->num_subparts;
3687 uint32 end= start + part_info->num_subparts;
3688 DBUG_ENTER("mark_full_partition_used_with_parts");
3689
3690 for (; start != end; start++)
3691 {
3692 DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3693 bitmap_set_bit(&part_info->read_partitions, start);
3694 }
3695 DBUG_VOID_RETURN;
3696 }
3697
3698 /*
3699 Find the set of used partitions for List<SEL_IMERGE>
3700 SYNOPSIS
3701 find_used_partitions_imerge_list
3702 ppar Partition pruning context.
3703 key_tree Intervals tree to perform pruning for.
3704
3705 DESCRIPTION
3706 List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3707 The set of used partitions is an intersection of used partitions sets
3708 for imerge_{i}.
3709 We accumulate this intersection in a separate bitmap.
3710
3711 RETURN
3712 See find_used_partitions()
3713 */
3714
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)3715 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3716 List<SEL_IMERGE> &merges)
3717 {
3718 MY_BITMAP all_merges;
3719 uint bitmap_bytes;
3720 my_bitmap_map *bitmap_buf;
3721 uint n_bits= ppar->part_info->read_partitions.n_bits;
3722 bitmap_bytes= bitmap_buffer_size(n_bits);
3723 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3724 bitmap_bytes)))
3725 {
3726 /*
3727 Fallback, process just the first SEL_IMERGE. This can leave us with more
3728 partitions marked as used then actually needed.
3729 */
3730 return find_used_partitions_imerge(ppar, merges.head());
3731 }
3732 bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3733 bitmap_set_prefix(&all_merges, n_bits);
3734
3735 List_iterator<SEL_IMERGE> it(merges);
3736 SEL_IMERGE *imerge;
3737 while ((imerge=it++))
3738 {
3739 int res= find_used_partitions_imerge(ppar, imerge);
3740 if (!res)
3741 {
3742 /* no used partitions on one ANDed imerge => no used partitions at all */
3743 return 0;
3744 }
3745
3746 if (res != -1)
3747 bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3748
3749 if (bitmap_is_clear_all(&all_merges))
3750 return 0;
3751
3752 bitmap_clear_all(&ppar->part_info->read_partitions);
3753 }
3754 memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3755 bitmap_bytes);
3756 return 1;
3757 }
3758
3759
3760 /*
3761 Find the set of used partitions for SEL_IMERGE structure
3762 SYNOPSIS
3763 find_used_partitions_imerge()
3764 ppar Partition pruning context.
3765 key_tree Intervals tree to perform pruning for.
3766
3767 DESCRIPTION
3768 SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3769 trivial - just use mark used partitions for each tree and bail out early
3770 if for some tree_{i} all partitions are used.
3771
3772 RETURN
3773 See find_used_partitions().
3774 */
3775
3776 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)3777 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3778 {
3779 int res= 0;
3780 for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3781 {
3782 ppar->arg_stack_end= ppar->arg_stack;
3783 ppar->cur_part_fields= 0;
3784 ppar->cur_subpart_fields= 0;
3785
3786 ppar->cur_min_key= ppar->range_param.min_key;
3787 ppar->cur_max_key= ppar->range_param.max_key;
3788 ppar->cur_min_flag= ppar->cur_max_flag= 0;
3789
3790 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3791 SEL_ARG *key_tree= (*ptree)->keys[0];
3792 if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3793 return -1;
3794 }
3795 return res;
3796 }
3797
3798
3799 /*
3800 Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3801
3802 SYNOPSIS
3803 find_used_partitions()
3804 ppar Partition pruning context.
3805 key_tree SEL_ARG range tree to perform pruning for
3806
3807 DESCRIPTION
3808 This function
3809 * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3810 * finds the partitions one needs to use to get rows in these intervals
3811 * marks these partitions as used.
3812 The next session desribes the process in greater detail.
3813
3814 IMPLEMENTATION
3815 TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3816 We can find out which [sub]partitions to use if we obtain restrictions on
3817 [sub]partitioning fields in the following form:
3818 1. "partition_field1=const1 AND ... AND partition_fieldN=constN"
3819 1.1 Same as (1) but for subpartition fields
3820
3821 If partitioning supports interval analysis (i.e. partitioning is a
3822 function of a single table field, and partition_info::
3823 get_part_iter_for_interval != NULL), then we can also use condition in
3824 this form:
3825 2. "const1 <=? partition_field <=? const2"
3826 2.1 Same as (2) but for subpartition_field
3827
3828 INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3829
3830 The below is an example of what SEL_ARG tree may represent:
3831
3832 (start)
3833 | $
3834 | Partitioning keyparts $ subpartitioning keyparts
3835 | $
3836 | ... ... $
3837 | | | $
3838 | +---------+ +---------+ $ +-----------+ +-----------+
3839 \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3840 +---------+ +---------+ $ +-----------+ +-----------+
3841 | $ | |
3842 | $ | +-----------+
3843 | $ | | subpar2=c6|
3844 | $ | +-----------+
3845 | $ |
3846 | $ +-----------+ +-----------+
3847 | $ | subpar1=c4|--| subpar2=c8|
3848 | $ +-----------+ +-----------+
3849 | $
3850 | $
3851 +---------+ $ +------------+ +------------+
3852 | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3853 +---------+ $ +------------+ +------------+
3854 | $
3855 ... $
3856
3857 The up-down connections are connections via SEL_ARG::left and
3858 SEL_ARG::right. A horizontal connection to the right is the
3859 SEL_ARG::next_key_part connection.
3860
3861 find_used_partitions() traverses the entire tree via recursion on
3862 * SEL_ARG::next_key_part (from left to right on the picture)
3863 * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3864 performed for each depth level.
3865
3866 Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3867 ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3868 For the example in the above picture, one of stack states is:
3869 in find_used_partitions(key_tree = "subpar2=c5") (***)
3870 in find_used_partitions(key_tree = "subpar1=c3")
3871 in find_used_partitions(key_tree = "par2=c2") (**)
3872 in find_used_partitions(key_tree = "par1=c1")
3873 in prune_partitions(...)
3874 We apply partitioning limits as soon as possible, e.g. when we reach the
3875 depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3876 and save them in ppar->part_iter.
3877 When we reach the depth (***), we find which subpartition(s) correspond to
3878 "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3879 appropriate subpartitions as used.
3880
3881 It is possible that constraints on some partitioning fields are missing.
3882 For the above example, consider this stack state:
3883 in find_used_partitions(key_tree = "subpar2=c12") (***)
3884 in find_used_partitions(key_tree = "subpar1=c10")
3885 in find_used_partitions(key_tree = "par1=c2")
3886 in prune_partitions(...)
3887 Here we don't have constraints for all partitioning fields. Since we've
3888 never set the ppar->part_iter to contain used set of partitions, we use
3889 its default "all partitions" value. We get subpartition id for
3890 "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3891 partition.
3892
3893 The inverse is also possible: we may get constraints on partitioning
3894 fields, but not constraints on subpartitioning fields. In that case,
3895 calls to find_used_partitions() with depth below (**) will return -1,
3896 and we will mark entire partition as used.
3897
3898 TODO
3899 Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3900
3901 RETURN
3902 1 OK, one or more [sub]partitions are marked as used.
3903 0 The passed condition doesn't match any partitions
3904 -1 Couldn't infer any partition pruning "intervals" from the passed
3905 SEL_ARG* tree (which means that all partitions should be marked as
3906 used) Marking partitions as used is the responsibility of the caller.
3907 */
3908
3909 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)3910 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3911 {
3912 int res, left_res=0, right_res=0;
3913 int key_tree_part= (int)key_tree->part;
3914 bool set_full_part_if_bad_ret= FALSE;
3915 bool ignore_part_fields= ppar->ignore_part_fields;
3916 bool did_set_ignore_part_fields= FALSE;
3917 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3918
3919 if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3920 return -1;
3921
3922 if (key_tree->left != &null_element)
3923 {
3924 if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3925 return -1;
3926 }
3927
3928 /* Push SEL_ARG's to stack to enable looking backwards as well */
3929 ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3930 ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3931 *(ppar->arg_stack_end++)= key_tree;
3932
3933 if (ignore_part_fields)
3934 {
3935 /*
3936 We come here when a condition on the first partitioning
3937 fields led to evaluating the partitioning condition
3938 (due to finding a condition of the type a < const or
3939 b > const). Thus we must ignore the rest of the
3940 partitioning fields but we still want to analyse the
3941 subpartitioning fields.
3942 */
3943 if (key_tree->next_key_part)
3944 res= find_used_partitions(ppar, key_tree->next_key_part);
3945 else
3946 res= -1;
3947 goto pop_and_go_right;
3948 }
3949
3950 if (key_tree->type == SEL_ARG::KEY_RANGE)
3951 {
3952 if (ppar->part_info->get_part_iter_for_interval &&
3953 key_tree->part <= ppar->last_part_partno)
3954 {
3955 /* Collect left and right bound, their lengths and flags */
3956 uchar *min_key= ppar->cur_min_key;
3957 uchar *max_key= ppar->cur_max_key;
3958 uchar *tmp_min_key= min_key;
3959 uchar *tmp_max_key= max_key;
3960 key_tree->store_min(ppar->key[key_tree->part].store_length,
3961 &tmp_min_key, ppar->cur_min_flag);
3962 key_tree->store_max(ppar->key[key_tree->part].store_length,
3963 &tmp_max_key, ppar->cur_max_flag);
3964 uint flag;
3965 if (key_tree->next_key_part &&
3966 key_tree->next_key_part->part == key_tree->part+1 &&
3967 key_tree->next_key_part->part <= ppar->last_part_partno &&
3968 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3969 {
3970 /*
3971 There are more key parts for partition pruning to handle
3972 This mainly happens when the condition is an equality
3973 condition.
3974 */
3975 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3976 (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3977 !key_tree->min_flag && !key_tree->max_flag)
3978 {
3979 /* Set 'parameters' */
3980 ppar->cur_min_key= tmp_min_key;
3981 ppar->cur_max_key= tmp_max_key;
3982 uint save_min_flag= ppar->cur_min_flag;
3983 uint save_max_flag= ppar->cur_max_flag;
3984
3985 ppar->cur_min_flag|= key_tree->min_flag;
3986 ppar->cur_max_flag|= key_tree->max_flag;
3987
3988 res= find_used_partitions(ppar, key_tree->next_key_part);
3989
3990 /* Restore 'parameters' back */
3991 ppar->cur_min_key= min_key;
3992 ppar->cur_max_key= max_key;
3993
3994 ppar->cur_min_flag= save_min_flag;
3995 ppar->cur_max_flag= save_max_flag;
3996 goto pop_and_go_right;
3997 }
3998 /* We have arrived at the last field in the partition pruning */
3999 uint tmp_min_flag= key_tree->min_flag,
4000 tmp_max_flag= key_tree->max_flag;
4001 if (!tmp_min_flag)
4002 key_tree->next_key_part->store_min_key(ppar->key,
4003 &tmp_min_key,
4004 &tmp_min_flag,
4005 ppar->last_part_partno);
4006 if (!tmp_max_flag)
4007 key_tree->next_key_part->store_max_key(ppar->key,
4008 &tmp_max_key,
4009 &tmp_max_flag,
4010 ppar->last_part_partno);
4011 flag= tmp_min_flag | tmp_max_flag;
4012 }
4013 else
4014 flag= key_tree->min_flag | key_tree->max_flag;
4015
4016 if (tmp_min_key != range_par->min_key)
4017 flag&= ~NO_MIN_RANGE;
4018 else
4019 flag|= NO_MIN_RANGE;
4020 if (tmp_max_key != range_par->max_key)
4021 flag&= ~NO_MAX_RANGE;
4022 else
4023 flag|= NO_MAX_RANGE;
4024
4025 /*
4026 We need to call the interval mapper if we have a condition which
4027 makes sense to prune on. In the example of COLUMNS on a and
4028 b it makes sense if we have a condition on a, or conditions on
4029 both a and b. If we only have conditions on b it might make sense
4030 but this is a harder case we will solve later. For the harder case
4031 this clause then turns into use of all partitions and thus we
4032 simply set res= -1 as if the mapper had returned that.
4033 TODO: What to do here is defined in WL#4065.
4034 */
4035 if (ppar->arg_stack[0]->part == 0)
4036 {
4037 uint32 i;
4038 uint32 store_length_array[MAX_KEY];
4039 uint32 num_keys= ppar->part_fields;
4040
4041 for (i= 0; i < num_keys; i++)
4042 store_length_array[i]= ppar->key[i].store_length;
4043 res= ppar->part_info->
4044 get_part_iter_for_interval(ppar->part_info,
4045 FALSE,
4046 store_length_array,
4047 range_par->min_key,
4048 range_par->max_key,
4049 tmp_min_key - range_par->min_key,
4050 tmp_max_key - range_par->max_key,
4051 flag,
4052 &ppar->part_iter);
4053 if (!res)
4054 goto pop_and_go_right; /* res==0 --> no satisfying partitions */
4055 }
4056 else
4057 res= -1;
4058
4059 if (res == -1)
4060 {
4061 /* get a full range iterator */
4062 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4063 }
4064 /*
4065 Save our intent to mark full partition as used if we will not be able
4066 to obtain further limits on subpartitions
4067 */
4068 if (key_tree_part < ppar->last_part_partno)
4069 {
4070 /*
4071 We need to ignore the rest of the partitioning fields in all
4072 evaluations after this
4073 */
4074 did_set_ignore_part_fields= TRUE;
4075 ppar->ignore_part_fields= TRUE;
4076 }
4077 set_full_part_if_bad_ret= TRUE;
4078 goto process_next_key_part;
4079 }
4080
4081 if (key_tree_part == ppar->last_subpart_partno &&
4082 (NULL != ppar->part_info->get_subpart_iter_for_interval))
4083 {
4084 PARTITION_ITERATOR subpart_iter;
4085 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
4086 range_par->key_parts););
4087 res= ppar->part_info->
4088 get_subpart_iter_for_interval(ppar->part_info,
4089 TRUE,
4090 NULL, /* Currently not used here */
4091 key_tree->min_value,
4092 key_tree->max_value,
4093 0, 0, /* Those are ignored here */
4094 key_tree->min_flag |
4095 key_tree->max_flag,
4096 &subpart_iter);
4097 if (res == 0)
4098 {
4099 /*
4100 The only case where we can get "no satisfying subpartitions"
4101 returned from the above call is when an error has occurred.
4102 */
4103 DBUG_ASSERT(range_par->thd->is_error());
4104 return 0;
4105 }
4106
4107 if (res == -1)
4108 goto pop_and_go_right; /* all subpartitions satisfy */
4109
4110 uint32 subpart_id;
4111 bitmap_clear_all(&ppar->subparts_bitmap);
4112 while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
4113 NOT_A_PARTITION_ID)
4114 bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
4115
4116 /* Mark each partition as used in each subpartition. */
4117 uint32 part_id;
4118 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4119 NOT_A_PARTITION_ID)
4120 {
4121 for (uint i= 0; i < ppar->part_info->num_subparts; i++)
4122 if (bitmap_is_set(&ppar->subparts_bitmap, i))
4123 bitmap_set_bit(&ppar->part_info->read_partitions,
4124 part_id * ppar->part_info->num_subparts + i);
4125 }
4126 goto pop_and_go_right;
4127 }
4128
4129 if (key_tree->is_singlepoint())
4130 {
4131 if (key_tree_part == ppar->last_part_partno &&
4132 ppar->cur_part_fields == ppar->part_fields &&
4133 ppar->part_info->get_part_iter_for_interval == NULL)
4134 {
4135 /*
4136 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
4137 fields. Save all constN constants into table record buffer.
4138 */
4139 store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
4140 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
4141 ppar->part_fields););
4142 uint32 part_id;
4143 longlong func_value;
4144 /* Find in which partition the {const1, ...,constN} tuple goes */
4145 if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
4146 &func_value))
4147 {
4148 res= 0; /* No satisfying partitions */
4149 goto pop_and_go_right;
4150 }
4151 /* Rembember the limit we got - single partition #part_id */
4152 init_single_partition_iterator(part_id, &ppar->part_iter);
4153
4154 /*
4155 If there are no subpartitions/we fail to get any limit for them,
4156 then we'll mark full partition as used.
4157 */
4158 set_full_part_if_bad_ret= TRUE;
4159 goto process_next_key_part;
4160 }
4161
4162 if (key_tree_part == ppar->last_subpart_partno &&
4163 ppar->cur_subpart_fields == ppar->subpart_fields)
4164 {
4165 /*
4166 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
4167 fields. Save all constN constants into table record buffer.
4168 */
4169 store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
4170 ppar->subpart_fields);
4171 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
4172 ppar->subpart_fields,
4173 ppar->subpart_fields););
4174 /* Find the subpartition (it's HASH/KEY so we always have one) */
4175 partition_info *part_info= ppar->part_info;
4176 uint32 part_id, subpart_id;
4177
4178 if (part_info->get_subpartition_id(part_info, &subpart_id))
4179 return 0;
4180
4181 /* Mark this partition as used in each subpartition. */
4182 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4183 NOT_A_PARTITION_ID)
4184 {
4185 bitmap_set_bit(&part_info->read_partitions,
4186 part_id * part_info->num_subparts + subpart_id);
4187 }
4188 res= 1; /* Some partitions were marked as used */
4189 goto pop_and_go_right;
4190 }
4191 }
4192 else
4193 {
4194 /*
4195 Can't handle condition on current key part. If we're that deep that
4196 we're processing subpartititoning's key parts, this means we'll not be
4197 able to infer any suitable condition, so bail out.
4198 */
4199 if (key_tree_part >= ppar->last_part_partno)
4200 {
4201 res= -1;
4202 goto pop_and_go_right;
4203 }
4204 /*
4205 No meaning in continuing with rest of partitioning key parts.
4206 Will try to continue with subpartitioning key parts.
4207 */
4208 ppar->ignore_part_fields= true;
4209 did_set_ignore_part_fields= true;
4210 goto process_next_key_part;
4211 }
4212 }
4213
4214 process_next_key_part:
4215 if (key_tree->next_key_part)
4216 res= find_used_partitions(ppar, key_tree->next_key_part);
4217 else
4218 res= -1;
4219
4220 if (did_set_ignore_part_fields)
4221 {
4222 /*
4223 We have returned from processing all key trees linked to our next
4224 key part. We are ready to be moving down (using right pointers) and
4225 this tree is a new evaluation requiring its own decision on whether
4226 to ignore partitioning fields.
4227 */
4228 ppar->ignore_part_fields= FALSE;
4229 }
4230 if (set_full_part_if_bad_ret)
4231 {
4232 if (res == -1)
4233 {
4234 /* Got "full range" for subpartitioning fields */
4235 uint32 part_id;
4236 bool found= FALSE;
4237 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4238 NOT_A_PARTITION_ID)
4239 {
4240 ppar->mark_full_partition_used(ppar->part_info, part_id);
4241 found= TRUE;
4242 }
4243 res= MY_TEST(found);
4244 }
4245 /*
4246 Restore the "used partitions iterator" to the default setting that
4247 specifies iteration over all partitions.
4248 */
4249 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4250 }
4251
4252 pop_and_go_right:
4253 /* Pop this key part info off the "stack" */
4254 ppar->arg_stack_end--;
4255 ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part];
4256 ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4257
4258 if (res == -1)
4259 return -1;
4260 if (key_tree->right != &null_element)
4261 {
4262 if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4263 return -1;
4264 }
4265 return (left_res || right_res || res);
4266 }
4267
4268
mark_all_partitions_as_used(partition_info * part_info)4269 static void mark_all_partitions_as_used(partition_info *part_info)
4270 {
4271 bitmap_copy(&(part_info->read_partitions),
4272 &(part_info->lock_partitions));
4273 }
4274
4275
4276 /*
4277 Check if field types allow to construct partitioning index description
4278
4279 SYNOPSIS
4280 fields_ok_for_partition_index()
4281 pfield NULL-terminated array of pointers to fields.
4282
4283 DESCRIPTION
4284 For an array of fields, check if we can use all of the fields to create
4285 partitioning index description.
4286
4287 We can't process GEOMETRY fields - for these fields singlepoint intervals
4288 cant be generated, and non-singlepoint are "special" kinds of intervals
4289 to which our processing logic can't be applied.
4290
4291 It is not known if we could process ENUM fields, so they are disabled to be
4292 on the safe side.
4293
4294 RETURN
4295 TRUE Yes, fields can be used in partitioning index
4296 FALSE Otherwise
4297 */
4298
fields_ok_for_partition_index(Field ** pfield)4299 static bool fields_ok_for_partition_index(Field **pfield)
4300 {
4301 if (!pfield)
4302 return FALSE;
4303 for (; (*pfield); pfield++)
4304 {
4305 enum_field_types ftype= (*pfield)->real_type();
4306 if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4307 return FALSE;
4308 }
4309 return TRUE;
4310 }
4311
4312
4313 /*
4314 Create partition index description and fill related info in the context
4315 struct
4316
4317 SYNOPSIS
4318 create_partition_index_description()
4319 prune_par INOUT Partition pruning context
4320
4321 DESCRIPTION
4322 Create partition index description. Partition index description is:
4323
4324 part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4325
4326 If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4327 corresponding fields_list(...) is not included into index description
4328 and we don't perform partition pruning for partitions/subpartitions.
4329
4330 RETURN
4331 TRUE Out of memory or can't do partition pruning at all
4332 FALSE OK
4333 */
4334
create_partition_index_description(PART_PRUNE_PARAM * ppar)4335 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4336 {
4337 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4338 partition_info *part_info= ppar->part_info;
4339 uint used_part_fields, used_subpart_fields;
4340
4341 used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4342 part_info->num_part_fields : 0;
4343 used_subpart_fields=
4344 fields_ok_for_partition_index(part_info->subpart_field_array)?
4345 part_info->num_subpart_fields : 0;
4346
4347 uint total_parts= used_part_fields + used_subpart_fields;
4348
4349 ppar->ignore_part_fields= FALSE;
4350 ppar->part_fields= used_part_fields;
4351 ppar->last_part_partno= (int)used_part_fields - 1;
4352
4353 ppar->subpart_fields= used_subpart_fields;
4354 ppar->last_subpart_partno=
4355 used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4356
4357 if (part_info->is_sub_partitioned())
4358 {
4359 ppar->mark_full_partition_used= mark_full_partition_used_with_parts;
4360 ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4361 }
4362 else
4363 {
4364 ppar->mark_full_partition_used= mark_full_partition_used_no_parts;
4365 ppar->get_top_partition_id_func= part_info->get_partition_id;
4366 }
4367
4368 KEY_PART *key_part;
4369 MEM_ROOT *alloc= range_par->mem_root;
4370 if (!total_parts ||
4371 !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4372 total_parts)) ||
4373 !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4374 total_parts)) ||
4375 !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4376 total_parts)) ||
4377 !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4378 total_parts)))
4379 return TRUE;
4380
4381 if (ppar->subpart_fields)
4382 {
4383 my_bitmap_map *buf;
4384 uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4385 if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4386 return TRUE;
4387 bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4388 FALSE);
4389 }
4390 range_par->key_parts= key_part;
4391 Field **field= (ppar->part_fields)? part_info->part_field_array :
4392 part_info->subpart_field_array;
4393 bool in_subpart_fields= FALSE;
4394 for (uint part= 0; part < total_parts; part++, key_part++)
4395 {
4396 key_part->key= 0;
4397 key_part->part= part;
4398 key_part->length= (uint16)(*field)->key_length();
4399 key_part->store_length= (uint16)get_partition_field_store_length(*field);
4400
4401 DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4402 key_part->length, key_part->store_length));
4403
4404 key_part->field= (*field);
4405 key_part->image_type = Field::itRAW;
4406 /*
4407 We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4408 in the RangeAnalysisModule.
4409 */
4410 key_part->flag= 0;
4411 /* We don't set key_parts->null_bit as it will not be used */
4412
4413 ppar->is_part_keypart[part]= !in_subpart_fields;
4414 ppar->is_subpart_keypart[part]= in_subpart_fields;
4415
4416 /*
4417 Check if this was last field in this array, in this case we
4418 switch to subpartitioning fields. (This will only happens if
4419 there are subpartitioning fields to cater for).
4420 */
4421 if (!*(++field))
4422 {
4423 field= part_info->subpart_field_array;
4424 in_subpart_fields= TRUE;
4425 }
4426 }
4427 range_par->key_parts_end= key_part;
4428
4429 DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4430 range_par->key_parts_end););
4431 return FALSE;
4432 }
4433
4434
4435 #ifndef DBUG_OFF
4436
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4437 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4438 {
4439 DBUG_ENTER("print_partitioning_index");
4440 DBUG_LOCK_FILE;
4441 fprintf(DBUG_FILE, "partitioning INDEX(");
4442 for (KEY_PART *p=parts; p != parts_end; p++)
4443 {
4444 fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
4445 }
4446 fputs(");\n", DBUG_FILE);
4447 DBUG_UNLOCK_FILE;
4448 DBUG_VOID_RETURN;
4449 }
4450
4451
4452 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4453 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4454 {
4455 DBUG_ENTER("dbug_print_segment_range");
4456 DBUG_LOCK_FILE;
4457 if (!(arg->min_flag & NO_MIN_RANGE))
4458 {
4459 store_key_image_to_rec(part->field, arg->min_value, part->length);
4460 part->field->dbug_print();
4461 if (arg->min_flag & NEAR_MIN)
4462 fputs(" < ", DBUG_FILE);
4463 else
4464 fputs(" <= ", DBUG_FILE);
4465 }
4466
4467 fprintf(DBUG_FILE, "%s", part->field->field_name);
4468
4469 if (!(arg->max_flag & NO_MAX_RANGE))
4470 {
4471 if (arg->max_flag & NEAR_MAX)
4472 fputs(" < ", DBUG_FILE);
4473 else
4474 fputs(" <= ", DBUG_FILE);
4475 store_key_image_to_rec(part->field, arg->max_value, part->length);
4476 part->field->dbug_print();
4477 }
4478 fputs("\n", DBUG_FILE);
4479 DBUG_UNLOCK_FILE;
4480 DBUG_VOID_RETURN;
4481 }
4482
4483
4484 /*
4485 Print a singlepoint multi-keypart range interval to debug trace
4486
4487 SYNOPSIS
4488 dbug_print_singlepoint_range()
4489 start Array of SEL_ARG* ptrs representing conditions on key parts
4490 num Number of elements in the array.
4491
4492 DESCRIPTION
4493 This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4494 interval to debug trace.
4495 */
4496
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4497 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4498 {
4499 DBUG_ENTER("dbug_print_singlepoint_range");
4500 DBUG_LOCK_FILE;
4501 SEL_ARG **end= start + num;
4502
4503 for (SEL_ARG **arg= start; arg != end; arg++)
4504 {
4505 Field *field= (*arg)->field;
4506 fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
4507 field->dbug_print();
4508 }
4509 fputs("\n", DBUG_FILE);
4510 DBUG_UNLOCK_FILE;
4511 DBUG_VOID_RETURN;
4512 }
4513 #endif
4514
4515 /****************************************************************************
4516 * Partition pruning code ends
4517 ****************************************************************************/
4518
4519
4520 /*
4521 Get best plan for a SEL_IMERGE disjunctive expression.
4522 SYNOPSIS
4523 get_best_disjunct_quick()
4524 param Parameter from check_quick_select function
4525 imerge Expression to use
4526 cost_est Don't create scans with cost > cost_est
4527
4528 NOTES
4529 index_merge cost is calculated as follows:
4530 index_merge_cost =
4531 cost(index_reads) + (see #1)
4532 cost(rowid_to_row_scan) + (see #2)
4533 cost(unique_use) (see #3)
4534
4535 1. cost(index_reads) =SUM_i(cost(index_read_i))
4536 For non-CPK scans,
4537 cost(index_read_i) = {cost of ordinary 'index only' scan}
4538 For CPK scan,
4539 cost(index_read_i) = {cost of non-'index only' scan}
4540
4541 2. cost(rowid_to_row_scan)
4542 If table PK is clustered then
4543 cost(rowid_to_row_scan) =
4544 {cost of ordinary clustered PK scan with n_ranges=n_rows}
4545
4546 Otherwise, we use the following model to calculate costs:
4547 We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4548 We assume that offsets of rows we need are independent variates with
4549 uniform distribution in [0..max_file_offset] range.
4550
4551 We'll denote block as "busy" if it contains row(s) we need to retrieve
4552 and "empty" if doesn't contain rows we need.
4553
4554 Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4555 applies to any block in file). Let x_i be a variate taking value 1 if
4556 block #i is empty and 0 otherwise.
4557
4558 Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4559
4560 E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4561 = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4562 ~= n_blocks * exp(-n_rows/n_blocks).
4563
4564 E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4565 ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4566
4567 Average size of "hole" between neighbor non-empty blocks is
4568 E(hole_size) = n_blocks/E(n_busy_blocks).
4569
4570 The total cost of reading all needed blocks in one "sweep" is:
4571
4572 E(n_busy_blocks) * disk_seek_cost(n_blocks/E(n_busy_blocks))
4573
4574 This cost estimate is calculated in get_sweep_read_cost().
4575
4576 3. Cost of Unique use is calculated in Unique::get_use_cost function.
4577
4578 ROR-union cost is calculated in the same way index_merge, but instead of
4579 Unique a priority queue is used.
4580
4581 RETURN
4582 Created read plan
4583 NULL - Out of memory or no read scan could be built.
4584 */
4585
4586 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,const Cost_estimate * cost_est)4587 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4588 const Cost_estimate *cost_est)
4589 {
4590 SEL_TREE **ptree;
4591 TRP_INDEX_MERGE *imerge_trp= NULL;
4592 uint n_child_scans= imerge->trees_next - imerge->trees;
4593 TRP_RANGE **range_scans;
4594 TRP_RANGE **cur_child;
4595 TRP_RANGE **cpk_scan= NULL;
4596 bool imerge_too_expensive= FALSE;
4597 Cost_estimate imerge_cost;
4598 ha_rows cpk_scan_records= 0;
4599 ha_rows non_cpk_scan_records= 0;
4600 bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4601 bool all_scans_ror_able= TRUE;
4602 bool all_scans_rors= TRUE;
4603 size_t unique_calc_buff_size;
4604 TABLE_READ_PLAN **roru_read_plans;
4605 TABLE_READ_PLAN **cur_roru_plan;
4606 ha_rows roru_total_records;
4607 double roru_intersect_part= 1.0;
4608 const Cost_model_table *const cost_model= param->table->cost_model();
4609 Cost_estimate read_cost= *cost_est;
4610
4611 DBUG_ENTER("get_best_disjunct_quick");
4612 DBUG_PRINT("info", ("Full table scan cost: %g", cost_est->total_cost()));
4613
4614 DBUG_ASSERT(param->table->file->stats.records);
4615
4616 Opt_trace_context * const trace= ¶m->thd->opt_trace;
4617 Opt_trace_object trace_best_disjunct(trace);
4618 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4619 sizeof(TRP_RANGE*)*
4620 n_child_scans)))
4621 DBUG_RETURN(NULL);
4622 // Note: to_merge.end() is called to close this object after this for-loop.
4623 Opt_trace_array to_merge(trace, "indexes_to_merge");
4624 /*
4625 Collect best 'range' scan for each of disjuncts, and, while doing so,
4626 analyze possibility of ROR scans. Also calculate some values needed by
4627 other parts of the code.
4628 */
4629 for (ptree= imerge->trees, cur_child= range_scans;
4630 ptree != imerge->trees_next;
4631 ptree++, cur_child++)
4632 {
4633 DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4634 "tree in SEL_IMERGE"););
4635 Opt_trace_object trace_idx(trace);
4636 if (!(*cur_child=
4637 get_key_scans_params(param, *ptree, true, false, &read_cost)))
4638 {
4639 /*
4640 One of index scans in this index_merge is more expensive than entire
4641 table read for another available option. The entire index_merge (and
4642 any possible ROR-union) will be more expensive then, too. We continue
4643 here only to update SQL_SELECT members.
4644 */
4645 imerge_too_expensive= true;
4646 }
4647 if (imerge_too_expensive)
4648 {
4649 trace_idx.add("chosen", false).add_alnum("cause", "cost");
4650 continue;
4651 }
4652
4653 const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
4654 imerge_cost+= (*cur_child)->cost_est;
4655 all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4656 all_scans_rors &= (*cur_child)->is_ror;
4657 if (pk_is_clustered &&
4658 keynr_in_table == param->table->s->primary_key)
4659 {
4660 cpk_scan= cur_child;
4661 cpk_scan_records= (*cur_child)->records;
4662 }
4663 else
4664 non_cpk_scan_records += (*cur_child)->records;
4665
4666 trace_idx.
4667 add_utf8("index_to_merge", param->table->key_info[keynr_in_table].name).
4668 add("cumulated_cost", imerge_cost);
4669 }
4670
4671 // Note: to_merge trace object is closed here
4672 to_merge.end();
4673
4674
4675 trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
4676 if (imerge_too_expensive || (imerge_cost > read_cost) ||
4677 ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
4678 !read_cost.is_max_cost()))
4679 {
4680 /*
4681 Bail out if it is obvious that both index_merge and ROR-union will be
4682 more expensive
4683 */
4684 DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4685 "full table scan, bailing out"));
4686 trace_best_disjunct.add("chosen", false).add_alnum("cause", "cost");
4687 DBUG_RETURN(NULL);
4688 }
4689
4690 /*
4691 If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4692 guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4693 disabled in @@optimizer_switch
4694 */
4695 if (all_scans_rors &&
4696 param->index_merge_union_allowed)
4697 {
4698 roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4699 trace_best_disjunct.add("use_roworder_union", true).
4700 add_alnum("cause", "always_cheaper_than_not_roworder_retrieval");
4701 goto skip_to_ror_scan;
4702 }
4703
4704 if (cpk_scan)
4705 {
4706 /*
4707 Add one rowid/key comparison for each row retrieved on non-CPK
4708 scan. (it is done in QUICK_RANGE_SELECT::row_in_ranges)
4709 */
4710 const double rid_comp_cost=
4711 cost_model->key_compare_cost(static_cast<double>(non_cpk_scan_records));
4712 imerge_cost.add_cpu(rid_comp_cost);
4713 trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
4714 rid_comp_cost);
4715 }
4716
4717 /* Calculate cost(rowid_to_row_scan) */
4718 {
4719 Cost_estimate sweep_cost;
4720 JOIN *join= param->thd->lex->select_lex->join;
4721 const bool is_interrupted= join && join->tables != 1;
4722 get_sweep_read_cost(param->table, non_cpk_scan_records, is_interrupted,
4723 &sweep_cost);
4724 imerge_cost+= sweep_cost;
4725 trace_best_disjunct.add("cost_sort_rowid_and_read_disk",
4726 sweep_cost);
4727 }
4728 DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4729 imerge_cost.total_cost()));
4730 if (imerge_cost > read_cost ||
4731 !param->index_merge_sort_union_allowed)
4732 {
4733 trace_best_disjunct.add("use_roworder_index_merge", true).
4734 add_alnum("cause", "cost");
4735 goto build_ror_index_merge;
4736 }
4737
4738 /* Add Unique operations cost */
4739 unique_calc_buff_size=
4740 Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4741 param->table->file->ref_length,
4742 param->thd->variables.sortbuff_size);
4743 if (param->imerge_cost_buff.size() < unique_calc_buff_size)
4744 {
4745 typedef Unique::Imerge_cost_buf_type::value_type element_type;
4746 void *rawmem=
4747 alloc_root(param->mem_root, unique_calc_buff_size * sizeof(element_type));
4748 if (!rawmem)
4749 DBUG_RETURN(NULL);
4750 param->imerge_cost_buff=
4751 Unique::Imerge_cost_buf_type(static_cast<element_type*>(rawmem),
4752 unique_calc_buff_size);
4753 }
4754
4755 {
4756 const double dup_removal_cost=
4757 Unique::get_use_cost(param->imerge_cost_buff,
4758 (uint)non_cpk_scan_records,
4759 param->table->file->ref_length,
4760 param->thd->variables.sortbuff_size,
4761 cost_model);
4762
4763 trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost);
4764 imerge_cost.add_cpu(dup_removal_cost);
4765
4766 trace_best_disjunct.add("total_cost", imerge_cost);
4767 DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
4768 imerge_cost.total_cost(), read_cost.total_cost()));
4769 }
4770 if (imerge_cost < read_cost)
4771 {
4772 if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
4773 {
4774 imerge_trp->cost_est= imerge_cost;
4775 imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
4776 imerge_trp->records= min(imerge_trp->records,
4777 param->table->file->stats.records);
4778 imerge_trp->range_scans= range_scans;
4779 imerge_trp->range_scans_end= range_scans + n_child_scans;
4780 read_cost= imerge_cost;
4781 }
4782 }
4783
4784 build_ror_index_merge:
4785 if (!all_scans_ror_able ||
4786 param->thd->lex->sql_command == SQLCOM_DELETE ||
4787 !param->index_merge_union_allowed)
4788 DBUG_RETURN(imerge_trp);
4789
4790 /* Ok, it is possible to build a ROR-union, try it. */
4791 if (!(roru_read_plans=
4792 (TABLE_READ_PLAN**)alloc_root(param->mem_root,
4793 sizeof(TABLE_READ_PLAN*)*
4794 n_child_scans)))
4795 DBUG_RETURN(imerge_trp);
4796 skip_to_ror_scan:
4797 Cost_estimate roru_index_cost;
4798 roru_total_records= 0;
4799 cur_roru_plan= roru_read_plans;
4800
4801 /*
4802 Note: trace_analyze_ror.end() is called to close this object after
4803 this for-loop.
4804 */
4805 Opt_trace_array trace_analyze_ror(trace, "analyzing_roworder_scans");
4806 /* Find 'best' ROR scan for each of trees in disjunction */
4807 for (ptree= imerge->trees, cur_child= range_scans;
4808 ptree != imerge->trees_next;
4809 ptree++, cur_child++, cur_roru_plan++)
4810 {
4811 Opt_trace_object trp_info(trace);
4812 if (unlikely(trace->is_started()))
4813 (*cur_child)->trace_basic_info(param, &trp_info);
4814
4815 /*
4816 Assume the best ROR scan is the one that has cheapest
4817 full-row-retrieval scan cost.
4818 Also accumulate index_only scan costs as we'll need them to
4819 calculate overall index_intersection cost.
4820 */
4821 Cost_estimate scan_cost;
4822 if ((*cur_child)->is_ror)
4823 {
4824 /* Ok, we have index_only cost, now get full rows scan cost */
4825 scan_cost=
4826 param->table->file->read_cost(param->real_keynr[(*cur_child)->key_idx],
4827 1, static_cast<double>((*cur_child)->records));
4828 scan_cost.add_cpu(
4829 cost_model->row_evaluate_cost(rows2double((*cur_child)->records)));
4830 }
4831 else
4832 scan_cost= read_cost;
4833
4834 TABLE_READ_PLAN *prev_plan= *cur_child;
4835 if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, &scan_cost)))
4836 {
4837 if (prev_plan->is_ror)
4838 *cur_roru_plan= prev_plan;
4839 else
4840 DBUG_RETURN(imerge_trp);
4841 roru_index_cost += (*cur_roru_plan)->cost_est;
4842 }
4843 else
4844 {
4845 roru_index_cost +=
4846 ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_cost;
4847 }
4848 roru_total_records += (*cur_roru_plan)->records;
4849 roru_intersect_part *= (*cur_roru_plan)->records /
4850 param->table->file->stats.records;
4851 }
4852 // Note: trace_analyze_ror trace object is closed here
4853 trace_analyze_ror.end();
4854
4855 /*
4856 rows to retrieve=
4857 SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
4858 This is valid because index_merge construction guarantees that conditions
4859 in disjunction do not share key parts.
4860 */
4861 roru_total_records -= (ha_rows)(roru_intersect_part*
4862 param->table->file->stats.records);
4863 /* ok, got a ROR read plan for each of the disjuncts
4864 Calculate cost:
4865 cost(index_union_scan(scan_1, ... scan_n)) =
4866 SUM_i(cost_of_index_only_scan(scan_i)) +
4867 queue_use_cost(rowid_len, n) +
4868 cost_of_row_retrieval
4869 See get_merge_buffers_cost function for queue_use_cost formula derivation.
4870 */
4871 Cost_estimate roru_total_cost;
4872 {
4873 JOIN *join= param->thd->lex->select_lex->join;
4874 const bool is_interrupted= join && join->tables != 1;
4875 get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
4876 &roru_total_cost);
4877 roru_total_cost += roru_index_cost;
4878 roru_total_cost.add_cpu(
4879 cost_model->key_compare_cost(rows2double(roru_total_records) *
4880 log((double)n_child_scans) / M_LN2));
4881 }
4882
4883 trace_best_disjunct.add("index_roworder_union_cost",
4884 roru_total_cost).
4885 add("members", n_child_scans);
4886 TRP_ROR_UNION* roru;
4887 if (roru_total_cost < read_cost)
4888 {
4889 if ((roru= new (param->mem_root) TRP_ROR_UNION))
4890 {
4891 trace_best_disjunct.add("chosen", true);
4892 roru->first_ror= roru_read_plans;
4893 roru->last_ror= roru_read_plans + n_child_scans;
4894 roru->cost_est= roru_total_cost;
4895 roru->records= roru_total_records;
4896 DBUG_RETURN(roru);
4897 }
4898 }
4899 trace_best_disjunct.add("chosen", false);
4900
4901 DBUG_RETURN(imerge_trp);
4902 }
4903
4904
4905 /*
4906 Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
4907 sel_arg set of intervals.
4908
4909 SYNOPSIS
4910 make_ror_scan()
4911 param Parameter from test_quick_select function
4912 idx Index of key in param->keys
4913 sel_arg Set of intervals for a given key
4914
4915 RETURN
4916 NULL - out of memory
4917 ROR scan structure containing a scan for {idx, sel_arg}
4918 */
4919
4920 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)4921 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
4922 {
4923 ROR_SCAN_INFO *ror_scan;
4924 my_bitmap_map *bitmap_buf1;
4925 my_bitmap_map *bitmap_buf2;
4926 uint keynr;
4927 DBUG_ENTER("make_ror_scan");
4928
4929 if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
4930 sizeof(ROR_SCAN_INFO))))
4931 DBUG_RETURN(NULL);
4932
4933 ror_scan->idx= idx;
4934 ror_scan->keynr= keynr= param->real_keynr[idx];
4935 ror_scan->sel_arg= sel_arg;
4936 ror_scan->records= param->table->quick_rows[keynr];
4937
4938 if (!(bitmap_buf1= (my_bitmap_map*) alloc_root(param->mem_root,
4939 param->fields_bitmap_size)))
4940 DBUG_RETURN(NULL);
4941 if (!(bitmap_buf2= (my_bitmap_map*) alloc_root(param->mem_root,
4942 param->fields_bitmap_size)))
4943 DBUG_RETURN(NULL);
4944
4945 if (bitmap_init(&ror_scan->covered_fields, bitmap_buf1,
4946 param->table->s->fields, FALSE))
4947 DBUG_RETURN(NULL);
4948 if (bitmap_init(&ror_scan->covered_fields_remaining, bitmap_buf2,
4949 param->table->s->fields, FALSE))
4950 DBUG_RETURN(NULL);
4951
4952 bitmap_clear_all(&ror_scan->covered_fields);
4953
4954 KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
4955 KEY_PART_INFO *key_part_end=
4956 key_part + param->table->key_info[keynr].user_defined_key_parts;
4957 for (;key_part != key_part_end; ++key_part)
4958 {
4959 if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr-1))
4960 bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
4961 }
4962 bitmap_copy(&ror_scan->covered_fields_remaining, &ror_scan->covered_fields);
4963
4964 double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
4965 ror_scan->index_read_cost=
4966 param->table->file->index_scan_cost(ror_scan->keynr, 1, rows);
4967 DBUG_RETURN(ror_scan);
4968 }
4969
4970
4971 /**
4972 Compare two ROR_SCAN_INFO* by
4973 1. #fields in this index that are not already covered
4974 by other indexes earlier in the intersect ordering: descending
4975 2. E(#records): ascending
4976
4977 @param scan1 first ror scan to compare
4978 @param scan2 second ror scan to compare
4979
4980 @return true if scan1 > scan2, false otherwise
4981 */
is_better_intersect_match(const ROR_SCAN_INFO * scan1,const ROR_SCAN_INFO * scan2)4982 static bool is_better_intersect_match(const ROR_SCAN_INFO *scan1,
4983 const ROR_SCAN_INFO *scan2)
4984 {
4985 if (scan1 == scan2)
4986 return false;
4987
4988 if (scan1->num_covered_fields_remaining >
4989 scan2->num_covered_fields_remaining)
4990 return false;
4991
4992 if (scan1->num_covered_fields_remaining <
4993 scan2->num_covered_fields_remaining)
4994 return true;
4995
4996 return (scan1->records > scan2->records);
4997 }
4998
4999 /**
5000 Sort indexes in an order that is likely to be a good index merge
5001 intersection order. After running this function, [start, ..., end-1]
5002 is ordered according to this strategy:
5003
5004 1) Minimize the number of indexes that must be used in the
5005 intersection. I.e., the index covering most fields not already
5006 covered by other indexes earlier in the sort order is picked first.
5007 2) When multiple indexes cover equally many uncovered fields, the
5008 index with lowest E(#rows) is chosen.
5009
5010 Note that all permutations of index ordering are not tested, so this
5011 function may not find the optimal order.
5012
5013 @param[in,out] start Pointer to the start of indexes that may
5014 be used in index merge intersection
5015 @param end Pointer past the last index that may be used.
5016 @param param Parameter from test_quick_select function.
5017 */
find_intersect_order(ROR_SCAN_INFO ** start,ROR_SCAN_INFO ** end,const PARAM * param)5018 static void find_intersect_order(ROR_SCAN_INFO **start,
5019 ROR_SCAN_INFO **end,
5020 const PARAM *param)
5021 {
5022 // nothing to sort if there are only zero or one ROR scans
5023 if ((start == end) || (start + 1 == end))
5024 return;
5025
5026 /*
5027 Bitmap of fields we would like the ROR scans to cover. Will be
5028 modified by the loop below so that when we're looking for a ROR
5029 scan in position 'x' in the ordering, all fields covered by ROR
5030 scans 0,...,x-1 have been removed.
5031 */
5032 MY_BITMAP fields_to_cover;
5033 my_bitmap_map *map;
5034 if (!(map= (my_bitmap_map*) alloc_root(param->mem_root,
5035 param->fields_bitmap_size)))
5036 return;
5037 bitmap_init(&fields_to_cover, map, param->needed_fields.n_bits, FALSE);
5038 bitmap_copy(&fields_to_cover, ¶m->needed_fields);
5039
5040 // Sort ROR scans in [start,...,end-1]
5041 for (ROR_SCAN_INFO **place= start; place < (end - 1); place++)
5042 {
5043 /* Placeholder for the best ROR scan found for position 'place' so far */
5044 ROR_SCAN_INFO **best= place;
5045 ROR_SCAN_INFO **current= place + 1;
5046
5047 {
5048 /*
5049 Calculate how many fields in 'fields_to_cover' not already
5050 covered by [start,...,place-1] the 'best' index covers. The
5051 result is used in is_better_intersect_match() and is valid
5052 when finding the best ROR scan for position 'place' only.
5053 */
5054 bitmap_intersect(&(*best)->covered_fields_remaining, &fields_to_cover);
5055 (*best)->num_covered_fields_remaining=
5056 bitmap_bits_set(&(*best)->covered_fields_remaining);
5057 }
5058 for (; current < end; current++)
5059 {
5060 {
5061 /*
5062 Calculate how many fields in 'fields_to_cover' not already
5063 covered by [start,...,place-1] the 'current' index covers.
5064 The result is used in is_better_intersect_match() and is
5065 valid when finding the best ROR scan for position 'place' only.
5066 */
5067 bitmap_intersect(&(*current)->covered_fields_remaining,
5068 &fields_to_cover);
5069 (*current)->num_covered_fields_remaining=
5070 bitmap_bits_set(&(*current)->covered_fields_remaining);
5071
5072 /*
5073 No need to compare with 'best' if 'current' does not
5074 contribute with uncovered fields.
5075 */
5076 if ((*current)->num_covered_fields_remaining == 0)
5077 continue;
5078 }
5079
5080 if (is_better_intersect_match(*best, *current))
5081 best= current;
5082 }
5083
5084 /*
5085 'best' is now the ROR scan that will be sorted in position
5086 'place'. When searching for the best ROR scans later in the sort
5087 sequence we do not need coverage of the fields covered by 'best'
5088 */
5089 bitmap_subtract(&fields_to_cover, &(*best)->covered_fields);
5090 if (best != place)
5091 swap_variables(ROR_SCAN_INFO*, *best, *place);
5092
5093 if (bitmap_is_clear_all(&fields_to_cover))
5094 return; // No more fields to cover
5095 }
5096 }
5097
5098 /* Auxiliary structure for incremental ROR-intersection creation */
5099 typedef struct
5100 {
5101 const PARAM *param;
5102 MY_BITMAP covered_fields; /* union of fields covered by all scans */
5103 /*
5104 Fraction of table records that satisfies conditions of all scans.
5105 This is the number of full records that will be retrieved if a
5106 non-index_only index intersection will be employed.
5107 */
5108 double out_rows;
5109 /* TRUE if covered_fields is a superset of needed_fields */
5110 bool is_covering;
5111
5112 ha_rows index_records; /* sum(#records to look in indexes) */
5113 Cost_estimate index_scan_cost; /* SUM(cost of 'index-only' scans) */
5114 Cost_estimate total_cost;
5115 } ROR_INTERSECT_INFO;
5116
5117
5118 /*
5119 Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
5120
5121 SYNOPSIS
5122 ror_intersect_init()
5123 param Parameter from test_quick_select
5124
5125 RETURN
5126 allocated structure
5127 NULL on error
5128 */
5129
5130 static
ror_intersect_init(const PARAM * param)5131 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
5132 {
5133 ROR_INTERSECT_INFO *info;
5134 my_bitmap_map* buf;
5135 if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
5136 sizeof(ROR_INTERSECT_INFO))))
5137 return NULL;
5138 info->param= param;
5139 if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
5140 param->fields_bitmap_size)))
5141 return NULL;
5142 if (bitmap_init(&info->covered_fields, buf, param->table->s->fields,
5143 FALSE))
5144 return NULL;
5145 info->is_covering= FALSE;
5146 info->index_scan_cost.reset();
5147 info->total_cost.reset();
5148 info->index_records= 0;
5149 info->out_rows= (double) param->table->file->stats.records;
5150 bitmap_clear_all(&info->covered_fields);
5151 return info;
5152 }
5153
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)5154 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
5155 {
5156 dst->param= src->param;
5157 memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
5158 no_bytes_in_map(&src->covered_fields));
5159 dst->out_rows= src->out_rows;
5160 dst->is_covering= src->is_covering;
5161 dst->index_records= src->index_records;
5162 dst->index_scan_cost= src->index_scan_cost;
5163 dst->total_cost= src->total_cost;
5164 }
5165
5166
5167 /*
5168 Get selectivity of adding a ROR scan to the ROR-intersection.
5169
5170 SYNOPSIS
5171 ror_scan_selectivity()
5172 info ROR-interection, an intersection of ROR index scans
5173 scan ROR scan that may or may not improve the selectivity
5174 of 'info'
5175
5176 NOTES
5177 Suppose we have conditions on several keys
5178 cond=k_11=c_11 AND k_12=c_12 AND ... // key_parts of first key in 'info'
5179 k_21=c_21 AND k_22=c_22 AND ... // key_parts of second key in 'info'
5180 ...
5181 k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //key_parts of 'scan'
5182
5183 where k_ij may be the same as any k_pq (i.e. keys may have common parts).
5184
5185 Note that for ROR retrieval, only equality conditions are usable so there
5186 are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'
5187
5188 A full row is retrieved if entire condition holds.
5189
5190 The recursive procedure for finding P(cond) is as follows:
5191
5192 First step:
5193 Pick 1st part of 1st key and break conjunction (1) into two parts:
5194 cond= (k_11=c_11 AND R)
5195
5196 Here R may still contain condition(s) equivalent to k_11=c_11.
5197 Nevertheless, the following holds:
5198
5199 P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
5200
5201 Mark k_11 as fixed field (and satisfied condition) F, save P(F),
5202 save R to be cond and proceed to recursion step.
5203
5204 Recursion step:
5205 We have a set of fixed fields/satisfied conditions) F, probability P(F),
5206 and remaining conjunction R
5207 Pick next key part on current key and its condition "k_ij=c_ij".
5208 We will add "k_ij=c_ij" into F and update P(F).
5209 Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then
5210
5211 P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
5212
5213 (where '|' mean conditional probability, not "or")
5214
5215 Consider the first multiplier in (2). One of the following holds:
5216 a) F contains condition on field used in t (i.e. t AND F = F).
5217 Then P(t|F) = 1
5218
5219 b) F doesn't contain condition on field used in t. Then F and t are
5220 considered independent.
5221
5222 P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
5223 = P(t|fields_before_t_in_key).
5224
5225 P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
5226 #records(fields_before_t_in_key, t)
5227
5228 The second multiplier is calculated by applying this step recursively.
5229
5230 IMPLEMENTATION
5231 This function calculates the result of application of the "recursion step"
5232 described above for all fixed key members of a single key, accumulating set
5233 of covered fields, selectivity, etc.
5234
5235 The calculation is conducted as follows:
5236 Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
5237
5238 n_{k1} n_{k2}
5239 --------- * --------- * .... (3)
5240 n_{k1-1} n_{k2-1}
5241
5242 where k1,k2,... are key parts which fields were not yet marked as fixed
5243 ( this is result of application of option b) of the recursion step for
5244 parts of a single key).
5245 Since it is reasonable to expect that most of the fields are not marked
5246 as fixed, we calculate (3) as
5247
5248 n_{i1} n_{i2}
5249 (3) = n_{max_key_part} / ( --------- * --------- * .... )
5250 n_{i1-1} n_{i2-1}
5251
5252 where i1,i2, .. are key parts that were already marked as fixed.
5253
5254 In order to minimize number of expensive records_in_range calls we
5255 group and reduce adjacent fractions. Note that on the optimizer's
5256 request, index statistics may be used instead of records_in_range
5257 @see RANGE_OPT_PARAM::use_index_statistics.
5258
5259 RETURN
5260 Selectivity of given ROR scan, a number between 0 and 1. 1 means that
5261 adding 'scan' to the intersection does not improve the selectivity.
5262 */
5263
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)5264 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
5265 const ROR_SCAN_INFO *scan)
5266 {
5267 double selectivity_mult= 1.0;
5268 const TABLE * const table= info->param->table;
5269 const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;
5270 /**
5271 key values tuple, used to store both min_range.key and
5272 max_range.key. This function is only called for equality ranges;
5273 open ranges (e.g. "min_value < X < max_value") cannot be used for
5274 rowid ordered retrieval, so in this function we know that
5275 min_range.key == max_range.key
5276 */
5277 uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
5278 uchar *key_ptr= key_val;
5279 SEL_ARG *sel_arg, *tuple_arg= NULL;
5280 key_part_map keypart_map= 0;
5281 bool cur_covered;
5282 bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5283 key_part->fieldnr-1));
5284 key_range min_range;
5285 key_range max_range;
5286 min_range.key= key_val;
5287 min_range.flag= HA_READ_KEY_EXACT;
5288 max_range.key= key_val;
5289 max_range.flag= HA_READ_AFTER_KEY;
5290 ha_rows prev_records= table->file->stats.records;
5291 DBUG_ENTER("ror_scan_selectivity");
5292
5293 for (sel_arg= scan->sel_arg; sel_arg;
5294 sel_arg= sel_arg->next_key_part)
5295 {
5296 DBUG_PRINT("info",("sel_arg step"));
5297 cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5298 key_part[sel_arg->part].fieldnr-1));
5299 if (cur_covered != prev_covered)
5300 {
5301 /* create (part1val, ..., part{n-1}val) tuple. */
5302 bool is_null_range= false;
5303 ha_rows records;
5304 if (!tuple_arg)
5305 {
5306 tuple_arg= scan->sel_arg;
5307 /* Here we use the length of the first key part */
5308 tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);
5309 is_null_range|= tuple_arg->is_null_interval();
5310 keypart_map= 1;
5311 }
5312 while (tuple_arg->next_key_part != sel_arg)
5313 {
5314 tuple_arg= tuple_arg->next_key_part;
5315 tuple_arg->store_min(key_part[tuple_arg->part].store_length,
5316 &key_ptr, 0);
5317 is_null_range|= tuple_arg->is_null_interval();
5318 keypart_map= (keypart_map << 1) | 1;
5319 }
5320 min_range.length= max_range.length= (size_t) (key_ptr - key_val);
5321 min_range.keypart_map= max_range.keypart_map= keypart_map;
5322
5323 /*
5324 Get the number of rows in this range. This is done by calling
5325 records_in_range() unless all these are true:
5326 1) The user has requested that index statistics should be used
5327 for equality ranges to avoid the incurred overhead of
5328 index dives in records_in_range()
5329 2) The range is not on the form "x IS NULL". The reason is
5330 that the number of rows with this value are likely to be
5331 very different than the values in the index statistics
5332 3) Index statistics is available.
5333 @see key_val
5334 */
5335 if (!info->param->use_index_statistics || // (1)
5336 is_null_range || // (2)
5337 !table->key_info[scan->keynr].
5338 has_records_per_key(tuple_arg->part)) // (3)
5339 {
5340 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5341 DBUG_ASSERT(min_range.length > 0);
5342 records=
5343 table->file->records_in_range(scan->keynr, &min_range, &max_range);
5344 }
5345 else
5346 {
5347 // Use index statistics
5348 records= static_cast<ha_rows>(
5349 table->key_info[scan->keynr].records_per_key(tuple_arg->part));
5350 }
5351
5352 if (cur_covered)
5353 {
5354 /* uncovered -> covered */
5355 double tmp= rows2double(records)/rows2double(prev_records);
5356 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5357 selectivity_mult *= tmp;
5358 prev_records= HA_POS_ERROR;
5359 }
5360 else
5361 {
5362 /* covered -> uncovered */
5363 prev_records= records;
5364 }
5365 }
5366 prev_covered= cur_covered;
5367 }
5368 if (!prev_covered)
5369 {
5370 double tmp= rows2double(table->quick_rows[scan->keynr]) /
5371 rows2double(prev_records);
5372 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5373 selectivity_mult *= tmp;
5374 }
5375 // Todo: This assert fires in PB sysqa RQG tests.
5376 // DBUG_ASSERT(selectivity_mult <= 1.0);
5377 DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
5378 DBUG_RETURN(selectivity_mult);
5379 }
5380
5381
5382 /*
5383 Check if adding a ROR scan to a ROR-intersection reduces its cost of
5384 ROR-intersection and if yes, update parameters of ROR-intersection,
5385 including its cost.
5386
5387 SYNOPSIS
5388 ror_intersect_add()
5389 param Parameter from test_quick_select
5390 info ROR-intersection structure to add the scan to.
5391 ror_scan ROR scan info to add.
5392 is_cpk_scan If TRUE, add the scan as CPK scan (this can be inferred
5393 from other parameters and is passed separately only to
5394 avoid duplicating the inference code)
5395 trace_costs Optimizer trace object cost details are added to
5396
5397 NOTES
5398 Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
5399 intersection decreases. The cost of ROR-intersection is calculated as
5400 follows:
5401
5402 cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
5403
5404 When we add a scan the first increases and the second decreases.
5405
5406 cost_of_full_rows_retrieval=
5407 (union of indexes used covers all needed fields) ?
5408 cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
5409 0
5410
5411 E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
5412 ror_scan_selectivity({scan1}, scan2) * ... *
5413 ror_scan_selectivity({scan1,...}, scanN).
5414 RETURN
5415 TRUE ROR scan added to ROR-intersection, cost updated.
5416 FALSE It doesn't make sense to add this ROR scan to this ROR-intersection.
5417 */
5418
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,bool is_cpk_scan,Opt_trace_object * trace_costs)5419 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
5420 ROR_SCAN_INFO* ror_scan, bool is_cpk_scan,
5421 Opt_trace_object *trace_costs)
5422 {
5423 double selectivity_mult= 1.0;
5424
5425 DBUG_ENTER("ror_intersect_add");
5426 DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
5427 DBUG_PRINT("info", ("Adding scan on %s",
5428 info->param->table->key_info[ror_scan->keynr].name));
5429 DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
5430
5431 selectivity_mult = ror_scan_selectivity(info, ror_scan);
5432 if (selectivity_mult == 1.0)
5433 {
5434 /* Don't add this scan if it doesn't improve selectivity. */
5435 DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
5436 DBUG_RETURN(FALSE);
5437 }
5438
5439 info->out_rows *= selectivity_mult;
5440
5441 if (is_cpk_scan)
5442 {
5443 /*
5444 CPK scan is used to filter out rows. We apply filtering for each
5445 record of every scan. For each record we assume that one key
5446 compare is done:
5447 */
5448 const Cost_model_table *const cost_model= info->param->table->cost_model();
5449 const double idx_cost=
5450 cost_model->key_compare_cost(rows2double(info->index_records));
5451 info->index_scan_cost.add_cpu(idx_cost);
5452 trace_costs->add("index_scan_cost", idx_cost);
5453 }
5454 else
5455 {
5456 info->index_records += info->param->table->quick_rows[ror_scan->keynr];
5457 info->index_scan_cost+= ror_scan->index_read_cost;
5458 trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
5459 bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
5460 if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
5461 &info->covered_fields))
5462 {
5463 DBUG_PRINT("info", ("ROR-intersect is covering now"));
5464 info->is_covering= TRUE;
5465 }
5466 }
5467
5468 info->total_cost= info->index_scan_cost;
5469 trace_costs->add("cumulated_index_scan_cost",
5470 info->index_scan_cost);
5471
5472 if (!info->is_covering)
5473 {
5474 Cost_estimate sweep_cost;
5475 JOIN *join= info->param->thd->lex->select_lex->join;
5476 const bool is_interrupted= join && join->tables != 1;
5477
5478 get_sweep_read_cost(info->param->table, double2rows(info->out_rows),
5479 is_interrupted, &sweep_cost);
5480 info->total_cost+= sweep_cost;
5481 trace_costs->add("disk_sweep_cost", sweep_cost);
5482 }
5483 else
5484 trace_costs->add("disk_sweep_cost", 0);
5485
5486 DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
5487 DBUG_PRINT("info", ("New cost: %g, %scovering",
5488 info->total_cost.total_cost(),
5489 info->is_covering?"" : "non-"));
5490 DBUG_RETURN(TRUE);
5491 }
5492
5493
5494 /*
5495 Get best ROR-intersection plan using non-covering ROR-intersection search
5496 algorithm. The returned plan may be covering.
5497
5498 SYNOPSIS
5499 get_best_ror_intersect()
5500 param Parameter from test_quick_select function.
5501 tree Transformed restriction condition to be used to look
5502 for ROR scans.
5503 cost_est Do not return read plans with cost > cost_est.
5504 are_all_covering [out] set to TRUE if union of all scans covers all
5505 fields needed by the query (and it is possible to build
5506 a covering ROR-intersection)
5507
5508 NOTES
5509 get_key_scans_params must be called before this function can be called.
5510
5511 When this function is called by ROR-union construction algorithm it
5512 assumes it is building an uncovered ROR-intersection (and thus # of full
5513 records to be retrieved is wrong here). This is a hack.
5514
5515 IMPLEMENTATION
5516 The approximate best non-covering plan search algorithm is as follows:
5517
5518 find_min_ror_intersection_scan()
5519 {
5520 R= select all ROR scans;
5521 order R by (E(#records_matched) * key_record_length).
5522
5523 S= first(R); -- set of scans that will be used for ROR-intersection
5524 R= R-first(S);
5525 min_cost= cost(S);
5526 min_scan= make_scan(S);
5527 while (R is not empty)
5528 {
5529 firstR= R - first(R);
5530 if (!selectivity(S + firstR < selectivity(S)))
5531 continue;
5532
5533 S= S + first(R);
5534 if (cost(S) < min_cost)
5535 {
5536 min_cost= cost(S);
5537 min_scan= make_scan(S);
5538 }
5539 }
5540 return min_scan;
5541 }
5542
5543 See ror_intersect_add function for ROR intersection costs.
5544
5545 Special handling for Clustered PK scans
5546 Clustered PK contains all table fields, so using it as a regular scan in
5547 index intersection doesn't make sense: a range scan on CPK will be less
5548 expensive in this case.
5549 Clustered PK scan has special handling in ROR-intersection: it is not used
5550 to retrieve rows, instead its condition is used to filter row references
5551 we get from scans on other keys.
5552
5553 RETURN
5554 ROR-intersection table read plan
5555 NULL if out of memory or no suitable plan found.
5556 */
5557
5558 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,const Cost_estimate * cost_est)5559 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
5560 const Cost_estimate *cost_est)
5561 {
5562 uint idx;
5563 Cost_estimate min_cost;
5564 Opt_trace_context * const trace= ¶m->thd->opt_trace;
5565 DBUG_ENTER("get_best_ror_intersect");
5566
5567 Opt_trace_object trace_ror(trace, "analyzing_roworder_intersect");
5568
5569 min_cost.set_max_cost();
5570
5571 if ((tree->n_ror_scans < 2) || !param->table->file->stats.records ||
5572 !param->index_merge_intersect_allowed)
5573 {
5574 trace_ror.add("usable", false);
5575 if (tree->n_ror_scans < 2)
5576 trace_ror.add_alnum("cause", "too_few_roworder_scans");
5577 else
5578 trace_ror.add("need_tracing", true);
5579 DBUG_RETURN(NULL);
5580 }
5581
5582 if (param->order_direction == ORDER::ORDER_DESC)
5583 DBUG_RETURN(NULL);
5584
5585 /*
5586 Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
5587 them. Also find and save clustered PK scan if there is one.
5588 */
5589 ROR_SCAN_INFO **cur_ror_scan;
5590 ROR_SCAN_INFO *cpk_scan= NULL;
5591 uint cpk_no;
5592 bool cpk_scan_used= FALSE;
5593
5594 if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5595 sizeof(ROR_SCAN_INFO*)*
5596 param->keys)))
5597 DBUG_RETURN(NULL);
5598 cpk_no= ((param->table->file->primary_key_is_clustered()) ?
5599 param->table->s->primary_key : MAX_KEY);
5600
5601 for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
5602 {
5603 ROR_SCAN_INFO *scan;
5604 if (!tree->ror_scans_map.is_set(idx))
5605 continue;
5606 if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
5607 DBUG_RETURN(NULL);
5608 if (param->real_keynr[idx] == cpk_no)
5609 {
5610 cpk_scan= scan;
5611 tree->n_ror_scans--;
5612 }
5613 else
5614 *(cur_ror_scan++)= scan;
5615 }
5616
5617 tree->ror_scans_end= cur_ror_scan;
5618 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
5619 tree->ror_scans,
5620 tree->ror_scans_end););
5621 /*
5622 Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
5623 ROR_SCAN_INFO's.
5624 Step 2: Get best ROR-intersection using an approximate algorithm.
5625 */
5626 find_intersect_order(tree->ror_scans, tree->ror_scans_end, param);
5627
5628 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
5629 tree->ror_scans,
5630 tree->ror_scans_end););
5631
5632 ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
5633 ROR_SCAN_INFO **intersect_scans_end;
5634 if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5635 sizeof(ROR_SCAN_INFO*)*
5636 tree->n_ror_scans)))
5637 DBUG_RETURN(NULL);
5638 intersect_scans_end= intersect_scans;
5639
5640 /* Create and incrementally update ROR intersection. */
5641 ROR_INTERSECT_INFO *intersect, *intersect_best;
5642 if (!(intersect= ror_intersect_init(param)) ||
5643 !(intersect_best= ror_intersect_init(param)))
5644 DBUG_RETURN(NULL);
5645
5646 /* [intersect_scans,intersect_scans_best) will hold the best intersection */
5647 ROR_SCAN_INFO **intersect_scans_best;
5648 cur_ror_scan= tree->ror_scans;
5649 intersect_scans_best= intersect_scans;
5650 /*
5651 Note: trace_isect_idx.end() is called to close this object after
5652 this while-loop.
5653 */
5654 Opt_trace_array trace_isect_idx(trace, "intersecting_indexes");
5655 while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
5656 {
5657 Opt_trace_object trace_idx(trace);
5658 trace_idx.add_utf8("index",
5659 param->table->key_info[(*cur_ror_scan)->keynr].name);
5660 /* S= S + first(R); R= R - first(R); */
5661 if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE, &trace_idx))
5662 {
5663 trace_idx.add("cumulated_total_cost",
5664 intersect->total_cost).
5665 add("usable", false).
5666 add_alnum("cause", "does_not_reduce_cost_of_intersect");
5667 cur_ror_scan++;
5668 continue;
5669 }
5670
5671 trace_idx.add("cumulated_total_cost",
5672 intersect->total_cost).
5673 add("usable", true).
5674 add("matching_rows_now", intersect->out_rows).
5675 add("isect_covering_with_this_index", intersect->is_covering);
5676
5677 *(intersect_scans_end++)= *(cur_ror_scan++);
5678
5679 if (intersect->total_cost < min_cost)
5680 {
5681 /* Local minimum found, save it */
5682 ror_intersect_cpy(intersect_best, intersect);
5683 intersect_scans_best= intersect_scans_end;
5684 min_cost= intersect->total_cost;
5685 trace_idx.add("chosen", true);
5686 }
5687 else
5688 {
5689 trace_idx.add("chosen", false).
5690 add_alnum("cause", "does_not_reduce_cost");
5691 }
5692 }
5693 // Note: trace_isect_idx trace object is closed here
5694 trace_isect_idx.end();
5695
5696 if (intersect_scans_best == intersect_scans)
5697 {
5698 trace_ror.add("chosen", false).
5699 add_alnum("cause", "does_not_increase_selectivity");
5700 DBUG_PRINT("info", ("None of scans increase selectivity"));
5701 DBUG_RETURN(NULL);
5702 }
5703
5704 DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
5705 "best ROR-intersection",
5706 intersect_scans,
5707 intersect_scans_best););
5708
5709 uint best_num= intersect_scans_best - intersect_scans;
5710 ror_intersect_cpy(intersect, intersect_best);
5711
5712 /*
5713 Ok, found the best ROR-intersection of non-CPK key scans.
5714 Check if we should add a CPK scan. If the obtained ROR-intersection is
5715 covering, it doesn't make sense to add CPK scan.
5716 */
5717 { // Scope for trace object
5718 Opt_trace_object trace_cpk(trace, "clustered_pk");
5719 if (cpk_scan && !intersect->is_covering)
5720 {
5721 if (ror_intersect_add(intersect, cpk_scan, TRUE, &trace_cpk) &&
5722 (intersect->total_cost < min_cost))
5723 {
5724 trace_cpk.add("clustered_pk_scan_added_to_intersect", true).
5725 add("cumulated_cost", intersect->total_cost);
5726 cpk_scan_used= TRUE;
5727 intersect_best= intersect; //just set pointer here
5728 }
5729 else
5730 trace_cpk.add("clustered_pk_added_to_intersect", false).
5731 add_alnum("cause", "cost");
5732 }
5733 else
5734 {
5735 trace_cpk.add("clustered_pk_added_to_intersect", false).
5736 add_alnum("cause", cpk_scan ?
5737 "roworder_is_covering" : "no_clustered_pk_index");
5738 }
5739 }
5740 /* Ok, return ROR-intersect plan if we have found one */
5741 TRP_ROR_INTERSECT *trp= NULL;
5742 if (min_cost < *cost_est && (cpk_scan_used || best_num > 1))
5743 {
5744 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
5745 DBUG_RETURN(trp);
5746 if (!(trp->first_scan=
5747 (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5748 sizeof(ROR_SCAN_INFO*)*best_num)))
5749 DBUG_RETURN(NULL);
5750 memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
5751 trp->last_scan= trp->first_scan + best_num;
5752 trp->is_covering= intersect_best->is_covering;
5753 trp->cost_est= intersect_best->total_cost;
5754 /* Prevent divisons by zero */
5755 ha_rows best_rows = double2rows(intersect_best->out_rows);
5756 if (!best_rows)
5757 best_rows= 1;
5758 set_if_smaller(param->table->quick_condition_rows, best_rows);
5759 trp->records= best_rows;
5760 trp->index_scan_cost= intersect_best->index_scan_cost;
5761 trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
5762
5763 trace_ror.add("rows", trp->records).
5764 add("cost", trp->cost_est).
5765 add("covering", trp->is_covering).
5766 add("chosen", true);
5767
5768 DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
5769 "cost %g, records %lu",
5770 trp->cost_est.total_cost(), (ulong) trp->records));
5771 }
5772 else
5773 {
5774 trace_ror.add("chosen", false).
5775 add_alnum("cause", (*cost_est > min_cost) ? "too_few_indexes_to_merge" :
5776 "cost");
5777 }
5778 DBUG_RETURN(trp);
5779 }
5780
5781 /*
5782 Get best "range" table read plan for given SEL_TREE, also update some info
5783
5784 SYNOPSIS
5785 get_key_scans_params()
5786 param Parameters from test_quick_select
5787 tree Make range select for this SEL_TREE
5788 index_read_must_be_used TRUE <=> assume 'index only' option will be set
5789 (except for clustered PK indexes)
5790 update_tbl_stats TRUE <=> update table->quick_* with information
5791 about range scans we've evaluated.
5792 cost_est Maximum cost. i.e. don't create read plans with
5793 cost > cost_est.
5794
5795 DESCRIPTION
5796 Find the best "range" table read plan for given SEL_TREE.
5797 The side effects are
5798 - tree->ror_scans is updated to indicate which scans are ROR scans.
5799 - if update_tbl_stats=TRUE then table->quick_* is updated with info
5800 about every possible range scan.
5801
5802 RETURN
5803 Best range read plan
5804 NULL if no plan found or error occurred
5805 */
5806
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool update_tbl_stats,const Cost_estimate * cost_est)5807 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
5808 bool index_read_must_be_used,
5809 bool update_tbl_stats,
5810 const Cost_estimate *cost_est)
5811 {
5812 uint idx, best_idx= 0;
5813 SEL_ARG *key, *key_to_read= NULL;
5814 ha_rows best_records= 0; /* protected by key_to_read */
5815 uint best_mrr_flags= 0, best_buf_size= 0;
5816 TRP_RANGE* read_plan= NULL;
5817 Cost_estimate read_cost= *cost_est;
5818 DBUG_ENTER("get_key_scans_params");
5819 Opt_trace_context * const trace= ¶m->thd->opt_trace;
5820 /*
5821 Note that there may be trees that have type SEL_TREE::KEY but contain no
5822 key reads at all, e.g. tree for expression "key1 is not null" where key1
5823 is defined as "not null".
5824 */
5825 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
5826 "tree scans"););
5827 Opt_trace_array ota(trace, "range_scan_alternatives");
5828
5829 tree->ror_scans_map.clear_all();
5830 tree->n_ror_scans= 0;
5831 for (idx= 0; idx < param->keys; idx++)
5832 {
5833 key= tree->keys[idx];
5834 if (key)
5835 {
5836 ha_rows found_records;
5837 Cost_estimate cost;
5838 uint mrr_flags, buf_size;
5839 uint keynr= param->real_keynr[idx];
5840 if (key->type == SEL_ARG::MAYBE_KEY ||
5841 key->maybe_flag)
5842 param->needed_reg->set_bit(keynr);
5843
5844 bool read_index_only= index_read_must_be_used ? TRUE :
5845 (bool) param->table->covering_keys.is_set(keynr);
5846
5847 Opt_trace_object trace_idx(trace);
5848 trace_idx.add_utf8("index", param->table->key_info[keynr].name);
5849
5850 found_records= check_quick_select(param, idx, read_index_only, key,
5851 update_tbl_stats, &mrr_flags,
5852 &buf_size, &cost);
5853
5854 #ifdef OPTIMIZER_TRACE
5855 // check_quick_select() says don't use range if it returns HA_POS_ERROR
5856 if (found_records != HA_POS_ERROR &&
5857 param->thd->opt_trace.is_started())
5858 {
5859 Opt_trace_array trace_range(¶m->thd->opt_trace, "ranges");
5860
5861 const KEY &cur_key= param->table->key_info[keynr];
5862 const KEY_PART_INFO *key_part= cur_key.key_part;
5863
5864 String range_info;
5865 range_info.set_charset(system_charset_info);
5866 append_range_all_keyparts(&trace_range, NULL, &range_info,
5867 key, key_part, false);
5868 trace_range.end(); // NOTE: ends the tracing scope
5869
5870 trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics).
5871 add("rowid_ordered", param->is_ror_scan).
5872 add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)).
5873 add("index_only", read_index_only).
5874 add("rows", found_records).
5875 add("cost", cost);
5876 }
5877 #endif
5878
5879 if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
5880 {
5881 tree->n_ror_scans++;
5882 tree->ror_scans_map.set_bit(idx);
5883 }
5884
5885 if (found_records != HA_POS_ERROR &&
5886 read_cost > cost)
5887 {
5888 trace_idx.add("chosen", true);
5889 read_cost= cost;
5890 best_records= found_records;
5891 key_to_read= key;
5892 best_idx= idx;
5893 best_mrr_flags= mrr_flags;
5894 best_buf_size= buf_size;
5895 }
5896 else
5897 {
5898 trace_idx.add("chosen", false);
5899 if (found_records == HA_POS_ERROR)
5900 if (key->type == SEL_ARG::MAYBE_KEY)
5901 trace_idx.add_alnum("cause", "depends_on_unread_values");
5902 else
5903 trace_idx.add_alnum("cause", "unknown");
5904 else
5905 trace_idx.add_alnum("cause", "cost");
5906 }
5907
5908 }
5909 }
5910
5911 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
5912 "ROR scans"););
5913 if (key_to_read)
5914 {
5915 if ((read_plan= new (param->mem_root) TRP_RANGE(key_to_read, best_idx,
5916 best_mrr_flags)))
5917 {
5918 read_plan->records= best_records;
5919 read_plan->is_ror= tree->ror_scans_map.is_set(best_idx);
5920 read_plan->cost_est= read_cost;
5921 read_plan->mrr_buf_size= best_buf_size;
5922 DBUG_PRINT("info",
5923 ("Returning range plan for key %s, cost %g, records %lu",
5924 param->table->key_info[param->real_keynr[best_idx]].name,
5925 read_plan->cost_est.total_cost(), (ulong) read_plan->records));
5926 }
5927 }
5928 else
5929 DBUG_PRINT("info", ("No 'range' table read plan found"));
5930
5931 DBUG_RETURN(read_plan);
5932 }
5933
5934
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5935 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
5936 bool retrieve_full_rows,
5937 MEM_ROOT *parent_alloc)
5938 {
5939 QUICK_INDEX_MERGE_SELECT *quick_imerge;
5940 QUICK_RANGE_SELECT *quick;
5941 /* index_merge always retrieves full rows, ignore retrieve_full_rows */
5942 if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
5943 return NULL;
5944
5945 quick_imerge->records= records;
5946 quick_imerge->cost_est= cost_est;
5947
5948 for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
5949 range_scan++)
5950 {
5951 if (!(quick= (QUICK_RANGE_SELECT*)
5952 ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
5953 quick_imerge->push_quick_back(quick))
5954 {
5955 delete quick;
5956 delete quick_imerge;
5957 return NULL;
5958 }
5959 }
5960 return quick_imerge;
5961 }
5962
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5963 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
5964 bool retrieve_full_rows,
5965 MEM_ROOT *parent_alloc)
5966 {
5967 QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
5968 QUICK_RANGE_SELECT *quick;
5969 DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
5970 MEM_ROOT *alloc;
5971
5972 if ((quick_intrsect=
5973 new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
5974 (retrieve_full_rows? (!is_covering) :
5975 FALSE),
5976 parent_alloc)))
5977 {
5978 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
5979 "creating ROR-intersect",
5980 first_scan, last_scan););
5981 alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
5982 for (st_ror_scan_info **current= first_scan;
5983 current != last_scan;
5984 current++)
5985 {
5986 if (!(quick= get_quick_select(param, (*current)->idx,
5987 (*current)->sel_arg,
5988 HA_MRR_SORTED,
5989 0, alloc)) ||
5990 quick_intrsect->push_quick_back(quick))
5991 {
5992 delete quick_intrsect;
5993 DBUG_RETURN(NULL);
5994 }
5995 }
5996 if (cpk_scan)
5997 {
5998 if (!(quick= get_quick_select(param, cpk_scan->idx,
5999 cpk_scan->sel_arg,
6000 HA_MRR_SORTED,
6001 0, alloc)))
6002 {
6003 delete quick_intrsect;
6004 DBUG_RETURN(NULL);
6005 }
6006 quick->file= NULL;
6007 quick_intrsect->cpk_quick= quick;
6008 }
6009 quick_intrsect->records= records;
6010 quick_intrsect->cost_est= cost_est;
6011 }
6012 DBUG_RETURN(quick_intrsect);
6013 }
6014
6015
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)6016 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
6017 bool retrieve_full_rows,
6018 MEM_ROOT *parent_alloc)
6019 {
6020 QUICK_ROR_UNION_SELECT *quick_roru;
6021 TABLE_READ_PLAN **scan;
6022 QUICK_SELECT_I *quick;
6023 DBUG_ENTER("TRP_ROR_UNION::make_quick");
6024 /*
6025 It is impossible to construct a ROR-union that will not retrieve full
6026 rows, ignore retrieve_full_rows parameter.
6027 */
6028 if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
6029 {
6030 for (scan= first_ror; scan != last_ror; scan++)
6031 {
6032 if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
6033 quick_roru->push_quick_back(quick))
6034 DBUG_RETURN(NULL);
6035 }
6036 quick_roru->records= records;
6037 quick_roru->cost_est= cost_est;
6038 }
6039 DBUG_RETURN(quick_roru);
6040 }
6041
6042
6043 /**
6044 If EXPLAIN or if the --safe-updates option is enabled, add a warning that
6045 the index cannot be used for range access due to either type conversion or
6046 different collations on the field used for comparison
6047
6048 @param param PARAM from test_quick_select
6049 @param key_num Key number
6050 @param field Field in the predicate
6051 */
warn_index_not_applicable(const RANGE_OPT_PARAM * param,const uint key_num,const Field * field)6052 static void warn_index_not_applicable(const RANGE_OPT_PARAM *param,
6053 const uint key_num, const Field *field)
6054 {
6055 THD *thd= param->thd;
6056 if (param->using_real_indexes &&
6057 (param->thd->lex->describe ||
6058 thd->variables.option_bits & OPTION_SAFE_UPDATES))
6059 push_warning_printf(
6060 param->thd,
6061 Sql_condition::SL_WARNING,
6062 ER_WARN_INDEX_NOT_APPLICABLE,
6063 ER(ER_WARN_INDEX_NOT_APPLICABLE),
6064 "range",
6065 field->table->key_info[param->real_keynr[key_num]].name,
6066 field->field_name);
6067 }
6068
6069
6070 /*
6071 Build a SEL_TREE for <> or NOT BETWEEN predicate
6072
6073 SYNOPSIS
6074 get_ne_mm_tree()
6075 param PARAM from test_quick_select
6076 cond_func item for the predicate
6077 field field in the predicate
6078 lt_value constant that field should be smaller
6079 gt_value constant that field should be greaterr
6080 cmp_type compare type for the field
6081
6082 RETURN
6083 # Pointer to tree built tree
6084 0 on error
6085 */
get_ne_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * lt_value,Item * gt_value,Item_result cmp_type)6086 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
6087 Field *field,
6088 Item *lt_value, Item *gt_value,
6089 Item_result cmp_type)
6090 {
6091 SEL_TREE *tree= NULL;
6092
6093 if (param->has_errors())
6094 return NULL;
6095
6096 tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6097 lt_value, cmp_type);
6098 if (tree)
6099 {
6100 tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
6101 Item_func::GT_FUNC,
6102 gt_value, cmp_type));
6103 }
6104 return tree;
6105 }
6106
6107
6108 /**
6109 Factory function to build a SEL_TREE from an <in predicate>
6110
6111 @param param Information on 'just about everything'.
6112 @param predicand The <in predicate's> predicand, i.e. the left-hand
6113 side of the <in predicate> expression.
6114 @param op The 'in' operator itself.
6115 @param value The right-hand side of the <in predicate> expression.
6116 @param cmp_type What types we should pretend that the arguments are.
6117 @param is_negated If true, the operator is NOT IN, otherwise IN.
6118 */
get_func_mm_tree_from_in_predicate(RANGE_OPT_PARAM * param,Item * predicand,Item_func_in * op,Item * value,Item_result cmp_type,bool is_negated)6119 static SEL_TREE *get_func_mm_tree_from_in_predicate(RANGE_OPT_PARAM *param,
6120 Item *predicand,
6121 Item_func_in *op,
6122 Item *value,
6123 Item_result cmp_type,
6124 bool is_negated)
6125 {
6126 if (param->has_errors())
6127 return NULL;
6128
6129 if (is_negated)
6130 {
6131 // We don't support row constructors (multiple columns on lhs) here.
6132 if (predicand->type() != Item::FIELD_ITEM)
6133 return NULL;
6134
6135 Field *field= static_cast<Item_field*>(predicand)->field;
6136
6137 if (op->array && op->array->result_type() != ROW_RESULT)
6138 {
6139 /*
6140 We get here for conditions on the form "t.key NOT IN (c1, c2, ...)",
6141 where c{i} are constants. Our goal is to produce a SEL_TREE that
6142 represents intervals:
6143
6144 ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
6145
6146 where $MIN is either "-inf" or NULL.
6147
6148 The most straightforward way to produce it is to convert NOT
6149 IN into "(t.key != c1) AND (t.key != c2) AND ... " and let the
6150 range analyzer build a SEL_TREE from that. The problem is that
6151 the range analyzer will use O(N^2) memory (which is probably a
6152 bug), and people who do use big NOT IN lists (e.g. see
6153 BUG#15872, BUG#21282), will run out of memory.
6154
6155 Another problem with big lists like (*) is that a big list is
6156 unlikely to produce a good "range" access, while considering
6157 that range access will require expensive CPU calculations (and
6158 for MyISAM even index accesses). In short, big NOT IN lists
6159 are rarely worth analyzing.
6160
6161 Considering the above, we'll handle NOT IN as follows:
6162
6163 - if the number of entries in the NOT IN list is less than
6164 NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*)
6165 manually.
6166
6167 - Otherwise, don't produce a SEL_TREE.
6168 */
6169
6170 const uint NOT_IN_IGNORE_THRESHOLD= 1000;
6171 // If we have t.key NOT IN (null, null, ...) or the list is too long
6172 if (op->array->used_count == 0 ||
6173 op->array->used_count > NOT_IN_IGNORE_THRESHOLD)
6174 return NULL;
6175
6176 MEM_ROOT *tmp_root= param->mem_root;
6177 param->thd->mem_root= param->old_root;
6178 /*
6179 Create one Item_type constant object. We'll need it as
6180 get_mm_parts only accepts constant values wrapped in Item_Type
6181 objects.
6182 We create the Item on param->mem_root which points to
6183 per-statement mem_root (while thd->mem_root is currently pointing
6184 to mem_root local to range optimizer).
6185 */
6186 Item *value_item= op->array->create_item();
6187 param->thd->mem_root= tmp_root;
6188
6189 if (!value_item)
6190 return NULL;
6191
6192 /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
6193 uint i= 0;
6194 SEL_TREE *tree= NULL;
6195 do
6196 {
6197 op->array->value_to_item(i, value_item);
6198 tree= get_mm_parts(param, op, field, Item_func::LT_FUNC, value_item,
6199 cmp_type);
6200 if (!tree)
6201 break;
6202 i++;
6203 }
6204 while (i < op->array->used_count && tree->type == SEL_TREE::IMPOSSIBLE);
6205
6206 if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
6207 /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
6208 return NULL;
6209 SEL_TREE *tree2= NULL;
6210 for (; i < op->array->used_count; i++)
6211 {
6212 if (op->array->compare_elems(i, i - 1))
6213 {
6214 /* Get a SEL_TREE for "-inf < X < c_i" interval */
6215 op->array->value_to_item(i, value_item);
6216 tree2= get_mm_parts(param, op, field, Item_func::LT_FUNC,
6217 value_item, cmp_type);
6218 if (!tree2)
6219 {
6220 tree= NULL;
6221 break;
6222 }
6223
6224 /* Change all intervals to be "c_{i-1} < X < c_i" */
6225 for (uint idx= 0; idx < param->keys; idx++)
6226 {
6227 SEL_ARG *new_interval, *last_val;
6228 if (((new_interval= tree2->keys[idx])) &&
6229 (tree->keys[idx]) &&
6230 ((last_val= tree->keys[idx]->last())))
6231 {
6232 new_interval->min_value= last_val->max_value;
6233 new_interval->min_flag= NEAR_MIN;
6234
6235 /*
6236 If the interval is over a partial keypart, the
6237 interval must be "c_{i-1} <= X < c_i" instead of
6238 "c_{i-1} < X < c_i". Reason:
6239
6240 Consider a table with a column "my_col VARCHAR(3)",
6241 and an index with definition
6242 "INDEX my_idx my_col(1)". If the table contains rows
6243 with my_col values "f" and "foo", the index will not
6244 distinguish the two rows.
6245
6246 Note that tree_or() below will effectively merge
6247 this range with the range created for c_{i-1} and
6248 we'll eventually end up with only one range:
6249 "NULL < X".
6250
6251 Partitioning indexes are never partial.
6252 */
6253 if (param->using_real_indexes)
6254 {
6255 const KEY key=
6256 param->table->key_info[param->real_keynr[idx]];
6257 const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
6258
6259 if (kpi->key_part_flag & HA_PART_KEY_SEG)
6260 new_interval->min_flag= 0;
6261 }
6262 }
6263 }
6264 /*
6265 The following doesn't try to allocate memory so no need to
6266 check for NULL.
6267 */
6268 tree= tree_or(param, tree, tree2);
6269 }
6270 }
6271
6272 if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
6273 {
6274 /*
6275 Get the SEL_TREE for the last "c_last < X < +inf" interval
6276 (value_item cotains c_last already)
6277 */
6278 tree2= get_mm_parts(param, op, field, Item_func::GT_FUNC,
6279 value_item, cmp_type);
6280 tree= tree_or(param, tree, tree2);
6281 }
6282 return tree;
6283 }
6284 else
6285 {
6286 SEL_TREE *tree= get_ne_mm_tree(param, op, field, op->arguments()[1],
6287 op->arguments()[1], cmp_type);
6288 if (tree)
6289 {
6290 Item **arg, **end;
6291 for (arg= op->arguments() + 2, end= arg + op->argument_count() - 2;
6292 arg < end ; arg++)
6293 {
6294 tree= tree_and(param, tree,
6295 get_ne_mm_tree(param, op, field, *arg, *arg,
6296 cmp_type));
6297 }
6298 }
6299 return tree;
6300 }
6301 return NULL;
6302 }
6303
6304 // The expression is IN, not negated.
6305 if (predicand->type() == Item::FIELD_ITEM)
6306 {
6307 // The expression is (<column>) IN (...)
6308 Field *field= static_cast<Item_field*>(predicand)->field;
6309 SEL_TREE *tree= get_mm_parts(param, op, field, Item_func::EQ_FUNC,
6310 op->arguments()[1], cmp_type);
6311 if (tree)
6312 {
6313 Item **arg, **end;
6314 for (arg= op->arguments() + 2, end= arg + op->argument_count() - 2;
6315 arg < end ; arg++)
6316 {
6317 tree= tree_or(param, tree, get_mm_parts(param, op, field,
6318 Item_func::EQ_FUNC,
6319 *arg, cmp_type));
6320 }
6321 }
6322 return tree;
6323 }
6324 if (predicand->type() == Item::ROW_ITEM)
6325 {
6326 /*
6327 The expression is (<column>,...) IN (...)
6328
6329 We iterate over the rows on the rhs of the in predicate,
6330 building an OR tree of ANDs, a.k.a. a DNF expression out of this. E.g:
6331
6332 (col1, col2) IN ((const1, const2), (const3, const4))
6333 becomes
6334 (col1 = const1 AND col2 = const2) OR (col1 = const3 AND col2 = const4)
6335 */
6336 SEL_TREE *or_tree= &null_sel_tree;
6337 Item_row *row_predicand= static_cast<Item_row*>(predicand);
6338
6339 // Iterate over the rows on the rhs of the in predicate, building an OR.
6340 for (uint i= 1; i < op->argument_count(); ++i)
6341 {
6342 /*
6343 We only support row value expressions. Some optimizations rewrite
6344 the Item tree, and we don't handle that.
6345 */
6346 Item *in_list_item= op->arguments()[i];
6347 if (in_list_item->type() != Item::ROW_ITEM)
6348 return NULL;
6349 Item_row *row= static_cast<Item_row*>(in_list_item);
6350
6351 // Iterate over the columns, building an AND tree.
6352 SEL_TREE *and_tree= NULL;
6353 for (uint j= 0; j < row_predicand->cols(); ++j)
6354 {
6355 Item *item= row_predicand->element_index(j);
6356
6357 // We only support columns in the row on the lhs of the in predicate.
6358 if (item->type() != Item::FIELD_ITEM)
6359 return NULL;
6360 Field *field= static_cast<Item_field*>(item)->field;
6361
6362 Item *value= row->element_index(j);
6363
6364 SEL_TREE *and_expr=
6365 get_mm_parts(param, op, field, Item_func::EQ_FUNC, value, cmp_type);
6366
6367 and_tree= tree_and(param, and_tree, and_expr);
6368 /*
6369 Short-circuit evaluation: If and_expr is NULL then no key part in
6370 this disjunct can be used as a search key. Or in other words the
6371 condition is always true. Hence the whole disjunction is always true.
6372 */
6373 if (and_tree == NULL)
6374 return NULL;
6375 }
6376 or_tree= tree_or(param, and_tree, or_tree);
6377 }
6378 return or_tree;
6379 }
6380 return NULL;
6381 }
6382
6383
6384 /**
6385 Build a SEL_TREE for a simple predicate.
6386
6387 @param param PARAM from test_quick_select
6388 @param predicand field in the predicate
6389 @param cond_func item for the predicate
6390 @param value constant in the predicate
6391 @param cmp_type compare type for the field
6392 @param inv TRUE <> NOT cond_func is considered
6393 (makes sense only when cond_func is BETWEEN or IN)
6394
6395 @return Pointer to the built tree.
6396
6397 @todo Remove the appaling hack that 'value' can be a 1 cast to an Item*.
6398 */
6399
get_func_mm_tree(RANGE_OPT_PARAM * param,Item * predicand,Item_func * cond_func,Item * value,Item_result cmp_type,bool inv)6400 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param,
6401 Item *predicand,
6402 Item_func *cond_func,
6403 Item *value,
6404 Item_result cmp_type,
6405 bool inv)
6406 {
6407 SEL_TREE *tree= 0;
6408 DBUG_ENTER("get_func_mm_tree");
6409
6410 if (param->has_errors())
6411 DBUG_RETURN(0);
6412
6413 switch (cond_func->functype()) {
6414
6415 case Item_func::XOR_FUNC:
6416 DBUG_RETURN(NULL); // Always true (don't use range access on XOR).
6417 break; // See WL#5800
6418
6419 case Item_func::NE_FUNC:
6420 if (predicand->type() == Item::FIELD_ITEM)
6421 {
6422 Field *field= static_cast<Item_field*>(predicand)->field;
6423 tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
6424 }
6425 break;
6426
6427 case Item_func::BETWEEN:
6428 if (predicand->type() == Item::FIELD_ITEM)
6429 {
6430 Field *field= static_cast<Item_field*>(predicand)->field;
6431
6432 if (!value)
6433 {
6434 if (inv)
6435 {
6436 tree= get_ne_mm_tree(param, cond_func, field,
6437 cond_func->arguments()[1],
6438 cond_func->arguments()[2], cmp_type);
6439 }
6440 else
6441 {
6442 tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
6443 cond_func->arguments()[1],cmp_type);
6444 if (tree)
6445 {
6446 tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
6447 Item_func::LE_FUNC,
6448 cond_func->arguments()[2],
6449 cmp_type));
6450 }
6451 }
6452 }
6453 else
6454 tree= get_mm_parts(param, cond_func, field,
6455 (inv ?
6456 (value == reinterpret_cast<Item*>(1) ?
6457 Item_func::GT_FUNC :
6458 Item_func::LT_FUNC):
6459 (value == reinterpret_cast<Item*>(1) ?
6460 Item_func::LE_FUNC :
6461 Item_func::GE_FUNC)),
6462 cond_func->arguments()[0], cmp_type);
6463 }
6464 break;
6465 case Item_func::IN_FUNC:
6466 {
6467 Item_func_in *in_pred= static_cast<Item_func_in*>(cond_func);
6468 tree= get_func_mm_tree_from_in_predicate(param, predicand, in_pred, value,
6469 cmp_type, inv);
6470 }
6471 break;
6472 default:
6473 if (predicand->type() == Item::FIELD_ITEM)
6474 {
6475 Field *field= static_cast<Item_field*>(predicand)->field;
6476
6477 /*
6478 Here the function for the following predicates are processed:
6479 <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL and GIS functions.
6480 If the predicate is of the form (value op field) it is handled
6481 as the equivalent predicate (field rev_op value), e.g.
6482 2 <= a is handled as a >= 2.
6483 */
6484 Item_func::Functype func_type=
6485 (value != cond_func->arguments()[0]) ? cond_func->functype() :
6486 ((Item_bool_func2*) cond_func)->rev_functype();
6487 tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
6488 }
6489 }
6490
6491 DBUG_RETURN(tree);
6492 }
6493
6494
6495 /*
6496 Build conjunction of all SEL_TREEs for a simple predicate applying equalities
6497
6498 SYNOPSIS
6499 get_full_func_mm_tree()
6500 param PARAM from test_quick_select
6501 predicand column or row constructor in the predicate's left-hand side.
6502 op Item for the predicate operator
6503 value constant in the predicate (or a field already read from
6504 a table in the case of dynamic range access)
6505 For BETWEEN it contains the number of the field argument.
6506 inv If true, the predicate is negated, e.g. NOT IN.
6507 (makes sense only when cond_func is BETWEEN or IN)
6508
6509 DESCRIPTION
6510 For a simple SARGable predicate of the form (f op c), where f is a field and
6511 c is a constant, the function builds a conjunction of all SEL_TREES that can
6512 be obtained by the substitution of f for all different fields equal to f.
6513
6514 NOTES
6515 If the WHERE condition contains a predicate (fi op c),
6516 then not only SELL_TREE for this predicate is built, but
6517 the trees for the results of substitution of fi for
6518 each fj belonging to the same multiple equality as fi
6519 are built as well.
6520 E.g. for WHERE t1.a=t2.a AND t2.a > 10
6521 a SEL_TREE for t2.a > 10 will be built for quick select from t2
6522 and
6523 a SEL_TREE for t1.a > 10 will be built for quick select from t1.
6524
6525 A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
6526 in a similar way: we build a conjuction of trees for the results
6527 of all substitutions of fi for equal fj.
6528 Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
6529 differently. It is considered as a conjuction of two SARGable
6530 predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
6531 is called for each of them separately producing trees for
6532 AND j (f1j <=c ) and AND j (f2j <= c)
6533 After this these two trees are united in one conjunctive tree.
6534 It's easy to see that the same tree is obtained for
6535 AND j,k (f1j <=c AND f2k<=c)
6536 which is equivalent to
6537 AND j,k (c BETWEEN f1j AND f2k).
6538 The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
6539 which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
6540 function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
6541 producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
6542 trees are united in one OR-tree. The expression
6543 (AND j (f1j > c) OR AND j (f2j < c)
6544 is equivalent to the expression
6545 AND j,k (f1j > c OR f2k < c)
6546 which is just a translation of
6547 AND j,k (c NOT BETWEEN f1j AND f2k)
6548
6549 In the cases when one of the items f1, f2 is a constant c1 we do not create
6550 a tree for it at all. It works for BETWEEN predicates but does not
6551 work for NOT BETWEEN predicates as we have to evaluate the expression
6552 with it. If it is TRUE then the other tree can be completely ignored.
6553 We do not do it now and no trees are built in these cases for
6554 NOT BETWEEN predicates.
6555
6556 As to IN predicates only ones of the form (f IN (c1,...,cn)),
6557 where f1 is a field and c1,...,cn are constant, are considered as
6558 SARGable. We never try to narrow the index scan using predicates of
6559 the form (c IN (c1,...,f,...,cn)).
6560
6561 RETURN
6562 Pointer to the tree representing the built conjunction of SEL_TREEs
6563 */
6564
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item * predicand,Item_func * op,Item * value,bool inv)6565 static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
6566 Item *predicand,
6567 Item_func *op,
6568 Item *value,
6569 bool inv)
6570 {
6571 SEL_TREE *tree= 0;
6572 SEL_TREE *ftree= 0;
6573 const table_map param_comp=
6574 ~(param->prev_tables | param->read_tables | param->current_table);
6575 DBUG_ENTER("get_full_func_mm_tree");
6576
6577 if (param->has_errors())
6578 DBUG_RETURN(NULL);
6579
6580 /*
6581 Here we compute a set of tables that we consider as constants
6582 suppliers during execution of the SEL_TREE that we produce below.
6583 */
6584 table_map ref_tables= 0;
6585 for (uint i= 0; i < op->arg_count; i++)
6586 {
6587 Item *arg= op->arguments()[i]->real_item();
6588 if (arg != predicand)
6589 ref_tables|= arg->used_tables();
6590 }
6591 if (predicand->type() == Item::FIELD_ITEM)
6592 {
6593 Item_field *item_field= static_cast<Item_field*>(predicand);
6594 Field *field= item_field->field;
6595 Item_result cmp_type= field->cmp_type();
6596
6597 if (!((ref_tables | item_field->table_ref->map()) & param_comp))
6598 ftree= get_func_mm_tree(param, predicand, op, value, cmp_type, inv);
6599 Item_equal *item_equal= item_field->item_equal;
6600 if (item_equal != NULL)
6601 {
6602 Item_equal_iterator it(*item_equal);
6603 Item_field *item;
6604 while ((item= it++))
6605 {
6606 Field *f= item->field;
6607 if (!field->eq(f) &&
6608 !((ref_tables | item->table_ref->map()) & param_comp))
6609 {
6610 tree= get_func_mm_tree(param, item, op, value, cmp_type, inv);
6611 ftree= !ftree ? tree : tree_and(param, ftree, tree);
6612 }
6613 }
6614 }
6615 }
6616 else if (predicand->type() == Item::ROW_ITEM)
6617 {
6618 ftree= get_func_mm_tree(param, predicand, op, value, ROW_RESULT, inv);
6619 DBUG_RETURN(ftree);
6620 }
6621 DBUG_RETURN(ftree);
6622 }
6623
6624 /**
6625 The Range Analysis Module, which finds range access alternatives
6626 applicable to single or multi-index (UNION) access. The function
6627 does not calculate or care about the cost of the different
6628 alternatives.
6629
6630 get_mm_tree() employs a relaxed boolean algebra where the solution
6631 may be bigger than what the rules of boolean algebra accept. In
6632 other words, get_mm_tree() may return range access plans that will
6633 read more rows than the input conditions dictate. In it's simplest
6634 form, consider a condition on two fields indexed by two different
6635 indexes:
6636
6637 "WHERE fld1 > 'x' AND fld2 > 'y'"
6638
6639 In this case, there are two single-index range access alternatives.
6640 No matter which access path is chosen, rows that are not in the
6641 result set may be read.
6642
6643 In the case above, get_mm_tree() will create range access
6644 alternatives for both indexes, so boolean algebra is still correct.
6645 In other cases, however, the conditions are too complex to be used
6646 without relaxing the rules. This typically happens when ORing a
6647 conjunction to a multi-index disjunctions (@see e.g.
6648 imerge_list_or_tree()). When this happens, the range optimizer may
6649 choose to ignore conjunctions (any condition connected with AND). The
6650 effect of this is that the result includes a "bigger" solution than
6651 neccessary. This is OK since all conditions will be used as filters
6652 after row retrieval.
6653
6654 @see SEL_TREE::keys and SEL_TREE::merges for details of how single
6655 and multi-index range access alternatives are stored.
6656 */
get_mm_tree(RANGE_OPT_PARAM * param,Item * cond)6657 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond)
6658 {
6659 SEL_TREE *tree=0;
6660 SEL_TREE *ftree= 0;
6661 Item_field *field_item= 0;
6662 bool inv= FALSE;
6663 Item *value= 0;
6664 DBUG_ENTER("get_mm_tree");
6665
6666 if (param->has_errors())
6667 DBUG_RETURN(NULL);
6668
6669 if (cond->type() == Item::COND_ITEM)
6670 {
6671 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
6672
6673 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6674 {
6675 tree= NULL;
6676 Item *item;
6677 while ((item=li++))
6678 {
6679 SEL_TREE *new_tree= get_mm_tree(param,item);
6680 if (param->has_errors())
6681 DBUG_RETURN(NULL);
6682 tree= tree_and(param,tree,new_tree);
6683 dbug_print_tree("after_and", tree, param);
6684 if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
6685 break;
6686 }
6687 }
6688 else
6689 { // Item OR
6690 tree= get_mm_tree(param,li++);
6691 if (param->has_errors())
6692 DBUG_RETURN(NULL);
6693 if (tree)
6694 {
6695 Item *item;
6696 while ((item=li++))
6697 {
6698 SEL_TREE *new_tree=get_mm_tree(param,item);
6699 if (new_tree == NULL || param->has_errors())
6700 DBUG_RETURN(NULL);
6701 tree= tree_or(param,tree,new_tree);
6702 dbug_print_tree("after_or", tree, param);
6703 if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
6704 break;
6705 }
6706 }
6707 }
6708 dbug_print_tree("tree_returned", tree, param);
6709 DBUG_RETURN(tree);
6710 }
6711 /*
6712 Here when simple cond
6713 There are limits on what kinds of const items we can evaluate.
6714 At this stage a subquery in 'cond' might not be fully transformed yet
6715 (example: semijoin) thus cannot be evaluated.
6716 */
6717 if (cond->const_item() && !cond->is_expensive() && !cond->has_subquery())
6718 {
6719 /*
6720 During the cond->val_int() evaluation we can come across a subselect
6721 item which may allocate memory on the thd->mem_root and assumes
6722 all the memory allocated has the same life span as the subselect
6723 item itself. So we have to restore the thread's mem_root here.
6724 */
6725 MEM_ROOT *tmp_root= param->mem_root;
6726 param->thd->mem_root= param->old_root;
6727 const SEL_TREE::Type type=
6728 cond->val_int() ? SEL_TREE::ALWAYS : SEL_TREE::IMPOSSIBLE;
6729 tree= new (tmp_root) SEL_TREE(type, tmp_root, param->keys);
6730
6731 param->thd->mem_root= tmp_root;
6732 if (param->has_errors())
6733 DBUG_RETURN(NULL);
6734 dbug_print_tree("tree_returned", tree, param);
6735 DBUG_RETURN(tree);
6736 }
6737
6738 table_map ref_tables= 0;
6739 table_map param_comp= ~(param->prev_tables | param->read_tables |
6740 param->current_table);
6741 if (cond->type() != Item::FUNC_ITEM)
6742 { // Should be a field
6743 ref_tables= cond->used_tables();
6744 if ((ref_tables & param->current_table) ||
6745 (ref_tables & ~(param->prev_tables | param->read_tables)))
6746 DBUG_RETURN(0);
6747 DBUG_RETURN(new (param->mem_root)
6748 SEL_TREE(SEL_TREE::MAYBE, param->mem_root, param->keys));
6749 }
6750
6751 Item_func *cond_func= (Item_func*) cond;
6752 if (cond_func->functype() == Item_func::BETWEEN ||
6753 cond_func->functype() == Item_func::IN_FUNC)
6754 inv= ((Item_func_opt_neg *) cond_func)->negated;
6755 else
6756 {
6757 /*
6758 During the cond_func->select_optimize() evaluation we can come across a
6759 subselect item which may allocate memory on the thd->mem_root and assumes
6760 all the memory allocated has the same life span as the subselect item
6761 itself. So we have to restore the thread's mem_root here.
6762 */
6763 MEM_ROOT *tmp_root= param->mem_root;
6764 param->thd->mem_root= param->old_root;
6765 Item_func::optimize_type opt_type= cond_func->select_optimize();
6766 param->thd->mem_root= tmp_root;
6767 if (opt_type == Item_func::OPTIMIZE_NONE)
6768 DBUG_RETURN(NULL);
6769 }
6770
6771 param->cond= cond;
6772
6773 /*
6774 Notice that all fields that are outer references are const during
6775 the execution and should not be considered for range analysis like
6776 fields coming from the local query block are.
6777 */
6778 switch (cond_func->functype())
6779 {
6780 case Item_func::BETWEEN:
6781 {
6782 Item *const arg_left= cond_func->arguments()[0];
6783
6784 if (!(arg_left->used_tables() & OUTER_REF_TABLE_BIT) &&
6785 arg_left->real_item()->type() == Item::FIELD_ITEM)
6786 {
6787 field_item= (Item_field*) arg_left->real_item();
6788 ftree= get_full_func_mm_tree(param, field_item, cond_func, NULL, inv);
6789 }
6790
6791 /*
6792 Concerning the code below see the NOTES section in
6793 the comments for the function get_full_func_mm_tree()
6794 */
6795 for (uint i= 1 ; i < cond_func->arg_count ; i++)
6796 {
6797 Item *const arg= cond_func->arguments()[i];
6798
6799 if (!(arg->used_tables() & OUTER_REF_TABLE_BIT) &&
6800 arg->real_item()->type() == Item::FIELD_ITEM)
6801 {
6802 field_item= (Item_field*) arg->real_item();
6803 SEL_TREE *tmp=
6804 get_full_func_mm_tree(param, field_item, cond_func,
6805 reinterpret_cast<Item*>(i), inv);
6806 if (inv)
6807 {
6808 tree= !tree ? tmp : tree_or(param, tree, tmp);
6809 if (tree == NULL)
6810 break;
6811 }
6812 else
6813 tree= tree_and(param, tree, tmp);
6814 }
6815 else if (inv)
6816 {
6817 tree= 0;
6818 break;
6819 }
6820 }
6821
6822 ftree = tree_and(param, ftree, tree);
6823 break;
6824 } // end case Item_func::BETWEEN
6825
6826 case Item_func::IN_FUNC:
6827 {
6828 Item *const predicand= ((Item_func_in*) cond_func)->key_item()->real_item();
6829 if (predicand->type() != Item::FIELD_ITEM &&
6830 predicand->type() != Item::ROW_ITEM)
6831 DBUG_RETURN(NULL);
6832 ftree= get_full_func_mm_tree(param, predicand, cond_func, NULL, inv);
6833 break;
6834 } // end case Item_func::IN_FUNC
6835
6836 case Item_func::MULT_EQUAL_FUNC:
6837 {
6838 Item_equal *item_equal= (Item_equal *) cond;
6839 if (!(value= item_equal->get_const()))
6840 DBUG_RETURN(0);
6841 Item_equal_iterator it(*item_equal);
6842 ref_tables= value->used_tables();
6843 while ((field_item= it++))
6844 {
6845 Field *field= field_item->field;
6846 Item_result cmp_type= field->cmp_type();
6847 if (!((ref_tables | field_item->table_ref->map()) & param_comp))
6848 {
6849 tree= get_mm_parts(param, item_equal, field, Item_func::EQ_FUNC,
6850 value,cmp_type);
6851 ftree= !ftree ? tree : tree_and(param, ftree, tree);
6852 }
6853 }
6854
6855 dbug_print_tree("tree_returned", ftree, param);
6856 DBUG_RETURN(ftree);
6857 } // end case Item_func::MULT_EQUAL_FUNC
6858
6859 default:
6860 {
6861 Item *const arg_left= cond_func->arguments()[0];
6862
6863 DBUG_ASSERT (!ftree);
6864 if (!(arg_left->used_tables() & OUTER_REF_TABLE_BIT) &&
6865 arg_left->real_item()->type() == Item::FIELD_ITEM)
6866 {
6867 field_item= (Item_field*) arg_left->real_item();
6868 value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : NULL;
6869 ftree= get_full_func_mm_tree(param, field_item, cond_func, value, inv);
6870 }
6871 /*
6872 Even if get_full_func_mm_tree() was executed above and did not
6873 return a range predicate it may still be possible to create one
6874 by reversing the order of the operands. Note that this only
6875 applies to predicates where both operands are fields. Example: A
6876 query of the form
6877
6878 WHERE t1.a OP t2.b
6879
6880 In this case, arguments()[0] == t1.a and arguments()[1] == t2.b.
6881 When creating range predicates for t2, get_full_func_mm_tree()
6882 above will return NULL because 'field' belongs to t1 and only
6883 predicates that applies to t2 are of interest. In this case a
6884 call to get_full_func_mm_tree() with reversed operands (see
6885 below) may succeed.
6886 */
6887 Item *arg_right;
6888 if (!ftree && cond_func->have_rev_func() &&
6889 (arg_right= cond_func->arguments()[1]) &&
6890 !(arg_right->used_tables() & OUTER_REF_TABLE_BIT) &&
6891 arg_right->real_item()->type() == Item::FIELD_ITEM)
6892 {
6893 field_item= (Item_field*) arg_right->real_item();
6894 value= arg_left;
6895 ftree= get_full_func_mm_tree(param, field_item, cond_func, value, inv);
6896 }
6897 } // end case default
6898 } // end switch
6899
6900 dbug_print_tree("tree_returned", ftree, param);
6901 DBUG_RETURN(ftree);
6902 }
6903
6904 /**
6905 Test whether a comparison operator is a spatial comparison
6906 operator, i.e. Item_func::SP_*.
6907
6908 Used to check if range access using operator 'op_type' is applicable
6909 for a non-spatial index.
6910
6911 @param op_type The comparison operator.
6912 @return true if 'op_type' is a spatial comparison operator, false otherwise.
6913
6914 */
is_spatial_operator(Item_func::Functype op_type)6915 bool is_spatial_operator(Item_func::Functype op_type)
6916 {
6917 switch (op_type)
6918 {
6919 case Item_func::SP_EQUALS_FUNC:
6920 case Item_func::SP_DISJOINT_FUNC:
6921 case Item_func::SP_INTERSECTS_FUNC:
6922 case Item_func::SP_TOUCHES_FUNC:
6923 case Item_func::SP_CROSSES_FUNC:
6924 case Item_func::SP_WITHIN_FUNC:
6925 case Item_func::SP_CONTAINS_FUNC:
6926 case Item_func::SP_COVEREDBY_FUNC:
6927 case Item_func::SP_COVERS_FUNC:
6928 case Item_func::SP_OVERLAPS_FUNC:
6929 case Item_func::SP_STARTPOINT:
6930 case Item_func::SP_ENDPOINT:
6931 case Item_func::SP_EXTERIORRING:
6932 case Item_func::SP_POINTN:
6933 case Item_func::SP_GEOMETRYN:
6934 case Item_func::SP_INTERIORRINGN:
6935 return true;
6936 default:
6937 return false;
6938 }
6939 }
6940
6941 /**
6942 Test if 'value' is comparable to 'field' when setting up range
6943 access for predicate "field OP value". 'field' is a field in the
6944 table being optimized for while 'value' is whatever 'field' is
6945 compared to.
6946
6947 @param cond_func the predicate item that compares 'field' with 'value'
6948 @param field field in the predicate
6949 @param itype itMBR if indexed field is spatial, itRAW otherwise
6950 @param comp_type comparator for the predicate
6951 @param value whatever 'field' is compared to
6952
6953 @return true if 'field' and 'value' are comparable, false otherwise
6954 */
6955
comparable_in_index(Item * cond_func,const Field * field,const Field::imagetype itype,Item_func::Functype comp_type,const Item * value)6956 static bool comparable_in_index(Item *cond_func,
6957 const Field *field,
6958 const Field::imagetype itype,
6959 Item_func::Functype comp_type,
6960 const Item *value)
6961 {
6962 /*
6963 Usually an index cannot be used if the column collation differs
6964 from the operation collation. However, a case insensitive index
6965 may be used for some binary searches:
6966
6967 WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;
6968 WHERE latin1_swedish_ci_colimn = BINARY 'a '
6969 */
6970 if ((field->result_type() == STRING_RESULT &&
6971 field->match_collation_to_optimize_range() &&
6972 value->result_type() == STRING_RESULT &&
6973 itype == Field::itRAW &&
6974 field->charset() != cond_func->compare_collation() &&
6975 !(cond_func->compare_collation()->state & MY_CS_BINSORT &&
6976 (comp_type == Item_func::EQUAL_FUNC ||
6977 comp_type == Item_func::EQ_FUNC))))
6978 return false;
6979
6980 /*
6981 Temporal values: Cannot use range access if:
6982 'indexed_varchar_column = temporal_value'
6983 because there are many ways to represent the same date as a
6984 string. A few examples: "01-01-2001", "1-1-2001", "2001-01-01",
6985 "2001#01#01". The same problem applies to time. Thus, we cannot
6986 create a useful range predicate for temporal values into VARCHAR
6987 column indexes. @see add_key_field()
6988 */
6989 if (!field->is_temporal() && value->is_temporal())
6990 return false;
6991
6992 /*
6993 Temporal values: Cannot use range access if
6994 'indexed_time = temporal_value_with_date_part'
6995 because:
6996 - without index, a TIME column with value '48:00:00' is
6997 equal to a DATETIME column with value
6998 'CURDATE() + 2 days'
6999 - with range access into the TIME column, CURDATE() + 2
7000 days becomes "00:00:00" (Field_timef::store_internal()
7001 simply extracts the time part from the datetime) which
7002 is a lookup key which does not match "48:00:00". On the other
7003 hand, we can do ref access for IndexedDatetimeComparedToTime
7004 because Field_temporal_with_date::store_time() will convert
7005 48:00:00 to CURDATE() + 2 days which is the correct lookup
7006 key.
7007 */
7008 if (field_time_cmp_date(field, value))
7009 return false;
7010
7011 /*
7012 We can't always use indexes when comparing a string index to a
7013 number. cmp_type() is checked to allow comparison of dates and
7014 numbers.
7015 */
7016 if (field->result_type() == STRING_RESULT &&
7017 value->result_type() != STRING_RESULT &&
7018 field->cmp_type() != value->result_type())
7019 return false;
7020
7021 /*
7022 We can't use indexes when comparing to a JSON value. For example,
7023 the string '{}' should compare equal to the JSON string "{}". If
7024 we use a string index to compare the two strings, we will be
7025 comparing '{}' and '"{}"', which don't compare equal.
7026 */
7027 if (value->result_type() == STRING_RESULT &&
7028 value->field_type() == MYSQL_TYPE_JSON)
7029 return false;
7030
7031 return true;
7032 }
7033
7034 static SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item_func::Functype type,Item * value,Item_result cmp_type)7035 get_mm_parts(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field,
7036 Item_func::Functype type,
7037 Item *value, Item_result cmp_type)
7038 {
7039 DBUG_ENTER("get_mm_parts");
7040
7041 if (param->has_errors())
7042 DBUG_RETURN(0);
7043
7044 if (field->table != param->table)
7045 DBUG_RETURN(0);
7046
7047 KEY_PART *key_part = param->key_parts;
7048 KEY_PART *end = param->key_parts_end;
7049 SEL_TREE *tree=0;
7050 if (value &&
7051 value->used_tables() & ~(param->prev_tables | param->read_tables))
7052 DBUG_RETURN(0);
7053 for (; key_part != end ; key_part++)
7054 {
7055 if (field->eq(key_part->field))
7056 {
7057 /*
7058 Cannot do range access for spatial operators when a
7059 non-spatial index is used.
7060 */
7061 if (key_part->image_type != Field::itMBR &&
7062 is_spatial_operator(cond_func->functype()))
7063 continue;
7064
7065 SEL_ARG *sel_arg=0;
7066 if (!tree && !(tree=new (param->mem_root)
7067 SEL_TREE(param->mem_root, param->keys)))
7068 DBUG_RETURN(0); // OOM
7069 if (!value || !(value->used_tables() & ~param->read_tables))
7070 {
7071 sel_arg=get_mm_leaf(param,cond_func,
7072 key_part->field,key_part,type,value);
7073 if (!sel_arg)
7074 continue;
7075 if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
7076 {
7077 tree->type=SEL_TREE::IMPOSSIBLE;
7078 DBUG_RETURN(tree);
7079 }
7080 }
7081 else
7082 {
7083 /*
7084 The index may not be used by dynamic range access unless
7085 'field' and 'value' are comparable.
7086 */
7087 if (!comparable_in_index(cond_func, key_part->field,
7088 key_part->image_type,
7089 type, value))
7090 {
7091 warn_index_not_applicable(param, key_part->key, field);
7092 DBUG_RETURN(NULL);
7093 }
7094
7095 if (!(sel_arg= new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY)))
7096 DBUG_RETURN(NULL); //OOM
7097 }
7098 sel_arg->part=(uchar) key_part->part;
7099 tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
7100 tree->keys_map.set_bit(key_part->key);
7101 }
7102 }
7103
7104 if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
7105 tree= NULL;
7106 DBUG_RETURN(tree);
7107 }
7108
7109 /**
7110 Saves 'value' in 'field' and handles potential type conversion
7111 problems.
7112
7113 @param tree [out] The SEL_ARG leaf under construction. If
7114 an always false predicate is found it is
7115 modified to point to a SEL_ARG with
7116 type == SEL_ARG::IMPOSSIBLE
7117 @param value The Item that contains a value that shall
7118 be stored in 'field'.
7119 @param comp_op Comparison operator: >, >=, <=> etc.
7120 @param field The field that 'value' is stored into.
7121 @param impossible_cond_cause[out] Set to a descriptive string if an
7122 impossible condition is found.
7123 @param memroot Memroot for creation of new SEL_ARG.
7124
7125 @retval false if saving went fine and it makes sense to continue
7126 optimizing for this predicate.
7127 @retval true if always true/false predicate was found, in which
7128 case 'tree' has been modified to reflect this: NULL
7129 pointer if always true, SEL_ARG with type IMPOSSIBLE
7130 if always false.
7131 */
save_value_and_handle_conversion(SEL_ARG ** tree,Item * value,const Item_func::Functype comp_op,Field * field,const char ** impossible_cond_cause,MEM_ROOT * memroot)7132 static bool save_value_and_handle_conversion(SEL_ARG **tree,
7133 Item *value,
7134 const Item_func::Functype comp_op,
7135 Field *field,
7136 const char **impossible_cond_cause,
7137 MEM_ROOT *memroot)
7138 {
7139 // A SEL_ARG should not have been created for this predicate yet.
7140 DBUG_ASSERT(*tree == NULL);
7141
7142 if (!value->can_be_evaluated_now())
7143 {
7144 /*
7145 We cannot evaluate the value yet (i.e. required tables are not yet
7146 locked.)
7147 This is the case of prune_partitions() called during
7148 SELECT_LEX::prepare().
7149 */
7150 return true;
7151 }
7152
7153 // For comparison purposes allow invalid dates like 2000-01-32
7154 const sql_mode_t orig_sql_mode= field->table->in_use->variables.sql_mode;
7155 field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
7156
7157 /*
7158 We want to change "field > value" to "field OP V"
7159 where:
7160 * V is what is in "field" after we stored "value" in it via
7161 save_in_field_no_warning() (such store operation may have done
7162 rounding...)
7163 * OP is > or >=, depending on what's correct.
7164 For example, if c is an INT column,
7165 "c > 2.9" is changed to "c OP 3"
7166 where OP is ">=" (">" would not be correct, as 3 > 2.9, a comparison
7167 done with stored_field_cmp_to_item()). And
7168 "c > 3.1" is changed to "c OP 3" where OP is ">" (3 < 3.1...).
7169 */
7170
7171 // Note that value may be a stored function call, executed here.
7172 const type_conversion_status err= value->save_in_field_no_warnings(field, true);
7173 field->table->in_use->variables.sql_mode= orig_sql_mode;
7174
7175 switch (err) {
7176 case TYPE_OK:
7177 case TYPE_NOTE_TRUNCATED:
7178 case TYPE_WARN_TRUNCATED:
7179 return false;
7180 case TYPE_WARN_INVALID_STRING:
7181 /*
7182 An invalid string does not produce any rows when used with
7183 equality operator.
7184 */
7185 if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
7186 {
7187 *impossible_cond_cause= "invalid_characters_in_string";
7188 goto impossible_cond;
7189 }
7190 /*
7191 For other operations on invalid strings, we assume that the range
7192 predicate is always true and let evaluate_join_record() decide
7193 the outcome.
7194 */
7195 return true;
7196 case TYPE_ERR_BAD_VALUE:
7197 /*
7198 In the case of incompatible values, MySQL's SQL dialect has some
7199 strange interpretations. For example,
7200
7201 "int_col > 'foo'" is interpreted as "int_col > 0"
7202
7203 instead of always false. Because of this, we assume that the
7204 range predicate is always true instead of always false and let
7205 evaluate_join_record() decide the outcome.
7206 */
7207 return true;
7208 case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
7209 // Checking NULL value on a field that cannot contain NULL.
7210 *impossible_cond_cause= "null_field_in_non_null_column";
7211 goto impossible_cond;
7212 case TYPE_WARN_OUT_OF_RANGE:
7213 /*
7214 value to store was either higher than field::max_value or lower
7215 than field::min_value. The field's max/min value has been stored
7216 instead.
7217 */
7218 if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
7219 {
7220 /*
7221 Independent of data type, "out_of_range_value =/<=> field" is
7222 always false.
7223 */
7224 *impossible_cond_cause= "value_out_of_range";
7225 goto impossible_cond;
7226 }
7227
7228 // If the field is numeric, we can interpret the out of range value.
7229 if ((field->type() != FIELD_TYPE_BIT) &&
7230 (field->result_type() == REAL_RESULT ||
7231 field->result_type() == INT_RESULT ||
7232 field->result_type() == DECIMAL_RESULT))
7233 {
7234 /*
7235 value to store was higher than field::max_value if
7236 a) field has a value greater than 0, or
7237 b) if field is unsigned and has a negative value (which, when
7238 cast to unsigned, means some value higher than LLONG_MAX).
7239 */
7240 if ((field->val_int() > 0) || // a)
7241 (static_cast<Field_num*>(field)->unsigned_flag &&
7242 field->val_int() < 0)) // b)
7243 {
7244 if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
7245 {
7246 /*
7247 '<' or '<=' compared to a value higher than the field
7248 can store is always true.
7249 */
7250 return true;
7251 }
7252 if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
7253 {
7254 /*
7255 '>' or '>=' compared to a value higher than the field can
7256 store is always false.
7257 */
7258 *impossible_cond_cause= "value_out_of_range";
7259 goto impossible_cond;
7260 }
7261 }
7262 else // value is lower than field::min_value
7263 {
7264 if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
7265 {
7266 /*
7267 '>' or '>=' compared to a value lower than the field
7268 can store is always true.
7269 */
7270 return true;
7271 }
7272 if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
7273 {
7274 /*
7275 '<' or '=' compared to a value lower than the field can
7276 store is always false.
7277 */
7278 *impossible_cond_cause= "value_out_of_range";
7279 goto impossible_cond;
7280 }
7281 }
7282 }
7283 /*
7284 Value is out of range on a datatype where it can't be decided if
7285 it was underflow or overflow. It is therefore not possible to
7286 determine whether or not the condition is impossible or always
7287 true and we have to assume always true.
7288 */
7289 return true;
7290 case TYPE_NOTE_TIME_TRUNCATED:
7291 if (field->type() == FIELD_TYPE_DATE &&
7292 (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC ||
7293 comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC))
7294 {
7295 /*
7296 We were saving DATETIME into a DATE column, the conversion went ok
7297 but a non-zero time part was cut off.
7298
7299 In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
7300 values. Index over a DATE column uses DATE comparison. Changing
7301 from one comparison to the other is possible:
7302
7303 datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
7304 datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
7305
7306 datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
7307 datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
7308
7309 but we'll need to convert '>' to '>=' and '<' to '<='. This will
7310 be done together with other types at the end of get_mm_leaf()
7311 (grep for stored_field_cmp_to_item)
7312 */
7313 return false;
7314 }
7315 if (comp_op == Item_func::EQ_FUNC || comp_op == Item_func::EQUAL_FUNC)
7316 {
7317 // Equality comparison is always false when time info has been truncated.
7318 goto impossible_cond;
7319 }
7320 return true;
7321 case TYPE_ERR_OOM:
7322 return true;
7323 /*
7324 No default here to avoid adding new conversion status codes that are
7325 unhandled in this function.
7326 */
7327 }
7328
7329 DBUG_ASSERT(FALSE); // Should never get here.
7330
7331 impossible_cond:
7332 *tree= new (memroot) SEL_ARG(field, 0, 0);
7333 (*tree)->type= SEL_ARG::IMPOSSIBLE;
7334 return true;
7335 }
7336
7337
7338 static SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Item * conf_func,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)7339 get_mm_leaf(RANGE_OPT_PARAM *param, Item *conf_func, Field *field,
7340 KEY_PART *key_part, Item_func::Functype type,Item *value)
7341 {
7342 uint maybe_null=(uint) field->real_maybe_null();
7343 bool optimize_range;
7344 SEL_ARG *tree= 0;
7345 MEM_ROOT *alloc= param->mem_root;
7346 uchar *str;
7347 const char *impossible_cond_cause= NULL;
7348 DBUG_ENTER("get_mm_leaf");
7349
7350 if (param->has_errors())
7351 goto end;
7352
7353 /*
7354 We need to restore the runtime mem_root of the thread in this
7355 function because it evaluates the value of its argument, while
7356 the argument can be any, e.g. a subselect. The subselect
7357 items, in turn, assume that all the memory allocated during
7358 the evaluation has the same life span as the item itself.
7359 TODO: opt_range.cc should not reset thd->mem_root at all.
7360 */
7361 param->thd->mem_root= param->old_root;
7362 if (!value) // IS NULL or IS NOT NULL
7363 {
7364 if (field->table->pos_in_table_list->outer_join)
7365 /*
7366 Range scan cannot be used to scan the inner table of an outer
7367 join if the predicate is IS NULL.
7368 */
7369 goto end;
7370 if (!maybe_null) // NOT NULL column
7371 {
7372 if (type == Item_func::ISNULL_FUNC)
7373 tree= &null_element;
7374 goto end;
7375 }
7376 uchar *null_string=
7377 static_cast<uchar*>(alloc_root(alloc, key_part->store_length + 1));
7378 if (!null_string)
7379 goto end; // out of memory
7380
7381 TRASH(null_string, key_part->store_length + 1);
7382 memcpy(null_string, is_null_string, sizeof(is_null_string));
7383
7384 if (!(tree= new (alloc) SEL_ARG(field, null_string, null_string)))
7385 goto end; // out of memory
7386 if (type == Item_func::ISNOTNULL_FUNC)
7387 {
7388 tree->min_flag=NEAR_MIN; /* IS NOT NULL -> X > NULL */
7389 tree->max_flag=NO_MAX_RANGE;
7390 }
7391 goto end;
7392 }
7393
7394 /*
7395 The range access method cannot be used unless 'field' and 'value'
7396 are comparable in the index. Examples of non-comparable
7397 field/values: different collation, DATETIME vs TIME etc.
7398 */
7399 if (!comparable_in_index(conf_func, field, key_part->image_type,
7400 type, value))
7401 {
7402 warn_index_not_applicable(param, key_part->key, field);
7403 goto end;
7404 }
7405
7406 if (key_part->image_type == Field::itMBR)
7407 {
7408 // @todo: use is_spatial_operator() instead?
7409 switch (type) {
7410 case Item_func::SP_EQUALS_FUNC:
7411 case Item_func::SP_DISJOINT_FUNC:
7412 case Item_func::SP_INTERSECTS_FUNC:
7413 case Item_func::SP_TOUCHES_FUNC:
7414 case Item_func::SP_CROSSES_FUNC:
7415 case Item_func::SP_WITHIN_FUNC:
7416 case Item_func::SP_CONTAINS_FUNC:
7417 case Item_func::SP_OVERLAPS_FUNC:
7418 break;
7419 default:
7420 /*
7421 We cannot involve spatial indexes for queries that
7422 don't use MBREQUALS(), MBRDISJOINT(), etc. functions.
7423 */
7424 goto end;
7425 }
7426 }
7427
7428 if (param->using_real_indexes)
7429 optimize_range= field->optimize_range(param->real_keynr[key_part->key],
7430 key_part->part);
7431 else
7432 optimize_range= TRUE;
7433
7434 if (type == Item_func::LIKE_FUNC)
7435 {
7436 bool like_error;
7437 char buff1[MAX_FIELD_WIDTH];
7438 uchar *min_str,*max_str;
7439 String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
7440 size_t length, offset, min_length, max_length;
7441 size_t field_length= field->pack_length()+maybe_null;
7442
7443 if (!optimize_range)
7444 goto end;
7445 if (!(res= value->val_str(&tmp)))
7446 {
7447 tree= &null_element;
7448 goto end;
7449 }
7450
7451 /*
7452 TODO:
7453 Check if this was a function. This should have be optimized away
7454 in the sql_select.cc
7455 */
7456 if (res != &tmp)
7457 {
7458 tmp.copy(*res); // Get own copy
7459 res= &tmp;
7460 }
7461 if (field->cmp_type() != STRING_RESULT)
7462 goto end; // Can only optimize strings
7463
7464 offset=maybe_null;
7465 length=key_part->store_length;
7466
7467 if (length != key_part->length + maybe_null)
7468 {
7469 /* key packed with length prefix */
7470 offset+= HA_KEY_BLOB_LENGTH;
7471 field_length= length - HA_KEY_BLOB_LENGTH;
7472 }
7473 else
7474 {
7475 if (unlikely(length < field_length))
7476 {
7477 /*
7478 This can only happen in a table created with UNIREG where one key
7479 overlaps many fields
7480 */
7481 length= field_length;
7482 }
7483 else
7484 field_length= length;
7485 }
7486 length+=offset;
7487 if (!(min_str= (uchar*) alloc_root(alloc, length*2)))
7488 goto end;
7489
7490 max_str=min_str+length;
7491 if (maybe_null)
7492 max_str[0]= min_str[0]=0;
7493
7494 Item_func_like *like_func= static_cast<Item_func_like*>(param->cond);
7495
7496 // We can only optimize with LIKE if the escape string is known.
7497 if (!like_func->escape_is_evaluated())
7498 goto end;
7499
7500 field_length-= maybe_null;
7501 like_error= my_like_range(field->charset(),
7502 res->ptr(), res->length(),
7503 like_func->escape,
7504 wild_one, wild_many,
7505 field_length,
7506 (char*) min_str+offset, (char*) max_str+offset,
7507 &min_length, &max_length);
7508 if (like_error) // Can't optimize with LIKE
7509 goto end;
7510
7511 if (offset != maybe_null) // BLOB or VARCHAR
7512 {
7513 int2store(min_str+maybe_null, static_cast<uint16>(min_length));
7514 int2store(max_str+maybe_null, static_cast<uint16>(max_length));
7515 }
7516 tree= new (alloc) SEL_ARG(field, min_str, max_str);
7517 goto end;
7518 }
7519
7520 if (!optimize_range &&
7521 type != Item_func::EQ_FUNC &&
7522 type != Item_func::EQUAL_FUNC)
7523 goto end; // Can't optimize this
7524
7525 /*
7526 Geometry operations may mix geometry types, e.g., we may be
7527 checking ST_Contains(<polygon field>, <point>). In such cases,
7528 field->geom_type will be a different type than the value we're
7529 trying to store in it, and the conversion will fail. Therefore,
7530 set the most general geometry type while saving, and revert to the
7531 original geometry type afterwards.
7532 */
7533 {
7534 const Field::geometry_type save_geom_type=
7535 (field->type() == MYSQL_TYPE_GEOMETRY) ?
7536 field->get_geometry_type() :
7537 Field::GEOM_GEOMETRY;
7538 if (field->type() == MYSQL_TYPE_GEOMETRY)
7539 {
7540 down_cast<Field_geom*>(field)->geom_type= Field::GEOM_GEOMETRY;
7541 }
7542
7543 bool always_true_or_false=
7544 save_value_and_handle_conversion(&tree, value, type, field,
7545 &impossible_cond_cause, alloc);
7546
7547 if (field->type() == MYSQL_TYPE_GEOMETRY &&
7548 save_geom_type != Field::GEOM_GEOMETRY)
7549 {
7550 down_cast<Field_geom*>(field)->geom_type= save_geom_type;
7551 }
7552
7553 if (always_true_or_false)
7554 goto end;
7555 }
7556
7557 /*
7558 Any sargable predicate except "<=>" involving NULL as a constant is always
7559 FALSE
7560 */
7561 if (type != Item_func::EQUAL_FUNC && field->is_real_null())
7562 {
7563 impossible_cond_cause= "comparison_with_null_always_false";
7564 tree= &null_element;
7565 goto end;
7566 }
7567
7568 str= (uchar*) alloc_root(alloc, key_part->store_length+1);
7569 if (!str)
7570 goto end;
7571 if (maybe_null)
7572 *str= (uchar) field->is_real_null(); // Set to 1 if null
7573 field->get_key_image(str+maybe_null, key_part->length,
7574 key_part->image_type);
7575 if (!(tree= new (alloc) SEL_ARG(field, str, str)))
7576 goto end; // out of memory
7577
7578 /*
7579 Check if we are comparing an UNSIGNED integer with a negative constant.
7580 In this case we know that:
7581 (a) (unsigned_int [< | <=] negative_constant) == FALSE
7582 (b) (unsigned_int [> | >=] negative_constant) == TRUE
7583 In case (a) the condition is false for all values, and in case (b) it
7584 is true for all values, so we can avoid unnecessary retrieval and condition
7585 testing, and we also get correct comparison of unsinged integers with
7586 negative integers (which otherwise fails because at query execution time
7587 negative integers are cast to unsigned if compared with unsigned).
7588 */
7589 if (field->result_type() == INT_RESULT &&
7590 value->result_type() == INT_RESULT &&
7591 ((field->type() == FIELD_TYPE_BIT ||
7592 ((Field_num *) field)->unsigned_flag) &&
7593 !((Item_int*) value)->unsigned_flag))
7594 {
7595 longlong item_val= value->val_int();
7596 if (item_val < 0)
7597 {
7598 if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
7599 {
7600 impossible_cond_cause= "unsigned_int_cannot_be_negative";
7601 tree->type= SEL_ARG::IMPOSSIBLE;
7602 goto end;
7603 }
7604 if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
7605 {
7606 tree= 0;
7607 goto end;
7608 }
7609 }
7610 }
7611
7612 switch (type) {
7613 case Item_func::LT_FUNC:
7614 /* Don't use open ranges for partial key_segments */
7615 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7616 stored_field_cmp_to_item(param->thd, field, value) == 0)
7617 tree->max_flag=NEAR_MAX;
7618 /* fall through */
7619 case Item_func::LE_FUNC:
7620 if (!maybe_null)
7621 tree->min_flag=NO_MIN_RANGE; /* From start */
7622 else
7623 { // > NULL
7624 if (!(tree->min_value=
7625 static_cast<uchar*>(alloc_root(alloc, key_part->store_length+1))))
7626 goto end;
7627 TRASH(tree->min_value, key_part->store_length + 1);
7628 memcpy(tree->min_value, is_null_string, sizeof(is_null_string));
7629 tree->min_flag=NEAR_MIN;
7630 }
7631 break;
7632 case Item_func::GT_FUNC:
7633 /* Don't use open ranges for partial key_segments */
7634 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7635 (stored_field_cmp_to_item(param->thd, field, value) <= 0))
7636 tree->min_flag=NEAR_MIN;
7637 tree->max_flag= NO_MAX_RANGE;
7638 break;
7639 case Item_func::GE_FUNC:
7640 /* Don't use open ranges for partial key_segments */
7641 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7642 (stored_field_cmp_to_item(param->thd, field, value) < 0))
7643 tree->min_flag= NEAR_MIN;
7644 tree->max_flag=NO_MAX_RANGE;
7645 break;
7646 case Item_func::SP_EQUALS_FUNC:
7647 tree->set_gis_index_read_function(HA_READ_MBR_EQUAL);
7648 break;
7649 case Item_func::SP_DISJOINT_FUNC:
7650 tree->set_gis_index_read_function(HA_READ_MBR_DISJOINT);
7651 break;
7652 case Item_func::SP_INTERSECTS_FUNC:
7653 tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7654 break;
7655 case Item_func::SP_TOUCHES_FUNC:
7656 tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7657 break;
7658
7659 case Item_func::SP_CROSSES_FUNC:
7660 tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7661 break;
7662 case Item_func::SP_WITHIN_FUNC:
7663 /*
7664 Adjust the rkey_func_flag as it's assumed and observed that both
7665 MyISAM and Innodb implement this function in reverse order.
7666 */
7667 tree->set_gis_index_read_function(HA_READ_MBR_CONTAIN);
7668 break;
7669
7670 case Item_func::SP_CONTAINS_FUNC:
7671 /*
7672 Adjust the rkey_func_flag as it's assumed and observed that both
7673 MyISAM and Innodb implement this function in reverse order.
7674 */
7675 tree->set_gis_index_read_function(HA_READ_MBR_WITHIN);
7676 break;
7677 case Item_func::SP_OVERLAPS_FUNC:
7678 tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7679 break;
7680
7681 default:
7682 break;
7683 }
7684
7685 end:
7686 if (impossible_cond_cause != NULL)
7687 {
7688 Opt_trace_object wrapper (¶m->thd->opt_trace);
7689 Opt_trace_object (¶m->thd->opt_trace, "impossible_condition",
7690 Opt_trace_context::RANGE_OPTIMIZER).
7691 add_alnum("cause", impossible_cond_cause);
7692 }
7693 param->thd->mem_root= alloc;
7694 DBUG_RETURN(tree);
7695 }
7696
7697
7698 /*
7699 Add a new key test to a key when scanning through all keys
7700 This will never be called for same key parts.
7701 */
7702
7703 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)7704 sel_add(SEL_ARG *key1,SEL_ARG *key2)
7705 {
7706 SEL_ARG *root,**key_link;
7707
7708 if (!key1)
7709 return key2;
7710 if (!key2)
7711 return key1;
7712
7713 key_link= &root;
7714 while (key1 && key2)
7715 {
7716 if (key1->part < key2->part)
7717 {
7718 *key_link= key1;
7719 key_link= &key1->next_key_part;
7720 key1=key1->next_key_part;
7721 }
7722 else
7723 {
7724 *key_link= key2;
7725 key_link= &key2->next_key_part;
7726 key2=key2->next_key_part;
7727 }
7728 }
7729 *key_link=key1 ? key1 : key2;
7730 return root;
7731 }
7732
7733 #define CLONE_KEY1_MAYBE 1
7734 #define CLONE_KEY2_MAYBE 2
7735 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
7736
7737
7738 static SEL_TREE *
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7739 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7740 {
7741 DBUG_ENTER("tree_and");
7742
7743 if (param->has_errors())
7744 DBUG_RETURN(0);
7745
7746 if (!tree1)
7747 DBUG_RETURN(tree2);
7748 if (!tree2)
7749 DBUG_RETURN(tree1);
7750 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7751 DBUG_RETURN(tree1);
7752 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7753 DBUG_RETURN(tree2);
7754 if (tree1->type == SEL_TREE::MAYBE)
7755 {
7756 if (tree2->type == SEL_TREE::KEY)
7757 tree2->type=SEL_TREE::KEY_SMALLER;
7758 DBUG_RETURN(tree2);
7759 }
7760 if (tree2->type == SEL_TREE::MAYBE)
7761 {
7762 tree1->type=SEL_TREE::KEY_SMALLER;
7763 DBUG_RETURN(tree1);
7764 }
7765
7766 dbug_print_tree("tree1", tree1, param);
7767 dbug_print_tree("tree2", tree2, param);
7768
7769 key_map result_keys;
7770
7771 /* Join the trees key per key */
7772 SEL_ARG **key1,**key2;
7773 for (uint idx=0; idx< param->keys; idx++)
7774 {
7775 key1= &tree1->keys[idx];
7776 key2= &tree2->keys[idx];
7777
7778 uint flag=0;
7779 if (*key1 || *key2)
7780 {
7781 if (*key1 && !(*key1)->simple_key())
7782 flag|=CLONE_KEY1_MAYBE;
7783 if (*key2 && !(*key2)->simple_key())
7784 flag|=CLONE_KEY2_MAYBE;
7785 *key1= key_and(param, *key1, *key2, flag);
7786 if (*key1)
7787 {
7788 if ((*key1)->type == SEL_ARG::IMPOSSIBLE)
7789 {
7790 tree1->type= SEL_TREE::IMPOSSIBLE;
7791 DBUG_RETURN(tree1);
7792 }
7793 result_keys.set_bit(idx);
7794 #ifndef DBUG_OFF
7795 /*
7796 Do not test use_count if there is a large range tree created.
7797 It takes too much time to traverse the tree.
7798 */
7799 if (param->mem_root->allocated_size < 2097152)
7800 (*key1)->test_use_count(*key1);
7801 #endif
7802 }
7803
7804 }
7805 }
7806 tree1->keys_map= result_keys;
7807
7808 /* ok, both trees are index_merge trees */
7809 imerge_list_and_list(&tree1->merges, &tree2->merges);
7810 DBUG_RETURN(tree1);
7811 }
7812
7813
7814 /*
7815 Check if two SEL_TREES can be combined into one (i.e. a single key range
7816 read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
7817 using index_merge.
7818 */
7819
sel_trees_can_be_ored(SEL_TREE * tree1,SEL_TREE * tree2,RANGE_OPT_PARAM * param)7820 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
7821 RANGE_OPT_PARAM* param)
7822 {
7823 key_map common_keys= tree1->keys_map;
7824 DBUG_ENTER("sel_trees_can_be_ored");
7825 common_keys.intersect(tree2->keys_map);
7826
7827 dbug_print_tree("tree1", tree1, param);
7828 dbug_print_tree("tree2", tree2, param);
7829
7830 if (common_keys.is_clear_all())
7831 DBUG_RETURN(FALSE);
7832
7833 /* trees have a common key, check if they refer to same key part */
7834 SEL_ARG *key1,*key2;
7835 for (uint key_no=0; key_no < param->keys; key_no++)
7836 {
7837 if (common_keys.is_set(key_no))
7838 {
7839 key1= tree1->keys[key_no];
7840 key2= tree2->keys[key_no];
7841 /* GIS_OPTIMIZER_FIXME: temp solution. key1 could be all nulls */
7842 if (key1 && key2 && key1->part == key2->part)
7843 DBUG_RETURN(TRUE);
7844 }
7845 }
7846 DBUG_RETURN(FALSE);
7847 }
7848
7849
7850 /*
7851 Remove the trees that are not suitable for record retrieval.
7852 SYNOPSIS
7853 param Range analysis parameter
7854 tree Tree to be processed, tree->type is KEY or KEY_SMALLER
7855
7856 DESCRIPTION
7857 This function walks through tree->keys[] and removes the SEL_ARG* trees
7858 that are not "maybe" trees (*) and cannot be used to construct quick range
7859 selects.
7860 (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
7861 these types here as well.
7862
7863 A SEL_ARG* tree cannot be used to construct quick select if it has
7864 tree->part != 0. (e.g. it could represent "keypart2 < const").
7865
7866 WHY THIS FUNCTION IS NEEDED
7867
7868 Normally we allow construction of SEL_TREE objects that have SEL_ARG
7869 trees that do not allow quick range select construction. For example for
7870 " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
7871 tree1= SEL_TREE { SEL_ARG{keypart1=1} }
7872 tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
7873 from this
7874 call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
7875 tree.
7876
7877 There is an exception though: when we construct index_merge SEL_TREE,
7878 any SEL_ARG* tree that cannot be used to construct quick range select can
7879 be removed, because current range analysis code doesn't provide any way
7880 that tree could be later combined with another tree.
7881 Consider an example: we should not construct
7882 st1 = SEL_TREE {
7883 merges = SEL_IMERGE {
7884 SEL_TREE(t.key1part1 = 1),
7885 SEL_TREE(t.key2part2 = 2) -- (*)
7886 }
7887 };
7888 because
7889 - (*) cannot be used to construct quick range select,
7890 - There is no execution path that would cause (*) to be converted to
7891 a tree that could be used.
7892
7893 The latter is easy to verify: first, notice that the only way to convert
7894 (*) into a usable tree is to call tree_and(something, (*)).
7895
7896 Second look at what tree_and/tree_or function would do when passed a
7897 SEL_TREE that has the structure like st1 tree has, and conlcude that
7898 tree_and(something, (*)) will not be called.
7899
7900 RETURN
7901 0 Ok, some suitable trees left
7902 1 No tree->keys[] left.
7903 */
7904
remove_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree)7905 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
7906 {
7907 bool res= FALSE;
7908 for (uint i=0; i < param->keys; i++)
7909 {
7910 if (tree->keys[i])
7911 {
7912 if (tree->keys[i]->part)
7913 {
7914 tree->keys[i]= NULL;
7915 tree->keys_map.clear_bit(i);
7916 }
7917 else
7918 res= TRUE;
7919 }
7920 }
7921 return !res;
7922 }
7923
7924
7925 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7926 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7927 {
7928 DBUG_ENTER("tree_or");
7929
7930 if (param->has_errors())
7931 DBUG_RETURN(0);
7932
7933 if (!tree1 || !tree2)
7934 DBUG_RETURN(0);
7935 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7936 DBUG_RETURN(tree2);
7937 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7938 DBUG_RETURN(tree1);
7939 if (tree1->type == SEL_TREE::MAYBE)
7940 DBUG_RETURN(tree1); // Can't use this
7941 if (tree2->type == SEL_TREE::MAYBE)
7942 DBUG_RETURN(tree2);
7943
7944 /*
7945 It is possible that a tree contains both
7946 a) simple range predicates (in tree->keys[]) and
7947 b) index merge range predicates (in tree->merges)
7948
7949 If a tree has both, they represent equally *valid* range
7950 predicate alternatives; both will return all relevant rows from
7951 the table but one may return more unnecessary rows than the
7952 other (additional rows will be filtered later). However, doing
7953 an OR operation on trees with both types of predicates is too
7954 complex at the time. We therefore remove the index merge
7955 predicates (if we have both types) before OR'ing the trees.
7956
7957 TODO: enable tree_or() for trees with both simple and index
7958 merge range predicates.
7959 */
7960 if (!tree1->merges.is_empty())
7961 {
7962 for (uint i= 0; i < param->keys; i++)
7963 if (tree1->keys[i] != NULL && tree1->keys[i] != &null_element)
7964 {
7965 tree1->merges.empty();
7966 break;
7967 }
7968 }
7969 if (!tree2->merges.is_empty())
7970 {
7971 for (uint i= 0; i< param->keys; i++)
7972 if (tree2->keys[i] != NULL && tree2->keys[i] != &null_element)
7973 {
7974 tree2->merges.empty();
7975 break;
7976 }
7977 }
7978
7979 SEL_TREE *result= 0;
7980 key_map result_keys;
7981 if (sel_trees_can_be_ored(tree1, tree2, param))
7982 {
7983 /* Join the trees key per key */
7984 SEL_ARG **key1,**key2;
7985 for (uint idx=0; idx < param->keys; idx++)
7986 {
7987 key1= &tree1->keys[idx];
7988 key2= &tree2->keys[idx];
7989 *key1= key_or(param, *key1, *key2);
7990 if (*key1)
7991 {
7992 result=tree1; // Added to tree1
7993 result_keys.set_bit(idx);
7994 #ifndef DBUG_OFF
7995 /*
7996 Do not test use count if there is a large range tree created.
7997 It takes too much time to traverse the tree.
7998 */
7999 if (param->mem_root->allocated_size < 2097152)
8000 (*key1)->test_use_count(*key1);
8001 #endif
8002 }
8003 }
8004 if (result)
8005 result->keys_map= result_keys;
8006 }
8007 else
8008 {
8009 /* ok, two trees have KEY type but cannot be used without index merge */
8010 if (tree1->merges.is_empty() && tree2->merges.is_empty())
8011 {
8012 if (param->remove_jump_scans)
8013 {
8014 bool no_trees= remove_nonrange_trees(param, tree1);
8015 no_trees= no_trees || remove_nonrange_trees(param, tree2);
8016 if (no_trees)
8017 DBUG_RETURN(new (param->mem_root)
8018 SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys));
8019 }
8020 SEL_IMERGE *merge;
8021 /* both trees are "range" trees, produce new index merge structure */
8022 if (!(result= new (param->mem_root)
8023 SEL_TREE(param->mem_root, param->keys)) ||
8024 !(merge= new (param->mem_root) SEL_IMERGE()) ||
8025 (result->merges.push_back(merge)) ||
8026 (merge->or_sel_tree(param, tree1)) ||
8027 (merge->or_sel_tree(param, tree2)))
8028 result= NULL;
8029 else
8030 result->type= tree1->type;
8031 }
8032 else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
8033 {
8034 if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
8035 result= new (param->mem_root)
8036 SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys);
8037 else
8038 result= tree1;
8039 }
8040 else
8041 {
8042 /* one tree is index merge tree and another is range tree */
8043 if (tree1->merges.is_empty())
8044 swap_variables(SEL_TREE*, tree1, tree2);
8045
8046 if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
8047 DBUG_RETURN(new (param->mem_root)
8048 SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys));
8049 /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
8050 if (imerge_list_or_tree(param, &tree1->merges, tree2))
8051 result= new (param->mem_root)
8052 SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys);
8053 else
8054 result= tree1;
8055 }
8056 }
8057 DBUG_RETURN(result);
8058 }
8059
8060
8061 /* And key trees where key1->part < key2 -> part */
8062
8063 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)8064 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
8065 uint clone_flag)
8066 {
8067 SEL_ARG *next;
8068 ulong use_count=key1->use_count;
8069
8070 if (key1->elements != 1)
8071 {
8072 key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
8073 key2->increment_use_count((int) key1->elements-1);
8074 }
8075 if (key1->type == SEL_ARG::MAYBE_KEY)
8076 {
8077 // See todo for left/right pointers
8078 DBUG_ASSERT(!key1->left);
8079 DBUG_ASSERT(!key1->right);
8080 key1->next= key1->prev= 0;
8081 }
8082 for (next=key1->first(); next ; next=next->next)
8083 {
8084 if (next->next_key_part)
8085 {
8086 SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
8087 if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
8088 {
8089 key1=key1->tree_delete(next);
8090 continue;
8091 }
8092 next->next_key_part=tmp;
8093 if (use_count)
8094 next->increment_use_count(use_count);
8095 }
8096 else
8097 next->next_key_part=key2;
8098 }
8099 if (!key1)
8100 return &null_element; // Impossible ranges
8101 key1->use_count++;
8102 return key1;
8103 }
8104
8105
8106 /*
8107 Produce a SEL_ARG graph that represents "key1 AND key2"
8108
8109 SYNOPSIS
8110 key_and()
8111 param Range analysis context (needed to track if we have allocated
8112 too many SEL_ARGs)
8113 key1 First argument, root of its RB-tree
8114 key2 Second argument, root of its RB-tree
8115
8116 RETURN
8117 RB-tree root of the resulting SEL_ARG graph.
8118 NULL if the result of AND operation is an empty interval {0}.
8119 */
8120
8121 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)8122 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
8123 {
8124 if (param->has_errors())
8125 return 0;
8126
8127 if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
8128 return key2;
8129 if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
8130 return key1;
8131 if (key1->part != key2->part)
8132 {
8133 if (key1->part > key2->part)
8134 {
8135 swap_variables(SEL_ARG *, key1, key2);
8136 clone_flag=swap_clone_flag(clone_flag);
8137 }
8138 // key1->part < key2->part
8139 key1->use_count--;
8140
8141 /*
8142 Clone key1 if the use_count is greater than 0 otherwise use the
8143 "clone_flag" to determine if a key needs to be cloned.
8144 "clone_flag" is set to true if the conditions which need to be
8145 ANDed (in tree_and) are not simple (has many OR conditions within).
8146 */
8147 if (key1->use_count > 0 || (clone_flag & CLONE_KEY2_MAYBE))
8148 if (!(key1= key1->clone_tree(param)))
8149 return 0; // OOM
8150 return and_all_keys(param, key1, key2, clone_flag);
8151 }
8152
8153 if (((clone_flag & CLONE_KEY2_MAYBE) &&
8154 !(clone_flag & CLONE_KEY1_MAYBE) &&
8155 key2->type != SEL_ARG::MAYBE_KEY) ||
8156 key1->type == SEL_ARG::MAYBE_KEY)
8157 { // Put simple key in key2
8158 swap_variables(SEL_ARG *, key1, key2);
8159 clone_flag=swap_clone_flag(clone_flag);
8160 }
8161
8162 /* If one of the key is MAYBE_KEY then the found region may be smaller */
8163 if (key2->type == SEL_ARG::MAYBE_KEY)
8164 {
8165 if (key1->use_count > 1)
8166 {
8167 key1->use_count--;
8168 if (!(key1=key1->clone_tree(param)))
8169 return 0; // OOM
8170 key1->use_count++;
8171 }
8172 if (key1->type == SEL_ARG::MAYBE_KEY)
8173 { // Both are maybe key
8174 key1->next_key_part=key_and(param, key1->next_key_part,
8175 key2->next_key_part, clone_flag);
8176 if (key1->next_key_part &&
8177 key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
8178 return key1;
8179 }
8180 else
8181 {
8182 key1->maybe_smaller();
8183 if (key2->next_key_part)
8184 {
8185 key1->use_count--; // Incremented in and_all_keys
8186 return and_all_keys(param, key1, key2, clone_flag);
8187 }
8188 key2->use_count--; // Key2 doesn't have a tree
8189 }
8190 return key1;
8191 }
8192
8193 if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
8194 {
8195 /*
8196 Cannot optimize geometry ranges. The next best thing is to keep
8197 one of them.
8198 */
8199 key2->free_tree();
8200 return key1;
8201 }
8202
8203 key1->use_count--;
8204 key2->use_count--;
8205 SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
8206
8207 while (e1 && e2)
8208 {
8209 int cmp=e1->cmp_min_to_min(e2);
8210 if (cmp < 0)
8211 {
8212 if (get_range(&e1,&e2,key1))
8213 continue;
8214 }
8215 else if (get_range(&e2,&e1,key2))
8216 continue;
8217 SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
8218 clone_flag);
8219 e1->increment_use_count(1);
8220 e2->increment_use_count(1);
8221 if (!next || next->type != SEL_ARG::IMPOSSIBLE)
8222 {
8223 SEL_ARG *new_arg= e1->clone_and(e2, param->mem_root);
8224 if (!new_arg)
8225 return &null_element; // End of memory
8226 new_arg->next_key_part=next;
8227 if (!new_tree)
8228 {
8229 new_tree=new_arg;
8230 }
8231 else
8232 new_tree=new_tree->insert(new_arg);
8233 }
8234 if (e1->cmp_max_to_max(e2) < 0)
8235 e1=e1->next; // e1 can't overlapp next e2
8236 else
8237 e2=e2->next;
8238 }
8239 key1->free_tree();
8240 key2->free_tree();
8241 if (!new_tree)
8242 return &null_element; // Impossible range
8243 return new_tree;
8244 }
8245
8246
8247 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)8248 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
8249 {
8250 (*e1)=root1->find_range(*e2); // first e1->min < e2->min
8251 if ((*e1)->cmp_max_to_min(*e2) < 0)
8252 {
8253 if (!((*e1)=(*e1)->next))
8254 return 1;
8255 if ((*e1)->cmp_min_to_max(*e2) > 0)
8256 {
8257 (*e2)=(*e2)->next;
8258 return 1;
8259 }
8260 }
8261 return 0;
8262 }
8263
8264
8265 /**
8266 Combine two range expression under a common OR. On a logical level, the
8267 transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
8268
8269 Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
8270 theformat is reminiscent of DNF, since an expression such as the following
8271
8272 ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
8273
8274 where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
8275 and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
8276 SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
8277 the first range and ranges must not overlap. It follows that they are also
8278 ordered by maximum endpoints. Thus
8279
8280 ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
8281
8282 Is a a valid SER_ARG expression for a key of at least 2 keyparts.
8283
8284 For simplicity, we will assume that expr2 is a single range predicate,
8285 i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
8286 disjunction of several predicates by subsequently call key_or for each
8287 disjunct.
8288
8289 The algorithm iterates over each disjunct of expr1, and for each disjunct
8290 where the first keypart's range overlaps with the first keypart's range in
8291 expr2:
8292
8293 If the predicates are equal for the rest of the keyparts, or if there are
8294 no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
8295 node in expr2 is deallocated. If more ranges became connected in expr1, the
8296 surplus is also dealocated. If they differ, two ranges are created.
8297
8298 - The range leading up to the overlap. Empty if endpoints are equal.
8299
8300 - The overlapping sub-range. May be the entire range if they are equal.
8301
8302 Finally, there may be one more range if expr2's first keypart's range has a
8303 greater maximum endpoint than the last range in expr1.
8304
8305 For the overlapping sub-range, we recursively call key_or. Thus in order to
8306 compute key_or of
8307
8308 (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
8309
8310 (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
8311
8312 We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
8313 first one, we simply hook on the condition for the second keypart from (1)
8314 : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
8315 < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
8316 the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
8317
8318 ( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR
8319 ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR
8320 ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
8321
8322 @param param PARAM from test_quick_select
8323 @param key1 Root of RB-tree of SEL_ARGs to be ORed with key2
8324 @param key2 Root of RB-tree of SEL_ARGs to be ORed with key1
8325 */
8326 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)8327 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2)
8328 {
8329
8330 if (param->has_errors())
8331 return 0;
8332
8333 if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
8334 {
8335 if (key2)
8336 {
8337 key2->use_count--;
8338 key2->free_tree();
8339 }
8340 return key1;
8341 }
8342 if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
8343 // Case is symmetric to the one above, just flip parameters.
8344 return key_or(param, key2, key1);
8345
8346 key1->use_count--;
8347 key2->use_count--;
8348
8349 if (key1->part != key2->part ||
8350 (key1->min_flag | key2->min_flag) & GEOM_FLAG)
8351 {
8352 key1->free_tree();
8353 key2->free_tree();
8354 return 0; // Can't optimize this
8355 }
8356
8357 // If one of the key is MAYBE_KEY then the found region may be bigger
8358 if (key1->type == SEL_ARG::MAYBE_KEY)
8359 {
8360 key2->free_tree();
8361 key1->use_count++;
8362 return key1;
8363 }
8364 if (key2->type == SEL_ARG::MAYBE_KEY)
8365 {
8366 key1->free_tree();
8367 key2->use_count++;
8368 return key2;
8369 }
8370
8371 if (key1->use_count > 0)
8372 {
8373 if (key2->use_count == 0 || key1->elements > key2->elements)
8374 {
8375 swap_variables(SEL_ARG *,key1,key2);
8376 }
8377 if (key1->use_count > 0 && (key1= key1->clone_tree(param)) == NULL)
8378 return 0; // OOM
8379 }
8380
8381 // Add tree at key2 to tree at key1
8382 const bool key2_shared= (key2->use_count != 0);
8383 key1->maybe_flag|= key2->maybe_flag;
8384
8385 /*
8386 Notation for illustrations used in the rest of this function:
8387
8388 Range: [--------]
8389 ^ ^
8390 start stop
8391
8392 Two overlapping ranges:
8393 [-----] [----] [--]
8394 [---] or [---] or [-------]
8395
8396 Ambiguity: ***
8397 The range starts or stops somewhere in the "***" range.
8398 Example: a starts before b and may end before/the same place/after b
8399 a: [----***]
8400 b: [---]
8401
8402 Adjacent ranges:
8403 Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
8404 a: ----]
8405 b: [----
8406 */
8407
8408 SEL_ARG *cur_key2= key2->first();
8409 while (cur_key2)
8410 {
8411 /*
8412 key1 consists of one or more ranges. cur_key1 is the
8413 range currently being handled.
8414
8415 initialize cur_key1 to the latest range in key1 that starts the
8416 same place or before the range in cur_key2 starts
8417
8418 cur_key2: [------]
8419 key1: [---] [-----] [----]
8420 ^
8421 cur_key1
8422 */
8423 SEL_ARG *cur_key1= key1->find_range(cur_key2);
8424
8425 /*
8426 Used to describe how two key values are positioned compared to
8427 each other. Consider key_value_a.<cmp_func>(key_value_b):
8428
8429 -2: key_value_a is smaller than key_value_b, and they are adjacent
8430 -1: key_value_a is smaller than key_value_b (not adjacent)
8431 0: the key values are equal
8432 1: key_value_a is bigger than key_value_b (not adjacent)
8433 2: key_value_a is bigger than key_value_b, and they are adjacent
8434
8435 Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
8436
8437 cur_key2: [-------- (10 <= x ... )
8438 cur_key1: -----] ( ... x < 10) => cmp==-2
8439 cur_key1: ----] ( ... x < 9) => cmp==-1
8440 cur_key1: ------] ( ... x <= 10) => cmp== 0
8441 cur_key1: --------] ( ... x <= 12) => cmp== 1
8442 (cmp == 2 does not make sense for cmp_max_to_min())
8443 */
8444 int cmp= 0;
8445
8446 if (!cur_key1)
8447 {
8448 /*
8449 The range in cur_key2 starts before the first range in key1. Use
8450 the first range in key1 as cur_key1.
8451
8452 cur_key2: [--------]
8453 key1: [****--] [----] [-------]
8454 ^
8455 cur_key1
8456 */
8457 cur_key1= key1->first();
8458 cmp= -1;
8459 }
8460 else if ((cmp= cur_key1->cmp_max_to_min(cur_key2)) < 0)
8461 {
8462 /*
8463 This is the case:
8464 cur_key2: [-------]
8465 cur_key1: [----**]
8466 */
8467 SEL_ARG *next_key1= cur_key1->next;
8468 if (cmp == -2 &&
8469 eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8470 {
8471 /*
8472 Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
8473
8474 This is the case:
8475 cur_key2: [-------]
8476 cur_key1: [----]
8477
8478 Result:
8479 cur_key2: [-------------] => inserted into key1 below
8480 cur_key1: => deleted
8481 */
8482 SEL_ARG *next_key2= cur_key2->next;
8483 if (key2_shared)
8484 {
8485 if (!(cur_key2= new (param->mem_root) SEL_ARG(*cur_key2)))
8486 return 0; // out of memory
8487 cur_key2->increment_use_count(key1->use_count+1);
8488 cur_key2->next= next_key2; // New copy of cur_key2
8489 }
8490
8491 if (cur_key2->copy_min(cur_key1))
8492 {
8493 // cur_key2 is full range: [-inf <= cur_key2 <= +inf]
8494 key1->free_tree();
8495 key2->free_tree();
8496 key1->type= SEL_ARG::ALWAYS;
8497 key2->type= SEL_ARG::ALWAYS;
8498 if (key1->maybe_flag)
8499 return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8500 return 0;
8501 }
8502
8503 if (!(key1= key1->tree_delete(cur_key1)))
8504 {
8505 /*
8506 cur_key1 was the last range in key1; move the cur_key2
8507 range that was merged above to key1
8508 */
8509 key1= cur_key2;
8510 key1->make_root();
8511 cur_key2= next_key2;
8512 break;
8513 }
8514 }
8515 // Move to next range in key1. Now cur_key1.min > cur_key2.min
8516 if (!(cur_key1= next_key1))
8517 break; // No more ranges in key1. Copy rest of key2
8518 }
8519
8520 if (cmp < 0)
8521 {
8522 /*
8523 This is the case:
8524 cur_key2: [--***]
8525 cur_key1: [----]
8526 */
8527 int cur_key1_cmp;
8528 if ((cur_key1_cmp= cur_key1->cmp_min_to_max(cur_key2)) > 0)
8529 {
8530 /*
8531 This is the case:
8532 cur_key2: [------**]
8533 cur_key1: [----]
8534 */
8535 if (cur_key1_cmp == 2 &&
8536 eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8537 {
8538 /*
8539 Adjacent ranges with equal next_key_part. Merge like this:
8540
8541 This is the case:
8542 cur_key2: [------]
8543 cur_key1: [-----]
8544
8545 Result:
8546 cur_key2: [------]
8547 cur_key1: [-------------]
8548
8549 Then move on to next key2 range.
8550 */
8551 cur_key1->copy_min_to_min(cur_key2);
8552 key1->merge_flags(cur_key2); //should be cur_key1->merge...() ?
8553 if (cur_key1->min_flag & NO_MIN_RANGE &&
8554 cur_key1->max_flag & NO_MAX_RANGE)
8555 {
8556 if (key1->maybe_flag)
8557 return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8558 return 0;
8559 }
8560 cur_key2->increment_use_count(-1); // Free not used tree
8561 cur_key2=cur_key2->next;
8562 continue;
8563 }
8564 else
8565 {
8566 /*
8567 cur_key2 not adjacent to cur_key1 or has different next_key_part.
8568 Insert into key1 and move to next range in key2
8569
8570 This is the case:
8571 cur_key2: [------**]
8572 cur_key1: [----]
8573
8574 Result:
8575 key1: [------**][----]
8576 ^ ^
8577 insert cur_key1
8578 */
8579 SEL_ARG *next_key2= cur_key2->next;
8580 if (key2_shared)
8581 {
8582 SEL_ARG *cpy= new (param->mem_root) SEL_ARG(*cur_key2); // Must make copy
8583 if (!cpy)
8584 return 0; // OOM
8585 key1= key1->insert(cpy);
8586 cur_key2->increment_use_count(key1->use_count+1);
8587 }
8588 else
8589 key1= key1->insert(cur_key2); // Will destroy key2_root
8590 cur_key2= next_key2;
8591 continue;
8592 }
8593 }
8594 }
8595
8596 /*
8597 The ranges in cur_key1 and cur_key2 are overlapping:
8598
8599 cur_key2: [----------]
8600 cur_key1: [*****-----*****]
8601
8602 Corollary: cur_key1.min <= cur_key2.max
8603 */
8604 if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8605 {
8606 // Merge overlapping ranges with equal next_key_part
8607 if (cur_key1->is_same(cur_key2))
8608 {
8609 /*
8610 cur_key1 covers exactly the same range as cur_key2
8611 Use the relevant range in key1.
8612 */
8613 cur_key1->merge_flags(cur_key2); // Copy maybe flags
8614 cur_key2->increment_next_key_part_use_count(-1); // Free not used tree
8615 }
8616 else
8617 {
8618 SEL_ARG *last= cur_key1;
8619 SEL_ARG *first= cur_key1;
8620
8621 /*
8622 Find the last range in key1 that overlaps cur_key2 and
8623 where all ranges first...last have the same next_key_part as
8624 cur_key2.
8625
8626 cur_key2: [****----------------------*******]
8627 key1: [--] [----] [---] [-----] [xxxx]
8628 ^ ^ ^
8629 first last different next_key_part
8630
8631 Since cur_key2 covers them, the ranges between first and last
8632 are merged into one range by deleting first...last-1 from
8633 the key1 tree. In the figure, this applies to first and the
8634 two consecutive ranges. The range of last is then extended:
8635 * last.min: Set to min(cur_key2.min, first.min)
8636 * last.max: If there is a last->next that overlaps cur_key2
8637 (i.e., last->next has a different next_key_part):
8638 Set adjacent to last->next.min
8639 Otherwise: Set to max(cur_key2.max, last.max)
8640
8641 Result:
8642 cur_key2: [****----------------------*******]
8643 [--] [----] [---] => deleted from key1
8644 key1: [**------------------------***][xxxx]
8645 ^ ^
8646 cur_key1=last different next_key_part
8647 */
8648 while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
8649 eq_tree(last->next->next_key_part, cur_key2->next_key_part))
8650 {
8651 /*
8652 last->next is covered by cur_key2 and has same next_key_part.
8653 last can be deleted
8654 */
8655 SEL_ARG *save=last;
8656 last=last->next;
8657 key1= key1->tree_delete(save);
8658 }
8659 // Redirect cur_key1 to last which will cover the entire range
8660 cur_key1= last;
8661
8662 /*
8663 Extend last to cover the entire range of
8664 [min(first.min_value,cur_key2.min_value)...last.max_value].
8665 If this forms a full range (the range covers all possible
8666 values) we return no SEL_ARG RB-tree.
8667 */
8668 bool full_range= last->copy_min(first);
8669 if (!full_range)
8670 full_range= last->copy_min(cur_key2);
8671
8672 if (!full_range)
8673 {
8674 if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0)
8675 {
8676 /*
8677 This is the case:
8678 cur_key2: [-------------]
8679 key1: [***------] [xxxx]
8680 ^ ^
8681 last different next_key_part
8682
8683 Extend range of last up to last->next:
8684 cur_key2: [-------------]
8685 key1: [***--------][xxxx]
8686 */
8687 last->copy_min_to_max(last->next);
8688 }
8689 else
8690 /*
8691 This is the case:
8692 cur_key2: [--------*****]
8693 key1: [***---------] [xxxx]
8694 ^ ^
8695 last different next_key_part
8696
8697 Extend range of last up to max(last.max, cur_key2.max):
8698 cur_key2: [--------*****]
8699 key1: [***----------**] [xxxx]
8700 */
8701 full_range= last->copy_max(cur_key2);
8702 }
8703 if (full_range)
8704 { // Full range
8705 key1->free_tree();
8706 key1->type= SEL_ARG::ALWAYS;
8707 key2->type= SEL_ARG::ALWAYS;
8708 for (; cur_key2 ; cur_key2= cur_key2->next)
8709 cur_key2->increment_use_count(-1); // Free not used tree
8710 if (key1->maybe_flag)
8711 return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8712 return 0;
8713 }
8714 }
8715 }
8716
8717 if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0)
8718 {
8719 /*
8720 This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
8721 cur_key2: [-------]
8722 cur_key1: [----------*******]
8723 */
8724
8725 if (!cur_key1->next_key_part)
8726 {
8727 /*
8728 cur_key1->next_key_part is empty: cut the range that
8729 is covered by cur_key1 from cur_key2.
8730 Reason: (cur_key2->next_key_part OR
8731 cur_key1->next_key_part) will be empty and therefore
8732 equal to cur_key1->next_key_part. Thus, this part of
8733 the cur_key2 range is completely covered by cur_key1.
8734 */
8735 if (cur_key1->cmp_max_to_max(cur_key2) >= 0)
8736 {
8737 /*
8738 cur_key1 covers the entire range in cur_key2.
8739 cur_key2: [-------]
8740 cur_key1: [-----------------]
8741
8742 Move on to next range in key2
8743 */
8744 /*
8745 cur_key2 will no longer be used. Reduce reference count
8746 of SEL_ARGs in its next_key_part.
8747 */
8748 cur_key2->increment_next_key_part_use_count(-1);
8749 cur_key2= cur_key2->next;
8750 continue;
8751 }
8752 else
8753 {
8754 /*
8755 This is the case:
8756 cur_key2: [-------]
8757 cur_key1: [---------]
8758
8759 Result:
8760 cur_key2: [---]
8761 cur_key1: [---------]
8762 */
8763 cur_key2->copy_max_to_min(cur_key1);
8764 continue;
8765 }
8766 }
8767
8768 /*
8769 The ranges are overlapping but have not been merged because
8770 next_key_part of cur_key1 and cur_key2 differ.
8771 cur_key2: [----]
8772 cur_key1: [------------*****]
8773
8774 Split cur_key1 in two where cur_key2 starts:
8775 cur_key2: [----]
8776 key1: [--------][--*****]
8777 ^ ^
8778 insert cur_key1
8779 */
8780 SEL_ARG *new_arg= cur_key1->clone_first(cur_key2, param->mem_root);
8781 if (!new_arg)
8782 return 0; // OOM
8783 if ((new_arg->next_key_part= cur_key1->next_key_part))
8784 new_arg->increment_use_count(key1->use_count+1);
8785 cur_key1->copy_min_to_min(cur_key2);
8786 key1= key1->insert(new_arg);
8787 } // cur_key1.min >= cur_key2.min due to this if()
8788
8789 /*
8790 Now cur_key2.min <= cur_key1.min <= cur_key2.max:
8791 cur_key2: [---------]
8792 cur_key1: [****---*****]
8793 */
8794 SEL_ARG key2_cpy(*cur_key2); // Get copy we can modify
8795 for (;;)
8796 {
8797 if (cur_key1->cmp_min_to_min(&key2_cpy) > 0)
8798 {
8799 /*
8800 This is the case:
8801 key2_cpy: [------------]
8802 key1: [-*****]
8803 ^
8804 cur_key1
8805
8806 Result:
8807 key2_cpy: [---]
8808 key1: [-------][-*****]
8809 ^ ^
8810 insert cur_key1
8811 */
8812 SEL_ARG *new_arg=key2_cpy.clone_first(cur_key1, param->mem_root);
8813 if (!new_arg)
8814 return 0; // OOM
8815 if ((new_arg->next_key_part=key2_cpy.next_key_part))
8816 new_arg->increment_use_count(key1->use_count+1);
8817 key1= key1->insert(new_arg);
8818 key2_cpy.copy_min_to_min(cur_key1);
8819 }
8820 // Now key2_cpy.min == cur_key1.min
8821
8822 if ((cmp= cur_key1->cmp_max_to_max(&key2_cpy)) <= 0)
8823 {
8824 /*
8825 cur_key1.max <= key2_cpy.max:
8826 key2_cpy: a) [-------] or b) [----]
8827 cur_key1: [----] [----]
8828
8829 Steps:
8830
8831 1) Update next_key_part of cur_key1: OR it with
8832 key2_cpy->next_key_part.
8833 2) If case a: Insert range [cur_key1.max, key2_cpy.max]
8834 into key1 using next_key_part of key2_cpy
8835
8836 Result:
8837 key1: a) [----][-] or b) [----]
8838 */
8839 cur_key1->maybe_flag|= key2_cpy.maybe_flag;
8840 key2_cpy.increment_use_count(key1->use_count+1);
8841 cur_key1->next_key_part=
8842 key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part);
8843
8844 if (!cmp)
8845 break; // case b: done with this key2 range
8846
8847 // Make key2_cpy the range [cur_key1.max, key2_cpy.max]
8848 key2_cpy.copy_max_to_min(cur_key1);
8849 if (!(cur_key1= cur_key1->next))
8850 {
8851 /*
8852 No more ranges in key1. Insert key2_cpy and go to "end"
8853 label to insert remaining ranges in key2 if any.
8854 */
8855 SEL_ARG *new_key1_range= new (param->mem_root) SEL_ARG(key2_cpy);
8856 if (!new_key1_range)
8857 return 0; // OOM
8858 key1= key1->insert(new_key1_range);
8859 cur_key2= cur_key2->next;
8860 goto end;
8861 }
8862 if (cur_key1->cmp_min_to_max(&key2_cpy) > 0)
8863 {
8864 /*
8865 The next range in key1 does not overlap with key2_cpy.
8866 Insert this range into key1 and move on to the next range
8867 in key2.
8868 */
8869 SEL_ARG *new_key1_range= new (param->mem_root) SEL_ARG(key2_cpy);
8870 if (!new_key1_range)
8871 return 0; // OOM
8872 key1= key1->insert(new_key1_range);
8873 break;
8874 }
8875 /*
8876 key2_cpy overlaps with the next range in key1 and the case
8877 is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
8878 to for(;;) to handle this situation.
8879 */
8880 continue;
8881 }
8882 else
8883 {
8884 /*
8885 This is the case:
8886 key2_cpy: [-------]
8887 cur_key1: [------------]
8888
8889 Result:
8890 key1: [-------][---]
8891 ^ ^
8892 new_arg cur_key1
8893 Steps:
8894
8895 0) If cur_key1->next_key_part is empty: do nothing.
8896 Reason: (key2_cpy->next_key_part OR
8897 cur_key1->next_key_part) will be empty and
8898 therefore equal to cur_key1->next_key_part. Thus,
8899 the range in key2_cpy is completely covered by
8900 cur_key1
8901 1) Make new_arg with range [cur_key1.min, key2_cpy.max].
8902 new_arg->next_key_part is OR between next_key_part of
8903 cur_key1 and key2_cpy
8904 2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
8905 3) Insert new_arg into key1
8906 */
8907 if (!cur_key1->next_key_part) // Step 0
8908 {
8909 key2_cpy.increment_use_count(-1); // Free not used tree
8910 break;
8911 }
8912 SEL_ARG *new_arg= cur_key1->clone_last(&key2_cpy, param->mem_root);
8913 if (!new_arg)
8914 return 0; // OOM
8915 cur_key1->copy_max_to_min(&key2_cpy);
8916 cur_key1->increment_use_count(key1->use_count+1);
8917 /* Increment key count as it may be used for next loop */
8918 key2_cpy.increment_use_count(1);
8919 new_arg->next_key_part= key_or(param, cur_key1->next_key_part,
8920 key2_cpy.next_key_part);
8921 key1= key1->insert(new_arg);
8922 break;
8923 }
8924 }
8925 // Move on to next range in key2
8926 cur_key2= cur_key2->next;
8927 }
8928
8929 end:
8930 /*
8931 Add key2 ranges that are non-overlapping with and higher than the
8932 highest range in key1.
8933 */
8934 while (cur_key2)
8935 {
8936 SEL_ARG *next= cur_key2->next;
8937 if (key2_shared)
8938 {
8939 SEL_ARG *key2_cpy=new (param->mem_root) SEL_ARG(*cur_key2); // Must make copy
8940 if (!key2_cpy)
8941 return 0;
8942 cur_key2->increment_use_count(key1->use_count+1);
8943 key1= key1->insert(key2_cpy);
8944 }
8945 else
8946 key1= key1->insert(cur_key2); // Will destroy key2_root
8947 cur_key2= next;
8948 }
8949 key1->use_count++;
8950
8951 return key1;
8952 }
8953
8954
8955 /* Compare if two trees are equal */
8956
eq_tree(SEL_ARG * a,SEL_ARG * b)8957 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
8958 {
8959 if (a == b)
8960 return 1;
8961 if (!a || !b || !a->is_same(b))
8962 return 0;
8963 if (a->left != &null_element && b->left != &null_element)
8964 {
8965 if (!eq_tree(a->left,b->left))
8966 return 0;
8967 }
8968 else if (a->left != &null_element || b->left != &null_element)
8969 return 0;
8970 if (a->right != &null_element && b->right != &null_element)
8971 {
8972 if (!eq_tree(a->right,b->right))
8973 return 0;
8974 }
8975 else if (a->right != &null_element || b->right != &null_element)
8976 return 0;
8977 if (a->next_key_part != b->next_key_part)
8978 { // Sub range
8979 if (!a->next_key_part != !b->next_key_part ||
8980 !eq_tree(a->next_key_part, b->next_key_part))
8981 return 0;
8982 }
8983 return 1;
8984 }
8985
8986
8987 SEL_ARG *
insert(SEL_ARG * key)8988 SEL_ARG::insert(SEL_ARG *key)
8989 {
8990 SEL_ARG *element, **par= NULL, *last_element= NULL;
8991
8992 for (element= this; element != &null_element ; )
8993 {
8994 last_element=element;
8995 if (key->cmp_min_to_min(element) > 0)
8996 {
8997 par= &element->right; element= element->right;
8998 }
8999 else
9000 {
9001 par = &element->left; element= element->left;
9002 }
9003 }
9004 *par=key;
9005 key->parent=last_element;
9006 /* Link in list */
9007 if (par == &last_element->left)
9008 {
9009 key->next=last_element;
9010 if ((key->prev=last_element->prev))
9011 key->prev->next=key;
9012 last_element->prev=key;
9013 }
9014 else
9015 {
9016 if ((key->next=last_element->next))
9017 key->next->prev=key;
9018 key->prev=last_element;
9019 last_element->next=key;
9020 }
9021 key->left=key->right= &null_element;
9022 SEL_ARG *root=rb_insert(key); // rebalance tree
9023 root->use_count=this->use_count; // copy root info
9024 root->elements= this->elements+1;
9025 root->maybe_flag=this->maybe_flag;
9026 return root;
9027 }
9028
9029
9030 /*
9031 ** Find best key with min <= given key
9032 ** Because the call context this should never return 0 to get_range
9033 */
9034
9035 SEL_ARG *
find_range(SEL_ARG * key)9036 SEL_ARG::find_range(SEL_ARG *key)
9037 {
9038 SEL_ARG *element=this,*found=0;
9039
9040 for (;;)
9041 {
9042 if (element == &null_element)
9043 return found;
9044 int cmp=element->cmp_min_to_min(key);
9045 if (cmp == 0)
9046 return element;
9047 if (cmp < 0)
9048 {
9049 found=element;
9050 element=element->right;
9051 }
9052 else
9053 element=element->left;
9054 }
9055 }
9056
9057
9058 /*
9059 Remove a element from the tree
9060
9061 SYNOPSIS
9062 tree_delete()
9063 key Key that is to be deleted from tree (this)
9064
9065 NOTE
9066 This also frees all sub trees that is used by the element
9067
9068 RETURN
9069 root of new tree (with key deleted)
9070 */
9071
9072 SEL_ARG *
tree_delete(SEL_ARG * key)9073 SEL_ARG::tree_delete(SEL_ARG *key)
9074 {
9075 enum leaf_color remove_color;
9076 SEL_ARG *root,*nod,**par,*fix_par;
9077 DBUG_ENTER("tree_delete");
9078
9079 root=this;
9080 this->parent= 0;
9081
9082 /* Unlink from list */
9083 if (key->prev)
9084 key->prev->next=key->next;
9085 if (key->next)
9086 key->next->prev=key->prev;
9087 key->increment_next_key_part_use_count(-1);
9088 if (!key->parent)
9089 par= &root;
9090 else
9091 par=key->parent_ptr();
9092
9093 if (key->left == &null_element)
9094 {
9095 *par=nod=key->right;
9096 fix_par=key->parent;
9097 if (nod != &null_element)
9098 nod->parent=fix_par;
9099 remove_color= key->color;
9100 }
9101 else if (key->right == &null_element)
9102 {
9103 *par= nod=key->left;
9104 nod->parent=fix_par=key->parent;
9105 remove_color= key->color;
9106 }
9107 else
9108 {
9109 SEL_ARG *tmp=key->next; // next bigger key (exist!)
9110 nod= *tmp->parent_ptr()= tmp->right; // unlink tmp from tree
9111 fix_par=tmp->parent;
9112 if (nod != &null_element)
9113 nod->parent=fix_par;
9114 remove_color= tmp->color;
9115
9116 tmp->parent=key->parent; // Move node in place of key
9117 (tmp->left=key->left)->parent=tmp;
9118 if ((tmp->right=key->right) != &null_element)
9119 tmp->right->parent=tmp;
9120 tmp->color=key->color;
9121 *par=tmp;
9122 if (fix_par == key) // key->right == key->next
9123 fix_par=tmp; // new parent of nod
9124 }
9125
9126 if (root == &null_element)
9127 DBUG_RETURN(0); // Maybe root later
9128 if (remove_color == BLACK)
9129 root=rb_delete_fixup(root,nod,fix_par);
9130 #ifndef DBUG_OFF
9131 test_rb_tree(root,root->parent);
9132 #endif
9133 root->use_count=this->use_count; // Fix root counters
9134 root->elements=this->elements-1;
9135 root->maybe_flag=this->maybe_flag;
9136 DBUG_RETURN(root);
9137 }
9138
9139
9140 /* Functions to fix up the tree after insert and delete */
9141
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)9142 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
9143 {
9144 SEL_ARG *y=leaf->right;
9145 leaf->right=y->left;
9146 if (y->left != &null_element)
9147 y->left->parent=leaf;
9148 if (!(y->parent=leaf->parent))
9149 *root=y;
9150 else
9151 *leaf->parent_ptr()=y;
9152 y->left=leaf;
9153 leaf->parent=y;
9154 }
9155
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)9156 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
9157 {
9158 SEL_ARG *y=leaf->left;
9159 leaf->left=y->right;
9160 if (y->right != &null_element)
9161 y->right->parent=leaf;
9162 if (!(y->parent=leaf->parent))
9163 *root=y;
9164 else
9165 *leaf->parent_ptr()=y;
9166 y->right=leaf;
9167 leaf->parent=y;
9168 }
9169
9170
9171 SEL_ARG *
rb_insert(SEL_ARG * leaf)9172 SEL_ARG::rb_insert(SEL_ARG *leaf)
9173 {
9174 SEL_ARG *y,*par,*par2,*root;
9175 root= this; root->parent= 0;
9176
9177 leaf->color=RED;
9178 while (leaf != root && (par= leaf->parent)->color == RED)
9179 { // This can't be root or 1 level under
9180 if (par == (par2= leaf->parent->parent)->left)
9181 {
9182 y= par2->right;
9183 if (y->color == RED)
9184 {
9185 par->color=BLACK;
9186 y->color=BLACK;
9187 leaf=par2;
9188 leaf->color=RED; /* And the loop continues */
9189 }
9190 else
9191 {
9192 if (leaf == par->right)
9193 {
9194 left_rotate(&root,leaf->parent);
9195 par=leaf; /* leaf is now parent to old leaf */
9196 }
9197 par->color=BLACK;
9198 par2->color=RED;
9199 right_rotate(&root,par2);
9200 break;
9201 }
9202 }
9203 else
9204 {
9205 y= par2->left;
9206 if (y->color == RED)
9207 {
9208 par->color=BLACK;
9209 y->color=BLACK;
9210 leaf=par2;
9211 leaf->color=RED; /* And the loop continues */
9212 }
9213 else
9214 {
9215 if (leaf == par->left)
9216 {
9217 right_rotate(&root,par);
9218 par=leaf;
9219 }
9220 par->color=BLACK;
9221 par2->color=RED;
9222 left_rotate(&root,par2);
9223 break;
9224 }
9225 }
9226 }
9227 root->color=BLACK;
9228 #ifndef DBUG_OFF
9229 test_rb_tree(root,root->parent);
9230 #endif
9231 return root;
9232 }
9233
9234
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)9235 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
9236 {
9237 SEL_ARG *x,*w;
9238 root->parent=0;
9239
9240 x= key;
9241 while (x != root && x->color == SEL_ARG::BLACK)
9242 {
9243 if (x == par->left)
9244 {
9245 w=par->right;
9246 if (w->color == SEL_ARG::RED)
9247 {
9248 w->color=SEL_ARG::BLACK;
9249 par->color=SEL_ARG::RED;
9250 left_rotate(&root,par);
9251 w=par->right;
9252 }
9253 if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
9254 {
9255 w->color=SEL_ARG::RED;
9256 x=par;
9257 }
9258 else
9259 {
9260 if (w->right->color == SEL_ARG::BLACK)
9261 {
9262 w->left->color=SEL_ARG::BLACK;
9263 w->color=SEL_ARG::RED;
9264 right_rotate(&root,w);
9265 w=par->right;
9266 }
9267 w->color=par->color;
9268 par->color=SEL_ARG::BLACK;
9269 w->right->color=SEL_ARG::BLACK;
9270 left_rotate(&root,par);
9271 x=root;
9272 break;
9273 }
9274 }
9275 else
9276 {
9277 w=par->left;
9278 if (w->color == SEL_ARG::RED)
9279 {
9280 w->color=SEL_ARG::BLACK;
9281 par->color=SEL_ARG::RED;
9282 right_rotate(&root,par);
9283 w=par->left;
9284 }
9285 if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
9286 {
9287 w->color=SEL_ARG::RED;
9288 x=par;
9289 }
9290 else
9291 {
9292 if (w->left->color == SEL_ARG::BLACK)
9293 {
9294 w->right->color=SEL_ARG::BLACK;
9295 w->color=SEL_ARG::RED;
9296 left_rotate(&root,w);
9297 w=par->left;
9298 }
9299 w->color=par->color;
9300 par->color=SEL_ARG::BLACK;
9301 w->left->color=SEL_ARG::BLACK;
9302 right_rotate(&root,par);
9303 x=root;
9304 break;
9305 }
9306 }
9307 par=x->parent;
9308 }
9309 x->color=SEL_ARG::BLACK;
9310 return root;
9311 }
9312
9313
9314 #ifndef DBUG_OFF
9315 /* Test that the properties for a red-black tree hold */
9316
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)9317 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
9318 {
9319 int count_l,count_r;
9320
9321 if (element == &null_element)
9322 return 0; // Found end of tree
9323 if (element->parent != parent)
9324 {
9325 sql_print_error("Wrong tree: Parent doesn't point at parent");
9326 return -1;
9327 }
9328 if (element->color == SEL_ARG::RED &&
9329 (element->left->color == SEL_ARG::RED ||
9330 element->right->color == SEL_ARG::RED))
9331 {
9332 sql_print_error("Wrong tree: Found two red in a row");
9333 return -1;
9334 }
9335 if (element->left == element->right && element->left != &null_element)
9336 { // Dummy test
9337 sql_print_error("Wrong tree: Found right == left");
9338 return -1;
9339 }
9340 count_l=test_rb_tree(element->left,element);
9341 count_r=test_rb_tree(element->right,element);
9342 if (count_l >= 0 && count_r >= 0)
9343 {
9344 if (count_l == count_r)
9345 return count_l+(element->color == SEL_ARG::BLACK);
9346 sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
9347 count_l,count_r);
9348 }
9349 return -1; // Error, no more warnings
9350 }
9351 #endif
9352
9353
9354 /**
9355 Count how many times SEL_ARG graph "root" refers to its part "key" via
9356 transitive closure.
9357
9358 @param root An RB-Root node in a SEL_ARG graph.
9359 @param key Another RB-Root node in that SEL_ARG graph.
9360
9361 The passed "root" node may refer to "key" node via root->next_key_part,
9362 root->next->n
9363
9364 This function counts how many times the node "key" is referred (via
9365 SEL_ARG::next_key_part) by
9366 - intervals of RB-tree pointed by "root",
9367 - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
9368 intervals of RB-tree pointed by "root",
9369 - and so on.
9370
9371 Here is an example (horizontal links represent next_key_part pointers,
9372 vertical links - next/prev prev pointers):
9373
9374 +----+ $
9375 |root|-----------------+
9376 +----+ $ |
9377 | $ |
9378 | $ |
9379 +----+ +---+ $ | +---+ Here the return value
9380 | |- ... -| |---$-+--+->|key| will be 4.
9381 +----+ +---+ $ | | +---+
9382 | $ | |
9383 ... $ | |
9384 | $ | |
9385 +----+ +---+ $ | |
9386 | |---| |---------+ |
9387 +----+ +---+ $ |
9388 | | $ |
9389 ... +---+ $ |
9390 | |------------+
9391 +---+ $
9392 @return
9393 Number of links to "key" from nodes reachable from "root".
9394 */
9395
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)9396 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
9397 {
9398 ulong count= 0;
9399 for (root=root->first(); root ; root=root->next)
9400 {
9401 if (root->next_key_part)
9402 {
9403 if (root->next_key_part == key)
9404 count++;
9405 if (root->next_key_part->part < key->part)
9406 count+=count_key_part_usage(root->next_key_part,key);
9407 }
9408 }
9409 return count;
9410 }
9411
9412
9413 /*
9414 Check if SEL_ARG::use_count value is correct
9415
9416 SYNOPSIS
9417 SEL_ARG::test_use_count()
9418 root The root node of the SEL_ARG graph (an RB-tree root node that
9419 has the least value of sel_arg->part in the entire graph, and
9420 thus is the "origin" of the graph)
9421
9422 DESCRIPTION
9423 Check if SEL_ARG::use_count value is correct. See the definition of
9424 use_count for what is "correct".
9425
9426 RETURN
9427 true an incorrect SEL_ARG::use_count is found,
9428 false otherwise
9429 */
9430
test_use_count(SEL_ARG * root)9431 bool SEL_ARG::test_use_count(SEL_ARG *root)
9432 {
9433 uint e_count=0;
9434 if (this == root && use_count != 1)
9435 {
9436 sql_print_information("Use_count: Wrong count %lu for root",use_count);
9437 // DBUG_ASSERT(false); // Todo - enable and clean up mess
9438 return true;
9439 }
9440 if (this->type != SEL_ARG::KEY_RANGE)
9441 return false;
9442 for (SEL_ARG *pos=first(); pos ; pos=pos->next)
9443 {
9444 e_count++;
9445 if (pos->next_key_part)
9446 {
9447 ulong count=count_key_part_usage(root,pos->next_key_part);
9448 if (count > pos->next_key_part->use_count)
9449 {
9450 sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu "
9451 "should be %lu", (long unsigned int)pos,
9452 pos->next_key_part->use_count, count);
9453 // DBUG_ASSERT(false); // Todo - enable and clean up mess
9454 return true;
9455 }
9456 pos->next_key_part->test_use_count(root);
9457 }
9458 }
9459 if (e_count != elements)
9460 {
9461 sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
9462 e_count, elements, (long unsigned int) this);
9463 // DBUG_ASSERT(false); // Todo - enable and clean up mess
9464 return true;
9465 }
9466 return false;
9467 }
9468
9469 /****************************************************************************
9470 MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
9471 ****************************************************************************/
9472
9473 /* MRR range sequence, SEL_ARG* implementation: stack entry */
9474 typedef struct st_range_seq_entry
9475 {
9476 /*
9477 Pointers in min and max keys. They point to right-after-end of key
9478 images. The 0-th entry has these pointing to key tuple start.
9479 */
9480 uchar *min_key, *max_key;
9481
9482 /*
9483 Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
9484 min_key_flag may have NULL_RANGE set.
9485 */
9486 uint min_key_flag, max_key_flag;
9487 enum ha_rkey_function rkey_func_flag;
9488 /* Number of key parts */
9489 uint min_key_parts, max_key_parts;
9490 /**
9491 Pointer into the R-B tree for this keypart. It points to the
9492 currently active range for the keypart, so calling next on it will
9493 get to the next range. sel_arg_range_seq_next() uses this to avoid
9494 reparsing the R-B range trees each time a new range is fetched.
9495 */
9496 SEL_ARG *key_tree;
9497 } RANGE_SEQ_ENTRY;
9498
9499
9500 /*
9501 MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
9502 */
9503 class Sel_arg_range_sequence
9504 {
9505 private:
9506
9507 /**
9508 Stack of ranges for the curr_kp first keyparts. Used by
9509 sel_arg_range_seq_next() so that if the next range is equal to the
9510 previous one for the first x keyparts, stack[x-1] can be
9511 accumulated with the new range in keyparts > x to quickly form
9512 the next range to return.
9513
9514 Notation used below: "x:y" means a range where
9515 "column_in_keypart_0=x" and "column_in_keypart_1=y". For
9516 simplicity, only equality (no BETWEEN, < etc) is considered in the
9517 example but the same principle applies to other range predicate
9518 operators too.
9519
9520 Consider a query with these range predicates:
9521 (kp0=1 and kp1=2 and kp2=3) or
9522 (kp0=1 and kp1=2 and kp2=4) or
9523 (kp0=1 and kp1=3 and kp2=5) or
9524 (kp0=1 and kp1=3 and kp2=6)
9525
9526 1) sel_arg_range_seq_next() is called the first time
9527 - traverse the R-B tree (see SEL_ARG) to find the first range
9528 - returns range "1:2:3"
9529 - values in stack after this: stack[1, 1:2, 1:2:3]
9530 2) sel_arg_range_seq_next() is called second time
9531 - keypart 2 has another range, so the next range in
9532 keypart 2 is appended to stack[1] and saved
9533 in stack[2]
9534 - returns range "1:2:4"
9535 - values in stack after this: stack[1, 1:2, 1:2:4]
9536 3) sel_arg_range_seq_next() is called the third time
9537 - no more ranges in keypart 2, but keypart 1 has
9538 another range, so the next range in keypart 1 is
9539 appended to stack[0] and saved in stack[1]. The first
9540 range in keypart 2 is then appended to stack[1] and
9541 saved in stack[2]
9542 - returns range "1:3:5"
9543 - values in stack after this: stack[1, 1:3, 1:3:5]
9544 4) sel_arg_range_seq_next() is called the fourth time
9545 - keypart 2 has another range, see 2)
9546 - returns range "1:3:6"
9547 - values in stack after this: stack[1, 1:3, 1:3:6]
9548 */
9549 RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
9550 /*
9551 Index of last used element in the above array. A value of -1 means
9552 that the stack is empty.
9553 */
9554 int curr_kp;
9555
9556 public:
9557 uint keyno; /* index of used tree in SEL_TREE structure */
9558 uint real_keyno; /* Number of the index in tables */
9559
9560 PARAM * const param;
9561 SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
9562
Sel_arg_range_sequence(PARAM * param_arg)9563 Sel_arg_range_sequence(PARAM *param_arg) : param(param_arg) { reset(); }
9564
reset()9565 void reset()
9566 {
9567 stack[0].key_tree= NULL;
9568 stack[0].min_key= (uchar*)param->min_key;
9569 stack[0].min_key_flag= 0;
9570 stack[0].min_key_parts= 0;
9571 stack[0].rkey_func_flag= HA_READ_INVALID;
9572
9573 stack[0].max_key= (uchar*)param->max_key;
9574 stack[0].max_key_flag= 0;
9575 stack[0].max_key_parts= 0;
9576 curr_kp= -1;
9577 }
9578
stack_empty() const9579 bool stack_empty() const { return (curr_kp == -1); }
9580
9581 void stack_push_range(SEL_ARG *key_tree);
9582
stack_pop_range()9583 void stack_pop_range()
9584 {
9585 DBUG_ASSERT(!stack_empty());
9586 if (curr_kp == 0)
9587 reset();
9588 else
9589 curr_kp--;
9590 }
9591
stack_size() const9592 int stack_size() const { return curr_kp + 1; }
9593
stack_top()9594 RANGE_SEQ_ENTRY *stack_top()
9595 {
9596 return stack_empty() ? NULL : &stack[curr_kp];
9597 }
9598 };
9599
9600
9601 /*
9602 Range sequence interface, SEL_ARG* implementation: Initialize the traversal
9603
9604 SYNOPSIS
9605 init()
9606 init_params SEL_ARG tree traversal context
9607 n_ranges [ignored] The number of ranges obtained
9608 flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
9609
9610 RETURN
9611 Value of init_param
9612 */
9613
sel_arg_range_seq_init(void * init_param,uint n_ranges,uint flags)9614 range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
9615 {
9616 Sel_arg_range_sequence *seq=
9617 static_cast<Sel_arg_range_sequence*>(init_param);
9618 seq->reset();
9619 return init_param;
9620 }
9621
9622
stack_push_range(SEL_ARG * key_tree)9623 void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree)
9624 {
9625
9626 DBUG_ASSERT((uint)curr_kp+1 < MAX_REF_PARTS);
9627
9628 RANGE_SEQ_ENTRY *push_position= &stack[curr_kp + 1];
9629 RANGE_SEQ_ENTRY *last_added_kp= stack_top();
9630 if (stack_empty())
9631 {
9632 /*
9633 If we get here this is either
9634 a) the first time a range sequence is constructed for this
9635 range access method (in which case stack[0] has not been
9636 modified since the constructor was called), or
9637 b) there are multiple ranges for the first keypart in the
9638 condition (and we have called stack_pop_range() to empty
9639 the stack).
9640 In both cases, reset() has been called and all fields in
9641 push_position have been reset. All we need to do is to copy the
9642 min/max key flags from the predicate we're about to add to
9643 stack[0].
9644 */
9645 push_position->min_key_flag= key_tree->min_flag;
9646 push_position->max_key_flag= key_tree->max_flag;
9647 push_position->rkey_func_flag= key_tree->rkey_func_flag;
9648 }
9649 else
9650 {
9651 push_position->min_key= last_added_kp->min_key;
9652 push_position->max_key= last_added_kp->max_key;
9653 push_position->min_key_parts= last_added_kp->min_key_parts;
9654 push_position->max_key_parts= last_added_kp->max_key_parts;
9655 push_position->min_key_flag= last_added_kp->min_key_flag |
9656 key_tree->min_flag;
9657 push_position->max_key_flag= last_added_kp->max_key_flag |
9658 key_tree->max_flag;
9659 push_position->rkey_func_flag= key_tree->rkey_func_flag;
9660 }
9661
9662 push_position->key_tree= key_tree;
9663 uint16 stor_length= param->key[keyno][key_tree->part].store_length;
9664 /* psergey-merge-done:
9665 key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
9666 &cur->min_key, prev->min_key_flag,
9667 &cur->max_key, prev->max_key_flag);
9668 */
9669 push_position->min_key_parts+=
9670 key_tree->store_min(stor_length, &push_position->min_key,
9671 last_added_kp ? last_added_kp->min_key_flag : 0);
9672 push_position->max_key_parts+=
9673 key_tree->store_max(stor_length, &push_position->max_key,
9674 last_added_kp ? last_added_kp->max_key_flag : 0);
9675
9676 if (key_tree->is_null_interval())
9677 push_position->min_key_flag |= NULL_RANGE;
9678 curr_kp++;
9679 }
9680
9681
9682 /*
9683 Range sequence interface, SEL_ARG* implementation: get the next interval
9684 in the R-B tree
9685
9686 SYNOPSIS
9687 sel_arg_range_seq_next()
9688 rseq Value returned from sel_arg_range_seq_init
9689 range OUT Store information about the range here
9690
9691 DESCRIPTION
9692 This is "get_next" function for Range sequence interface implementation
9693 for SEL_ARG* tree.
9694
9695 IMPLEMENTATION
9696 The traversal also updates those param members:
9697 - is_ror_scan
9698 - range_count
9699 - max_key_part
9700
9701 RETURN
9702 0 Ok
9703 1 No more ranges in the sequence
9704
9705 NOTE: append_range_all_keyparts(), which is used to e.g. print
9706 ranges to Optimizer Trace in a human readable format, mimics the
9707 behavior of this function.
9708 */
9709
9710 //psergey-merge-todo: support check_quick_keys:max_keypart
sel_arg_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)9711 uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
9712 {
9713 SEL_ARG *key_tree;
9714 Sel_arg_range_sequence *seq= static_cast<Sel_arg_range_sequence*>(rseq);
9715
9716 if (seq->stack_empty())
9717 {
9718 /*
9719 This is the first time sel_arg_range_seq_next is called.
9720 seq->start points to the root of the R-B tree for the first
9721 keypart
9722 */
9723 key_tree= seq->start;
9724
9725 /*
9726 Move to the first range for the first keypart. Save this range
9727 in seq->stack[0] and carry on to ranges in the next keypart if
9728 any
9729 */
9730 key_tree= key_tree->first();
9731 seq->stack_push_range(key_tree);
9732 }
9733 else
9734 {
9735 /*
9736 This is not the first time sel_arg_range_seq_next is called, so
9737 seq->stack is populated with the range the last call to this
9738 function found. seq->stack[current_keypart].key_tree points to a
9739 leaf in the R-B tree of the last keypart that was part of the
9740 former range. This is the starting point for finding the next
9741 range. @see Sel_arg_range_sequence::stack
9742 */
9743 // See if there are more ranges in this or any of the previous keyparts
9744 while (true)
9745 {
9746 key_tree= seq->stack_top()->key_tree;
9747 seq->stack_pop_range();
9748 if (key_tree->next)
9749 {
9750 /* This keypart has more ranges */
9751 DBUG_ASSERT(key_tree->next != &null_element);
9752 key_tree= key_tree->next;
9753
9754 /*
9755 save the next range for this keypart and carry on to ranges in
9756 the next keypart if any
9757 */
9758 seq->stack_push_range(key_tree);
9759 seq->param->is_ror_scan= FALSE;
9760 break;
9761 }
9762
9763 if (seq->stack_empty())
9764 {
9765 // There are no more ranges for the first keypart: we're done
9766 return 1;
9767 }
9768 /*
9769 There are no more ranges for the current keypart. Step back
9770 to the previous keypart and see if there are more ranges
9771 there.
9772 */
9773 }
9774 }
9775
9776 DBUG_ASSERT(!seq->stack_empty());
9777
9778 /*
9779 Add range info for the next keypart if
9780 1) there is a range predicate for a later keypart
9781 2) the range predicate is for the next keypart in the index: a
9782 range predicate on keypartX+1 can only be used if there is a
9783 range predicate on keypartX.
9784 3) the range predicate on the next keypart is usable
9785 */
9786 while (key_tree->next_key_part && // 1)
9787 key_tree->next_key_part != &null_element && // 1)
9788 key_tree->next_key_part->part == key_tree->part + 1 && // 2)
9789 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) // 3)
9790 {
9791 {
9792 DBUG_PRINT("info", ("while(): key_tree->part %d",key_tree->part));
9793 RANGE_SEQ_ENTRY *cur= seq->stack_top();
9794 const size_t min_key_total_length= cur->min_key - seq->param->min_key;
9795 const size_t max_key_total_length= cur->max_key - seq->param->max_key;
9796
9797 /*
9798 Check if more ranges can be added. This is the case if all
9799 predicates for keyparts handled so far are equality
9800 predicates. If either of the following apply, there are
9801 non-equality predicates in stack[]:
9802
9803 1) min_key_total_length != max_key_total_length (because
9804 equality ranges are stored as "min_key = max_key = <value>")
9805 2) memcmp(<min_key_values>,<max_key_values>) != 0 (same argument as 1)
9806 3) A min or max flag has been set: Because flags denote ranges
9807 ('<', '<=' etc), any value but 0 indicates a non-equality
9808 predicate.
9809 */
9810
9811 uchar* min_key_start;
9812 uchar* max_key_start;
9813 size_t cur_key_length;
9814
9815 if (seq->stack_size() == 1)
9816 {
9817 min_key_start= seq->param->min_key;
9818 max_key_start= seq->param->max_key;
9819 cur_key_length= min_key_total_length;
9820 }
9821 else
9822 {
9823 const RANGE_SEQ_ENTRY prev= cur[-1];
9824 min_key_start= prev.min_key;
9825 max_key_start= prev.max_key;
9826 cur_key_length= cur->min_key - prev.min_key;
9827 }
9828
9829 if ((min_key_total_length != max_key_total_length) || // 1)
9830 (memcmp(min_key_start, max_key_start, cur_key_length)) || // 2)
9831 (key_tree->min_flag || key_tree->max_flag)) // 3)
9832 {
9833 DBUG_PRINT("info", ("while(): inside if()"));
9834 /*
9835 The range predicate up to and including the one in key_tree
9836 is usable by range access but does not allow subranges made
9837 up from predicates in later keyparts. This may e.g. be
9838 because the predicate operator is "<". Since there are range
9839 predicates on more keyparts, we use those to more closely
9840 specify the start and stop locations for the range. Example:
9841
9842 "SELECT * FROM t1 WHERE a >= 2 AND b >= 3":
9843
9844 t1 content:
9845 -----------
9846 1 1
9847 2 1 <- 1)
9848 2 2
9849 2 3 <- 2)
9850 2 4
9851 3 1
9852 3 2
9853 3 3
9854
9855 The predicate cannot be translated into something like
9856 "(a=2 and b>=3) or (a=3 and b>=3) or ..."
9857 I.e., it cannot be divided into subranges, but by storing
9858 min/max key below we can at least start the scan from 2)
9859 instead of 1)
9860 */
9861 SEL_ARG *store_key_part= key_tree->next_key_part;
9862 seq->param->is_ror_scan= FALSE;
9863 if (!key_tree->min_flag)
9864 cur->min_key_parts +=
9865 store_key_part->store_min_key(seq->param->key[seq->keyno],
9866 &cur->min_key,
9867 &cur->min_key_flag,
9868 MAX_KEY);
9869 if (!key_tree->max_flag)
9870 cur->max_key_parts +=
9871 store_key_part->store_max_key(seq->param->key[seq->keyno],
9872 &cur->max_key,
9873 &cur->max_key_flag,
9874 MAX_KEY);
9875 break;
9876 }
9877 }
9878
9879 /*
9880 There are usable range predicates for the next keypart and the
9881 range predicate for the current keypart allows us to make use of
9882 them. Move to the first range predicate for the next keypart.
9883 Push this range predicate to seq->stack and move on to the next
9884 keypart (if any). @see Sel_arg_range_sequence::stack
9885 */
9886 key_tree= key_tree->next_key_part->first();
9887 seq->stack_push_range(key_tree);
9888 }
9889
9890 DBUG_ASSERT(!seq->stack_empty() && (seq->stack_top() != NULL));
9891
9892 // We now have a full range predicate in seq->stack_top()
9893 RANGE_SEQ_ENTRY *cur= seq->stack_top();
9894 PARAM *param= seq->param;
9895 size_t min_key_length= cur->min_key - param->min_key;
9896
9897 if (cur->min_key_flag & GEOM_FLAG)
9898 {
9899 range->range_flag= cur->min_key_flag;
9900
9901 /* Here minimum contains also function code bits, and maximum is +inf */
9902 range->start_key.key= param->min_key;
9903 range->start_key.length= min_key_length;
9904 range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9905 range->start_key.flag= cur->rkey_func_flag;
9906 /*
9907 Spatial operators are only allowed on spatial indexes, and no
9908 spatial index can at the moment return rows in ROWID order
9909 */
9910 DBUG_ASSERT(!param->is_ror_scan);
9911 }
9912 else
9913 {
9914 const KEY *cur_key_info= ¶m->table->key_info[seq->real_keyno];
9915 range->range_flag= cur->min_key_flag | cur->max_key_flag;
9916
9917 range->start_key.key= param->min_key;
9918 range->start_key.length= cur->min_key - param->min_key;
9919 range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9920 range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
9921 HA_READ_KEY_EXACT);
9922
9923 range->end_key.key= param->max_key;
9924 range->end_key.length= cur->max_key - param->max_key;
9925 range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
9926 range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
9927 HA_READ_AFTER_KEY);
9928
9929 /*
9930 This is an equality range (keypart_0=X and ... and keypart_n=Z) if
9931 1) There are no flags indicating open range (e.g.,
9932 "keypart_x > y") or GIS.
9933 2) The lower bound and the upper bound of the range has the
9934 same value (min_key == max_key).
9935 */
9936 const uint is_open_range= (NO_MIN_RANGE | NO_MAX_RANGE |
9937 NEAR_MIN | NEAR_MAX | GEOM_FLAG);
9938 const bool is_eq_range_pred=
9939 !(cur->min_key_flag & is_open_range) && // 1)
9940 !(cur->max_key_flag & is_open_range) && // 1)
9941 range->start_key.length == range->end_key.length && // 2)
9942 !memcmp(param->min_key, param->max_key, range->start_key.length);
9943
9944 if (is_eq_range_pred)
9945 {
9946 range->range_flag= EQ_RANGE;
9947 /*
9948 Use statistics instead of index dives for estimates of rows in
9949 this range if the user requested it
9950 */
9951 if (param->use_index_statistics)
9952 range->range_flag|= USE_INDEX_STATISTICS;
9953
9954 /*
9955 An equality range is a unique range (0 or 1 rows in the range)
9956 if the index is unique (1) and all keyparts are used (2).
9957 Note that keys which are extended with PK parts have no
9958 HA_NOSAME flag. So we can use user_defined_key_parts.
9959 */
9960 if (cur_key_info->flags & HA_NOSAME && // 1)
9961 (uint)key_tree->part+1 == cur_key_info->user_defined_key_parts) // 2)
9962 range->range_flag|= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
9963 }
9964
9965 if (param->is_ror_scan)
9966 {
9967 const uint key_part_number= key_tree->part + 1;
9968 /*
9969 If we get here, the condition on the key was converted to form
9970 "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
9971 somecond(keyXpart{key_tree->part})"
9972 Check if
9973 somecond is "keyXpart{key_tree->part} = const" and
9974 uncovered "tail" of KeyX parts is either empty or is identical to
9975 first members of clustered primary key.
9976
9977 If last key part is PK part added to the key as an extension
9978 and is_key_scan_ror() result is TRUE then it's possible to
9979 use ROR scan.
9980 */
9981 if ((!is_eq_range_pred &&
9982 key_part_number <= cur_key_info->user_defined_key_parts) ||
9983 !is_key_scan_ror(param, seq->real_keyno, key_part_number))
9984 param->is_ror_scan= FALSE;
9985 }
9986 }
9987
9988 seq->param->range_count++;
9989 seq->param->max_key_part=max<uint>(seq->param->max_key_part,key_tree->part);
9990
9991 return 0;
9992 }
9993
9994
9995 /*
9996 Calculate estimate of number records that will be retrieved by a range
9997 scan on given index using given SEL_ARG intervals tree.
9998
9999 SYNOPSIS
10000 check_quick_select()
10001 param Parameter from test_quick_select
10002 idx Number of index to use in PARAM::key SEL_TREE::key
10003 index_only TRUE - assume only index tuples will be accessed
10004 FALSE - assume full table rows will be read
10005 tree Transformed selection condition, tree->key[idx] holds
10006 the intervals for the given index.
10007 update_tbl_stats TRUE <=> update table->quick_* with information
10008 about range scan we've evaluated.
10009 mrr_flags INOUT MRR access flags
10010 cost OUT Scan cost
10011
10012 NOTES
10013 param->is_ror_scan is set to reflect if the key scan is a ROR (see
10014 is_key_scan_ror function for more info)
10015 param->table->quick_*, param->range_count (and maybe others) are
10016 updated with data of given key scan, see quick_range_seq_next for details.
10017
10018 RETURN
10019 Estimate # of records to be retrieved.
10020 HA_POS_ERROR if estimate calculation failed due to table handler problems.
10021 */
10022
10023 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost)10024 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
10025 SEL_ARG *tree, bool update_tbl_stats,
10026 uint *mrr_flags, uint *bufsize, Cost_estimate *cost)
10027 {
10028 Sel_arg_range_sequence seq(param);
10029 RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
10030 handler *file= param->table->file;
10031 ha_rows rows;
10032 uint keynr= param->real_keynr[idx];
10033 DBUG_ENTER("check_quick_select");
10034
10035 /* Handle cases when we don't have a valid non-empty list of range */
10036 if (!tree)
10037 DBUG_RETURN(HA_POS_ERROR);
10038 if (tree->type == SEL_ARG::IMPOSSIBLE)
10039 DBUG_RETURN(0L);
10040 if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
10041 DBUG_RETURN(HA_POS_ERROR); // Don't use tree
10042
10043 seq.keyno= idx;
10044 seq.real_keyno= keynr;
10045 seq.start= tree;
10046
10047 param->range_count=0;
10048 param->max_key_part=0;
10049
10050 /*
10051 If there are more equality ranges than specified by the
10052 eq_range_index_dive_limit variable we switches from using index
10053 dives to use statistics.
10054 */
10055 uint range_count= 0;
10056 param->use_index_statistics=
10057 eq_ranges_exceeds_limit(tree, &range_count,
10058 param->thd->variables.eq_range_index_dive_limit);
10059
10060 param->is_ror_scan= TRUE;
10061 if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
10062 param->is_ror_scan= FALSE;
10063
10064 *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
10065 *mrr_flags|= HA_MRR_NO_ASSOCIATION;
10066 /*
10067 Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
10068 */
10069 if (param->order_direction != ORDER::ORDER_NOT_RELEVANT)
10070 *mrr_flags|= HA_MRR_SORTED;
10071
10072 bool pk_is_clustered= file->primary_key_is_clustered();
10073 if (index_only &&
10074 (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
10075 !(pk_is_clustered && keynr == param->table->s->primary_key))
10076 *mrr_flags |= HA_MRR_INDEX_ONLY;
10077
10078 if (current_thd->lex->sql_command != SQLCOM_SELECT)
10079 *mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10080
10081 *bufsize= param->thd->variables.read_rnd_buff_size;
10082 // Sets is_ror_scan to false for some queries, e.g. multi-ranges
10083 rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
10084 bufsize, mrr_flags, cost);
10085 if (rows != HA_POS_ERROR)
10086 {
10087 param->table->quick_rows[keynr]=rows;
10088 if (update_tbl_stats)
10089 {
10090 param->table->quick_keys.set_bit(keynr);
10091 param->table->quick_key_parts[keynr]=param->max_key_part+1;
10092 param->table->quick_n_ranges[keynr]= param->range_count;
10093 param->table->quick_condition_rows=
10094 min(param->table->quick_condition_rows, rows);
10095 }
10096 param->table->possible_quick_keys.set_bit(keynr);
10097 }
10098 /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
10099 enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
10100 if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
10101 {
10102 /*
10103 All scans are non-ROR scans for those index types.
10104 TODO: Don't have this logic here, make table engines return
10105 appropriate flags instead.
10106 */
10107 param->is_ror_scan= FALSE;
10108 }
10109 else
10110 {
10111 /* Clustered PK scan is always a ROR scan (TODO: same as above) */
10112 if (param->table->s->primary_key == keynr && pk_is_clustered)
10113 param->is_ror_scan= TRUE;
10114 }
10115 if (param->table->file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
10116 param->is_ror_scan= FALSE;
10117
10118 /*
10119 QUICK_ROR_INTERSECT_SELECT and QUICK_ROR_UNION_SELECT do read_set
10120 manipulations in reset(), which breaks virtual generated column's
10121 computation logic, which is used when reading index values.
10122 So, disable index merge intersection/union for any index on such column.
10123 @todo lift this implementation restriction
10124 */
10125 if (param->table->index_contains_some_virtual_gcol(keynr))
10126 param->is_ror_scan= false;
10127
10128 DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
10129 DBUG_RETURN(rows);
10130 }
10131
10132
10133 /*
10134 Check if key scan on given index with equality conditions on first n key
10135 parts is a ROR scan.
10136
10137 SYNOPSIS
10138 is_key_scan_ror()
10139 param Parameter from test_quick_select
10140 keynr Number of key in the table. The key must not be a clustered
10141 primary key.
10142 nparts Number of first key parts for which equality conditions
10143 are present.
10144
10145 NOTES
10146 ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
10147 ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
10148
10149 This function is needed to handle a practically-important special case:
10150 an index scan is a ROR scan if it is done using a condition in form
10151
10152 "key1_1=c_1 AND ... AND key1_n=c_n"
10153
10154 where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
10155
10156 and the table has a clustered Primary Key defined as
10157
10158 PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
10159
10160 i.e. the first key parts of it are identical to uncovered parts ot the
10161 key being scanned. This function assumes that the index flags do not
10162 include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
10163
10164 Check (1) is made in quick_range_seq_next()
10165
10166 RETURN
10167 TRUE The scan is ROR-scan
10168 FALSE Otherwise
10169 */
10170
is_key_scan_ror(PARAM * param,uint keynr,uint nparts)10171 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts)
10172 {
10173 KEY *table_key= param->table->key_info + keynr;
10174
10175 /*
10176 Range predicates on hidden key parts do not change the fact
10177 that a scan is rowid ordered, so we only care about user
10178 defined keyparts
10179 */
10180 const uint user_defined_nparts=
10181 std::min<uint>(nparts, table_key->user_defined_key_parts);
10182
10183 KEY_PART_INFO *key_part= table_key->key_part + user_defined_nparts;
10184 KEY_PART_INFO *key_part_end= (table_key->key_part +
10185 table_key->user_defined_key_parts);
10186 uint pk_number;
10187
10188 for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
10189 {
10190 uint16 fieldnr= param->table->key_info[keynr].
10191 key_part[kp - table_key->key_part].fieldnr - 1;
10192 if (param->table->field[fieldnr]->key_length() != kp->length)
10193 return FALSE;
10194 }
10195
10196 if (key_part == key_part_end)
10197 return TRUE;
10198
10199 key_part= table_key->key_part + user_defined_nparts;
10200 pk_number= param->table->s->primary_key;
10201 if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
10202 return FALSE;
10203
10204 KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
10205 KEY_PART_INFO *pk_part_end=
10206 pk_part + param->table->key_info[pk_number].user_defined_key_parts;
10207 for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
10208 ++key_part, ++pk_part)
10209 {
10210 if ((key_part->field != pk_part->field) ||
10211 (key_part->length != pk_part->length))
10212 return FALSE;
10213 }
10214 return (key_part == key_part_end);
10215 }
10216
10217
10218 /*
10219 Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
10220
10221 SYNOPSIS
10222 get_quick_select()
10223 param
10224 idx Index of used key in param->key.
10225 key_tree SEL_ARG tree for the used key
10226 mrr_flags MRR parameter for quick select
10227 mrr_buf_size MRR parameter for quick select
10228 parent_alloc If not NULL, use it to allocate memory for
10229 quick select data. Otherwise use quick->alloc.
10230 NOTES
10231 The caller must call QUICK_SELECT::init for returned quick select.
10232
10233 CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
10234 deallocated when the returned quick select is deleted.
10235
10236 RETURN
10237 NULL on error
10238 otherwise created quick select
10239 */
10240
10241 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)10242 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
10243 uint mrr_buf_size, MEM_ROOT *parent_alloc)
10244 {
10245 QUICK_RANGE_SELECT *quick;
10246 bool create_err= FALSE;
10247 DBUG_ENTER("get_quick_select");
10248
10249 if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
10250 quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
10251 param->real_keynr[idx],
10252 MY_TEST(parent_alloc),
10253 parent_alloc, &create_err);
10254 else
10255 quick=new QUICK_RANGE_SELECT(param->thd, param->table,
10256 param->real_keynr[idx],
10257 MY_TEST(parent_alloc), NULL, &create_err);
10258
10259 if (quick)
10260 {
10261 if (create_err ||
10262 get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
10263 param->max_key,0))
10264 {
10265 delete quick;
10266 quick=0;
10267 }
10268 else
10269 {
10270 quick->mrr_flags= mrr_flags;
10271 quick->mrr_buf_size= mrr_buf_size;
10272 quick->key_parts=(KEY_PART*)
10273 memdup_root(parent_alloc? parent_alloc : &quick->alloc,
10274 (char*) param->key[idx],
10275 sizeof(KEY_PART) *
10276 actual_key_parts(¶m->
10277 table->key_info[param->real_keynr[idx]]));
10278 }
10279 }
10280 DBUG_RETURN(quick);
10281 }
10282
10283
10284 /*
10285 ** Fix this to get all possible sub_ranges
10286 */
10287 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)10288 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
10289 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
10290 uchar *max_key, uint max_key_flag)
10291 {
10292 QUICK_RANGE *range;
10293 uint flag;
10294 int min_part= key_tree->part-1, // # of keypart values in min_key buffer
10295 max_part= key_tree->part-1; // # of keypart values in max_key buffer
10296
10297 if (key_tree->left != &null_element)
10298 {
10299 if (get_quick_keys(param,quick,key,key_tree->left,
10300 min_key,min_key_flag, max_key, max_key_flag))
10301 return 1;
10302 }
10303 uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
10304 min_part+= key_tree->store_min(key[key_tree->part].store_length,
10305 &tmp_min_key,min_key_flag);
10306 max_part+= key_tree->store_max(key[key_tree->part].store_length,
10307 &tmp_max_key,max_key_flag);
10308
10309 if (key_tree->next_key_part &&
10310 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
10311 key_tree->next_key_part->part == key_tree->part+1)
10312 { // const key as prefix
10313 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
10314 memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
10315 key_tree->min_flag==0 && key_tree->max_flag==0)
10316 {
10317 if (get_quick_keys(param,quick,key,key_tree->next_key_part,
10318 tmp_min_key, min_key_flag | key_tree->min_flag,
10319 tmp_max_key, max_key_flag | key_tree->max_flag))
10320 return 1;
10321 goto end; // Ugly, but efficient
10322 }
10323 {
10324 uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
10325 if (!tmp_min_flag)
10326 min_part+= key_tree->next_key_part->store_min_key(key,
10327 &tmp_min_key,
10328 &tmp_min_flag,
10329 MAX_KEY);
10330 if (!tmp_max_flag)
10331 max_part+= key_tree->next_key_part->store_max_key(key,
10332 &tmp_max_key,
10333 &tmp_max_flag,
10334 MAX_KEY);
10335 flag=tmp_min_flag | tmp_max_flag;
10336 }
10337 }
10338 else
10339 {
10340 flag = (key_tree->min_flag & GEOM_FLAG) ?
10341 key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
10342 }
10343
10344 /*
10345 Ensure that some part of min_key and max_key are used. If not,
10346 regard this as no lower/upper range
10347 */
10348 if ((flag & GEOM_FLAG) == 0)
10349 {
10350 if (tmp_min_key != param->min_key)
10351 flag&= ~NO_MIN_RANGE;
10352 else
10353 flag|= NO_MIN_RANGE;
10354 if (tmp_max_key != param->max_key)
10355 flag&= ~NO_MAX_RANGE;
10356 else
10357 flag|= NO_MAX_RANGE;
10358 }
10359 if (flag == 0)
10360 {
10361 uint length= (uint) (tmp_min_key - param->min_key);
10362 if (length == (uint) (tmp_max_key - param->max_key) &&
10363 !memcmp(param->min_key,param->max_key,length))
10364 {
10365 const KEY *table_key=quick->head->key_info+quick->index;
10366 flag=EQ_RANGE;
10367 /*
10368 Note that keys which are extended with PK parts have no
10369 HA_NOSAME flag. So we can use user_defined_key_parts.
10370 */
10371 if ((table_key->flags & HA_NOSAME) &&
10372 key_tree->part == table_key->user_defined_key_parts - 1)
10373 {
10374 if ((table_key->flags & HA_NULL_PART_KEY) &&
10375 null_part_in_key(key,
10376 param->min_key,
10377 (uint) (tmp_min_key - param->min_key)))
10378 flag|= NULL_RANGE;
10379 else
10380 flag|= UNIQUE_RANGE;
10381 }
10382 }
10383 }
10384
10385 /* Get range for retrieving rows in QUICK_SELECT::get_next */
10386 if (!(range= new QUICK_RANGE(param->min_key,
10387 (uint) (tmp_min_key - param->min_key),
10388 min_part >=0 ? make_keypart_map(min_part) : 0,
10389 param->max_key,
10390 (uint) (tmp_max_key - param->max_key),
10391 max_part >=0 ? make_keypart_map(max_part) : 0,
10392 flag, key_tree->rkey_func_flag)))
10393 return 1; // out of memory
10394
10395 set_if_bigger(quick->max_used_key_length, range->min_length);
10396 set_if_bigger(quick->max_used_key_length, range->max_length);
10397 set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
10398 if (quick->ranges.push_back(range))
10399 return 1;
10400
10401 end:
10402 if (key_tree->right != &null_element)
10403 return get_quick_keys(param,quick,key,key_tree->right,
10404 min_key,min_key_flag,
10405 max_key,max_key_flag);
10406 return 0;
10407 }
10408
10409 /*
10410 Return 1 if there is only one range and this uses the whole unique key
10411 */
10412
unique_key_range()10413 bool QUICK_RANGE_SELECT::unique_key_range()
10414 {
10415 if (ranges.size() == 1)
10416 {
10417 QUICK_RANGE *tmp= ranges[0];
10418 if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
10419 {
10420 KEY *key=head->key_info+index;
10421 return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
10422 }
10423 }
10424 return 0;
10425 }
10426
10427
10428
10429 /*
10430 Return TRUE if any part of the key is NULL
10431
10432 SYNOPSIS
10433 null_part_in_key()
10434 key_part Array of key parts (index description)
10435 key Key values tuple
10436 length Length of key values tuple in bytes.
10437
10438 RETURN
10439 TRUE The tuple has at least one "keypartX is NULL"
10440 FALSE Otherwise
10441 */
10442
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)10443 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
10444 {
10445 for (const uchar *end=key+length ;
10446 key < end;
10447 key+= key_part++->store_length)
10448 {
10449 if (key_part->null_bit && *key)
10450 return 1;
10451 }
10452 return 0;
10453 }
10454
10455
is_keys_used(const MY_BITMAP * fields)10456 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
10457 {
10458 return is_key_used(head, index, fields);
10459 }
10460
is_keys_used(const MY_BITMAP * fields)10461 bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
10462 {
10463 QUICK_RANGE_SELECT *quick;
10464 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
10465 while ((quick= it++))
10466 {
10467 if (is_key_used(head, quick->index, fields))
10468 return 1;
10469 }
10470 return 0;
10471 }
10472
is_keys_used(const MY_BITMAP * fields)10473 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
10474 {
10475 QUICK_RANGE_SELECT *quick;
10476 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
10477 while ((quick= it++))
10478 {
10479 if (is_key_used(head, quick->index, fields))
10480 return 1;
10481 }
10482 return 0;
10483 }
10484
is_keys_used(const MY_BITMAP * fields)10485 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
10486 {
10487 QUICK_SELECT_I *quick;
10488 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
10489 while ((quick= it++))
10490 {
10491 if (quick->is_keys_used(fields))
10492 return 1;
10493 }
10494 return 0;
10495 }
10496
10497
get_ft_select(THD * thd,TABLE * table,uint key)10498 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
10499 {
10500 bool create_err= FALSE;
10501 FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
10502 if (create_err)
10503 {
10504 delete fts;
10505 return NULL;
10506 }
10507 else
10508 return fts;
10509 }
10510
10511
10512 /*
10513 Check if any columns in the key value specified
10514 by 'key_info' has a NULL-value.
10515 */
10516
10517 static bool
key_has_nulls(const KEY * key_info,const uchar * key,uint key_len)10518 key_has_nulls(const KEY* key_info, const uchar *key, uint key_len)
10519 {
10520 KEY_PART_INFO *curr_part, *end_part;
10521 const uchar* end_ptr= key + key_len;
10522 curr_part= key_info->key_part;
10523 end_part= curr_part + key_info->user_defined_key_parts;
10524
10525 for (; curr_part != end_part && key < end_ptr; curr_part++)
10526 {
10527 if (curr_part->null_bit && *key)
10528 return TRUE;
10529
10530 key += curr_part->store_length;
10531 }
10532 return FALSE;
10533 }
10534
10535 /*
10536 Create quick select from ref/ref_or_null scan.
10537
10538 SYNOPSIS
10539 get_quick_select_for_ref()
10540 thd Thread handle
10541 table Table to access
10542 ref ref[_or_null] scan parameters
10543 records Estimate of number of records (needed only to construct
10544 quick select)
10545 NOTES
10546 This allocates things in a new memory root, as this may be called many
10547 times during a query.
10548
10549 RETURN
10550 Quick select that retrieves the same rows as passed ref scan
10551 NULL on error.
10552 */
10553
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)10554 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
10555 TABLE_REF *ref, ha_rows records)
10556 {
10557 MEM_ROOT *old_root, *alloc;
10558 QUICK_RANGE_SELECT *quick;
10559 KEY *key_info = &table->key_info[ref->key];
10560 KEY_PART *key_part;
10561 QUICK_RANGE *range;
10562 uint part;
10563 bool create_err= FALSE;
10564 Cost_estimate cost;
10565
10566 old_root= thd->mem_root;
10567 /* The following call may change thd->mem_root */
10568 quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
10569 /* save mem_root set by QUICK_RANGE_SELECT constructor */
10570 alloc= thd->mem_root;
10571 /*
10572 return back default mem_root (thd->mem_root) changed by
10573 QUICK_RANGE_SELECT constructor
10574 */
10575 thd->mem_root= old_root;
10576
10577 if (!quick || create_err)
10578 return 0; /* no ranges found */
10579 if (quick->init())
10580 goto err;
10581 quick->records= records;
10582
10583 if (!(range= new (alloc) QUICK_RANGE()))
10584 goto err; // out of memory
10585
10586 range->min_key= range->max_key= ref->key_buff;
10587 range->min_length= range->max_length= ref->key_length;
10588 range->min_keypart_map= range->max_keypart_map=
10589 make_prev_keypart_map(ref->key_parts);
10590 range->flag= (ref->key_length == key_info->key_length ? EQ_RANGE : 0);
10591
10592 if (!(quick->key_parts=key_part=(KEY_PART *)
10593 alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
10594 goto err;
10595
10596 for (part=0 ; part < ref->key_parts ;part++,key_part++)
10597 {
10598 key_part->part=part;
10599 key_part->field= key_info->key_part[part].field;
10600 key_part->length= key_info->key_part[part].length;
10601 key_part->store_length= key_info->key_part[part].store_length;
10602 key_part->null_bit= key_info->key_part[part].null_bit;
10603 key_part->flag= (uint8) key_info->key_part[part].key_part_flag;
10604 }
10605 if (quick->ranges.push_back(range))
10606 goto err;
10607
10608 /*
10609 Add a NULL range if REF_OR_NULL optimization is used.
10610 For example:
10611 if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
10612 and have ref->null_ref_key set. Will create a new NULL range here.
10613 */
10614 if (ref->null_ref_key)
10615 {
10616 QUICK_RANGE *null_range;
10617
10618 *ref->null_ref_key= 1; // Set null byte then create a range
10619 if (!(null_range= new (alloc)
10620 QUICK_RANGE(ref->key_buff, ref->key_length,
10621 make_prev_keypart_map(ref->key_parts),
10622 ref->key_buff, ref->key_length,
10623 make_prev_keypart_map(ref->key_parts), EQ_RANGE,
10624 HA_READ_INVALID)))
10625 goto err;
10626 *ref->null_ref_key= 0; // Clear null byte
10627 if (quick->ranges.push_back(null_range))
10628 goto err;
10629 }
10630
10631 /* Call multi_range_read_info() to get the MRR flags and buffer size */
10632 quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
10633 (table->key_read ? HA_MRR_INDEX_ONLY : 0);
10634 if (thd->lex->sql_command != SQLCOM_SELECT)
10635 quick->mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10636 if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
10637 ref->key_length))
10638 quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
10639
10640 quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
10641 if (table->file->multi_range_read_info(quick->index, 1,
10642 static_cast<uint>(records),
10643 &quick->mrr_buf_size,
10644 &quick->mrr_flags, &cost))
10645 goto err;
10646
10647 return quick;
10648 err:
10649 delete quick;
10650 return 0;
10651 }
10652
10653
10654 /*
10655 Perform key scans for all used indexes (except CPK), get rowids and merge
10656 them into an ordered non-recurrent sequence of rowids.
10657
10658 The merge/duplicate removal is performed using Unique class. We put all
10659 rowids into Unique, get the sorted sequence and destroy the Unique.
10660
10661 If table has a clustered primary key that covers all rows (TRUE for bdb
10662 and innodb currently) and one of the index_merge scans is a scan on PK,
10663 then rows that will be retrieved by PK scan are not put into Unique and
10664 primary key scan is not performed here, it is performed later separately.
10665
10666 RETURN
10667 0 OK
10668 other error
10669 */
10670
read_keys_and_merge()10671 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
10672 {
10673 List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
10674 QUICK_RANGE_SELECT* cur_quick;
10675 int result;
10676 handler *file= head->file;
10677 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
10678
10679 /* We're going to just read rowids. */
10680 head->set_keyread(TRUE);
10681 head->prepare_for_position();
10682
10683 cur_quick_it.rewind();
10684 cur_quick= cur_quick_it++;
10685 DBUG_ASSERT(cur_quick != 0);
10686
10687 DBUG_EXECUTE_IF("simulate_bug13919180",
10688 {
10689 my_error(ER_UNKNOWN_ERROR, MYF(0));
10690 DBUG_RETURN(1);
10691 });
10692 /*
10693 We reuse the same instance of handler so we need to call both init and
10694 reset here.
10695 */
10696 if (cur_quick->init() || cur_quick->reset())
10697 DBUG_RETURN(1);
10698
10699 if (unique == NULL)
10700 {
10701 DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_ABORT(); );
10702 DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
10703 DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
10704
10705 unique= new Unique(refpos_order_cmp, (void *)file,
10706 file->ref_length,
10707 thd->variables.sortbuff_size);
10708 }
10709 else
10710 {
10711 unique->reset();
10712 filesort_free_buffers(head, false);
10713 }
10714
10715 DBUG_ASSERT(file->ref_length == unique->get_size());
10716 DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
10717
10718 if (!unique)
10719 DBUG_RETURN(1);
10720 for (;;)
10721 {
10722 while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
10723 {
10724 cur_quick->range_end();
10725 cur_quick= cur_quick_it++;
10726 if (!cur_quick)
10727 break;
10728
10729 if (cur_quick->file->inited)
10730 cur_quick->file->ha_index_end();
10731 if (cur_quick->init() || cur_quick->reset())
10732 DBUG_RETURN(1);
10733 }
10734
10735 if (result)
10736 {
10737 if (result != HA_ERR_END_OF_FILE)
10738 {
10739 cur_quick->range_end();
10740 DBUG_RETURN(result);
10741 }
10742 break;
10743 }
10744
10745 if (thd->killed)
10746 DBUG_RETURN(1);
10747
10748 /* skip row if it will be retrieved by clustered PK scan */
10749 if (pk_quick_select && pk_quick_select->row_in_ranges())
10750 continue;
10751
10752 cur_quick->file->position(cur_quick->record);
10753 result= unique->unique_add((char*)cur_quick->file->ref);
10754 if (result)
10755 DBUG_RETURN(1);
10756 }
10757
10758 /*
10759 Ok all rowids are in the Unique now. The next call will initialize
10760 head->sort structure so it can be used to iterate through the rowids
10761 sequence.
10762 */
10763 result= unique->get(head);
10764 doing_pk_scan= FALSE;
10765 /* index_merge currently doesn't support "using index" at all */
10766 head->set_keyread(FALSE);
10767 if (init_read_record(&read_record, thd, head, NULL, 1, 1, TRUE))
10768 DBUG_RETURN(1);
10769 DBUG_RETURN(result);
10770 }
10771
10772
10773 /*
10774 Get next row for index_merge.
10775 NOTES
10776 The rows are read from
10777 1. rowids stored in Unique.
10778 2. QUICK_RANGE_SELECT with clustered primary key (if any).
10779 The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
10780 */
10781
get_next()10782 int QUICK_INDEX_MERGE_SELECT::get_next()
10783 {
10784 int result;
10785 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
10786
10787 if (doing_pk_scan)
10788 DBUG_RETURN(pk_quick_select->get_next());
10789
10790 if ((result= read_record.read_record(&read_record)) == -1)
10791 {
10792 result= HA_ERR_END_OF_FILE;
10793 end_read_record(&read_record);
10794 free_io_cache(head);
10795 /* All rows from Unique have been retrieved, do a clustered PK scan */
10796 if (pk_quick_select)
10797 {
10798 doing_pk_scan= TRUE;
10799 if ((result= pk_quick_select->init()) ||
10800 (result= pk_quick_select->reset()))
10801 DBUG_RETURN(result);
10802 DBUG_RETURN(pk_quick_select->get_next());
10803 }
10804 }
10805
10806 DBUG_RETURN(result);
10807 }
10808
10809
10810 /*
10811 Retrieve next record.
10812 SYNOPSIS
10813 QUICK_ROR_INTERSECT_SELECT::get_next()
10814
10815 NOTES
10816 Invariant on enter/exit: all intersected selects have retrieved all index
10817 records with rowid <= some_rowid_val and no intersected select has
10818 retrieved any index records with rowid > some_rowid_val.
10819 We start fresh and loop until we have retrieved the same rowid in each of
10820 the key scans or we got an error.
10821
10822 If a Clustered PK scan is present, it is used only to check if row
10823 satisfies its condition (and never used for row retrieval).
10824
10825 Locking: to ensure that exclusive locks are only set on records that
10826 are included in the final result we must release the lock
10827 on all rows we read but do not include in the final result. This
10828 must be done on each index that reads the record and the lock
10829 must be released using the same handler (the same quick object) as
10830 used when reading the record.
10831
10832 RETURN
10833 0 - Ok
10834 other - Error code if any error occurred.
10835 */
10836
get_next()10837 int QUICK_ROR_INTERSECT_SELECT::get_next()
10838 {
10839 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
10840 QUICK_RANGE_SELECT* quick;
10841
10842 /* quick that reads the given rowid first. This is needed in order
10843 to be able to unlock the row using the same handler object that locked
10844 it */
10845 QUICK_RANGE_SELECT* quick_with_last_rowid;
10846
10847 int error, cmp;
10848 uint last_rowid_count=0;
10849 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
10850
10851 do
10852 {
10853 /* Get a rowid for first quick and save it as a 'candidate' */
10854 quick= quick_it++;
10855 error= quick->get_next();
10856 if (cpk_quick)
10857 {
10858 while (!error && !cpk_quick->row_in_ranges())
10859 {
10860 quick->file->unlock_row(); /* row not in range; unlock */
10861 error= quick->get_next();
10862 }
10863 }
10864 if (error)
10865 DBUG_RETURN(error);
10866
10867 quick->file->position(quick->record);
10868 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10869 last_rowid_count= 1;
10870 quick_with_last_rowid= quick;
10871
10872 while (last_rowid_count < quick_selects.elements)
10873 {
10874 if (!(quick= quick_it++))
10875 {
10876 quick_it.rewind();
10877 quick= quick_it++;
10878 }
10879
10880 do
10881 {
10882 DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
10883 DBUG_SET("+d,innodb_report_deadlock"););
10884 if ((error= quick->get_next()))
10885 {
10886 /* On certain errors like deadlock, trx might be rolled back.*/
10887 if (!current_thd->transaction_rollback_request)
10888 quick_with_last_rowid->file->unlock_row();
10889 DBUG_RETURN(error);
10890 }
10891 quick->file->position(quick->record);
10892 cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
10893 if (cmp < 0)
10894 {
10895 /* This row is being skipped. Release lock on it. */
10896 quick->file->unlock_row();
10897 }
10898 } while (cmp < 0);
10899
10900 /* Ok, current select 'caught up' and returned ref >= cur_ref */
10901 if (cmp > 0)
10902 {
10903 /* Found a row with ref > cur_ref. Make it a new 'candidate' */
10904 if (cpk_quick)
10905 {
10906 while (!cpk_quick->row_in_ranges())
10907 {
10908 quick->file->unlock_row(); /* row not in range; unlock */
10909 if ((error= quick->get_next()))
10910 {
10911 /* On certain errors like deadlock, trx might be rolled back.*/
10912 if (!current_thd->transaction_rollback_request)
10913 quick_with_last_rowid->file->unlock_row();
10914 DBUG_RETURN(error);
10915 }
10916 }
10917 quick->file->position(quick->record);
10918 }
10919 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10920 quick_with_last_rowid->file->unlock_row();
10921 last_rowid_count= 1;
10922 quick_with_last_rowid= quick;
10923 }
10924 else
10925 {
10926 /* current 'candidate' row confirmed by this select */
10927 last_rowid_count++;
10928 }
10929 }
10930
10931 /* We get here if we got the same row ref in all scans. */
10932 if (need_to_fetch_row)
10933 error= head->file->ha_rnd_pos(head->record[0], last_rowid);
10934 } while (error == HA_ERR_RECORD_DELETED);
10935 DBUG_RETURN(error);
10936 }
10937
10938
10939 /*
10940 Retrieve next record.
10941 SYNOPSIS
10942 QUICK_ROR_UNION_SELECT::get_next()
10943
10944 NOTES
10945 Enter/exit invariant:
10946 For each quick select in the queue a {key,rowid} tuple has been
10947 retrieved but the corresponding row hasn't been passed to output.
10948
10949 RETURN
10950 0 - Ok
10951 other - Error code if any error occurred.
10952 */
10953
get_next()10954 int QUICK_ROR_UNION_SELECT::get_next()
10955 {
10956 int error, dup_row;
10957 QUICK_SELECT_I *quick;
10958 uchar *tmp;
10959 DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
10960
10961 do
10962 {
10963 do
10964 {
10965 if (queue.empty())
10966 DBUG_RETURN(HA_ERR_END_OF_FILE);
10967 /* Ok, we have a queue with >= 1 scans */
10968
10969 quick= queue.top();
10970 memcpy(cur_rowid, quick->last_rowid, rowid_length);
10971
10972 /* put into queue rowid from the same stream as top element */
10973 if ((error= quick->get_next()))
10974 {
10975 if (error != HA_ERR_END_OF_FILE)
10976 DBUG_RETURN(error);
10977 queue.pop();
10978 }
10979 else
10980 {
10981 quick->save_last_pos();
10982 queue.update_top();
10983 }
10984
10985 if (!have_prev_rowid)
10986 {
10987 /* No rows have been returned yet */
10988 dup_row= FALSE;
10989 have_prev_rowid= TRUE;
10990 }
10991 else
10992 dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
10993 } while (dup_row);
10994
10995 tmp= cur_rowid;
10996 cur_rowid= prev_rowid;
10997 prev_rowid= tmp;
10998
10999 error= head->file->ha_rnd_pos(quick->record, prev_rowid);
11000 } while (error == HA_ERR_RECORD_DELETED);
11001 DBUG_RETURN(error);
11002 }
11003
11004
reset()11005 int QUICK_RANGE_SELECT::reset()
11006 {
11007 uint buf_size;
11008 uchar *mrange_buff;
11009 int error;
11010 HANDLER_BUFFER empty_buf;
11011 DBUG_ENTER("QUICK_RANGE_SELECT::reset");
11012 last_range= NULL;
11013 cur_range= ranges.begin();
11014
11015 /* set keyread to TRUE if index is covering */
11016 if(!head->no_keyread && head->covering_keys.is_set(index))
11017 head->set_keyread(true);
11018 else
11019 head->set_keyread(false);
11020
11021 if (!file->inited)
11022 {
11023 /*
11024 read_set is set to the correct value for ror_merge_scan here as a
11025 subquery execution during optimization might result in innodb not
11026 initializing the read set in index_read() leading to wrong
11027 results while merging.
11028 */
11029 MY_BITMAP * const save_read_set= head->read_set;
11030 MY_BITMAP * const save_write_set= head->write_set;
11031 const bool sorted= (mrr_flags & HA_MRR_SORTED);
11032 DBUG_EXECUTE_IF("bug14365043_2",
11033 DBUG_SET("+d,ha_index_init_fail"););
11034
11035 /* Pass index specifc read set for ror_merged_scan */
11036 if (in_ror_merged_scan)
11037 {
11038 /*
11039 We don't need to signal the bitmap change as the bitmap is always the
11040 same for this head->file
11041 */
11042 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
11043 }
11044 if ((error= file->ha_index_init(index, sorted)))
11045 {
11046 file->print_error(error, MYF(0));
11047 DBUG_RETURN(error);
11048 }
11049 if (in_ror_merged_scan)
11050 {
11051 /* Restore bitmaps set on entry */
11052 head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
11053 }
11054 }
11055
11056 /* Allocate buffer if we need one but haven't allocated it yet */
11057 if (mrr_buf_size && !mrr_buf_desc)
11058 {
11059 buf_size= mrr_buf_size;
11060 while (buf_size && !my_multi_malloc(key_memory_QUICK_RANGE_SELECT_mrr_buf_desc,
11061 MYF(MY_WME),
11062 &mrr_buf_desc, sizeof(*mrr_buf_desc),
11063 &mrange_buff, buf_size,
11064 NullS))
11065 {
11066 /* Try to shrink the buffers until both are 0. */
11067 buf_size/= 2;
11068 }
11069 if (!mrr_buf_desc)
11070 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
11071
11072 /* Initialize the handler buffer. */
11073 mrr_buf_desc->buffer= mrange_buff;
11074 mrr_buf_desc->buffer_end= mrange_buff + buf_size;
11075 mrr_buf_desc->end_of_used_area= mrange_buff;
11076 }
11077
11078 if (!mrr_buf_desc)
11079 empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
11080
11081 RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
11082 error= file->multi_range_read_init(&seq_funcs, this, ranges.size(),
11083 mrr_flags, mrr_buf_desc? mrr_buf_desc:
11084 &empty_buf);
11085 DBUG_RETURN(error);
11086 }
11087
11088
11089 /*
11090 Range sequence interface implementation for array<QUICK_RANGE>: initialize
11091
11092 SYNOPSIS
11093 quick_range_seq_init()
11094 init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
11095 n_ranges Number of ranges in the sequence (ignored)
11096 flags MRR flags (currently not used)
11097
11098 RETURN
11099 Opaque value to be passed to quick_range_seq_next
11100 */
11101
quick_range_seq_init(void * init_param,uint n_ranges,uint flags)11102 range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
11103 {
11104 QUICK_RANGE_SELECT *quick= static_cast<QUICK_RANGE_SELECT*>(init_param);
11105 quick->qr_traversal_ctx.first= quick->ranges.begin();
11106 quick->qr_traversal_ctx.cur= quick->ranges.begin();
11107 quick->qr_traversal_ctx.last= quick->ranges.end();
11108 return &quick->qr_traversal_ctx;
11109 }
11110
11111
11112 /*
11113 Range sequence interface implementation for array<QUICK_RANGE>: get next
11114
11115 SYNOPSIS
11116 quick_range_seq_next()
11117 rseq Value returned from quick_range_seq_init
11118 range OUT Store information about the range here
11119
11120 RETURN
11121 0 Ok
11122 1 No more ranges in the sequence
11123 */
11124
quick_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)11125 uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
11126 {
11127 QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
11128
11129 if (ctx->cur == ctx->last)
11130 return 1; /* no more ranges */
11131
11132 QUICK_RANGE *cur= *(ctx->cur);
11133 key_range *start_key= &range->start_key;
11134 key_range *end_key= &range->end_key;
11135
11136 start_key->key= cur->min_key;
11137 start_key->length= cur->min_length;
11138 start_key->keypart_map= cur->min_keypart_map;
11139 start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
11140 (cur->flag & EQ_RANGE) ?
11141 HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
11142 end_key->key= cur->max_key;
11143 end_key->length= cur->max_length;
11144 end_key->keypart_map= cur->max_keypart_map;
11145 /*
11146 We use HA_READ_AFTER_KEY here because if we are reading on a key
11147 prefix. We want to find all keys with this prefix.
11148 */
11149 end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
11150 HA_READ_AFTER_KEY);
11151 range->range_flag= cur->flag;
11152 ctx->cur++;
11153 return 0;
11154 }
11155
11156
11157 /*
11158 MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
11159
11160 SYNOPSIS
11161 mrr_persistent_flag_storage()
11162 seq Range sequence being traversed
11163 idx Number of range
11164
11165 DESCRIPTION
11166 MRR/NDB implementation needs to store some bits for each range. This
11167 function returns a reference to the "range_flag" associated with the
11168 range number idx.
11169
11170 This function should be removed when we get a proper MRR/NDB
11171 implementation.
11172
11173 RETURN
11174 Reference to range_flag associated with range number #idx
11175 */
11176
mrr_persistent_flag_storage(range_seq_t seq,uint idx)11177 uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
11178 {
11179 QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
11180 return ctx->first[idx]->flag;
11181 }
11182
11183
11184 /*
11185 MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
11186
11187 SYNOPSIS
11188 mrr_get_ptr_by_idx()
11189 seq Range sequence bening traversed
11190 idx Number of the range
11191
11192 DESCRIPTION
11193 An extension of MRR range sequence interface needed by NDB: return the
11194 data associated with the given range.
11195
11196 A proper MRR interface implementer is supposed to store and return
11197 range-associated data. NDB stores number of the range instead. So this
11198 is a helper function that translates range number to range associated
11199 data.
11200
11201 This function does nothing, as currrently there is only one user of the
11202 MRR interface - the quick range select code, and this user doesn't need
11203 to use range-associated data.
11204
11205 RETURN
11206 Reference to range-associated data
11207 */
11208
mrr_get_ptr_by_idx(range_seq_t seq,uint idx)11209 char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
11210 {
11211 static char *dummy;
11212 return dummy;
11213 }
11214
11215
11216 /*
11217 Get next possible record using quick-struct.
11218
11219 SYNOPSIS
11220 QUICK_RANGE_SELECT::get_next()
11221
11222 NOTES
11223 Record is read into table->record[0]
11224
11225 RETURN
11226 0 Found row
11227 HA_ERR_END_OF_FILE No (more) rows in range
11228 # Error code
11229 */
11230
get_next()11231 int QUICK_RANGE_SELECT::get_next()
11232 {
11233 char *dummy;
11234 MY_BITMAP * const save_read_set= head->read_set;
11235 MY_BITMAP * const save_write_set= head->write_set;
11236 DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
11237
11238 if (in_ror_merged_scan)
11239 {
11240 /*
11241 We don't need to signal the bitmap change as the bitmap is always the
11242 same for this head->file
11243 */
11244 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
11245 }
11246
11247 int result= file->multi_range_read_next(&dummy);
11248
11249 if (in_ror_merged_scan)
11250 {
11251 /* Restore bitmaps set on entry */
11252 head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
11253 }
11254 DBUG_RETURN(result);
11255 }
11256
11257
11258 /*
11259 Get the next record with a different prefix.
11260
11261 @param prefix_length length of cur_prefix
11262 @param group_key_parts The number of key parts in the group prefix
11263 @param cur_prefix prefix of a key to be searched for
11264
11265 Each subsequent call to the method retrieves the first record that has a
11266 prefix with length prefix_length and which is different from cur_prefix,
11267 such that the record with the new prefix is within the ranges described by
11268 this->ranges. The record found is stored into the buffer pointed by
11269 this->record. The method is useful for GROUP-BY queries with range
11270 conditions to discover the prefix of the next group that satisfies the range
11271 conditions.
11272
11273 @todo
11274
11275 This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
11276 methods should be unified into a more general one to reduce code
11277 duplication.
11278
11279 @retval 0 on success
11280 @retval HA_ERR_END_OF_FILE if returned all keys
11281 @retval other if some error occurred
11282 */
11283
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)11284 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
11285 uint group_key_parts,
11286 uchar *cur_prefix)
11287 {
11288 DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
11289 const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
11290
11291 for (;;)
11292 {
11293 int result;
11294 if (last_range)
11295 {
11296 /* Read the next record in the same range with prefix after cur_prefix. */
11297 DBUG_ASSERT(cur_prefix != NULL);
11298 result= file->ha_index_read_map(record, cur_prefix, keypart_map,
11299 HA_READ_AFTER_KEY);
11300 if (result || last_range->max_keypart_map == 0)
11301 DBUG_RETURN(result);
11302
11303 key_range previous_endpoint;
11304 last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
11305 if (file->compare_key(&previous_endpoint) <= 0)
11306 DBUG_RETURN(0);
11307 }
11308
11309 const size_t count= ranges.size() - (cur_range - ranges.begin());
11310 if (count == 0)
11311 {
11312 /* Ranges have already been used up before. None is left for read. */
11313 last_range= 0;
11314 DBUG_RETURN(HA_ERR_END_OF_FILE);
11315 }
11316 last_range= *(cur_range++);
11317
11318 key_range start_key, end_key;
11319 last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
11320 last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
11321
11322 const bool sorted= (mrr_flags & HA_MRR_SORTED);
11323 result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
11324 last_range->max_keypart_map ? &end_key : 0,
11325 MY_TEST(last_range->flag & EQ_RANGE),
11326 sorted);
11327 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11328 last_range= 0; // Stop searching
11329
11330 if (result != HA_ERR_END_OF_FILE)
11331 DBUG_RETURN(result);
11332 last_range= 0; // No matching rows; go to next range
11333 }
11334 }
11335
11336
11337 /* Get next for geometrical indexes */
11338
get_next()11339 int QUICK_RANGE_SELECT_GEOM::get_next()
11340 {
11341 DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
11342
11343 for (;;)
11344 {
11345 int result;
11346 if (last_range)
11347 {
11348 // Already read through key
11349 result= file->ha_index_next_same(record, last_range->min_key,
11350 last_range->min_length);
11351 if (result != HA_ERR_END_OF_FILE)
11352 DBUG_RETURN(result);
11353 }
11354
11355 const size_t count= ranges.size() - (cur_range-ranges.begin());
11356 if (count == 0)
11357 {
11358 /* Ranges have already been used up before. None is left for read. */
11359 last_range= 0;
11360 DBUG_RETURN(HA_ERR_END_OF_FILE);
11361 }
11362 last_range= *(cur_range++);
11363
11364 result= file->ha_index_read_map(record, last_range->min_key,
11365 last_range->min_keypart_map,
11366 last_range->rkey_func_flag);
11367 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11368 DBUG_RETURN(result);
11369 last_range= 0; // Not found, to next range
11370 }
11371 }
11372
11373
11374 /*
11375 Check if current row will be retrieved by this QUICK_RANGE_SELECT
11376
11377 NOTES
11378 It is assumed that currently a scan is being done on another index
11379 which reads all necessary parts of the index that is scanned by this
11380 quick select.
11381 The implementation does a binary search on sorted array of disjoint
11382 ranges, without taking size of range into account.
11383
11384 This function is used to filter out clustered PK scan rows in
11385 index_merge quick select.
11386
11387 RETURN
11388 TRUE if current row will be retrieved by this quick select
11389 FALSE if not
11390 */
11391
row_in_ranges()11392 bool QUICK_RANGE_SELECT::row_in_ranges()
11393 {
11394 QUICK_RANGE *res;
11395 size_t min= 0;
11396 size_t max= ranges.size() - 1;
11397 size_t mid= (max + min)/2;
11398
11399 while (min != max)
11400 {
11401 if (cmp_next(ranges[mid]))
11402 {
11403 /* current row value > mid->max */
11404 min= mid + 1;
11405 }
11406 else
11407 max= mid;
11408 mid= (min + max) / 2;
11409 }
11410 res= ranges[mid];
11411 return (!cmp_next(res) && !cmp_prev(res));
11412 }
11413
11414 /*
11415 This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
11416 get_next() interface, but we have to hold a pointer to the original
11417 QUICK_RANGE_SELECT because its data are used all over the place. What
11418 should be done is to factor out the data that is needed into a base
11419 class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
11420 which handle the ranges and implement the get_next() function. But
11421 for now, this seems to work right at least.
11422 */
11423
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg,bool * error)11424 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
11425 uint used_key_parts_arg,
11426 bool *error)
11427 :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
11428 used_key_parts (used_key_parts_arg)
11429 {
11430 QUICK_RANGE *r;
11431 /*
11432 Use default MRR implementation for reverse scans. No table engine
11433 currently can do an MRR scan with output in reverse index order.
11434 */
11435 mrr_buf_desc= NULL;
11436 mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
11437 mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
11438 mrr_buf_size= 0;
11439
11440
11441 Quick_ranges::const_iterator pr= ranges.begin();
11442 Quick_ranges::const_iterator end_range= ranges.end();
11443 for (; pr != end_range; pr++)
11444 rev_ranges.push_front(*pr);
11445
11446 /* Remove EQ_RANGE flag for keys that are not using the full key */
11447 for (r = rev_it++; r; r = rev_it++)
11448 {
11449 if ((r->flag & EQ_RANGE) &&
11450 head->key_info[index].key_length != r->max_length)
11451 r->flag&= ~EQ_RANGE;
11452 }
11453 rev_it.rewind();
11454 q->dont_free=1; // Don't free shared mem
11455 }
11456
11457
get_next()11458 int QUICK_SELECT_DESC::get_next()
11459 {
11460 DBUG_ENTER("QUICK_SELECT_DESC::get_next");
11461
11462 /* The max key is handled as follows:
11463 * - if there is NO_MAX_RANGE, start at the end and move backwards
11464 * - if it is an EQ_RANGE (which means that max key covers the entire
11465 * key) and the query does not use any hidden key fields that are
11466 * not considered when the range optimzier sets EQ_RANGE (e.g. the
11467 * primary key added by InnoDB), then go directly to the key and
11468 * read through it (sorting backwards is same as sorting forwards).
11469 * - if it is NEAR_MAX, go to the key or next, step back once, and
11470 * move backwards
11471 * - otherwise (not NEAR_MAX == include the key), go after the key,
11472 * step back once, and move backwards
11473 */
11474
11475 for (;;)
11476 {
11477 int result;
11478 if (last_range)
11479 { // Already read through key
11480 result = ((last_range->flag & EQ_RANGE &&
11481 used_key_parts <=
11482 head->key_info[index].user_defined_key_parts) ?
11483 file->ha_index_next_same(record, last_range->min_key,
11484 last_range->min_length) :
11485 file->ha_index_prev(record));
11486 if (!result)
11487 {
11488 if (cmp_prev(*rev_it.ref()) == 0)
11489 DBUG_RETURN(0);
11490 }
11491 else if (result != HA_ERR_END_OF_FILE)
11492 DBUG_RETURN(result);
11493 }
11494
11495 if (!(last_range= rev_it++))
11496 DBUG_RETURN(HA_ERR_END_OF_FILE); // All ranges used
11497
11498 // Case where we can avoid descending scan, see comment above
11499 const bool eqrange_all_keyparts= (last_range->flag & EQ_RANGE) &&
11500 (used_key_parts <= head->key_info[index].user_defined_key_parts);
11501
11502 /*
11503 If we have pushed an index condition (ICP) and this quick select
11504 will use ha_index_prev() to read data, we need to let the
11505 handler know where to end the scan in order to avoid that the
11506 ICP implemention continues to read past the range boundary.
11507 */
11508 if (file->pushed_idx_cond)
11509 {
11510 if (!eqrange_all_keyparts)
11511 {
11512 key_range min_range;
11513 last_range->make_min_endpoint(&min_range);
11514 if(min_range.length > 0)
11515 file->set_end_range(&min_range, handler::RANGE_SCAN_DESC);
11516 else
11517 file->set_end_range(NULL, handler::RANGE_SCAN_DESC);
11518 }
11519 else
11520 {
11521 /*
11522 Will use ha_index_next_same() for reading records. In case we have
11523 set the end range for an earlier range, this need to be cleared.
11524 */
11525 file->set_end_range(NULL, handler::RANGE_SCAN_ASC);
11526 }
11527 }
11528
11529 if (last_range->flag & NO_MAX_RANGE) // Read last record
11530 {
11531 int local_error;
11532 if ((local_error= file->ha_index_last(record)))
11533 {
11534 /*
11535 HA_ERR_END_OF_FILE is returned both when the table is empty and when
11536 there are no qualifying records in the range (when using ICP).
11537 Interpret this return value as "no qualifying rows in the range" to
11538 avoid loss of records. If the error code truly meant "empty table"
11539 the next iteration of the loop will exit.
11540 */
11541 if (local_error != HA_ERR_END_OF_FILE)
11542 DBUG_RETURN(local_error);
11543 last_range= NULL; // Go to next range
11544 continue;
11545 }
11546
11547 if (cmp_prev(last_range) == 0)
11548 DBUG_RETURN(0);
11549 last_range= 0; // No match; go to next range
11550 continue;
11551 }
11552
11553 if (eqrange_all_keyparts)
11554
11555 {
11556 result= file->ha_index_read_map(record, last_range->max_key,
11557 last_range->max_keypart_map,
11558 HA_READ_KEY_EXACT);
11559 }
11560 else
11561 {
11562 DBUG_ASSERT(last_range->flag & NEAR_MAX ||
11563 (last_range->flag & EQ_RANGE &&
11564 used_key_parts >
11565 head->key_info[index].user_defined_key_parts) ||
11566 range_reads_after_key(last_range));
11567 result= file->ha_index_read_map(record, last_range->max_key,
11568 last_range->max_keypart_map,
11569 ((last_range->flag & NEAR_MAX) ?
11570 HA_READ_BEFORE_KEY :
11571 HA_READ_PREFIX_LAST_OR_PREV));
11572 }
11573 if (result)
11574 {
11575 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11576 DBUG_RETURN(result);
11577 last_range= 0; // Not found, to next range
11578 continue;
11579 }
11580 if (cmp_prev(last_range) == 0)
11581 {
11582 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11583 last_range= 0; // Stop searching
11584 DBUG_RETURN(0); // Found key is in range
11585 }
11586 last_range= 0; // To next range
11587 }
11588 }
11589
11590
11591 /**
11592 Create a compatible quick select with the result ordered in an opposite way
11593
11594 @param used_key_parts_arg Number of used key parts
11595
11596 @retval NULL in case of errors (OOM etc)
11597 @retval pointer to a newly created QUICK_SELECT_DESC if success
11598 */
11599
make_reverse(uint used_key_parts_arg)11600 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
11601 {
11602 bool error= FALSE;
11603 QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg,
11604 &error);
11605 if (new_quick == NULL || error)
11606 {
11607 delete new_quick;
11608 return NULL;
11609 }
11610 return new_quick;
11611 }
11612
11613
11614 /*
11615 Compare if found key is over max-value
11616 Returns 0 if key <= range->max_key
11617 TODO: Figure out why can't this function be as simple as cmp_prev().
11618 At least it could use key_cmp() from key.cc, it's almost identical.
11619 */
11620
cmp_next(QUICK_RANGE * range_arg)11621 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
11622 {
11623 if (range_arg->flag & NO_MAX_RANGE)
11624 return 0; /* key can't be to large */
11625
11626 KEY_PART *key_part=key_parts;
11627 uint store_length;
11628
11629 for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
11630 key < end;
11631 key+= store_length, key_part++)
11632 {
11633 int cmp;
11634 store_length= key_part->store_length;
11635 if (key_part->null_bit)
11636 {
11637 if (*key)
11638 {
11639 if (!key_part->field->is_null())
11640 return 1;
11641 continue;
11642 }
11643 else if (key_part->field->is_null())
11644 return 0;
11645 key++; // Skip null byte
11646 store_length--;
11647 }
11648 if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
11649 return 0;
11650 if (cmp > 0)
11651 return 1;
11652 }
11653 return (range_arg->flag & NEAR_MAX) ? 1 : 0; // Exact match
11654 }
11655
11656
11657 /*
11658 Returns 0 if found key is inside range (found key >= range->min_key).
11659 */
11660
cmp_prev(QUICK_RANGE * range_arg)11661 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
11662 {
11663 int cmp;
11664 if (range_arg->flag & NO_MIN_RANGE)
11665 return 0; /* key can't be to small */
11666
11667 cmp= key_cmp(key_part_info, range_arg->min_key,
11668 range_arg->min_length);
11669 if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
11670 return 0;
11671 return 1; // outside of range
11672 }
11673
11674
11675 /*
11676 * TRUE if this range will require using HA_READ_AFTER_KEY
11677 See comment in get_next() about this
11678 */
11679
range_reads_after_key(QUICK_RANGE * range_arg)11680 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
11681 {
11682 return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
11683 !(range_arg->flag & EQ_RANGE) ||
11684 head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
11685 }
11686
11687
add_info_string(String * str)11688 void QUICK_RANGE_SELECT::add_info_string(String *str)
11689 {
11690 KEY *key_info= head->key_info + index;
11691 str->append(key_info->name);
11692 }
11693
add_info_string(String * str)11694 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
11695 {
11696 QUICK_RANGE_SELECT *quick;
11697 bool first= TRUE;
11698 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11699 str->append(STRING_WITH_LEN("sort_union("));
11700 while ((quick= it++))
11701 {
11702 if (!first)
11703 str->append(',');
11704 else
11705 first= FALSE;
11706 quick->add_info_string(str);
11707 }
11708 if (pk_quick_select)
11709 {
11710 str->append(',');
11711 pk_quick_select->add_info_string(str);
11712 }
11713 str->append(')');
11714 }
11715
add_info_string(String * str)11716 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
11717 {
11718 bool first= TRUE;
11719 QUICK_RANGE_SELECT *quick;
11720 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11721 str->append(STRING_WITH_LEN("intersect("));
11722 while ((quick= it++))
11723 {
11724 KEY *key_info= head->key_info + quick->index;
11725 if (!first)
11726 str->append(',');
11727 else
11728 first= FALSE;
11729 str->append(key_info->name);
11730 }
11731 if (cpk_quick)
11732 {
11733 KEY *key_info= head->key_info + cpk_quick->index;
11734 str->append(',');
11735 str->append(key_info->name);
11736 }
11737 str->append(')');
11738 }
11739
add_info_string(String * str)11740 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
11741 {
11742 bool first= TRUE;
11743 QUICK_SELECT_I *quick;
11744 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11745 str->append(STRING_WITH_LEN("union("));
11746 while ((quick= it++))
11747 {
11748 if (!first)
11749 str->append(',');
11750 else
11751 first= FALSE;
11752 quick->add_info_string(str);
11753 }
11754 str->append(')');
11755 }
11756
add_info_string(String * str)11757 void QUICK_GROUP_MIN_MAX_SELECT::add_info_string(String *str)
11758 {
11759 str->append(STRING_WITH_LEN("index_for_group_by("));
11760 str->append(index_info->name);
11761 str->append(')');
11762 }
11763
add_keys_and_lengths(String * key_names,String * used_lengths)11764 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
11765 String *used_lengths)
11766 {
11767 char buf[64];
11768 size_t length;
11769 KEY *key_info= head->key_info + index;
11770 key_names->append(key_info->name);
11771 length= longlong2str(max_used_key_length, buf, 10) - buf;
11772 used_lengths->append(buf, length);
11773 }
11774
add_keys_and_lengths(String * key_names,String * used_lengths)11775 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
11776 String *used_lengths)
11777 {
11778 char buf[64];
11779 size_t length;
11780 bool first= TRUE;
11781 QUICK_RANGE_SELECT *quick;
11782
11783 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11784 while ((quick= it++))
11785 {
11786 if (first)
11787 first= FALSE;
11788 else
11789 {
11790 key_names->append(',');
11791 used_lengths->append(',');
11792 }
11793
11794 KEY *key_info= head->key_info + quick->index;
11795 key_names->append(key_info->name);
11796 length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11797 used_lengths->append(buf, length);
11798 }
11799 if (pk_quick_select)
11800 {
11801 KEY *key_info= head->key_info + pk_quick_select->index;
11802 key_names->append(',');
11803 key_names->append(key_info->name);
11804 length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
11805 used_lengths->append(',');
11806 used_lengths->append(buf, length);
11807 }
11808 }
11809
add_keys_and_lengths(String * key_names,String * used_lengths)11810 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
11811 String *used_lengths)
11812 {
11813 char buf[64];
11814 size_t length;
11815 bool first= TRUE;
11816 QUICK_RANGE_SELECT *quick;
11817 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11818 while ((quick= it++))
11819 {
11820 KEY *key_info= head->key_info + quick->index;
11821 if (first)
11822 first= FALSE;
11823 else
11824 {
11825 key_names->append(',');
11826 used_lengths->append(',');
11827 }
11828 key_names->append(key_info->name);
11829 length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11830 used_lengths->append(buf, length);
11831 }
11832
11833 if (cpk_quick)
11834 {
11835 KEY *key_info= head->key_info + cpk_quick->index;
11836 key_names->append(',');
11837 key_names->append(key_info->name);
11838 length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
11839 used_lengths->append(',');
11840 used_lengths->append(buf, length);
11841 }
11842 }
11843
add_keys_and_lengths(String * key_names,String * used_lengths)11844 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
11845 String *used_lengths)
11846 {
11847 bool first= TRUE;
11848 QUICK_SELECT_I *quick;
11849 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11850 while ((quick= it++))
11851 {
11852 if (first)
11853 first= FALSE;
11854 else
11855 {
11856 used_lengths->append(',');
11857 key_names->append(',');
11858 }
11859 quick->add_keys_and_lengths(key_names, used_lengths);
11860 }
11861 }
11862
11863
11864 /*******************************************************************************
11865 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
11866 *******************************************************************************/
11867
11868 static inline uint get_field_keypart(KEY *index, Field *field);
11869 static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
11870 PARAM *param);
11871 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
11872 SEL_ARG **cur_range);
11873 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
11874 KEY_PART_INFO *first_non_group_part,
11875 KEY_PART_INFO *min_max_arg_part,
11876 KEY_PART_INFO *last_part, THD *thd,
11877 uchar *key_infix, uint *key_infix_len,
11878 KEY_PART_INFO **first_non_infix_part);
11879 static bool
11880 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
11881 Field::imagetype image_type);
11882
11883 static bool
11884 min_max_inspect_cond_for_fields(Item *cond, Item_field *min_max_arg_item,
11885 bool *min_max_arg_present,
11886 bool *non_min_max_arg_present);
11887
11888 static void
11889 cost_group_min_max(TABLE* table, uint key, uint used_key_parts,
11890 uint group_key_parts, SEL_TREE *range_tree,
11891 SEL_ARG *index_tree, ha_rows quick_prefix_records,
11892 bool have_min, bool have_max,
11893 Cost_estimate *cost_est, ha_rows *records);
11894
11895
11896 /**
11897 Test if this access method is applicable to a GROUP query with MIN/MAX
11898 functions, and if so, construct a new TRP object.
11899
11900 DESCRIPTION
11901 Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
11902 Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
11903 following conditions:
11904 A) Table T has at least one compound index I of the form:
11905 I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
11906 B) Query conditions:
11907 B0. Q is over a single table T.
11908 B1. The attributes referenced by Q are a subset of the attributes of I.
11909 B2. All attributes QA in Q can be divided into 3 overlapping groups:
11910 - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
11911 referenced by any number of MIN and/or MAX functions if present.
11912 - WA = {W_1, ..., W_p} - from the WHERE clause
11913 - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
11914 = SA - if Q is a DISTINCT query (based on the
11915 equivalence of DISTINCT and GROUP queries.
11916 - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
11917 GROUP BY and not referenced by MIN/MAX functions.
11918 with the following properties specified below.
11919 B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
11920 applicable.
11921
11922 SA1. There is at most one attribute in SA referenced by any number of
11923 MIN and/or MAX functions which, which if present, is denoted as C.
11924 SA2. The position of the C attribute in the index is after the last A_k.
11925 SA3. The attribute C can be referenced in the WHERE clause only in
11926 predicates of the forms:
11927 - (C {< | <= | > | >= | =} const)
11928 - (const {< | <= | > | >= | =} C)
11929 - (C between const_i and const_j)
11930 - C IS NULL
11931 - C IS NOT NULL
11932 - C != const
11933 SA4. If Q has a GROUP BY clause, there are no other aggregate functions
11934 except MIN and MAX. For queries with DISTINCT, aggregate functions
11935 are allowed.
11936 SA5. The select list in DISTINCT queries should not contain expressions.
11937 SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
11938 for AGG_FUNC(DISTINCT ...) optimization because cursor position is
11939 never stored after a unique key lookup in the clustered index and
11940 furhter index_next/prev calls can not be used. So loose index scan
11941 optimization can not be used in this case.
11942 SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
11943 access method is not used.
11944 For above queries MIN/MAX() aggregation has to be done at
11945 nested_loops_join (end_send_group). But with current design MIN/MAX()
11946 is always set as part of loose index scan. Because of this mismatch
11947 MIN() and MAX() values will be set incorrectly. For such queries to
11948 work we need a new interface for loose index scan. This new interface
11949 should only fetch records with min and max values and let
11950 end_send_group to do aggregation. Until then do not use
11951 loose_index_scan.
11952 GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
11953 G_i = A_j => i = j.
11954 GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
11955 forms a prefix of I. This permutation is used as the GROUP clause
11956 when the DISTINCT query is converted to a GROUP query.
11957 GA3. The attributes in GA may participate in arbitrary predicates, divided
11958 into two groups:
11959 - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
11960 attributes of a prefix of GA
11961 - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
11962 of GA. Since P is applied to only GROUP attributes it filters some
11963 groups, and thus can be applied after the grouping.
11964 GA4. There are no expressions among G_i, just direct column references.
11965 NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
11966 and the MIN/MAX attribute C, then NGA must consist of exactly the
11967 index attributes that constitute the gap. As a result there is a
11968 permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
11969 in the index.
11970 NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
11971 equality conditions for all NG_i of the form (NG_i = const) or
11972 (const = NG_i), such that each NG_i is referenced in exactly one
11973 conjunct. Informally, the predicates provide constants to fill the
11974 gap in the index.
11975 NGA3.If BA <> {}, there can only be one range. TODO: This is a code
11976 limitation and is not strictly needed. See BUG#15947433
11977 WA1. There are no other attributes in the WHERE clause except the ones
11978 referenced in predicates RNG, PA, PC, EQ defined above. Therefore
11979 WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
11980 above tests. By transitivity then it also follows that each WA_i
11981 participates in the index I (if this was already tested for GA, NGA
11982 and C).
11983 WA2. If there is a predicate on C, then it must be in conjunction
11984 to all predicates on all earlier keyparts in I.
11985
11986 C) Overall query form:
11987 SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
11988 FROM T
11989 WHERE [RNG(A_1,...,A_p ; where p <= k)]
11990 [AND EQ(B_1,...,B_m)]
11991 [AND PC(C)]
11992 [AND PA(A_i1,...,A_iq)]
11993 GROUP BY A_1,...,A_k
11994 [HAVING PH(A_1, ..., B_1,..., C)]
11995 where EXPR(...) is an arbitrary expression over some or all SELECT fields,
11996 or:
11997 SELECT DISTINCT A_i1,...,A_ik
11998 FROM T
11999 WHERE [RNG(A_1,...,A_p ; where p <= k)]
12000 [AND PA(A_i1,...,A_iq)];
12001
12002 NOTES
12003 If the current query satisfies the conditions above, and if
12004 (mem_root! = NULL), then the function constructs and returns a new TRP
12005 object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
12006 If (mem_root == NULL), then the function only tests whether the current
12007 query satisfies the conditions above, and, if so, sets
12008 is_applicable = TRUE.
12009
12010 Queries with DISTINCT for which index access can be used are transformed
12011 into equivalent group-by queries of the form:
12012
12013 SELECT A_1,...,A_k FROM T
12014 WHERE [RNG(A_1,...,A_p ; where p <= k)]
12015 [AND PA(A_i1,...,A_iq)]
12016 GROUP BY A_1,...,A_k;
12017
12018 The group-by list is a permutation of the select attributes, according
12019 to their order in the index.
12020
12021 TODO
12022 - What happens if the query groups by the MIN/MAX field, and there is no
12023 other field as in: "select min(a) from t1 group by a" ?
12024 - We assume that the general correctness of the GROUP-BY query was checked
12025 before this point. Is this correct, or do we have to check it completely?
12026 - Lift the limitation in condition (B3), that is, make this access method
12027 applicable to ROLLUP queries.
12028
12029 @param param Parameter from test_quick_select
12030 @param sel_tree Range tree generated by get_mm_tree
12031 @param cost_est Best cost so far (=table/index scan time)
12032 @return table read plan
12033 @retval NULL Loose index scan not applicable or mem_root == NULL
12034 @retval !NULL Loose index scan table read plan
12035 */
12036
12037 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,const Cost_estimate * cost_est)12038 get_best_group_min_max(PARAM *param, SEL_TREE *tree, const Cost_estimate *cost_est)
12039 {
12040 THD *thd= param->thd;
12041 JOIN *join= thd->lex->current_select()->join;
12042 TABLE *table= param->table;
12043 bool have_min= FALSE; /* TRUE if there is a MIN function. */
12044 bool have_max= FALSE; /* TRUE if there is a MAX function. */
12045 Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
12046 KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
12047 uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
12048 KEY *index_info= NULL; /* The index chosen for data access. */
12049 uint index= 0; /* The id of the chosen index. */
12050 uint group_key_parts= 0; // Number of index key parts in the group prefix.
12051 uint used_key_parts= 0; /* Number of index key parts used for access. */
12052 uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
12053 uint key_infix_len= 0; /* Length of key_infix. */
12054 TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
12055 uint key_part_nr;
12056 ORDER *tmp_group;
12057 Item *item;
12058 Item_field *item_field;
12059 bool is_agg_distinct;
12060 List<Item_field> agg_distinct_flds;
12061 /* Cost-related variables for the best index so far. */
12062 Cost_estimate best_read_cost;
12063 ha_rows best_records= 0;
12064 SEL_ARG *best_index_tree= NULL;
12065 ha_rows best_quick_prefix_records= 0;
12066 uint best_param_idx= 0;
12067 List_iterator<Item> select_items_it;
12068 Opt_trace_context * const trace= ¶m->thd->opt_trace;
12069
12070 DBUG_ENTER("get_best_group_min_max");
12071
12072 Opt_trace_object trace_group(trace, "group_index_range",
12073 Opt_trace_context::RANGE_OPTIMIZER);
12074 const char* cause= NULL;
12075 best_read_cost.set_max_cost();
12076
12077 /* Perform few 'cheap' tests whether this access method is applicable. */
12078 if (!join)
12079 cause= "no_join";
12080 else if (join->primary_tables != 1) /* Query must reference one table. */
12081 cause= "not_single_table";
12082 else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
12083 cause= "rollup";
12084 else if (table->s->keys == 0) /* There are no indexes to use. */
12085 cause= "no_index";
12086 else if (param->order_direction == ORDER::ORDER_DESC)
12087 cause= "cannot_do_reverse_ordering";
12088 if (cause != NULL)
12089 {
12090 trace_group.add("chosen", false).add_alnum("cause", cause);
12091 DBUG_RETURN(NULL);
12092 }
12093
12094 /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
12095 is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
12096
12097 if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
12098 (!join->select_distinct) &&
12099 !is_agg_distinct)
12100 {
12101 trace_group.add("chosen", false).
12102 add_alnum("cause", "not_group_by_or_distinct");
12103 DBUG_RETURN(NULL);
12104 }
12105 /* Analyze the query in more detail. */
12106
12107 if (join->sum_funcs[0])
12108 {
12109 Item_sum *min_max_item;
12110 Item_sum **func_ptr= join->sum_funcs;
12111 while ((min_max_item= *(func_ptr++)))
12112 {
12113 if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
12114 have_min= TRUE;
12115 else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
12116 have_max= TRUE;
12117 else if (is_agg_distinct &&
12118 (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
12119 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
12120 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
12121 continue;
12122 else
12123 {
12124 trace_group.add("chosen", false).
12125 add_alnum("cause", "not_applicable_aggregate_function");
12126 DBUG_RETURN(NULL);
12127 }
12128
12129 /* The argument of MIN/MAX. */
12130 Item *expr= min_max_item->get_arg(0)->real_item();
12131 if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
12132 {
12133 if (! min_max_arg_item)
12134 min_max_arg_item= (Item_field*) expr;
12135 else if (! min_max_arg_item->eq(expr, 1))
12136 DBUG_RETURN(NULL);
12137 }
12138 else
12139 DBUG_RETURN(NULL);
12140 }
12141 }
12142
12143 /**
12144 Test (Part of WA2): Skip loose index scan on disjunctive WHERE clause which
12145 results in null tree or merge tree.
12146 */
12147 if (tree && !tree->merges.is_empty())
12148 {
12149 /**
12150 The tree structure contains multiple disjoint trees. This happens when
12151 the WHERE clause can't be represented in a single range tree due to the
12152 disjunctive nature of it but there exists indexes to perform index
12153 merge scan.
12154 */
12155 trace_group.add("chosen", false).
12156 add_alnum("cause", "disjuntive_predicate_present");
12157 DBUG_RETURN(NULL);
12158 }
12159 else if (!tree && join->where_cond && min_max_arg_item)
12160 {
12161 /**
12162 Skip loose index scan if min_max attribute is present along with
12163 at least one other attribute in the WHERE cluse when the tree is null.
12164 There is no range tree if WHERE condition can't be represented in a
12165 single range tree and index merge is not possible.
12166 */
12167 bool min_max_arg_present= false;
12168 bool non_min_max_arg_present= false;
12169 if (min_max_inspect_cond_for_fields(join->where_cond,
12170 min_max_arg_item,
12171 &min_max_arg_present,
12172 &non_min_max_arg_present))
12173 {
12174 trace_group.add("chosen", false).
12175 add_alnum("cause", "minmax_keypart_in_disjunctive_query");
12176 DBUG_RETURN(NULL);
12177 }
12178 }
12179
12180 /* Check (SA7). */
12181 if (is_agg_distinct && (have_max || have_min))
12182 {
12183 trace_group.add("chosen", false).
12184 add_alnum("cause", "have_both_agg_distinct_and_min_max");
12185 DBUG_RETURN(NULL);
12186 }
12187
12188 select_items_it= List_iterator<Item>(join->fields_list);
12189 /* Check (SA5). */
12190 if (join->select_distinct)
12191 {
12192 trace_group.add("distinct_query", true);
12193 while ((item= select_items_it++))
12194 {
12195 if (item->real_item()->type() != Item::FIELD_ITEM)
12196 DBUG_RETURN(NULL);
12197 }
12198 }
12199
12200 /* Check (GA4) - that there are no expressions among the group attributes. */
12201 for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
12202 {
12203 if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
12204 {
12205 trace_group.add("chosen", false).
12206 add_alnum("cause", "group_field_is_expression");
12207 DBUG_RETURN(NULL);
12208 }
12209 }
12210
12211 /*
12212 Check that table has at least one compound index such that the conditions
12213 (GA1,GA2) are all TRUE. If there is more than one such index, select the
12214 first one. Here we set the variables: group_prefix_len and index_info.
12215 */
12216
12217 const uint pk= param->table->s->primary_key;
12218 SEL_ARG *cur_index_tree= NULL;
12219 ha_rows cur_quick_prefix_records= 0;
12220 Opt_trace_array trace_indexes(trace, "potential_group_range_indexes");
12221 // We go through allowed indexes
12222 for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx)
12223 {
12224 const uint cur_index= param->real_keynr[cur_param_idx];
12225 KEY *const cur_index_info= &table->key_info[cur_index];
12226 Opt_trace_object trace_idx(trace);
12227 trace_idx.add_utf8("index", cur_index_info->name);
12228 KEY_PART_INFO *cur_part;
12229 KEY_PART_INFO *end_part; /* Last part for loops. */
12230 /* Last index part. */
12231 KEY_PART_INFO *last_part;
12232 KEY_PART_INFO *first_non_group_part;
12233 KEY_PART_INFO *first_non_infix_part;
12234 uint key_infix_parts;
12235 uint cur_group_key_parts= 0;
12236 uint cur_group_prefix_len= 0;
12237 Cost_estimate cur_read_cost;
12238 ha_rows cur_records;
12239 key_map used_key_parts_map;
12240 uint max_key_part= 0;
12241 uint cur_key_infix_len= 0;
12242 uchar cur_key_infix[MAX_KEY_LENGTH];
12243 uint cur_used_key_parts;
12244
12245 /* Check (B1) - if current index is covering. */
12246 if (!table->covering_keys.is_set(cur_index))
12247 {
12248 cause= "not_covering";
12249 goto next_index;
12250 }
12251
12252 /*
12253 If the current storage manager is such that it appends the primary key to
12254 each index, then the above condition is insufficient to check if the
12255 index is covering. In such cases it may happen that some fields are
12256 covered by the PK index, but not by the current index. Since we can't
12257 use the concatenation of both indexes for index lookup, such an index
12258 does not qualify as covering in our case. If this is the case, below
12259 we check that all query fields are indeed covered by 'cur_index'.
12260 */
12261 if (pk < MAX_KEY && cur_index != pk &&
12262 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
12263 {
12264 /* For each table field */
12265 for (uint i= 0; i < table->s->fields; i++)
12266 {
12267 Field *cur_field= table->field[i];
12268 /*
12269 If the field is used in the current query ensure that it's
12270 part of 'cur_index'
12271 */
12272 if (bitmap_is_set(table->read_set, cur_field->field_index) &&
12273 !cur_field->is_part_of_actual_key(thd, cur_index, cur_index_info))
12274 {
12275 cause= "not_covering";
12276 goto next_index; // Field was not part of key
12277 }
12278 }
12279 }
12280 trace_idx.add("covering", true);
12281
12282 /*
12283 Check (GA1) for GROUP BY queries.
12284 */
12285 if (join->group_list)
12286 {
12287 cur_part= cur_index_info->key_part;
12288 end_part= cur_part + actual_key_parts(cur_index_info);
12289 /* Iterate in parallel over the GROUP list and the index parts. */
12290 for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
12291 tmp_group= tmp_group->next, cur_part++)
12292 {
12293 /*
12294 TODO:
12295 tmp_group::item is an array of Item, is it OK to consider only the
12296 first Item? If so, then why? What is the array for?
12297 */
12298 /* Above we already checked that all group items are fields. */
12299 DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
12300 Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
12301 if (group_field->field->eq(cur_part->field))
12302 {
12303 cur_group_prefix_len+= cur_part->store_length;
12304 ++cur_group_key_parts;
12305 max_key_part= cur_part - cur_index_info->key_part + 1;
12306 used_key_parts_map.set_bit(max_key_part);
12307 }
12308 else
12309 {
12310 cause= "group_attribute_not_prefix_in_index";
12311 goto next_index;
12312 }
12313 }
12314 }
12315
12316 /*
12317 Check (GA2) if this is a DISTINCT query.
12318 If GA2, then Store a new ORDER object in group_fields_array at the
12319 position of the key part of item_field->field. Thus we get the ORDER
12320 objects for each field ordered as the corresponding key parts.
12321 Later group_fields_array of ORDER objects is used to convert the query
12322 to a GROUP query.
12323 */
12324 if ((!join->group_list && join->select_distinct) ||
12325 is_agg_distinct)
12326 {
12327 if (!is_agg_distinct)
12328 {
12329 select_items_it.rewind();
12330 }
12331
12332 List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
12333 while (NULL !=
12334 (item= (is_agg_distinct ?
12335 (Item *) agg_distinct_flds_it++ : select_items_it++)))
12336 {
12337 /* (SA5) already checked above. */
12338 item_field= (Item_field*) item->real_item();
12339 DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
12340
12341 /* not doing loose index scan for derived tables */
12342 if (!item_field->field)
12343 {
12344 cause= "derived_table";
12345 goto next_index;
12346 }
12347
12348 /* Find the order of the key part in the index. */
12349 key_part_nr= get_field_keypart(cur_index_info, item_field->field);
12350 /*
12351 Check if this attribute was already present in the select list.
12352 If it was present, then its corresponding key part was alredy used.
12353 */
12354 if (used_key_parts_map.is_set(key_part_nr))
12355 continue;
12356 if (key_part_nr < 1 ||
12357 (!is_agg_distinct && key_part_nr > join->fields_list.elements))
12358 {
12359 cause= "select_attribute_not_prefix_in_index";
12360 goto next_index;
12361 }
12362 cur_part= cur_index_info->key_part + key_part_nr - 1;
12363 cur_group_prefix_len+= cur_part->store_length;
12364 used_key_parts_map.set_bit(key_part_nr);
12365 ++cur_group_key_parts;
12366 max_key_part= max(max_key_part,key_part_nr);
12367 }
12368 /*
12369 Check that used key parts forms a prefix of the index.
12370 To check this we compare bits in all_parts and cur_parts.
12371 all_parts have all bits set from 0 to (max_key_part-1).
12372 cur_parts have bits set for only used keyparts.
12373 */
12374 ulonglong all_parts, cur_parts;
12375 all_parts= (1ULL << max_key_part) - 1;
12376 cur_parts= used_key_parts_map.to_ulonglong() >> 1;
12377 if (all_parts != cur_parts)
12378 goto next_index;
12379 }
12380
12381 /* Check (SA2). */
12382 if (min_max_arg_item)
12383 {
12384 key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
12385 if (key_part_nr <= cur_group_key_parts)
12386 {
12387 cause= "aggregate_column_not_suffix_in_idx";
12388 goto next_index;
12389 }
12390 min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
12391 }
12392
12393 /* Check (SA6) if clustered key is used. */
12394 if (is_agg_distinct && cur_index == table->s->primary_key &&
12395 table->file->primary_key_is_clustered())
12396 {
12397 cause= "primary_key_is_clustered";
12398 goto next_index;
12399 }
12400
12401 /*
12402 Check (NGA1, NGA2) and extract a sequence of constants to be used as part
12403 of all search keys.
12404 */
12405
12406 /*
12407 If there is MIN/MAX, each keypart between the last group part and the
12408 MIN/MAX part must participate in one equality with constants, and all
12409 keyparts after the MIN/MAX part must not be referenced in the query.
12410
12411 If there is no MIN/MAX, the keyparts after the last group part can be
12412 referenced only in equalities with constants, and the referenced keyparts
12413 must form a sequence without any gaps that starts immediately after the
12414 last group keypart.
12415 */
12416 last_part= cur_index_info->key_part + actual_key_parts(cur_index_info);
12417 first_non_group_part=
12418 (cur_group_key_parts < actual_key_parts(cur_index_info)) ?
12419 cur_index_info->key_part + cur_group_key_parts :
12420 NULL;
12421 first_non_infix_part= min_max_arg_part ?
12422 (min_max_arg_part < last_part) ?
12423 min_max_arg_part :
12424 NULL :
12425 NULL;
12426 if (first_non_group_part &&
12427 (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
12428 {
12429 if (tree)
12430 {
12431 SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param);
12432 if (!get_constant_key_infix(cur_index_info, index_range_tree,
12433 first_non_group_part, min_max_arg_part,
12434 last_part, thd, cur_key_infix,
12435 &cur_key_infix_len,
12436 &first_non_infix_part))
12437 {
12438 cause= "nonconst_equality_gap_attribute";
12439 goto next_index;
12440 }
12441 }
12442 else if (min_max_arg_part &&
12443 (min_max_arg_part - first_non_group_part > 0))
12444 {
12445 /*
12446 There is a gap but no range tree, thus no predicates at all for the
12447 non-group keyparts.
12448 */
12449 cause= "no_nongroup_keypart_predicate";
12450 goto next_index;
12451 }
12452 else if (first_non_group_part && join->where_cond)
12453 {
12454 /*
12455 If there is no MIN/MAX function in the query, but some index
12456 key part is referenced in the WHERE clause, then this index
12457 cannot be used because the WHERE condition over the keypart's
12458 field cannot be 'pushed' to the index (because there is no
12459 range 'tree'), and the WHERE clause must be evaluated before
12460 GROUP BY/DISTINCT.
12461 */
12462 /*
12463 Store the first and last keyparts that need to be analyzed
12464 into one array that can be passed as parameter.
12465 */
12466 KEY_PART_INFO *key_part_range[2];
12467 key_part_range[0]= first_non_group_part;
12468 key_part_range[1]= last_part;
12469
12470 /* Check if cur_part is referenced in the WHERE clause. */
12471 if (join->where_cond->walk(&Item::find_item_in_field_list_processor,
12472 Item::WALK_SUBQUERY_POSTFIX,
12473 (uchar*) key_part_range))
12474 {
12475 cause= "keypart_reference_from_where_clause";
12476 goto next_index;
12477 }
12478 }
12479 }
12480
12481 /*
12482 Test (WA1) partially - that no other keypart after the last infix part is
12483 referenced in the query.
12484 */
12485 if (first_non_infix_part)
12486 {
12487 cur_part= first_non_infix_part +
12488 (min_max_arg_part && (min_max_arg_part < last_part));
12489 for (; cur_part != last_part; cur_part++)
12490 {
12491 if (bitmap_is_set(table->read_set, cur_part->field->field_index))
12492 {
12493 cause= "keypart_after_infix_in_query";
12494 goto next_index;
12495 }
12496 }
12497 }
12498
12499 /**
12500 Test Part of WA2:If there are conditions on a column C participating in
12501 MIN/MAX, those conditions must be conjunctions to all earlier
12502 keyparts. Otherwise, Loose Index Scan cannot be used.
12503 */
12504 if (tree && min_max_arg_item)
12505 {
12506 SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param);
12507 SEL_ARG *cur_range= NULL;
12508 if (get_sel_arg_for_keypart(min_max_arg_part->field,
12509 index_range_tree, &cur_range) ||
12510 (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
12511 {
12512 cause= "minmax_keypart_in_disjunctive_query";
12513 goto next_index;
12514 }
12515 }
12516
12517 /* If we got to this point, cur_index_info passes the test. */
12518 key_infix_parts= cur_key_infix_len ? (uint)
12519 (first_non_infix_part - first_non_group_part) : 0;
12520 cur_used_key_parts= cur_group_key_parts + key_infix_parts;
12521
12522 /* Compute the cost of using this index. */
12523 if (tree)
12524 {
12525 /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
12526 cur_index_tree= get_index_range_tree(cur_index, tree, param);
12527 /* Check if this range tree can be used for prefix retrieval. */
12528 Cost_estimate dummy_cost;
12529 uint mrr_flags= HA_MRR_SORTED;
12530 uint mrr_bufsize=0;
12531 cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
12532 FALSE /*don't care*/,
12533 cur_index_tree, TRUE,
12534 &mrr_flags, &mrr_bufsize,
12535 &dummy_cost);
12536 #ifdef OPTIMIZER_TRACE
12537 if (unlikely(cur_index_tree && trace->is_started()))
12538 {
12539 trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics);
12540 Opt_trace_array trace_range(trace, "ranges");
12541
12542 const KEY_PART_INFO *key_part= cur_index_info->key_part;
12543
12544 String range_info;
12545 range_info.set_charset(system_charset_info);
12546 append_range_all_keyparts(&trace_range, NULL, &range_info,
12547 cur_index_tree, key_part, false);
12548 }
12549 #endif
12550 }
12551 cost_group_min_max(table, cur_index, cur_used_key_parts,
12552 cur_group_key_parts, tree, cur_index_tree,
12553 cur_quick_prefix_records, have_min, have_max,
12554 &cur_read_cost, &cur_records);
12555 /*
12556 If cur_read_cost is lower than best_read_cost use cur_index.
12557 Do not compare doubles directly because they may have different
12558 representations (64 vs. 80 bits).
12559 */
12560 trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
12561 {
12562 Cost_estimate min_diff_cost= cur_read_cost;
12563 min_diff_cost.multiply(DBL_EPSILON);
12564 if (cur_read_cost < (best_read_cost - min_diff_cost))
12565 {
12566 index_info= cur_index_info;
12567 index= cur_index;
12568 best_read_cost= cur_read_cost;
12569 best_records= cur_records;
12570 best_index_tree= cur_index_tree;
12571 best_quick_prefix_records= cur_quick_prefix_records;
12572 best_param_idx= cur_param_idx;
12573 group_key_parts= cur_group_key_parts;
12574 group_prefix_len= cur_group_prefix_len;
12575 key_infix_len= cur_key_infix_len;
12576 if (key_infix_len)
12577 memcpy (key_infix, cur_key_infix, sizeof (key_infix));
12578 used_key_parts= cur_used_key_parts;
12579 }
12580 }
12581
12582 next_index:
12583 if (cause)
12584 {
12585 trace_idx.add("usable", false).add_alnum("cause", cause);
12586 cause= NULL;
12587 }
12588 }
12589 trace_indexes.end();
12590
12591 if (!index_info) /* No usable index found. */
12592 DBUG_RETURN(NULL);
12593
12594 /* Check (SA3) for the where clause. */
12595 if (join->where_cond && min_max_arg_item &&
12596 !check_group_min_max_predicates(join->where_cond, min_max_arg_item,
12597 (index_info->flags & HA_SPATIAL) ?
12598 Field::itMBR : Field::itRAW))
12599 {
12600 trace_group.add("usable", false).
12601 add_alnum("cause", "unsupported_predicate_on_agg_attribute");
12602 DBUG_RETURN(NULL);
12603 }
12604
12605 /* The query passes all tests, so construct a new TRP object. */
12606 read_plan= new (param->mem_root)
12607 TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
12608 min_max_arg_part,
12609 group_prefix_len, used_key_parts,
12610 group_key_parts, index_info, index,
12611 key_infix_len,
12612 (key_infix_len > 0) ? key_infix : NULL,
12613 tree, best_index_tree, best_param_idx,
12614 best_quick_prefix_records);
12615 if (read_plan)
12616 {
12617 if (tree && read_plan->quick_prefix_records == 0)
12618 DBUG_RETURN(NULL);
12619
12620 read_plan->cost_est= best_read_cost;
12621 read_plan->records= best_records;
12622 if (*cost_est < best_read_cost &&
12623 is_agg_distinct)
12624 {
12625 trace_group.add("index_scan", true);
12626 read_plan->cost_est.reset();
12627 read_plan->use_index_scan();
12628 }
12629
12630 DBUG_PRINT("info",
12631 ("Returning group min/max plan: cost: %g, records: %lu",
12632 read_plan->cost_est.total_cost(), (ulong) read_plan->records));
12633 }
12634
12635 DBUG_RETURN(read_plan);
12636 }
12637
12638
12639 /*
12640 Check that the MIN/MAX attribute participates only in range predicates
12641 with constants.
12642
12643 SYNOPSIS
12644 check_group_min_max_predicates()
12645 cond tree (or subtree) describing all or part of the WHERE
12646 clause being analyzed
12647 min_max_arg_item the field referenced by the MIN/MAX function(s)
12648 min_max_arg_part the keypart of the MIN/MAX argument if any
12649
12650 DESCRIPTION
12651 The function walks recursively over the cond tree representing a WHERE
12652 clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
12653 aggregate function, it is referenced only by one of the following
12654 predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
12655
12656 RETURN
12657 TRUE if cond passes the test
12658 FALSE o/w
12659 */
12660
12661 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type)12662 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
12663 Field::imagetype image_type)
12664 {
12665 DBUG_ENTER("check_group_min_max_predicates");
12666 DBUG_ASSERT(cond && min_max_arg_item);
12667
12668 cond= cond->real_item();
12669 Item::Type cond_type= cond->type();
12670 if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
12671 {
12672 DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12673 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12674 Item *and_or_arg;
12675 while ((and_or_arg= li++))
12676 {
12677 if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
12678 image_type))
12679 DBUG_RETURN(FALSE);
12680 }
12681 DBUG_RETURN(TRUE);
12682 }
12683
12684 /*
12685 TODO:
12686 This is a very crude fix to handle sub-selects in the WHERE clause
12687 (Item_subselect objects). With the test below we rule out from the
12688 optimization all queries with subselects in the WHERE clause. What has to
12689 be done, is that here we should analyze whether the subselect references
12690 the MIN/MAX argument field, and disallow the optimization only if this is
12691 so.
12692 Need to handle subselect in min_max_inspect_cond_for_fields() once this
12693 is fixed.
12694 */
12695 if (cond_type == Item::SUBSELECT_ITEM)
12696 DBUG_RETURN(FALSE);
12697
12698 /*
12699 Condition of the form 'field' is equivalent to 'field <> 0' and thus
12700 satisfies the SA3 condition.
12701 */
12702 if (cond_type == Item::FIELD_ITEM)
12703 {
12704 DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12705 DBUG_RETURN(TRUE);
12706 }
12707
12708 /*
12709 At this point, we have weeded out most conditions other than
12710 function items. However, there are cases like the following:
12711
12712 select 1 in (select max(c) from t1 where max(1) group by a)
12713
12714 Here the condition "where max(1)" is an Item_sum_max, not an
12715 Item_func. In this particular case, the where clause should
12716 be equivalent to "where max(1) <> 0". A where clause
12717 phrased that way does not satisfy the SA3 condition of
12718 get_best_group_min_max(). The "where max(1) = true" clause
12719 causes this method to reject the access method
12720 (i.e., to return FALSE).
12721
12722 It's been suggested that it may be possible to use the access method
12723 for a sub-family of cases when we're aggregating constants or
12724 outer references. For the moment, we bale out and we reject
12725 the access method for the query.
12726
12727 It's hard to prove that there are no other cases where the
12728 condition is not an Item_func. So, for the moment, don't apply
12729 the optimization if the condition is not a function item.
12730 */
12731 if (cond_type == Item::SUM_FUNC_ITEM)
12732 {
12733 DBUG_RETURN(FALSE);
12734 }
12735
12736 /*
12737 If this is a debug server, then we want to know about
12738 additional oddball cases which might benefit from this
12739 optimization.
12740 */
12741 DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
12742 if (cond_type != Item::FUNC_ITEM)
12743 {
12744 DBUG_RETURN(FALSE);
12745 }
12746
12747 /* Test if cond references only group-by or non-group fields. */
12748 Item_func *pred= (Item_func*) cond;
12749 Item *cur_arg;
12750 DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12751 for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
12752 {
12753 Item **arguments= pred->arguments();
12754 cur_arg= arguments[arg_idx]->real_item();
12755 DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12756 if (cur_arg->type() == Item::FIELD_ITEM)
12757 {
12758 if (min_max_arg_item->eq(cur_arg, 1))
12759 {
12760 /*
12761 If pred references the MIN/MAX argument, check whether pred is a range
12762 condition that compares the MIN/MAX argument with a constant.
12763 */
12764 Item_func::Functype pred_type= pred->functype();
12765 if (pred_type != Item_func::EQUAL_FUNC &&
12766 pred_type != Item_func::LT_FUNC &&
12767 pred_type != Item_func::LE_FUNC &&
12768 pred_type != Item_func::GT_FUNC &&
12769 pred_type != Item_func::GE_FUNC &&
12770 pred_type != Item_func::BETWEEN &&
12771 pred_type != Item_func::ISNULL_FUNC &&
12772 pred_type != Item_func::ISNOTNULL_FUNC &&
12773 pred_type != Item_func::EQ_FUNC &&
12774 pred_type != Item_func::NE_FUNC)
12775 DBUG_RETURN(FALSE);
12776
12777 /* Check that pred compares min_max_arg_item with a constant. */
12778 Item *args[3];
12779 memset(args, 0, 3 * sizeof(Item*));
12780 bool inv;
12781 /* Test if this is a comparison of a field and a constant. */
12782 if (!simple_pred(pred, args, &inv))
12783 DBUG_RETURN(FALSE);
12784
12785 /* Check for compatible string comparisons - similar to get_mm_leaf. */
12786 if (args[0] && args[1] && !args[2] && // this is a binary function
12787 min_max_arg_item->result_type() == STRING_RESULT &&
12788 /*
12789 Don't use an index when comparing strings of different collations.
12790 */
12791 ((args[1]->result_type() == STRING_RESULT &&
12792 image_type == Field::itRAW &&
12793 min_max_arg_item->field->charset() != pred->compare_collation())
12794 ||
12795 /*
12796 We can't always use indexes when comparing a string index to a
12797 number.
12798 */
12799 (args[1]->result_type() != STRING_RESULT &&
12800 min_max_arg_item->field->cmp_type() != args[1]->result_type())))
12801 DBUG_RETURN(FALSE);
12802 }
12803 }
12804 else if (cur_arg->type() == Item::FUNC_ITEM)
12805 {
12806 if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
12807 image_type))
12808 DBUG_RETURN(FALSE);
12809 }
12810 else if (cur_arg->const_item())
12811 {
12812 /*
12813 For predicates of the form "const OP expr" we also have to check 'expr'
12814 to make a decision.
12815 */
12816 continue;
12817 }
12818 else
12819 DBUG_RETURN(FALSE);
12820 }
12821
12822 DBUG_RETURN(TRUE);
12823 }
12824
12825 /**
12826 Utility function used by min_max_inspect_cond_for_fields() for comparing
12827 FILED item with given MIN/MAX item and setting appropriate out paramater.
12828
12829 @param item_field Item field for comparison.
12830 @param min_max_arg_item The field referenced by the MIN/MAX
12831 function(s).
12832 @param [out] min_max_arg_present This out parameter is set to true if
12833 MIN/MAX argument is present in cond.
12834 @param [out] non_min_max_arg_present This out parameter is set to true if
12835 any field item other than MIN/MAX
12836 argument is present in cond.
12837 */
util_min_max_inspect_item(Item * item_field,Item_field * min_max_arg_item,bool * min_max_arg_present,bool * non_min_max_arg_present)12838 static inline void util_min_max_inspect_item(Item *item_field,
12839 Item_field *min_max_arg_item,
12840 bool *min_max_arg_present,
12841 bool *non_min_max_arg_present)
12842 {
12843 if (item_field->type() == Item::FIELD_ITEM)
12844 {
12845 if(min_max_arg_item->eq(item_field, 1))
12846 *min_max_arg_present= true;
12847 else
12848 *non_min_max_arg_present= true;
12849 }
12850 }
12851
12852 /**
12853 This function detects the presents of MIN/MAX field along with at least
12854 one non MIN/MAX field participation in the given condition. Subqueries
12855 inspection is skipped as of now.
12856
12857 @param cond tree (or subtree) describing all or part of the WHERE
12858 clause being analyzed.
12859 @param min_max_arg_item The field referenced by the MIN/MAX
12860 function(s).
12861 @param [out] min_max_arg_present This out parameter is set to true if
12862 MIN/MAX argument is present in cond.
12863 @param [out] non_min_max_arg_present This out parameter is set to true if
12864 any field item other than MIN/MAX
12865 argument is present in cond.
12866
12867 @return TRUE if both MIN/MAX field and non MIN/MAX field is present in cond.
12868 FALSE o/w.
12869
12870 @todo: When the hack present in check_group_min_max_predicate() is removed,
12871 subqueries needs to be inspected.
12872 */
12873
12874 static bool
min_max_inspect_cond_for_fields(Item * cond,Item_field * min_max_arg_item,bool * min_max_arg_present,bool * non_min_max_arg_present)12875 min_max_inspect_cond_for_fields(Item *cond, Item_field *min_max_arg_item,
12876 bool *min_max_arg_present,
12877 bool *non_min_max_arg_present)
12878 {
12879 DBUG_ENTER("inspect_cond_for_fields");
12880 DBUG_ASSERT(cond && min_max_arg_item);
12881
12882 cond= cond->real_item();
12883 Item::Type cond_type= cond->type();
12884
12885 switch (cond_type) {
12886 case Item::COND_ITEM: {
12887 DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12888 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12889 Item *and_or_arg;
12890 while ((and_or_arg= li++))
12891 {
12892 min_max_inspect_cond_for_fields(and_or_arg, min_max_arg_item,
12893 min_max_arg_present,
12894 non_min_max_arg_present);
12895 if (*min_max_arg_present && *non_min_max_arg_present)
12896 DBUG_RETURN(true);
12897 }
12898
12899 DBUG_RETURN(false);
12900 }
12901 case Item::FUNC_ITEM: {
12902 /* Test if cond references both group-by and non-group fields. */
12903 Item_func *pred= (Item_func*) cond;
12904 Item *cur_arg;
12905 DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12906 for (uint arg_idx= 0; arg_idx < pred->argument_count(); arg_idx++)
12907 {
12908 Item **arguments= pred->arguments();
12909 cur_arg= arguments[arg_idx]->real_item();
12910 DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12911
12912 if (cur_arg->type() == Item::FUNC_ITEM)
12913 {
12914 min_max_inspect_cond_for_fields(cur_arg, min_max_arg_item,
12915 min_max_arg_present,
12916 non_min_max_arg_present);
12917 }
12918 else
12919 {
12920 util_min_max_inspect_item(cur_arg,
12921 min_max_arg_item,
12922 min_max_arg_present,
12923 non_min_max_arg_present);
12924 }
12925
12926 if (*min_max_arg_present && *non_min_max_arg_present)
12927 DBUG_RETURN(true);
12928 }
12929
12930 if (pred->functype() == Item_func::MULT_EQUAL_FUNC)
12931 {
12932 /*
12933 Analyze participating fields in a multiequal condition.
12934 */
12935 Item_equal_iterator it(*(Item_equal*)cond);
12936
12937 Item *item_field;
12938 while ((item_field= it++))
12939 {
12940 util_min_max_inspect_item(item_field,
12941 min_max_arg_item,
12942 min_max_arg_present,
12943 non_min_max_arg_present);
12944
12945 if (*min_max_arg_present && *non_min_max_arg_present)
12946 DBUG_RETURN(true);
12947 }
12948 }
12949
12950 break;
12951 }
12952 case Item::FIELD_ITEM: {
12953 util_min_max_inspect_item(cond,
12954 min_max_arg_item,
12955 min_max_arg_present,
12956 non_min_max_arg_present);
12957 DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12958 DBUG_RETURN(false);
12959 }
12960 default:
12961 break;
12962 }
12963
12964 DBUG_RETURN(false);
12965 }
12966
12967 /*
12968 Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
12969 any. 'tree' must be a unique conjunction to ALL predicates in earlier
12970 keyparts of 'keypart_tree'.
12971
12972 E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
12973 covers 'field', all these conditions satisfies the requirement:
12974
12975 1. "(kp1=2 OR kp1=3) AND kp2=10" => returns "kp2=10"
12976 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)" => returns "kp2=10"
12977 3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12978 => returns "kp2=10 OR kp2=11"
12979
12980 whereas these do not
12981 1. "(kp1=2 AND kp2=10) OR kp1=3"
12982 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
12983 3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12984
12985 This function effectively tests requirement WA2. In combination with
12986 a test that the returned tree has no more than one range it is also
12987 a test of NGA3.
12988
12989 @param[in] field The field we want the SEL_ARG tree for
12990 @param[in] keypart_tree Root node of the SEL_ARG* tree for the index
12991 @param[out] cur_range The SEL_ARG tree, if any, for the keypart
12992 covering field 'keypart_field'
12993 @retval true 'keypart_tree' contained a predicate for 'field' that
12994 is not conjunction to all predicates on earlier keyparts
12995 @retval false otherwise
12996 */
12997
12998 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)12999 get_sel_arg_for_keypart(Field *field,
13000 SEL_ARG *keypart_tree,
13001 SEL_ARG **cur_range)
13002 {
13003 if (keypart_tree == NULL)
13004 return false;
13005 if (keypart_tree->type != SEL_ARG::KEY_RANGE)
13006 {
13007 /*
13008 A range predicate not usable by Loose Index Scan is found.
13009 Predicates for keypart 'keypart_tree->part' and later keyparts
13010 cannot be used.
13011 */
13012 *cur_range= keypart_tree;
13013 return false;
13014 }
13015 if (keypart_tree->field->eq(field))
13016 {
13017 *cur_range= keypart_tree;
13018 return false;
13019 }
13020
13021 SEL_ARG *tree_first_range= NULL;
13022 SEL_ARG *first_kp= keypart_tree->first();
13023
13024 for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
13025 {
13026 SEL_ARG *curr_tree= NULL;
13027 if (cur_kp->next_key_part)
13028 {
13029 if (get_sel_arg_for_keypart(field,
13030 cur_kp->next_key_part,
13031 &curr_tree))
13032 return true;
13033 }
13034 /**
13035 Check if the SEL_ARG tree for 'field' is identical for all ranges in
13036 'keypart_tree
13037 */
13038 if (cur_kp == first_kp)
13039 tree_first_range= curr_tree;
13040 else if (!all_same(tree_first_range, curr_tree))
13041 return true;
13042 }
13043 *cur_range= tree_first_range;
13044 return false;
13045 }
13046
13047 /*
13048 Extract a sequence of constants from a conjunction of equality predicates.
13049
13050 SYNOPSIS
13051 get_constant_key_infix()
13052 index_info [in] Descriptor of the chosen index.
13053 index_range_tree [in] Range tree for the chosen index
13054 first_non_group_part [in] First index part after group attribute parts
13055 min_max_arg_part [in] The keypart of the MIN/MAX argument if any
13056 last_part [in] Last keypart of the index
13057 thd [in] Current thread
13058 key_infix [out] Infix of constants to be used for index lookup
13059 key_infix_len [out] Lenghth of the infix
13060 first_non_infix_part [out] The first keypart after the infix (if any)
13061
13062 DESCRIPTION
13063 Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
13064 for each keypart field NGF_i not in GROUP-BY, check that there is a
13065 constant equality predicate among conds with the form (NGF_i = const_ci) or
13066 (const_ci = NGF_i).
13067 Thus all the NGF_i attributes must fill the 'gap' between the last group-by
13068 attribute and the MIN/MAX attribute in the index (if present). Also ensure
13069 that there is only a single range on NGF_i (NGA3). If these
13070 conditions hold, copy each constant from its corresponding predicate into
13071 key_infix, in the order its NG_i attribute appears in the index, and update
13072 key_infix_len with the total length of the key parts in key_infix.
13073
13074 RETURN
13075 TRUE if the index passes the test
13076 FALSE o/w
13077 */
13078 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)13079 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
13080 KEY_PART_INFO *first_non_group_part,
13081 KEY_PART_INFO *min_max_arg_part,
13082 KEY_PART_INFO *last_part, THD *thd,
13083 uchar *key_infix, uint *key_infix_len,
13084 KEY_PART_INFO **first_non_infix_part)
13085 {
13086 SEL_ARG *cur_range;
13087 KEY_PART_INFO *cur_part;
13088 /* End part for the first loop below. */
13089 KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
13090
13091 *key_infix_len= 0;
13092 uchar *key_ptr= key_infix;
13093 for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
13094 {
13095 cur_range= NULL;
13096 /*
13097 Check NGA3:
13098 1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
13099 checks for a unique conjunction of this tree with all the predicates
13100 on the earlier keyparts in the index.
13101 2. Check for multiple ranges on the found keypart tree.
13102
13103 We assume that index_range_tree points to the leftmost keypart in
13104 the index.
13105 */
13106 if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
13107 &cur_range))
13108 return false;
13109
13110 if (cur_range && cur_range->elements > 1)
13111 return false;
13112
13113 if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
13114 {
13115 if (min_max_arg_part)
13116 return false; /* The current keypart has no range predicates at all. */
13117 else
13118 {
13119 *first_non_infix_part= cur_part;
13120 return true;
13121 }
13122 }
13123
13124 if ((cur_range->min_flag & NO_MIN_RANGE) ||
13125 (cur_range->max_flag & NO_MAX_RANGE) ||
13126 (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
13127 return false;
13128
13129 uint field_length= cur_part->store_length;
13130 if (cur_range->maybe_null &&
13131 cur_range->min_value[0] && cur_range->max_value[0])
13132 {
13133 /*
13134 cur_range specifies 'IS NULL'. In this case the argument points
13135 to a "null value" (a copy of is_null_string) that we do not
13136 memcmp(), or memcpy to a field.
13137 */
13138 DBUG_ASSERT (field_length > 0);
13139 *key_ptr= 1;
13140 key_ptr+= field_length;
13141 *key_infix_len+= field_length;
13142 }
13143 else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
13144 { /* cur_range specifies an equality condition. */
13145 memcpy(key_ptr, cur_range->min_value, field_length);
13146 key_ptr+= field_length;
13147 *key_infix_len+= field_length;
13148 }
13149 else
13150 return false;
13151 }
13152
13153 if (!min_max_arg_part && (cur_part == last_part))
13154 *first_non_infix_part= last_part;
13155
13156 return TRUE;
13157 }
13158
13159
13160 /*
13161 Find the key part referenced by a field.
13162
13163 SYNOPSIS
13164 get_field_keypart()
13165 index descriptor of an index
13166 field field that possibly references some key part in index
13167
13168 NOTES
13169 The return value can be used to get a KEY_PART_INFO pointer by
13170 part= index->key_part + get_field_keypart(...) - 1;
13171
13172 RETURN
13173 Positive number which is the consecutive number of the key part, or
13174 0 if field does not reference any index field.
13175 */
13176
13177 static inline uint
get_field_keypart(KEY * index,Field * field)13178 get_field_keypart(KEY *index, Field *field)
13179 {
13180 KEY_PART_INFO *part, *end;
13181
13182 for (part= index->key_part, end= part + actual_key_parts(index) ;
13183 part < end; part++)
13184 {
13185 if (field->eq(part->field))
13186 return part - index->key_part + 1;
13187 }
13188 return 0;
13189 }
13190
13191
13192 /*
13193 Find the SEL_ARG sub-tree that corresponds to the chosen index.
13194
13195 SYNOPSIS
13196 get_index_range_tree()
13197 index [in] The ID of the index being looked for
13198 range_tree[in] Tree of ranges being searched
13199 param [in] PARAM from test_quick_select
13200
13201 DESCRIPTION
13202
13203 A SEL_TREE contains range trees for all usable indexes. This procedure
13204 finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
13205 ordered in the same way as the members of PARAM::key, thus we first find
13206 the corresponding index in the array PARAM::key. This index is returned
13207 through the variable param_idx, to be used later as argument of
13208 check_quick_select().
13209
13210 RETURN
13211 Pointer to the SEL_ARG subtree that corresponds to index.
13212 */
13213
get_index_range_tree(uint index,SEL_TREE * range_tree,PARAM * param)13214 SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param)
13215 {
13216 uint idx= 0; /* Index nr in param->key_parts */
13217 while (idx < param->keys)
13218 {
13219 if (index == param->real_keynr[idx])
13220 break;
13221 idx++;
13222 }
13223 return(range_tree->keys[idx]);
13224 }
13225
13226
13227 /*
13228 Compute the cost of a quick_group_min_max_select for a particular index.
13229
13230 SYNOPSIS
13231 cost_group_min_max()
13232 table [in] The table being accessed
13233 key [in] The index used to access the table
13234 used_key_parts [in] Number of key parts used to access the index
13235 group_key_parts [in] Number of index key parts in the group prefix
13236 range_tree [in] Tree of ranges for all indexes
13237 index_tree [in] The range tree for the current index
13238 quick_prefix_records [in] Number of records retrieved by the internally
13239 used quick range select if any
13240 have_min [in] True if there is a MIN function
13241 have_max [in] True if there is a MAX function
13242 cost_est [out] The cost to retrieve rows via this quick select
13243 records [out] The number of rows retrieved
13244
13245 DESCRIPTION
13246 This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
13247 the number of rows returned.
13248
13249 NOTES
13250 The cost computation distinguishes several cases:
13251 1) No equality predicates over non-group attributes (thus no key_infix).
13252 If groups are bigger than blocks on the average, then we assume that it
13253 is very unlikely that block ends are aligned with group ends, thus even
13254 if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
13255 keys, except for the first MIN and the last MAX keys, will be in the
13256 same block. If groups are smaller than blocks, then we are going to
13257 read all blocks.
13258 2) There are equality predicates over non-group attributes.
13259 In this case the group prefix is extended by additional constants, and
13260 as a result the min/max values are inside sub-groups of the original
13261 groups. The number of blocks that will be read depends on whether the
13262 ends of these sub-groups will be contained in the same or in different
13263 blocks. We compute the probability for the two ends of a subgroup to be
13264 in two different blocks as the ratio of:
13265 - the number of positions of the left-end of a subgroup inside a group,
13266 such that the right end of the subgroup is past the end of the buffer
13267 containing the left-end, and
13268 - the total number of possible positions for the left-end of the
13269 subgroup, which is the number of keys in the containing group.
13270 We assume it is very unlikely that two ends of subsequent subgroups are
13271 in the same block.
13272 3) The are range predicates over the group attributes.
13273 Then some groups may be filtered by the range predicates. We use the
13274 selectivity of the range predicates to decide how many groups will be
13275 filtered.
13276
13277 TODO
13278 - Take into account the optional range predicates over the MIN/MAX
13279 argument.
13280 - Check if we have a PK index and we use all cols - then each key is a
13281 group, and it will be better to use an index scan.
13282
13283 RETURN
13284 None
13285 */
13286
cost_group_min_max(TABLE * table,uint key,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,Cost_estimate * cost_est,ha_rows * records)13287 void cost_group_min_max(TABLE* table, uint key, uint used_key_parts,
13288 uint group_key_parts, SEL_TREE *range_tree,
13289 SEL_ARG *index_tree, ha_rows quick_prefix_records,
13290 bool have_min, bool have_max,
13291 Cost_estimate *cost_est, ha_rows *records)
13292 {
13293 ha_rows table_records;
13294 uint num_groups;
13295 uint num_blocks;
13296 uint keys_per_block;
13297 rec_per_key_t keys_per_group;
13298 double p_overlap; /* Probability that a sub-group overlaps two blocks. */
13299 double quick_prefix_selectivity;
13300 double io_blocks; // Number of blocks to read from table
13301 DBUG_ENTER("cost_group_min_max");
13302 DBUG_ASSERT(cost_est->is_zero());
13303
13304 const KEY *const index_info= &table->key_info[key];
13305 table_records= table->file->stats.records;
13306 keys_per_block= (table->file->stats.block_size / 2 /
13307 (index_info->key_length + table->file->ref_length)
13308 + 1);
13309 num_blocks= (uint)(table_records / keys_per_block) + 1;
13310
13311 /* Compute the number of keys in a group. */
13312 if (index_info->has_records_per_key(group_key_parts - 1))
13313 // Use index statistics
13314 keys_per_group= index_info->records_per_key(group_key_parts - 1);
13315 else
13316 /* If there is no statistics try to guess */
13317 keys_per_group= guess_rec_per_key(table, index_info, group_key_parts);
13318
13319 num_groups= (uint)(table_records / keys_per_group) + 1;
13320
13321 /* Apply the selectivity of the quick select for group prefixes. */
13322 if (range_tree && (quick_prefix_records != HA_POS_ERROR))
13323 {
13324 quick_prefix_selectivity= (double) quick_prefix_records /
13325 (double) table_records;
13326 num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
13327 set_if_bigger(num_groups, 1);
13328 }
13329
13330 if (used_key_parts > group_key_parts)
13331 {
13332 // Average number of keys in sub-groups formed by a key infix
13333 rec_per_key_t keys_per_subgroup;
13334 if (index_info->has_records_per_key(used_key_parts - 1))
13335 // Use index statistics
13336 keys_per_subgroup= index_info->records_per_key(used_key_parts - 1);
13337 else
13338 {
13339 // If no index statistics then we use a guessed records per key value.
13340 keys_per_subgroup= guess_rec_per_key(table, index_info, used_key_parts);
13341 set_if_smaller(keys_per_subgroup, keys_per_group);
13342 }
13343
13344 /*
13345 Compute the probability that two ends of a subgroup are inside
13346 different blocks.
13347 */
13348 if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
13349 p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
13350 else
13351 {
13352 double blocks_per_group= (double) num_blocks / (double) num_groups;
13353 p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
13354 p_overlap= min(p_overlap, 1.0);
13355 }
13356 io_blocks= min<double>(num_groups * (1 + p_overlap), num_blocks);
13357 }
13358 else
13359 io_blocks= (keys_per_group > keys_per_block) ?
13360 (have_min && have_max) ? (double) (num_groups + 1) :
13361 (double) num_groups :
13362 (double) num_blocks;
13363
13364 /*
13365 Estimate IO cost.
13366 */
13367 const Cost_model_table *const cost_model= table->cost_model();
13368 cost_est->add_io(cost_model->page_read_cost_index(key, io_blocks));
13369
13370 /*
13371 CPU cost must be comparable to that of an index scan as computed
13372 in test_quick_select(). When the groups are small,
13373 e.g. for a unique index, using index scan will be cheaper since it
13374 reads the next record without having to re-position to it on every
13375 group. To make the CPU cost reflect this, we estimate the CPU cost
13376 as the sum of:
13377 1. Cost for evaluating the condition (similarly as for index scan).
13378 2. Cost for navigating the index structure (assuming a b-tree).
13379 Note: We only add the cost for one comparision per block. For a
13380 b-tree the number of comparisons will be larger.
13381 TODO: This cost should be provided by the storage engine.
13382 */
13383 const double tree_height= table_records == 0 ?
13384 1.0 :
13385 ceil(log(double(table_records)) /
13386 log(double(keys_per_block)));
13387 const double tree_traversal_cost= cost_model->key_compare_cost(tree_height);
13388
13389 const double cpu_cost= num_groups * (tree_traversal_cost +
13390 cost_model->row_evaluate_cost(1.0));
13391 cost_est->add_cpu(cpu_cost);
13392 *records= num_groups;
13393
13394 DBUG_PRINT("info",
13395 ("table rows: %lu keys/block: %u keys/group: %.1f result rows: %lu blocks: %u",
13396 (ulong)table_records, keys_per_block, keys_per_group,
13397 (ulong) *records, num_blocks));
13398 DBUG_VOID_RETURN;
13399 }
13400
13401
13402 /*
13403 Construct a new quick select object for queries with group by with min/max.
13404
13405 SYNOPSIS
13406 TRP_GROUP_MIN_MAX::make_quick()
13407 param Parameter from test_quick_select
13408 retrieve_full_rows ignored
13409 parent_alloc Memory pool to use, if any.
13410
13411 NOTES
13412 Make_quick ignores the retrieve_full_rows parameter because
13413 QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
13414 The other parameter are ignored as well because all necessary
13415 data to create the QUICK object is computed at this TRP creation
13416 time.
13417
13418 RETURN
13419 New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
13420 NULL otherwise.
13421 */
13422
13423 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)13424 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
13425 MEM_ROOT *parent_alloc)
13426 {
13427 QUICK_GROUP_MIN_MAX_SELECT *quick;
13428 DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
13429
13430 quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
13431 param->thd->lex->current_select()->join,
13432 have_min, have_max,
13433 have_agg_distinct, min_max_arg_part,
13434 group_prefix_len, group_key_parts,
13435 used_key_parts, index_info, index,
13436 &cost_est, records, key_infix_len,
13437 key_infix, parent_alloc, is_index_scan);
13438 if (!quick)
13439 DBUG_RETURN(NULL);
13440
13441 if (quick->init())
13442 {
13443 delete quick;
13444 DBUG_RETURN(NULL);
13445 }
13446
13447 if (range_tree)
13448 {
13449 DBUG_ASSERT(quick_prefix_records > 0);
13450 if (quick_prefix_records == HA_POS_ERROR)
13451 quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
13452 else
13453 {
13454 /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
13455 quick->quick_prefix_select= get_quick_select(param, param_idx,
13456 index_tree,
13457 HA_MRR_SORTED,
13458 0,
13459 &quick->alloc);
13460 if (!quick->quick_prefix_select)
13461 {
13462 delete quick;
13463 DBUG_RETURN(NULL);
13464 }
13465 }
13466 /*
13467 Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
13468 attribute, and create an array of QUICK_RANGES to be used by the
13469 new quick select.
13470 */
13471 if (min_max_arg_part)
13472 {
13473 SEL_ARG *min_max_range= index_tree;
13474 while (min_max_range) /* Find the tree for the MIN/MAX key part. */
13475 {
13476 if (min_max_range->field->eq(min_max_arg_part->field))
13477 break;
13478 min_max_range= min_max_range->next_key_part;
13479 }
13480 /* Scroll to the leftmost interval for the MIN/MAX argument. */
13481 while (min_max_range && min_max_range->prev)
13482 min_max_range= min_max_range->prev;
13483 /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
13484 while (min_max_range)
13485 {
13486 if (quick->add_range(min_max_range))
13487 {
13488 delete quick;
13489 quick= NULL;
13490 DBUG_RETURN(NULL);
13491 }
13492 min_max_range= min_max_range->next;
13493 }
13494 }
13495 }
13496 else
13497 quick->quick_prefix_select= NULL;
13498
13499 quick->update_key_stat();
13500 quick->adjust_prefix_ranges();
13501
13502 DBUG_RETURN(quick);
13503 }
13504
13505
13506 /*
13507 Construct new quick select for group queries with min/max.
13508
13509 SYNOPSIS
13510 QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
13511 table The table being accessed
13512 join Descriptor of the current query
13513 have_min TRUE if the query selects a MIN function
13514 have_max TRUE if the query selects a MAX function
13515 min_max_arg_part The only argument field of all MIN/MAX functions
13516 group_prefix_len Length of all key parts in the group prefix
13517 prefix_key_parts All key parts in the group prefix
13518 index_info The index chosen for data access
13519 use_index The id of index_info
13520 read_cost Cost of this access method
13521 records Number of records returned
13522 key_infix_len Length of the key infix appended to the group prefix
13523 key_infix Infix of constants from equality predicates
13524 parent_alloc Memory pool for this and quick_prefix_select data
13525 is_index_scan get the next different key not by jumping on it via
13526 index read, but by scanning until the end of the
13527 rows with equal key value.
13528
13529 RETURN
13530 None
13531 */
13532
13533 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,const Cost_estimate * read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)13534 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
13535 bool have_max_arg, bool have_agg_distinct_arg,
13536 KEY_PART_INFO *min_max_arg_part_arg,
13537 uint group_prefix_len_arg, uint group_key_parts_arg,
13538 uint used_key_parts_arg, KEY *index_info_arg,
13539 uint use_index, const Cost_estimate *read_cost_arg,
13540 ha_rows records_arg, uint key_infix_len_arg,
13541 uchar *key_infix_arg, MEM_ROOT *parent_alloc,
13542 bool is_index_scan_arg)
13543 :join(join_arg), index_info(index_info_arg),
13544 group_prefix_len(group_prefix_len_arg),
13545 group_key_parts(group_key_parts_arg), have_min(have_min_arg),
13546 have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
13547 seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
13548 key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
13549 min_max_ranges(PSI_INSTRUMENT_ME),
13550 min_functions_it(NULL), max_functions_it(NULL),
13551 is_index_scan(is_index_scan_arg)
13552 {
13553 head= table;
13554 index= use_index;
13555 record= head->record[0];
13556 tmp_record= head->record[1];
13557 cost_est= *read_cost_arg;
13558 records= records_arg;
13559 used_key_parts= used_key_parts_arg;
13560 real_key_parts= used_key_parts_arg;
13561 real_prefix_len= group_prefix_len + key_infix_len;
13562 group_prefix= NULL;
13563 min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
13564
13565 /*
13566 We can't have parent_alloc set as the init function can't handle this case
13567 yet.
13568 */
13569 DBUG_ASSERT(!parent_alloc);
13570 if (!parent_alloc)
13571 {
13572 init_sql_alloc(key_memory_quick_group_min_max_select_root,
13573 &alloc, join->thd->variables.range_alloc_block_size, 0);
13574 join->thd->mem_root= &alloc;
13575 }
13576 else
13577 memset(&alloc, 0, sizeof(MEM_ROOT)); // ensure that it's not used
13578 }
13579
13580
13581 /*
13582 Do post-constructor initialization.
13583
13584 SYNOPSIS
13585 QUICK_GROUP_MIN_MAX_SELECT::init()
13586
13587 DESCRIPTION
13588 The method performs initialization that cannot be done in the constructor
13589 such as memory allocations that may fail. It allocates memory for the
13590 group prefix and inifix buffers, and for the lists of MIN/MAX item to be
13591 updated during execution.
13592
13593 RETURN
13594 0 OK
13595 other Error code
13596 */
13597
init()13598 int QUICK_GROUP_MIN_MAX_SELECT::init()
13599 {
13600 if (group_prefix) /* Already initialized. */
13601 return 0;
13602
13603 if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len)))
13604 return 1;
13605 /*
13606 We may use group_prefix to store keys with all select fields, so allocate
13607 enough space for it.
13608 */
13609 if (!(group_prefix= (uchar*) alloc_root(&alloc,
13610 real_prefix_len + min_max_arg_len)))
13611 return 1;
13612
13613 if (key_infix_len > 0)
13614 {
13615 /*
13616 The memory location pointed to by key_infix will be deleted soon, so
13617 allocate a new buffer and copy the key_infix into it.
13618 */
13619 uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
13620 if (!tmp_key_infix)
13621 return 1;
13622 memcpy(tmp_key_infix, this->key_infix, key_infix_len);
13623 this->key_infix= tmp_key_infix;
13624 }
13625
13626 if (min_max_arg_part)
13627 {
13628 if (have_min)
13629 {
13630 if (!(min_functions= new List<Item_sum>))
13631 return 1;
13632 }
13633 else
13634 min_functions= NULL;
13635 if (have_max)
13636 {
13637 if (!(max_functions= new List<Item_sum>))
13638 return 1;
13639 }
13640 else
13641 max_functions= NULL;
13642
13643 Item_sum *min_max_item;
13644 Item_sum **func_ptr= join->sum_funcs;
13645 while ((min_max_item= *(func_ptr++)))
13646 {
13647 if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
13648 min_functions->push_back(min_max_item);
13649 else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
13650 max_functions->push_back(min_max_item);
13651 }
13652
13653 if (have_min)
13654 {
13655 if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
13656 return 1;
13657 }
13658
13659 if (have_max)
13660 {
13661 if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
13662 return 1;
13663 }
13664 }
13665
13666 return 0;
13667 }
13668
13669
~QUICK_GROUP_MIN_MAX_SELECT()13670 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
13671 {
13672 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
13673 if (head->file->inited)
13674 /*
13675 We may have used this object for index access during
13676 create_sort_index() and then switched to rnd access for the rest
13677 of execution. Since we don't do cleanup until now, we must call
13678 ha_*_end() for whatever is the current access method.
13679 */
13680 head->file->ha_index_or_rnd_end();
13681
13682 free_root(&alloc,MYF(0));
13683 delete min_functions_it;
13684 delete max_functions_it;
13685 delete quick_prefix_select;
13686 DBUG_VOID_RETURN;
13687 }
13688
13689
13690 /*
13691 Eventually create and add a new quick range object.
13692
13693 SYNOPSIS
13694 QUICK_GROUP_MIN_MAX_SELECT::add_range()
13695 sel_range Range object from which a
13696
13697 NOTES
13698 Construct a new QUICK_RANGE object from a SEL_ARG object, and
13699 add it to the array min_max_ranges. If sel_arg is an infinite
13700 range, e.g. (x < 5 or x > 4), then skip it and do not construct
13701 a quick range.
13702
13703 RETURN
13704 FALSE on success
13705 TRUE otherwise
13706 */
13707
add_range(SEL_ARG * sel_range)13708 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
13709 {
13710 QUICK_RANGE *range;
13711 uint range_flag= sel_range->min_flag | sel_range->max_flag;
13712
13713 /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
13714 if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
13715 return FALSE;
13716
13717 if (!(sel_range->min_flag & NO_MIN_RANGE) &&
13718 !(sel_range->max_flag & NO_MAX_RANGE))
13719 {
13720 if (sel_range->maybe_null &&
13721 sel_range->min_value[0] && sel_range->max_value[0])
13722 range_flag|= NULL_RANGE; /* IS NULL condition */
13723 /*
13724 Do not perform comparison if one of the argiment is NULL value.
13725 */
13726 else if (!sel_range->min_value[0] &&
13727 !sel_range->max_value[0] &&
13728 memcmp(sel_range->min_value, sel_range->max_value,
13729 min_max_arg_len) == 0)
13730 range_flag|= EQ_RANGE; /* equality condition */
13731 }
13732 range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
13733 make_keypart_map(sel_range->part),
13734 sel_range->max_value, min_max_arg_len,
13735 make_keypart_map(sel_range->part),
13736 range_flag, HA_READ_INVALID);
13737 if (!range)
13738 return TRUE;
13739 if (min_max_ranges.push_back(range))
13740 return TRUE;
13741 return FALSE;
13742 }
13743
13744
13745 /*
13746 Opens the ranges if there are more conditions in quick_prefix_select than
13747 the ones used for jumping through the prefixes.
13748
13749 SYNOPSIS
13750 QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
13751
13752 NOTES
13753 quick_prefix_select is made over the conditions on the whole key.
13754 It defines a number of ranges of length x.
13755 However when jumping through the prefixes we use only the the first
13756 few most significant keyparts in the range key. However if there
13757 are more keyparts to follow the ones we are using we must make the
13758 condition on the key inclusive (because x < "ab" means
13759 x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
13760 To achive the above we must turn off the NEAR_MIN/NEAR_MAX
13761 */
adjust_prefix_ranges()13762 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
13763 {
13764 if (quick_prefix_select &&
13765 group_prefix_len < quick_prefix_select->max_used_key_length)
13766 {
13767 for (size_t ix= 0; ix < quick_prefix_select->ranges.size(); ++ix)
13768 {
13769 QUICK_RANGE *range= quick_prefix_select->ranges[ix];
13770 range->flag&= ~(NEAR_MIN | NEAR_MAX);
13771 }
13772 }
13773 }
13774
13775
13776 /*
13777 Determine the total number and length of the keys that will be used for
13778 index lookup.
13779
13780 SYNOPSIS
13781 QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13782
13783 DESCRIPTION
13784 The total length of the keys used for index lookup depends on whether
13785 there are any predicates referencing the min/max argument, and/or if
13786 the min/max argument field can be NULL.
13787 This function does an optimistic analysis whether the search key might
13788 be extended by a constant for the min/max keypart. It is 'optimistic'
13789 because during actual execution it may happen that a particular range
13790 is skipped, and then a shorter key will be used. However this is data
13791 dependent and can't be easily estimated here.
13792
13793 RETURN
13794 None
13795 */
13796
update_key_stat()13797 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13798 {
13799 max_used_key_length= real_prefix_len;
13800 if (min_max_ranges.size() > 0)
13801 {
13802 if (have_min)
13803 { /* Check if the right-most range has a lower boundary. */
13804 QUICK_RANGE *rightmost_range= min_max_ranges[min_max_ranges.size() - 1];
13805 if (!(rightmost_range->flag & NO_MIN_RANGE))
13806 {
13807 max_used_key_length+= min_max_arg_len;
13808 used_key_parts++;
13809 return;
13810 }
13811 }
13812 if (have_max)
13813 { /* Check if the left-most range has an upper boundary. */
13814 QUICK_RANGE *leftmost_range= min_max_ranges[0];
13815 if (!(leftmost_range->flag & NO_MAX_RANGE))
13816 {
13817 max_used_key_length+= min_max_arg_len;
13818 used_key_parts++;
13819 return;
13820 }
13821 }
13822 }
13823 else if (have_min && min_max_arg_part &&
13824 min_max_arg_part->field->real_maybe_null())
13825 {
13826 /*
13827 If a MIN/MAX argument value is NULL, we can quickly determine
13828 that we're in the beginning of the next group, because NULLs
13829 are always < any other value. This allows us to quickly
13830 determine the end of the current group and jump to the next
13831 group (see next_min()) and thus effectively increases the
13832 usable key length.
13833 */
13834 max_used_key_length+= min_max_arg_len;
13835 used_key_parts++;
13836 }
13837 }
13838
13839
13840 /*
13841 Initialize a quick group min/max select for key retrieval.
13842
13843 SYNOPSIS
13844 QUICK_GROUP_MIN_MAX_SELECT::reset()
13845
13846 DESCRIPTION
13847 Initialize the index chosen for access and find and store the prefix
13848 of the last group. The method is expensive since it performs disk access.
13849
13850 RETURN
13851 0 OK
13852 other Error code
13853 */
13854
reset(void)13855 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
13856 {
13857 int result;
13858 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
13859
13860 seen_first_key= false;
13861 head->set_keyread(TRUE); /* We need only the key attributes */
13862 /*
13863 Request ordered index access as usage of ::index_last(),
13864 ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
13865 */
13866 if ((result= head->file->ha_index_init(index, true)))
13867 {
13868 head->file->print_error(result, MYF(0));
13869 DBUG_RETURN(result);
13870 }
13871 if (quick_prefix_select && quick_prefix_select->reset())
13872 DBUG_RETURN(1);
13873
13874 result= head->file->ha_index_last(record);
13875 if (result != 0)
13876 {
13877 if (result == HA_ERR_END_OF_FILE)
13878 DBUG_RETURN(0);
13879 else
13880 DBUG_RETURN(result);
13881 }
13882
13883 /* Save the prefix of the last group. */
13884 key_copy(last_prefix, record, index_info, group_prefix_len);
13885
13886 DBUG_RETURN(0);
13887 }
13888
13889
13890
13891 /*
13892 Get the next key containing the MIN and/or MAX key for the next group.
13893
13894 SYNOPSIS
13895 QUICK_GROUP_MIN_MAX_SELECT::get_next()
13896
13897 DESCRIPTION
13898 The method finds the next subsequent group of records that satisfies the
13899 query conditions and finds the keys that contain the MIN/MAX values for
13900 the key part referenced by the MIN/MAX function(s). Once a group and its
13901 MIN/MAX values are found, store these values in the Item_sum objects for
13902 the MIN/MAX functions. The rest of the values in the result row are stored
13903 in the Item_field::result_field of each select field. If the query does
13904 not contain MIN and/or MAX functions, then the function only finds the
13905 group prefix, which is a query answer itself.
13906
13907 NOTES
13908 If both MIN and MAX are computed, then we use the fact that if there is
13909 no MIN key, there can't be a MAX key as well, so we can skip looking
13910 for a MAX key in this case.
13911
13912 RETURN
13913 0 on success
13914 HA_ERR_END_OF_FILE if returned all keys
13915 other if some error occurred
13916 */
13917
get_next()13918 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
13919 {
13920 int min_res= 0;
13921 int max_res= 0;
13922 int result;
13923 int is_last_prefix= 0;
13924
13925 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
13926
13927 /*
13928 Loop until a group is found that satisfies all query conditions or the last
13929 group is reached.
13930 */
13931 do
13932 {
13933 result= next_prefix();
13934 /*
13935 Check if this is the last group prefix. Notice that at this point
13936 this->record contains the current prefix in record format.
13937 */
13938 if (!result)
13939 {
13940 is_last_prefix= key_cmp(index_info->key_part, last_prefix,
13941 group_prefix_len);
13942 DBUG_ASSERT(is_last_prefix <= 0);
13943 }
13944 else
13945 {
13946 if (result == HA_ERR_KEY_NOT_FOUND)
13947 continue;
13948 break;
13949 }
13950
13951 if (have_min)
13952 {
13953 min_res= next_min();
13954 if (min_res == 0)
13955 update_min_result();
13956 }
13957 /* If there is no MIN in the group, there is no MAX either. */
13958 if ((have_max && !have_min) ||
13959 (have_max && have_min && (min_res == 0)))
13960 {
13961 max_res= next_max();
13962 if (max_res == 0)
13963 update_max_result();
13964 /* If a MIN was found, a MAX must have been found as well. */
13965 DBUG_ASSERT((have_max && !have_min) ||
13966 (have_max && have_min && (max_res == 0)));
13967 }
13968 /*
13969 If this is just a GROUP BY or DISTINCT without MIN or MAX and there
13970 are equality predicates for the key parts after the group, find the
13971 first sub-group with the extended prefix.
13972 */
13973 if (!have_min && !have_max && key_infix_len > 0)
13974 result= head->file->ha_index_read_map(record, group_prefix,
13975 make_prev_keypart_map(real_key_parts),
13976 HA_READ_KEY_EXACT);
13977
13978 result= have_min ? min_res : have_max ? max_res : result;
13979 } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13980 is_last_prefix != 0);
13981
13982 if (result == HA_ERR_KEY_NOT_FOUND)
13983 result= HA_ERR_END_OF_FILE;
13984
13985 DBUG_RETURN(result);
13986 }
13987
13988
13989 /*
13990 Retrieve the minimal key in the next group.
13991
13992 SYNOPSIS
13993 QUICK_GROUP_MIN_MAX_SELECT::next_min()
13994
13995 DESCRIPTION
13996 Find the minimal key within this group such that the key satisfies the query
13997 conditions and NULL semantics. The found key is loaded into this->record.
13998
13999 IMPLEMENTATION
14000 Depending on the values of min_max_ranges.elements, key_infix_len, and
14001 whether there is a NULL in the MIN field, this function may directly
14002 return without any data access. In this case we use the key loaded into
14003 this->record by the call to this->next_prefix() just before this call.
14004
14005 RETURN
14006 0 on success
14007 HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
14008 HA_ERR_END_OF_FILE - "" -
14009 other if some error occurred
14010 */
14011
next_min()14012 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
14013 {
14014 int result= 0;
14015 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
14016
14017 /* Find the MIN key using the eventually extended group prefix. */
14018 if (min_max_ranges.size() > 0)
14019 {
14020 if ((result= next_min_in_range()))
14021 DBUG_RETURN(result);
14022 }
14023 else
14024 {
14025 /* Apply the constant equality conditions to the non-group select fields */
14026 if (key_infix_len > 0)
14027 {
14028 if ((result= head->file->ha_index_read_map(record, group_prefix,
14029 make_prev_keypart_map(real_key_parts),
14030 HA_READ_KEY_EXACT)))
14031 DBUG_RETURN(result);
14032 }
14033
14034 /*
14035 If the min/max argument field is NULL, skip subsequent rows in the same
14036 group with NULL in it. Notice that:
14037 - if the first row in a group doesn't have a NULL in the field, no row
14038 in the same group has (because NULL < any other value),
14039 - min_max_arg_part->field->ptr points to some place in 'record'.
14040 */
14041 if (min_max_arg_part && min_max_arg_part->field->is_null())
14042 {
14043 uchar key_buf[MAX_KEY_LENGTH];
14044
14045 /* Find the first subsequent record without NULL in the MIN/MAX field. */
14046 key_copy(key_buf, record, index_info, max_used_key_length);
14047 result= head->file->ha_index_read_map(record, key_buf,
14048 make_keypart_map(real_key_parts),
14049 HA_READ_AFTER_KEY);
14050 /*
14051 Check if the new record belongs to the current group by comparing its
14052 prefix with the group's prefix. If it is from the next group, then the
14053 whole group has NULLs in the MIN/MAX field, so use the first record in
14054 the group as a result.
14055 TODO:
14056 It is possible to reuse this new record as the result candidate for the
14057 next call to next_min(), and to save one lookup in the next call. For
14058 this add a new member 'this->next_group_prefix'.
14059 */
14060 if (!result)
14061 {
14062 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14063 key_restore(record, key_buf, index_info, 0);
14064 }
14065 else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
14066 result= 0; /* There is a result in any case. */
14067 }
14068 }
14069
14070 /*
14071 If the MIN attribute is non-nullable, this->record already contains the
14072 MIN key in the group, so just return.
14073 */
14074 DBUG_RETURN(result);
14075 }
14076
14077
14078 /*
14079 Retrieve the maximal key in the next group.
14080
14081 SYNOPSIS
14082 QUICK_GROUP_MIN_MAX_SELECT::next_max()
14083
14084 DESCRIPTION
14085 Lookup the maximal key of the group, and store it into this->record.
14086
14087 RETURN
14088 0 on success
14089 HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
14090 HA_ERR_END_OF_FILE - "" -
14091 other if some error occurred
14092 */
14093
next_max()14094 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
14095 {
14096 int result;
14097
14098 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
14099
14100 /* Get the last key in the (possibly extended) group. */
14101 if (min_max_ranges.size() > 0)
14102 result= next_max_in_range();
14103 else
14104 result= head->file->ha_index_read_map(record, group_prefix,
14105 make_prev_keypart_map(real_key_parts),
14106 HA_READ_PREFIX_LAST);
14107 DBUG_RETURN(result);
14108 }
14109
14110
14111 /**
14112 Find the next different key value by skiping all the rows with the same key
14113 value.
14114
14115 Implements a specialized loose index access method for queries
14116 containing aggregate functions with distinct of the form:
14117 SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
14118 This method comes to replace the index scan + Unique class
14119 (distinct selection) for loose index scan that visits all the rows of a
14120 covering index instead of jumping in the begining of each group.
14121 TODO: Placeholder function. To be replaced by a handler API call
14122
14123 @param is_index_scan hint to use index scan instead of random index read
14124 to find the next different value.
14125 @param file table handler
14126 @param key_part group key to compare
14127 @param record row data
14128 @param group_prefix current key prefix data
14129 @param group_prefix_len length of the current key prefix data
14130 @param group_key_parts number of the current key prefix columns
14131 @return status
14132 @retval 0 success
14133 @retval !0 failure
14134 */
14135
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)14136 static int index_next_different (bool is_index_scan, handler *file,
14137 KEY_PART_INFO *key_part, uchar * record,
14138 const uchar * group_prefix,
14139 uint group_prefix_len,
14140 uint group_key_parts)
14141 {
14142 if (is_index_scan)
14143 {
14144 int result= 0;
14145
14146 while (!key_cmp (key_part, group_prefix, group_prefix_len))
14147 {
14148 result= file->ha_index_next(record);
14149 if (result)
14150 return(result);
14151 }
14152 return result;
14153 }
14154 else
14155 return file->ha_index_read_map(record, group_prefix,
14156 make_prev_keypart_map(group_key_parts),
14157 HA_READ_AFTER_KEY);
14158 }
14159
14160
14161 /*
14162 Determine the prefix of the next group.
14163
14164 SYNOPSIS
14165 QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
14166
14167 DESCRIPTION
14168 Determine the prefix of the next group that satisfies the query conditions.
14169 If there is a range condition referencing the group attributes, use a
14170 QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
14171 condition. If there is a key infix of constants, append this infix
14172 immediately after the group attributes. The possibly extended prefix is
14173 stored in this->group_prefix. The first key of the found group is stored in
14174 this->record, on which relies this->next_min().
14175
14176 RETURN
14177 0 on success
14178 HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
14179 HA_ERR_END_OF_FILE if there are no more keys
14180 other if some error occurred
14181 */
next_prefix()14182 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
14183 {
14184 int result;
14185 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
14186
14187 if (quick_prefix_select)
14188 {
14189 uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
14190 if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
14191 group_key_parts,
14192 cur_prefix)))
14193 DBUG_RETURN(result);
14194 seen_first_key= TRUE;
14195 }
14196 else
14197 {
14198 if (!seen_first_key)
14199 {
14200 result= head->file->ha_index_first(record);
14201 if (result)
14202 DBUG_RETURN(result);
14203 seen_first_key= TRUE;
14204 }
14205 else
14206 {
14207 /* Load the first key in this group into record. */
14208 result= index_next_different (is_index_scan, head->file,
14209 index_info->key_part,
14210 record, group_prefix, group_prefix_len,
14211 group_key_parts);
14212 if (result)
14213 DBUG_RETURN(result);
14214 }
14215 }
14216
14217 /* Save the prefix of this group for subsequent calls. */
14218 key_copy(group_prefix, record, index_info, group_prefix_len);
14219 /* Append key_infix to group_prefix. */
14220 if (key_infix_len > 0)
14221 memcpy(group_prefix + group_prefix_len,
14222 key_infix, key_infix_len);
14223
14224 DBUG_RETURN(0);
14225 }
14226
14227
14228 /*
14229 Find the minimal key in a group that satisfies some range conditions for the
14230 min/max argument field.
14231
14232 SYNOPSIS
14233 QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
14234
14235 DESCRIPTION
14236 Given the sequence of ranges min_max_ranges, find the minimal key that is
14237 in the left-most possible range. If there is no such key, then the current
14238 group does not have a MIN key that satisfies the WHERE clause. If a key is
14239 found, its value is stored in this->record.
14240
14241 RETURN
14242 0 on success
14243 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
14244 the ranges
14245 HA_ERR_END_OF_FILE - "" -
14246 other if some error
14247 */
14248
next_min_in_range()14249 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
14250 {
14251 ha_rkey_function find_flag;
14252 key_part_map keypart_map;
14253 bool found_null= FALSE;
14254 int result= HA_ERR_KEY_NOT_FOUND;
14255
14256 DBUG_ASSERT(min_max_ranges.size() > 0);
14257
14258 /* Search from the left-most range to the right. */
14259 for (Quick_ranges::const_iterator it= min_max_ranges.begin();
14260 it != min_max_ranges.end(); ++it)
14261 {
14262 QUICK_RANGE *cur_range= *it;
14263 /*
14264 If the current value for the min/max argument is bigger than the right
14265 boundary of cur_range, there is no need to check this range.
14266 */
14267 if (it != min_max_ranges.begin() && !(cur_range->flag & NO_MAX_RANGE) &&
14268 (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
14269 min_max_arg_len) == 1))
14270 continue;
14271
14272 if (cur_range->flag & NO_MIN_RANGE)
14273 {
14274 keypart_map= make_prev_keypart_map(real_key_parts);
14275 find_flag= HA_READ_KEY_EXACT;
14276 }
14277 else
14278 {
14279 /* Extend the search key with the lower boundary for this range. */
14280 memcpy(group_prefix + real_prefix_len, cur_range->min_key,
14281 cur_range->min_length);
14282 keypart_map= make_keypart_map(real_key_parts);
14283 find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
14284 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
14285 HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
14286 }
14287
14288 result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
14289 find_flag);
14290 if (result)
14291 {
14292 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
14293 (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
14294 continue; /* Check the next range. */
14295
14296 /*
14297 In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
14298 HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
14299 range, it can't succeed for any other subsequent range.
14300 */
14301 break;
14302 }
14303
14304 /* A key was found. */
14305 if (cur_range->flag & EQ_RANGE)
14306 break; /* No need to perform the checks below for equal keys. */
14307
14308 if (cur_range->flag & NULL_RANGE)
14309 {
14310 /*
14311 Remember this key, and continue looking for a non-NULL key that
14312 satisfies some other condition.
14313 */
14314 memcpy(tmp_record, record, head->s->rec_buff_length);
14315 found_null= TRUE;
14316 continue;
14317 }
14318
14319 /* Check if record belongs to the current group. */
14320 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14321 {
14322 result= HA_ERR_KEY_NOT_FOUND;
14323 continue;
14324 }
14325
14326 /* If there is an upper limit, check if the found key is in the range. */
14327 if ( !(cur_range->flag & NO_MAX_RANGE) )
14328 {
14329 /* Compose the MAX key for the range. */
14330 uchar *max_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
14331 memcpy(max_key, group_prefix, real_prefix_len);
14332 memcpy(max_key + real_prefix_len, cur_range->max_key,
14333 cur_range->max_length);
14334 /* Compare the found key with max_key. */
14335 int cmp_res= key_cmp(index_info->key_part, max_key,
14336 real_prefix_len + min_max_arg_len);
14337 /*
14338 The key is outside of the range if:
14339 the interval is open and the key is equal to the maximum boundry
14340 or
14341 the key is greater than the maximum
14342 */
14343 if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
14344 cmp_res > 0)
14345 {
14346 result= HA_ERR_KEY_NOT_FOUND;
14347 continue;
14348 }
14349 }
14350 /* If we got to this point, the current key qualifies as MIN. */
14351 DBUG_ASSERT(result == 0);
14352 break;
14353 }
14354 /*
14355 If there was a key with NULL in the MIN/MAX field, and there was no other
14356 key without NULL from the same group that satisfies some other condition,
14357 then use the key with the NULL.
14358 */
14359 if (found_null && result)
14360 {
14361 memcpy(record, tmp_record, head->s->rec_buff_length);
14362 result= 0;
14363 }
14364 return result;
14365 }
14366
14367
14368 /*
14369 Find the maximal key in a group that satisfies some range conditions for the
14370 min/max argument field.
14371
14372 SYNOPSIS
14373 QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
14374
14375 DESCRIPTION
14376 Given the sequence of ranges min_max_ranges, find the maximal key that is
14377 in the right-most possible range. If there is no such key, then the current
14378 group does not have a MAX key that satisfies the WHERE clause. If a key is
14379 found, its value is stored in this->record.
14380
14381 RETURN
14382 0 on success
14383 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
14384 the ranges
14385 HA_ERR_END_OF_FILE - "" -
14386 other if some error
14387 */
14388
next_max_in_range()14389 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
14390 {
14391 ha_rkey_function find_flag;
14392 key_part_map keypart_map;
14393 int result;
14394
14395 DBUG_ASSERT(min_max_ranges.size() > 0);
14396
14397 /* Search from the right-most range to the left. */
14398 for (Quick_ranges::const_iterator it= min_max_ranges.end();
14399 it != min_max_ranges.begin(); --it)
14400 {
14401 QUICK_RANGE *cur_range = *(it - 1);
14402 /*
14403 If the current value for the min/max argument is smaller than the left
14404 boundary of cur_range, there is no need to check this range.
14405 */
14406 if (it != min_max_ranges.end() &&
14407 !(cur_range->flag & NO_MIN_RANGE) &&
14408 (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
14409 min_max_arg_len) == -1))
14410 continue;
14411
14412 if (cur_range->flag & NO_MAX_RANGE)
14413 {
14414 keypart_map= make_prev_keypart_map(real_key_parts);
14415 find_flag= HA_READ_PREFIX_LAST;
14416 }
14417 else
14418 {
14419 /* Extend the search key with the upper boundary for this range. */
14420 memcpy(group_prefix + real_prefix_len, cur_range->max_key,
14421 cur_range->max_length);
14422 keypart_map= make_keypart_map(real_key_parts);
14423 find_flag= (cur_range->flag & EQ_RANGE) ?
14424 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
14425 HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
14426 }
14427
14428 result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
14429 find_flag);
14430
14431 if (result)
14432 {
14433 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
14434 (cur_range->flag & EQ_RANGE))
14435 continue; /* Check the next range. */
14436
14437 /*
14438 In no key was found with this upper bound, there certainly are no keys
14439 in the ranges to the left.
14440 */
14441 return result;
14442 }
14443 /* A key was found. */
14444 if (cur_range->flag & EQ_RANGE)
14445 return 0; /* No need to perform the checks below for equal keys. */
14446
14447 /* Check if record belongs to the current group. */
14448 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14449 continue; // Row not found
14450
14451 /* If there is a lower limit, check if the found key is in the range. */
14452 if ( !(cur_range->flag & NO_MIN_RANGE) )
14453 {
14454 /* Compose the MIN key for the range. */
14455 uchar *min_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
14456 memcpy(min_key, group_prefix, real_prefix_len);
14457 memcpy(min_key + real_prefix_len, cur_range->min_key,
14458 cur_range->min_length);
14459 /* Compare the found key with min_key. */
14460 int cmp_res= key_cmp(index_info->key_part, min_key,
14461 real_prefix_len + min_max_arg_len);
14462 /*
14463 The key is outside of the range if:
14464 the interval is open and the key is equal to the minimum boundry
14465 or
14466 the key is less than the minimum
14467 */
14468 if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
14469 cmp_res < 0)
14470 continue;
14471 }
14472 /* If we got to this point, the current key qualifies as MAX. */
14473 return result;
14474 }
14475 return HA_ERR_KEY_NOT_FOUND;
14476 }
14477
14478
14479 /*
14480 Update all MIN function results with the newly found value.
14481
14482 SYNOPSIS
14483 QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
14484
14485 DESCRIPTION
14486 The method iterates through all MIN functions and updates the result value
14487 of each function by calling Item_sum::reset(), which in turn picks the new
14488 result value from this->head->record[0], previously updated by
14489 next_min(). The updated value is stored in a member variable of each of the
14490 Item_sum objects, depending on the value type.
14491
14492 IMPLEMENTATION
14493 The update must be done separately for MIN and MAX, immediately after
14494 next_min() was called and before next_max() is called, because both MIN and
14495 MAX take their result value from the same buffer this->head->record[0]
14496 (i.e. this->record).
14497
14498 RETURN
14499 None
14500 */
14501
update_min_result()14502 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
14503 {
14504 Item_sum *min_func;
14505
14506 min_functions_it->rewind();
14507 while ((min_func= (*min_functions_it)++))
14508 min_func->reset_and_add();
14509 }
14510
14511
14512 /*
14513 Update all MAX function results with the newly found value.
14514
14515 SYNOPSIS
14516 QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
14517
14518 DESCRIPTION
14519 The method iterates through all MAX functions and updates the result value
14520 of each function by calling Item_sum::reset(), which in turn picks the new
14521 result value from this->head->record[0], previously updated by
14522 next_max(). The updated value is stored in a member variable of each of the
14523 Item_sum objects, depending on the value type.
14524
14525 IMPLEMENTATION
14526 The update must be done separately for MIN and MAX, immediately after
14527 next_max() was called, because both MIN and MAX take their result value
14528 from the same buffer this->head->record[0] (i.e. this->record).
14529
14530 RETURN
14531 None
14532 */
14533
update_max_result()14534 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
14535 {
14536 Item_sum *max_func;
14537
14538 max_functions_it->rewind();
14539 while ((max_func= (*max_functions_it)++))
14540 max_func->reset_and_add();
14541 }
14542
14543
14544 /*
14545 Append comma-separated list of keys this quick select uses to key_names;
14546 append comma-separated list of corresponding used lengths to used_lengths.
14547
14548 SYNOPSIS
14549 QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
14550 key_names [out] Names of used indexes
14551 used_lengths [out] Corresponding lengths of the index names
14552
14553 DESCRIPTION
14554 This method is used by select_describe to extract the names of the
14555 indexes used by a quick select.
14556
14557 */
14558
add_keys_and_lengths(String * key_names,String * used_lengths)14559 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
14560 String *used_lengths)
14561 {
14562 char buf[64];
14563 size_t length;
14564 key_names->append(index_info->name);
14565 length= longlong2str(max_used_key_length, buf, 10) - buf;
14566 used_lengths->append(buf, length);
14567 }
14568
14569
14570
14571 /**
14572 Traverse the R-B range tree for this and later keyparts to see if
14573 there are at least as many equality ranges as defined by the limit.
14574
14575 @param keypart_root The root of a R-B tree of ranges for a given keypart.
14576 @param count[in,out] The number of equality ranges found so far
14577 @param limit The number of ranges
14578
14579 @retval true if limit > 0 and 'limit' or more equality ranges have been
14580 found in the range R-B trees
14581 @retval false otherwise
14582
14583 */
eq_ranges_exceeds_limit(SEL_ARG * keypart_root,uint * count,uint limit)14584 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count, uint limit)
14585 {
14586 // "Statistics instead of index dives" feature is turned off
14587 if (limit == 0)
14588 return false;
14589
14590 /*
14591 Optimization: if there is at least one equality range, index
14592 statistics will be used when limit is 1. It's safe to return true
14593 even without checking that there is an equality range because if
14594 there are none, index statistics will not be used anyway.
14595 */
14596 if (limit == 1)
14597 return true;
14598
14599 for(SEL_ARG *keypart_range= keypart_root->first();
14600 keypart_range; keypart_range= keypart_range->next)
14601 {
14602 /*
14603 This is an equality range predicate and should be counted if:
14604 1) the range for this keypart does not have a min/max flag
14605 (which indicates <, <= etc), and
14606 2) the lower and upper range boundaries have the same value
14607 (it's not a "x BETWEEN a AND b")
14608
14609 Note, however, that if this is an "x IS NULL" condition we don't
14610 count it because the number of NULL-values is likely to be off
14611 the index statistics we plan to use.
14612 */
14613 if (!keypart_range->min_flag && !keypart_range->max_flag && // 1)
14614 !keypart_range->cmp_max_to_min(keypart_range) && // 2)
14615 !keypart_range->is_null_interval()) // "x IS NULL"
14616 {
14617 /*
14618 Count predicates in the next keypart, but only if that keypart
14619 is the next in the index.
14620 */
14621 if (keypart_range->next_key_part &&
14622 keypart_range->next_key_part->part == keypart_range->part + 1)
14623 eq_ranges_exceeds_limit(keypart_range->next_key_part, count, limit);
14624 else
14625 // We've found a path of equlity predicates down to a keypart leaf
14626 (*count)++;
14627
14628 if (*count >= limit)
14629 return true;
14630 }
14631 }
14632 return false;
14633 }
14634
14635 #ifndef DBUG_OFF
14636
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)14637 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
14638 const char *msg)
14639 {
14640 char buff[1024];
14641 DBUG_ENTER("print_sel_tree");
14642
14643 String tmp(buff,sizeof(buff),&my_charset_bin);
14644 tmp.length(0);
14645 for (uint idx= 0; idx < param->keys; idx++)
14646 {
14647 if (tree_map->is_set(idx))
14648 {
14649 uint keynr= param->real_keynr[idx];
14650 if (tmp.length())
14651 tmp.append(',');
14652 tmp.append(param->table->key_info[keynr].name);
14653 }
14654 }
14655 if (!tmp.length())
14656 tmp.append(STRING_WITH_LEN("(empty)"));
14657
14658 DBUG_PRINT("info", ("SEL_TREE: %p (%s) scans: %s", tree, msg, tmp.ptr()));
14659 DBUG_VOID_RETURN;
14660 }
14661
14662
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)14663 static void print_ror_scans_arr(TABLE *table, const char *msg,
14664 struct st_ror_scan_info **start,
14665 struct st_ror_scan_info **end)
14666 {
14667 DBUG_ENTER("print_ror_scans_arr");
14668
14669 char buff[1024];
14670 String tmp(buff,sizeof(buff),&my_charset_bin);
14671 tmp.length(0);
14672 for (;start != end; start++)
14673 {
14674 if (tmp.length())
14675 tmp.append(',');
14676 tmp.append(table->key_info[(*start)->keynr].name);
14677 }
14678 if (!tmp.length())
14679 tmp.append(STRING_WITH_LEN("(empty)"));
14680 DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
14681 fprintf(DBUG_FILE,"ROR key scans (%s): %s", msg, tmp.ptr());
14682
14683 DBUG_VOID_RETURN;
14684 }
14685
14686
14687 #endif /* !DBUG_OFF */
14688
14689 /**
14690 Print a key to a string
14691
14692 @param[out] out String the key is appended to
14693 @param[in] key_part Index components description
14694 @param[in] key Key tuple
14695 @param[in] used_length Key tuple length
14696 */
14697 static void
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key)14698 print_key_value(String *out, const KEY_PART_INFO *key_part, const uchar *key)
14699 {
14700 Field *field= key_part->field;
14701
14702 if (field->flags & BLOB_FLAG)
14703 {
14704 // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
14705 if (field->real_maybe_null() && *key)
14706 out->append(STRING_WITH_LEN("NULL"));
14707 else
14708 (field->type() == MYSQL_TYPE_GEOMETRY) ?
14709 out->append(STRING_WITH_LEN("unprintable_geometry_value")) :
14710 out->append(STRING_WITH_LEN("unprintable_blob_value"));
14711 return;
14712 }
14713
14714 uint store_length= key_part->store_length;
14715
14716 if (field->real_maybe_null())
14717 {
14718 /*
14719 Byte 0 of key is the null-byte. If set, key is NULL.
14720 Otherwise, print the key value starting immediately after the
14721 null-byte
14722 */
14723 if (*key)
14724 {
14725 out->append(STRING_WITH_LEN("NULL"));
14726 return;
14727 }
14728 key++; // Skip null byte
14729 store_length--;
14730 }
14731
14732 /*
14733 Binary data cannot be converted to UTF8 which is what the
14734 optimizer trace expects. If the column is binary, the hex
14735 representation is printed to the trace instead.
14736 */
14737 if (field->flags & BINARY_FLAG)
14738 {
14739 out->append("0x");
14740 for (uint i= 0; i < store_length; i++)
14741 {
14742 out->append(_dig_vec_lower[*(key+i) >> 4]);
14743 out->append(_dig_vec_lower[*(key+i) & 0x0F]);
14744 }
14745 return;
14746 }
14747
14748 char buff[128];
14749 String tmp(buff, sizeof(buff), system_charset_info);
14750 tmp.length(0);
14751
14752 TABLE *table= field->table;
14753 my_bitmap_map *old_sets[2];
14754
14755 dbug_tmp_use_all_columns(table, old_sets, table->read_set,
14756 table->write_set);
14757
14758 field->set_key_image(key, key_part->length);
14759 if (field->type() == MYSQL_TYPE_BIT)
14760 (void) field->val_int_as_str(&tmp, 1); // may change tmp's charset
14761 else
14762 field->val_str(&tmp); // may change tmp's charset
14763 out->append(tmp.ptr(), tmp.length(), tmp.charset());
14764
14765 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14766 }
14767
14768 /**
14769 Append range info for a key part to a string
14770
14771 @param[in,out] out String the range info is appended to
14772 @param[in] key_part Indexed column used in a range select
14773 @param[in] min_key Key tuple describing lower bound of range
14774 @param[in] max_key Key tuple describing upper bound of range
14775 @param[in] flag Key range flags defining what min_key
14776 and max_key represent @see my_base.h
14777 */
append_range(String * out,const KEY_PART_INFO * key_part,const uchar * min_key,const uchar * max_key,const uint flag)14778 void append_range(String *out,
14779 const KEY_PART_INFO *key_part,
14780 const uchar *min_key, const uchar *max_key,
14781 const uint flag)
14782 {
14783 if (out->length() > 0)
14784 out->append(STRING_WITH_LEN(" AND "));
14785
14786 if (flag & GEOM_FLAG)
14787 {
14788 /*
14789 The flags of GEOM ranges do not work the same way as for other
14790 range types, so printing "col < some_geom" doesn't make sense.
14791 Just print the column name, not operator.
14792 */
14793 out->append(key_part->field->field_name);
14794 out->append(STRING_WITH_LEN(" "));
14795 print_key_value(out, key_part, min_key);
14796 return;
14797 }
14798
14799 if (!(flag & NO_MIN_RANGE))
14800 {
14801 print_key_value(out, key_part, min_key);
14802 if (flag & NEAR_MIN)
14803 out->append(STRING_WITH_LEN(" < "));
14804 else
14805 out->append(STRING_WITH_LEN(" <= "));
14806 }
14807
14808 out->append(key_part->field->field_name);
14809
14810 if (!(flag & NO_MAX_RANGE))
14811 {
14812 if (flag & NEAR_MAX)
14813 out->append(STRING_WITH_LEN(" < "));
14814 else
14815 out->append(STRING_WITH_LEN(" <= "));
14816 print_key_value(out, key_part, max_key);
14817 }
14818 }
14819
14820 /**
14821 Traverse an R-B tree of range conditions and append all ranges for
14822 this keypart and consecutive keyparts to range_trace (if non-NULL)
14823 or to range_string (if range_trace is NULL). See description of R-B
14824 trees/SEL_ARG for details on how ranges are linked.
14825
14826 @param[in,out] range_trace Optimizer trace array ranges are appended to
14827 @param[in,out] range_string The string where range predicates are
14828 appended when the last keypart has
14829 been reached.
14830 @param range_so_far String containing ranges for keyparts prior
14831 to this keypart.
14832 @param keypart_root The root of the R-B tree containing intervals
14833 for this keypart.
14834 @param key_parts Index components description, used when adding
14835 information to the optimizer trace
14836 @param print_full Whether or not ranges on unusable keyparts
14837 should be printed. Useful for debugging.
14838
14839 @note This function mimics the behavior of sel_arg_range_seq_next()
14840 */
append_range_all_keyparts(Opt_trace_array * range_trace,String * range_string,String * range_so_far,SEL_ARG * keypart_root,const KEY_PART_INFO * key_parts,const bool print_full)14841 static void append_range_all_keyparts(Opt_trace_array *range_trace,
14842 String *range_string,
14843 String *range_so_far,
14844 SEL_ARG *keypart_root,
14845 const KEY_PART_INFO *key_parts,
14846 const bool print_full)
14847 {
14848 DBUG_ASSERT(keypart_root && keypart_root != &null_element);
14849
14850 const bool append_to_trace= (range_trace != NULL);
14851
14852 // Either add info to range_string or to range_trace
14853 DBUG_ASSERT(append_to_trace ? !range_string : (range_string != NULL));
14854
14855 // Navigate to first interval in red-black tree
14856 const KEY_PART_INFO *cur_key_part= key_parts + keypart_root->part;
14857 const SEL_ARG *keypart_range= keypart_root->first();
14858
14859 const size_t save_range_so_far_length= range_so_far->length();
14860
14861 while (keypart_range)
14862 {
14863 /*
14864 Skip the rest of condition printing to avoid OOM if appending to
14865 range_string and the string becomes too long. Printing very long
14866 range conditions normally doesn't make sense either.
14867 */
14868 if (!append_to_trace && range_string->length() > 500)
14869 {
14870 range_string->append(STRING_WITH_LEN("..."));
14871 break;
14872 }
14873
14874 // Append the current range predicate to the range String
14875 append_range(range_so_far, cur_key_part,
14876 keypart_range->min_value, keypart_range->max_value,
14877 keypart_range->min_flag | keypart_range->max_flag);
14878
14879 /*
14880 Print range predicates for consecutive keyparts if
14881 1) There are predicates for later keyparts, and
14882 2) We explicitly requested to print even the ranges that will
14883 not be usable by range access, or
14884 3) There are no "holes" in the used keyparts (keypartX can only
14885 be used if there is a range predicate on keypartX-1), and
14886 4) The current range is an equality range
14887 */
14888 if (keypart_range->next_key_part && // 1
14889 (print_full || // 2
14890 (keypart_range->next_key_part->part == keypart_range->part + 1 && // 3
14891 keypart_range->is_singlepoint()))) // 4
14892 {
14893 append_range_all_keyparts(range_trace, range_string, range_so_far,
14894 keypart_range->next_key_part, key_parts,
14895 print_full);
14896 }
14897 else
14898 {
14899 /*
14900 This is the last keypart with a usable range predicate. Print
14901 full range info to the optimizer trace or to the string
14902 */
14903 if (append_to_trace)
14904 range_trace->add_utf8(range_so_far->ptr(),
14905 range_so_far->length());
14906 else
14907 {
14908 if (range_string->length() == 0)
14909 range_string->append(STRING_WITH_LEN("("));
14910 else
14911 range_string->append(STRING_WITH_LEN(" OR ("));
14912
14913 range_string->append(range_so_far->ptr(), range_so_far->length());
14914 range_string->append(STRING_WITH_LEN(")"));
14915 }
14916 }
14917 keypart_range= keypart_range->next;
14918 /*
14919 Now moving to next range for this keypart, so "reset"
14920 range_so_far to include only range description of earlier
14921 keyparts
14922 */
14923 range_so_far->length(save_range_so_far_length);
14924 }
14925 }
14926
14927 /**
14928 Print the ranges in a SEL_TREE to debug log.
14929
14930 @param tree_name Descriptive name of the tree
14931 @param tree The SEL_TREE that will be printed to debug log
14932 @param param PARAM from test_quick_select
14933 */
dbug_print_tree(const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param)14934 static inline void dbug_print_tree(const char *tree_name,
14935 SEL_TREE *tree,
14936 const RANGE_OPT_PARAM *param)
14937 {
14938 #ifndef DBUG_OFF
14939 print_tree(NULL, tree_name, tree, param, true);
14940 #endif
14941 }
14942
14943
print_tree(String * out,const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param,const bool print_full)14944 static inline void print_tree(String *out,
14945 const char *tree_name,
14946 SEL_TREE *tree,
14947 const RANGE_OPT_PARAM *param,
14948 const bool print_full)
14949 {
14950 if (!param->using_real_indexes)
14951 {
14952 if (out)
14953 {
14954 out->append(tree_name);
14955 out->append(" uses a partitioned index and cannot be printed");
14956 }
14957 else
14958 DBUG_PRINT("info",
14959 ("sel_tree: "
14960 "%s uses a partitioned index and cannot be printed",
14961 tree_name));
14962 return;
14963 }
14964
14965 if (!tree)
14966 {
14967 if (out)
14968 {
14969 out->append(tree_name);
14970 out->append(" is NULL");
14971 }
14972 else
14973 DBUG_PRINT("info", ("sel_tree: %s is NULL", tree_name));
14974 return;
14975 }
14976
14977 if (tree->type == SEL_TREE::IMPOSSIBLE)
14978 {
14979 if (out)
14980 {
14981 out->append(tree_name);
14982 out->append(" is IMPOSSIBLE");
14983 }
14984 else
14985 DBUG_PRINT("info", ("sel_tree: %s is IMPOSSIBLE", tree_name));
14986 return;
14987 }
14988
14989 if (tree->type == SEL_TREE::ALWAYS)
14990 {
14991 if (out)
14992 {
14993 out->append(tree_name);
14994 out->append(" is ALWAYS");
14995 }
14996 else
14997 DBUG_PRINT("info", ("sel_tree: %s is ALWAYS", tree_name));
14998 return;
14999 }
15000
15001 if (tree->type == SEL_TREE::MAYBE)
15002 {
15003 if (out)
15004 {
15005 out->append(tree_name);
15006 out->append(" is MAYBE");
15007 }
15008 else
15009 DBUG_PRINT("info", ("sel_tree: %s is MAYBE", tree_name));
15010 return;
15011 }
15012
15013 if (!tree->merges.is_empty())
15014 {
15015 if (out)
15016 {
15017 out->append(tree_name);
15018 out->append(" contains the following merges");
15019 }
15020 else
15021 DBUG_PRINT("info",
15022 ("sel_tree: "
15023 "%s contains the following merges", tree_name));
15024
15025 List_iterator<SEL_IMERGE> it(tree->merges);
15026 int i= 1;
15027 for (SEL_IMERGE *el= it++; el; el= it++, i++)
15028 {
15029 if (out)
15030 {
15031 out->append("\n--- alternative ");
15032 char istr[22];
15033 out->append(llstr(i, istr));
15034 out->append(" ---\n");
15035 }
15036 else
15037 DBUG_PRINT("info", ("sel_tree: --- alternative %d ---",i));
15038 for (SEL_TREE** current= el->trees;
15039 current != el->trees_next;
15040 current++)
15041 print_tree(out, " merge_tree", *current, param, print_full);
15042 }
15043 }
15044
15045 for (uint i= 0; i< param->keys; i++)
15046 {
15047 if (tree->keys[i] == NULL || tree->keys[i] == &null_element)
15048 continue;
15049
15050 uint real_key_nr= param->real_keynr[i];
15051
15052 const KEY &cur_key= param->table->key_info[real_key_nr];
15053 const KEY_PART_INFO *key_part= cur_key.key_part;
15054
15055 /*
15056 String holding the final range description from
15057 append_range_all_keyparts()
15058 */
15059 char buff1[512];
15060 String range_result(buff1, sizeof(buff1), system_charset_info);
15061 range_result.length(0);
15062
15063 /*
15064 Range description up to a certain keypart - used internally in
15065 append_range_all_keyparts()
15066 */
15067 char buff2[128];
15068 String range_so_far(buff2, sizeof(buff2), system_charset_info);
15069 range_so_far.length(0);
15070
15071 append_range_all_keyparts(NULL, &range_result, &range_so_far,
15072 tree->keys[i], key_part, print_full);
15073
15074 if (out)
15075 {
15076 char istr[22];
15077
15078 out->append(tree_name);
15079 out->append(" keys[");
15080 out->append(llstr(i, istr));
15081 out->append("]: ");
15082 out->append(range_result.ptr());
15083 out->append("\n");
15084 }
15085 else
15086 DBUG_PRINT("info",
15087 ("sel_tree: %p, type=%d, %s->keys[%u(%u)]: %s",
15088 tree->keys[i], tree->keys[i]->type, tree_name, i,
15089 real_key_nr, range_result.ptr()));
15090 }
15091 }
15092
15093
15094 /*****************************************************************************
15095 ** Print a quick range for debugging
15096 ** TODO:
15097 ** This should be changed to use a String to store each row instead
15098 ** of locking the DEBUG stream !
15099 *****************************************************************************/
15100
15101 #ifndef DBUG_OFF
15102
15103 static void
print_multiple_key_values(KEY_PART * key_part,const uchar * key,uint used_length)15104 print_multiple_key_values(KEY_PART *key_part, const uchar *key,
15105 uint used_length)
15106 {
15107 char buff[1024];
15108 const uchar *key_end= key+used_length;
15109 String tmp(buff,sizeof(buff),&my_charset_bin);
15110 uint store_length;
15111 TABLE *table= key_part->field->table;
15112 my_bitmap_map *old_sets[2];
15113
15114 dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
15115
15116 for (; key < key_end; key+=store_length, key_part++)
15117 {
15118 Field *field= key_part->field;
15119 store_length= key_part->store_length;
15120
15121 if (field->real_maybe_null())
15122 {
15123 if (*key)
15124 {
15125 if (fwrite("NULL",sizeof(char),4,DBUG_FILE) != 4) {
15126 goto restore_col_map;
15127 }
15128 continue;
15129 }
15130 key++; // Skip null byte
15131 store_length--;
15132 }
15133 field->set_key_image(key, key_part->length);
15134 if (field->type() == MYSQL_TYPE_BIT)
15135 (void) field->val_int_as_str(&tmp, 1);
15136 else
15137 field->val_str(&tmp);
15138 if (fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE) != tmp.length()) {
15139 goto restore_col_map;
15140 }
15141 if (key+store_length < key_end)
15142 fputc('/',DBUG_FILE);
15143 }
15144 restore_col_map:
15145 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
15146 }
15147
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)15148 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
15149 {
15150 char buf[MAX_KEY/8+1];
15151 TABLE *table;
15152 my_bitmap_map *old_sets[2];
15153 DBUG_ENTER("print_quick");
15154 if (!quick)
15155 DBUG_VOID_RETURN;
15156 DBUG_LOCK_FILE;
15157
15158 table= quick->head;
15159 dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
15160 quick->dbug_dump(0, TRUE);
15161 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
15162
15163 fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
15164
15165 DBUG_UNLOCK_FILE;
15166 DBUG_VOID_RETURN;
15167 }
15168
dbug_dump(int indent,bool verbose)15169 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
15170 {
15171 /* purecov: begin inspected */
15172 fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
15173 indent, "", head->key_info[index].name, max_used_key_length);
15174
15175 if (verbose)
15176 {
15177 for (size_t ix= 0; ix < ranges.size(); ++ix)
15178 {
15179 fprintf(DBUG_FILE, "%*s", indent + 2, "");
15180 QUICK_RANGE *range= ranges[ix];
15181 if (!(range->flag & NO_MIN_RANGE))
15182 {
15183 print_multiple_key_values(key_parts, range->min_key,
15184 range->min_length);
15185 if (range->flag & NEAR_MIN)
15186 fputs(" < ",DBUG_FILE);
15187 else
15188 fputs(" <= ",DBUG_FILE);
15189 }
15190 fputs("X",DBUG_FILE);
15191
15192 if (!(range->flag & NO_MAX_RANGE))
15193 {
15194 if (range->flag & NEAR_MAX)
15195 fputs(" < ",DBUG_FILE);
15196 else
15197 fputs(" <= ",DBUG_FILE);
15198 print_multiple_key_values(key_parts, range->max_key,
15199 range->max_length);
15200 }
15201 fputs("\n",DBUG_FILE);
15202 }
15203 }
15204 /* purecov: end */
15205 }
15206
dbug_dump(int indent,bool verbose)15207 void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
15208 {
15209 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
15210 QUICK_RANGE_SELECT *quick;
15211 fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
15212 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15213 while ((quick= it++))
15214 quick->dbug_dump(indent+2, verbose);
15215 if (pk_quick_select)
15216 {
15217 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
15218 pk_quick_select->dbug_dump(indent+2, verbose);
15219 }
15220 fprintf(DBUG_FILE, "%*s}\n", indent, "");
15221 }
15222
dbug_dump(int indent,bool verbose)15223 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
15224 {
15225 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
15226 QUICK_RANGE_SELECT *quick;
15227 fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
15228 indent, "", need_to_fetch_row? "":"non-");
15229 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15230 while ((quick= it++))
15231 quick->dbug_dump(indent+2, verbose);
15232 if (cpk_quick)
15233 {
15234 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
15235 cpk_quick->dbug_dump(indent+2, verbose);
15236 }
15237 fprintf(DBUG_FILE, "%*s}\n", indent, "");
15238 }
15239
dbug_dump(int indent,bool verbose)15240 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
15241 {
15242 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
15243 QUICK_SELECT_I *quick;
15244 fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
15245 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15246 while ((quick= it++))
15247 quick->dbug_dump(indent+2, verbose);
15248 fprintf(DBUG_FILE, "%*s}\n", indent, "");
15249 }
15250
15251 /*
15252 Print quick select information to DBUG_FILE.
15253
15254 SYNOPSIS
15255 QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
15256 indent Indentation offset
15257 verbose If TRUE show more detailed output.
15258
15259 DESCRIPTION
15260 Print the contents of this quick select to DBUG_FILE. The method also
15261 calls dbug_dump() for the used quick select if any.
15262
15263 IMPLEMENTATION
15264 Caller is responsible for locking DBUG_FILE before this call and unlocking
15265 it afterwards.
15266
15267 RETURN
15268 None
15269 */
15270
dbug_dump(int indent,bool verbose)15271 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
15272 {
15273 fprintf(DBUG_FILE,
15274 "%*squick_group_min_max_select: index %s (%d), length: %d\n",
15275 indent, "", index_info->name, index, max_used_key_length);
15276 if (key_infix_len > 0)
15277 {
15278 fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
15279 indent, "", key_infix_len);
15280 }
15281 if (quick_prefix_select)
15282 {
15283 fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
15284 quick_prefix_select->dbug_dump(indent + 2, verbose);
15285 }
15286 if (min_max_ranges.size() > 0)
15287 {
15288 fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
15289 indent, "", static_cast<int>(min_max_ranges.size()));
15290 }
15291 }
15292
15293
15294 #endif /* !DBUG_OFF */
15295 #endif /* OPT_RANGE_CC_INCLUDED */
15296