1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights
2 * reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24 /*
25 TODO:
26 Fix that MAYBE_KEY are stored in the tree so that we can detect use
27 of full hash keys for queries like:
28
29 select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
30
31 */
32
33 /*
34 This file contains:
35
36 RangeAnalysisModule
37 A module that accepts a condition, index (or partitioning) description,
38 and builds lists of intervals (in index/partitioning space), such that
39 all possible records that match the condition are contained within the
40 intervals.
41 The entry point for the range analysis module is get_mm_tree()
42 (mm=min_max) function.
43
44 The lists are returned in form of complicated structure of interlinked
45 SEL_TREE/SEL_IMERGE/SEL_ARG objects.
46 See quick_range_seq_next, find_used_partitions for examples of how to walk
47 this structure.
48 All direct "users" of this module are located within this file, too.
49
50
51 PartitionPruningModule
52 A module that accepts a partitioned table, condition, and finds which
53 partitions we will need to use in query execution. Search down for
54 "PartitionPruningModule" for description.
55 The module has single entry point - prune_partitions() function.
56
57
58 Range/index_merge/groupby-minmax optimizer module
59 A module that accepts a table, condition, and returns
60 - a QUICK_*_SELECT object that can be used to retrieve rows that match
61 the specified condition, or a "no records will match the condition"
62 statement.
63
64 The module entry points are
65 test_quick_select()
66 get_quick_select_for_ref()
67
68
69 Record retrieval code for range/index_merge/groupby-min-max.
70 Implementations of QUICK_*_SELECT classes.
71
72 KeyTupleFormat
73 ~~~~~~~~~~~~~~
74 The code in this file (and elsewhere) makes operations on key value tuples.
75 Those tuples are stored in the following format:
76
77 The tuple is a sequence of key part values. The length of key part value
78 depends only on its type (and not depends on the what value is stored)
79
80 KeyTuple: keypart1-data, keypart2-data, ...
81
82 The value of each keypart is stored in the following format:
83
84 keypart_data: [isnull_byte] keypart-value-bytes
85
86 If a keypart may have a NULL value (key_part->field->real_maybe_null() can
87 be used to check this), then the first byte is a NULL indicator with the
88 following valid values:
89 1 - keypart has NULL value.
90 0 - keypart has non-NULL value.
91
92 <questionable-statement> If isnull_byte==1 (NULL value), then the following
93 keypart->length bytes must be 0.
94 </questionable-statement>
95
96 keypart-value-bytes holds the value. Its format depends on the field type.
97 The length of keypart-value-bytes may or may not depend on the value being
98 stored. The default is that length is static and equal to
99 KEY_PART_INFO::length.
100
101 Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
102 value:
103
104 keypart-value-bytes: value_length value_bytes
105
106 The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
107
108 See key_copy() and key_restore() for code to move data between index tuple
109 and table record
110
111 CAUTION: the above description is only sergefp's understanding of the
112 subject and may omit some details.
113 */
114
115 #include "sql_priv.h"
116 #include "key.h" // is_key_used, key_copy, key_cmp, key_restore
117 #include "sql_parse.h" // check_stack_overrun
118 #include "sql_partition.h" // get_part_id_func, PARTITION_ITERATOR,
119 // struct partition_info, NOT_A_PARTITION_ID
120 #include "sql_base.h" // free_io_cache
121 #include "records.h" // init_read_record, end_read_record
122 #include <m_ctype.h>
123 #include "sql_select.h"
124 #include "opt_trace.h"
125 #include "filesort.h" // filesort_free_buffers
126 #include "sql_optimizer.h" // is_indexed_agg_distinct,field_time_cmp_date
127
128 using std::min;
129 using std::max;
130
131 /*
132 Convert double value to #rows. Currently this does floor(), and we
133 might consider using round() instead.
134 */
135 #define double2rows(x) ((ha_rows)(x))
136
137 static int sel_cmp(Field *f,uchar *a,uchar *b,uint8 a_flag,uint8 b_flag);
138
139 static uchar is_null_string[2]= {1,0};
140
141 class RANGE_OPT_PARAM;
142 /*
143 A construction block of the SEL_ARG-graph.
144
145 The following description only covers graphs of SEL_ARG objects with
146 sel_arg->type==KEY_RANGE:
147
148 One SEL_ARG object represents an "elementary interval" in form
149
150 min_value <=? table.keypartX <=? max_value
151
152 The interval is a non-empty interval of any kind: with[out] minimum/maximum
153 bound, [half]open/closed, single-point interval, etc.
154
155 1. SEL_ARG GRAPH STRUCTURE
156
157 SEL_ARG objects are linked together in a graph. The meaning of the graph
158 is better demostrated by an example:
159
160 tree->keys[i]
161 |
162 | $ $
163 | part=1 $ part=2 $ part=3
164 | $ $
165 | +-------+ $ +-------+ $ +--------+
166 | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
167 | +-------+ $ +-------+ $ +--------+
168 | | $ $ |
169 | | $ $ +--------+
170 | | $ $ | kp3=12 |
171 | | $ $ +--------+
172 | +-------+ $ $
173 \->| kp1=2 |--$--------------$-+
174 +-------+ $ $ | +--------+
175 | $ $ ==>| kp3=11 |
176 +-------+ $ $ | +--------+
177 | kp1=3 |--$--------------$-+ |
178 +-------+ $ $ +--------+
179 | $ $ | kp3=14 |
180 ... $ $ +--------+
181
182 The entire graph is partitioned into "interval lists".
183
184 An interval list is a sequence of ordered disjoint intervals over
185 the same key part. SEL_ARG are linked via "next" and "prev" pointers
186 with NULL as sentinel.
187
188 In the example pic, there are 4 interval lists:
189 "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
190 The vertical lines represent SEL_ARG::next/prev pointers.
191
192 Additionally, all intervals in the list form a red-black (RB) tree,
193 linked via left/right/parent pointers with null_element as sentinel. The
194 red-black tree root SEL_ARG object will be further called "root of the
195 interval list".
196
197 A red-black tree with 7 SEL_ARGs will look similar to what is shown
198 below. Left/right/parent pointers are shown while next pointers go from a
199 node with number X to the node with number X+1 (and prev in the
200 opposite direction):
201
202 Root
203 +---+
204 | 4 |
205 +---+
206 left/ \ right
207 __/ \__
208 / \
209 +---+ +---+
210 | 2 | | 6 |
211 +---+ +---+
212 left / \ right left / \ right
213 | | | |
214 +---+ +---+ +---+ +---+
215 | 1 | | 3 | | 5 | | 7 |
216 +---+ +---+ +---+ +---+
217
218 In this tree,
219 * node1->prev == node7->next == NULL
220 * node1->left == node1->right ==
221 node3->left == ... node7->right == &null_element
222
223 In an interval list, each member X may have SEL_ARG::next_key_part pointer
224 pointing to the root of another interval list Y. The pointed interval list
225 must cover a key part with greater number (i.e. Y->part > X->part).
226
227 In the example pic, the next_key_part pointers are represented by
228 horisontal lines.
229
230 2. SEL_ARG GRAPH SEMANTICS
231
232 It represents a condition in a special form (we don't have a name for it ATM)
233 The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
234
235 For example, the picture represents the condition in form:
236 (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
237 (kp1=2 AND (kp3=11 OR kp3=14)) OR
238 (kp1=3 AND (kp3=11 OR kp3=14))
239
240 In red-black tree form:
241
242 +-------+ +--------+
243 | kp1=2 |.................| kp3=14 |
244 +-------+ +--------+
245 / \ /
246 +---------+ +-------+ +--------+
247 | kp1 < 1 | | kp1=3 | | kp3=11 |
248 +---------+ +-------+ +--------+
249 . .
250 ...... .......
251 . .
252 +-------+ +--------+
253 | kp2=5 | | kp3=14 |
254 +-------+ +--------+
255 . /
256 . +--------+
257 (root of R-B tree | kp3=11 |
258 for "kp3={10|12}") +--------+
259
260
261 Where / and \ denote left and right pointers and ... denotes
262 next_key_part pointers to the root of the R-B tree of intervals for
263 consecutive key parts.
264
265 3. SEL_ARG GRAPH USE
266
267 Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
268 Then walk the SEL_ARG graph and get a list of dijsoint ordered key
269 intervals (i.e. intervals in form
270
271 (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
272
273 Those intervals can be used to access the index. The uses are in:
274 - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
275 how many table records are contained within all
276 intervals.
277 - get_quick_select() - Walk the SEL_ARG, materialize the key intervals,
278 and create QUICK_RANGE_SELECT object that will
279 read records within these intervals.
280
281 4. SPACE COMPLEXITY NOTES
282
283 SEL_ARG graph is a representation of an ordered disjoint sequence of
284 intervals over the ordered set of index tuple values.
285
286 For multi-part keys, one can construct a WHERE expression such that its
287 list of intervals will be of combinatorial size. Here is an example:
288
289 (keypart1 IN (1,2, ..., n1)) AND
290 (keypart2 IN (1,2, ..., n2)) AND
291 (keypart3 IN (1,2, ..., n3))
292
293 For this WHERE clause the list of intervals will have n1*n2*n3 intervals
294 of form
295
296 (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
297
298 SEL_ARG graph structure aims to reduce the amount of required space by
299 "sharing" the elementary intervals when possible (the pic at the
300 beginning of this comment has examples of such sharing). The sharing may
301 prevent combinatorial blowup:
302
303 There are WHERE clauses that have combinatorial-size interval lists but
304 will be represented by a compact SEL_ARG graph.
305 Example:
306 (keypartN IN (1,2, ..., n1)) AND
307 ...
308 (keypart2 IN (1,2, ..., n2)) AND
309 (keypart1 IN (1,2, ..., n3))
310
311 but not in all cases:
312
313 - There are WHERE clauses that do have a compact SEL_ARG-graph
314 representation but get_mm_tree() and its callees will construct a
315 graph of combinatorial size.
316 Example:
317 (keypart1 IN (1,2, ..., n1)) AND
318 (keypart2 IN (1,2, ..., n2)) AND
319 ...
320 (keypartN IN (1,2, ..., n3))
321
322 - There are WHERE clauses for which the minimal possible SEL_ARG graph
323 representation will have combinatorial size.
324 Example:
325 By induction: Let's take any interval on some keypart in the middle:
326
327 kp15=c0
328
329 Then let's AND it with this interval 'structure' from preceding and
330 following keyparts:
331
332 (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
333
334 We will obtain this SEL_ARG graph:
335
336 kp14 $ kp15 $ kp16
337 $ $
338 +---------+ $ +---------+ $ +---------+
339 | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
340 +---------+ $ +---------+ $ +---------+
341 | $ $
342 +---------+ $ +---------+ $
343 | kp14=c2 |--$-->| kp15=c0 | $
344 +---------+ $ +---------+ $
345 $ $
346
347 Note that we had to duplicate "kp15=c0" and there was no way to avoid
348 that.
349 The induction step: AND the obtained expression with another "wrapping"
350 expression like (*).
351 When the process ends because of the limit on max. number of keyparts
352 we'll have:
353
354 WHERE clause length is O(3*#max_keyparts)
355 SEL_ARG graph size is O(2^(#max_keyparts/2))
356
357 (it is also possible to construct a case where instead of 2 in 2^n we
358 have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
359 nodes)
360
361 We avoid consuming too much memory by setting a limit on the number of
362 SEL_ARG object we can construct during one range analysis invocation.
363 */
364
365 class SEL_ARG :public Sql_alloc
366 {
367 public:
368 uint8 min_flag,max_flag,maybe_flag;
369 uint8 part; // Which key part
370 uint8 maybe_null;
371 /*
372 Number of children of this element in the RB-tree, plus 1 for this
373 element itself.
374 */
375 uint16 elements;
376 /*
377 Valid only for elements which are RB-tree roots: Number of times this
378 RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
379 SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
380 */
381 ulong use_count;
382
383 Field *field;
384 uchar *min_value,*max_value; // Pointer to range
385
386 /*
387 eq_tree(), first(), last() etc require that left == right == NULL
388 if the type is MAYBE_KEY. Todo: fix this so SEL_ARGs without R-B
389 children are handled consistently. See related WL#5894.
390 */
391 SEL_ARG *left,*right; /* R-B tree children */
392 SEL_ARG *next,*prev; /* Links for bi-directional interval list */
393 SEL_ARG *parent; /* R-B tree parent */
394 /*
395 R-B tree root of intervals covering keyparts consecutive to this
396 SEL_ARG. See documentation of SEL_ARG GRAPH semantics for details.
397 */
398 SEL_ARG *next_key_part;
399 enum leaf_color { BLACK,RED } color;
400
401 /**
402 Starting an effort to document this field:
403
404 IMPOSSIBLE: if the range predicate for this index is always false.
405
406 ALWAYS: if the range predicate for this index is always true.
407
408 KEY_RANGE: if there is a range predicate that can be used on this index.
409 */
410 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, MAYBE_KEY, KEY_RANGE } type;
411
412 enum { MAX_SEL_ARGS = 16000 };
413
SEL_ARG()414 SEL_ARG() {}
415 SEL_ARG(SEL_ARG &);
416 SEL_ARG(Field *,const uchar *, const uchar *);
417 SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
418 uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
419 /*
420 Used to construct MAYBE_KEY and IMPOSSIBLE SEL_ARGs. left and
421 right is NULL, so this ctor must not be used to create other
422 SEL_ARG types. See todo for left/right pointers.
423 */
SEL_ARG(enum Type type_arg)424 SEL_ARG(enum Type type_arg)
425 :min_flag(0),elements(1),use_count(1),left(NULL),right(NULL),
426 next_key_part(0), color(BLACK), type(type_arg)
427 {
428 DBUG_ASSERT(type_arg == MAYBE_KEY || type_arg == IMPOSSIBLE);
429 }
430 /**
431 returns true if a range predicate is equal. Use all_same()
432 to check for equality of all the predicates on this keypart.
433 */
is_same(const SEL_ARG * arg) const434 inline bool is_same(const SEL_ARG *arg) const
435 {
436 if (type != arg->type || part != arg->part)
437 return false;
438 if (type != KEY_RANGE)
439 return true;
440 return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
441 }
442 /**
443 returns true if all the predicates in the keypart tree are equal
444 */
all_same(const SEL_ARG * arg) const445 bool all_same(const SEL_ARG *arg) const
446 {
447 if (type != arg->type || part != arg->part)
448 return false;
449 if (type != KEY_RANGE)
450 return true;
451 if (arg == this)
452 return true;
453 const SEL_ARG *cmp_arg= arg->first();
454 const SEL_ARG *cur_arg= first();
455 for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg);
456 cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ;
457 if (cur_arg || cmp_arg)
458 return false;
459 return true;
460 }
merge_flags(SEL_ARG * arg)461 inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
maybe_smaller()462 inline void maybe_smaller() { maybe_flag=1; }
463 /* Return true iff it's a single-point null interval */
is_null_interval()464 inline bool is_null_interval() { return maybe_null && max_value[0] == 1; }
cmp_min_to_min(const SEL_ARG * arg) const465 inline int cmp_min_to_min(const SEL_ARG* arg) const
466 {
467 return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
468 }
cmp_min_to_max(const SEL_ARG * arg) const469 inline int cmp_min_to_max(const SEL_ARG* arg) const
470 {
471 return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
472 }
cmp_max_to_max(const SEL_ARG * arg) const473 inline int cmp_max_to_max(const SEL_ARG* arg) const
474 {
475 return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
476 }
cmp_max_to_min(const SEL_ARG * arg) const477 inline int cmp_max_to_min(const SEL_ARG* arg) const
478 {
479 return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
480 }
clone_and(SEL_ARG * arg)481 SEL_ARG *clone_and(SEL_ARG* arg)
482 { // Get overlapping range
483 uchar *new_min,*new_max;
484 uint8 flag_min,flag_max;
485 if (cmp_min_to_min(arg) >= 0)
486 {
487 new_min=min_value; flag_min=min_flag;
488 }
489 else
490 {
491 new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
492 }
493 if (cmp_max_to_max(arg) <= 0)
494 {
495 new_max=max_value; flag_max=max_flag;
496 }
497 else
498 {
499 new_max=arg->max_value; flag_max=arg->max_flag;
500 }
501 return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
502 MY_TEST(maybe_flag && arg->maybe_flag));
503 }
clone_first(SEL_ARG * arg)504 SEL_ARG *clone_first(SEL_ARG *arg)
505 { // min <= X < arg->min
506 return new SEL_ARG(field,part, min_value, arg->min_value,
507 min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
508 maybe_flag | arg->maybe_flag);
509 }
clone_last(SEL_ARG * arg)510 SEL_ARG *clone_last(SEL_ARG *arg)
511 { // min <= X <= key_max
512 return new SEL_ARG(field, part, min_value, arg->max_value,
513 min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
514 }
515 SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next);
516
copy_min(SEL_ARG * arg)517 bool copy_min(SEL_ARG* arg)
518 { // Get overlapping range
519 if (cmp_min_to_min(arg) > 0)
520 {
521 min_value=arg->min_value; min_flag=arg->min_flag;
522 if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
523 return 1; // Full range
524 }
525 maybe_flag|=arg->maybe_flag;
526 return 0;
527 }
copy_max(SEL_ARG * arg)528 bool copy_max(SEL_ARG* arg)
529 { // Get overlapping range
530 if (cmp_max_to_max(arg) <= 0)
531 {
532 max_value=arg->max_value; max_flag=arg->max_flag;
533 if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
534 return 1; // Full range
535 }
536 maybe_flag|=arg->maybe_flag;
537 return 0;
538 }
539
copy_min_to_min(SEL_ARG * arg)540 void copy_min_to_min(SEL_ARG *arg)
541 {
542 min_value=arg->min_value; min_flag=arg->min_flag;
543 }
copy_min_to_max(SEL_ARG * arg)544 void copy_min_to_max(SEL_ARG *arg)
545 {
546 max_value=arg->min_value;
547 max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
548 }
copy_max_to_min(SEL_ARG * arg)549 void copy_max_to_min(SEL_ARG *arg)
550 {
551 min_value=arg->max_value;
552 min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
553 }
554 /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_min(uint length,uchar ** min_key,uint min_key_flag)555 int store_min(uint length, uchar **min_key,uint min_key_flag)
556 {
557 /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
558 if ((min_flag & GEOM_FLAG) ||
559 (!(min_flag & NO_MIN_RANGE) &&
560 !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
561 {
562 if (maybe_null && *min_value)
563 {
564 **min_key=1;
565 memset(*min_key+1, 0, length-1);
566 }
567 else
568 memcpy(*min_key,min_value,length);
569 (*min_key)+= length;
570 return 1;
571 }
572 return 0;
573 }
574 /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_max(uint length,uchar ** max_key,uint max_key_flag)575 int store_max(uint length, uchar **max_key, uint max_key_flag)
576 {
577 if (!(max_flag & NO_MAX_RANGE) &&
578 !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
579 {
580 if (maybe_null && *max_value)
581 {
582 **max_key=1;
583 memset(*max_key+1, 0, length-1);
584 }
585 else
586 memcpy(*max_key,max_value,length);
587 (*max_key)+= length;
588 return 1;
589 }
590 return 0;
591 }
592
593 /*
594 Returns a number of keypart values appended to the key buffer
595 for min key and max key. This function is used by both Range
596 Analysis and Partition pruning. For partition pruning we have
597 to ensure that we don't store also subpartition fields. Thus
598 we have to stop at the last partition part and not step into
599 the subpartition fields. For Range Analysis we set last_part
600 to MAX_KEY which we should never reach.
601 */
store_min_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)602 int store_min_key(KEY_PART *key,
603 uchar **range_key,
604 uint *range_key_flag,
605 uint last_part)
606 {
607 SEL_ARG *key_tree= first();
608 uint res= key_tree->store_min(key[key_tree->part].store_length,
609 range_key, *range_key_flag);
610 *range_key_flag|= key_tree->min_flag;
611
612 if (key_tree->next_key_part &&
613 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
614 key_tree->part != last_part &&
615 key_tree->next_key_part->part == key_tree->part+1 &&
616 !(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)))
617 res+= key_tree->next_key_part->store_min_key(key,
618 range_key,
619 range_key_flag,
620 last_part);
621 return res;
622 }
623
624 /* returns a number of keypart values appended to the key buffer */
store_max_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)625 int store_max_key(KEY_PART *key,
626 uchar **range_key,
627 uint *range_key_flag,
628 uint last_part)
629 {
630 SEL_ARG *key_tree= last();
631 uint res=key_tree->store_max(key[key_tree->part].store_length,
632 range_key, *range_key_flag);
633 (*range_key_flag)|= key_tree->max_flag;
634 if (key_tree->next_key_part &&
635 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
636 key_tree->part != last_part &&
637 key_tree->next_key_part->part == key_tree->part+1 &&
638 !(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
639 res+= key_tree->next_key_part->store_max_key(key,
640 range_key,
641 range_key_flag,
642 last_part);
643 return res;
644 }
645
646 SEL_ARG *insert(SEL_ARG *key);
647 SEL_ARG *tree_delete(SEL_ARG *key);
648 SEL_ARG *find_range(SEL_ARG *key);
649 SEL_ARG *rb_insert(SEL_ARG *leaf);
650 friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
651 #ifndef DBUG_OFF
652 friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
653 void test_use_count(SEL_ARG *root);
654 #endif
655 SEL_ARG *first();
656 const SEL_ARG *first() const;
657 SEL_ARG *last();
658 void make_root();
simple_key()659 inline bool simple_key()
660 {
661 return !next_key_part && elements == 1;
662 }
increment_use_count(long count)663 void increment_use_count(long count)
664 {
665 if (next_key_part)
666 {
667 next_key_part->use_count+=count;
668 for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next)
669 if (pos->next_key_part)
670 pos->increment_use_count(count);
671 }
672 }
free_tree()673 void free_tree()
674 {
675 for (SEL_ARG *pos=first(); pos ; pos=pos->next)
676 if (pos->next_key_part)
677 {
678 pos->next_key_part->use_count--;
679 pos->next_key_part->free_tree();
680 }
681 }
682
parent_ptr()683 inline SEL_ARG **parent_ptr()
684 {
685 return parent->left == this ? &parent->left : &parent->right;
686 }
687
688
689 /*
690 Check if this SEL_ARG object represents a single-point interval
691
692 SYNOPSIS
693 is_singlepoint()
694
695 DESCRIPTION
696 Check if this SEL_ARG object (not tree) represents a single-point
697 interval, i.e. if it represents a "keypart = const" or
698 "keypart IS NULL".
699
700 RETURN
701 TRUE This SEL_ARG object represents a singlepoint interval
702 FALSE Otherwise
703 */
704
is_singlepoint() const705 bool is_singlepoint() const
706 {
707 /*
708 Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
709 flags, and the same for right edge.
710 */
711 if (min_flag || max_flag)
712 return FALSE;
713 uchar *min_val= min_value;
714 uchar *max_val= max_value;
715
716 if (maybe_null)
717 {
718 /* First byte is a NULL value indicator */
719 if (*min_val != *max_val)
720 return FALSE;
721
722 if (*min_val)
723 return TRUE; /* This "x IS NULL" */
724 min_val++;
725 max_val++;
726 }
727 return !field->key_cmp(min_val, max_val);
728 }
729 SEL_ARG *clone_tree(RANGE_OPT_PARAM *param);
730 };
731
732 /**
733 Helper function to compare two SEL_ARG's.
734 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)735 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
736 {
737 if (sa1 == NULL && sa2 == NULL)
738 return true;
739 if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
740 return false;
741 return sa1->all_same(sa2);
742 }
743
744 class SEL_IMERGE;
745
746
747 class SEL_TREE :public Sql_alloc
748 {
749 public:
750 /**
751 Starting an effort to document this field:
752
753 IMPOSSIBLE: if keys[i]->type == SEL_ARG::IMPOSSIBLE for some i,
754 then type == SEL_TREE::IMPOSSIBLE. Rationale: if the predicate for
755 one of the indexes is always false, then the full predicate is also
756 always false.
757
758 ALWAYS: if either (keys[i]->type == SEL_ARG::ALWAYS) or
759 (keys[i] == NULL) for all i, then type == SEL_TREE::ALWAYS.
760 Rationale: the range access method will not be able to filter
761 out any rows when there are no range predicates that can be used
762 to filter on any index.
763
764 KEY: There are range predicates that can be used on at least one
765 index.
766
767 KEY_SMALLER: There are range predicates that can be used on at
768 least one index. In addition, there are predicates that cannot
769 be directly utilized by range access on key parts in the same
770 index. These unused predicates makes it probable that the row
771 estimate for range access on this index is too pessimistic.
772 */
773 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
SEL_TREE(enum Type type_arg)774 SEL_TREE(enum Type type_arg) :type(type_arg) {}
SEL_TREE()775 SEL_TREE() :type(KEY)
776 {
777 memset(keys, 0, sizeof(keys));
778 }
779 SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
780 /*
781 Possible ways to read rows using a single index because the
782 conditions of the query consists of single-index conjunctions:
783
784 (ranges_for_idx_1) AND (ranges_for_idx_2) AND ...
785
786 The SEL_ARG graph for each non-NULL element in keys[] may consist
787 of many single-index ranges (disjunctions), so ranges_for_idx_1
788 may e.g. be:
789
790 "idx_field1 = 1 OR (idx_field1 > 5 AND idx_field2 = 10)"
791
792 assuming that index1 is a composite index covering
793 (idx_field1,...,idx_field2,..)
794
795 Index merge intersection intersects ranges on SEL_ARGs from two or
796 more indexes.
797
798 Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
799 keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
800 merit in range analyzer functions (e.g. get_mm_parts) returning a
801 pointer to such SEL_TREE instead of NULL)
802 */
803 SEL_ARG *keys[MAX_KEY];
804 key_map keys_map; /* bitmask of non-NULL elements in keys */
805
806 /*
807 Possible ways to read rows using Index merge (sort) union.
808
809 Each element in 'merges' consists of multi-index disjunctions,
810 which means that Index merge (sort) union must be applied to read
811 rows. The nodes in the 'merges' list forms a conjunction of such
812 multi-index disjunctions.
813
814 The list is non-empty only if type==KEY.
815 */
816 List<SEL_IMERGE> merges;
817
818 /* The members below are filled/used only after get_mm_tree is done */
819 key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */
820 uint n_ror_scans; /* number of set bits in ror_scans_map */
821
822 struct st_ror_scan_info **ror_scans; /* list of ROR key scans */
823 struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
824 /* Note that #records for each key scan is stored in table->quick_rows */
825 };
826
827 class RANGE_OPT_PARAM
828 {
829 public:
830 THD *thd; /* Current thread handle */
831 TABLE *table; /* Table being analyzed */
832 Item *cond; /* Used inside get_mm_tree(). */
833 table_map prev_tables;
834 table_map read_tables;
835 table_map current_table; /* Bit of the table being analyzed */
836
837 /* Array of parts of all keys for which range analysis is performed */
838 KEY_PART *key_parts;
839 KEY_PART *key_parts_end;
840 MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
841 MEM_ROOT *old_root; /* Memory that will last until the query end */
842 /*
843 Number of indexes used in range analysis (In SEL_TREE::keys only first
844 #keys elements are not empty)
845 */
846 uint keys;
847
848 /*
849 If true, the index descriptions describe real indexes (and it is ok to
850 call field->optimize_range(real_keynr[...], ...).
851 Otherwise index description describes fake indexes, like a partitioning
852 expression.
853 */
854 bool using_real_indexes;
855
856 /*
857 Aggressively remove "scans" that do not have conditions on first
858 keyparts. Such scans are usable when doing partition pruning but not
859 regular range optimization.
860 */
861 bool remove_jump_scans;
862
863 /*
864 used_key_no -> table_key_no translation table. Only makes sense if
865 using_real_indexes==TRUE
866 */
867 uint real_keynr[MAX_KEY];
868
869 /*
870 Used to store 'current key tuples', in both range analysis and
871 partitioning (list) analysis
872 */
873 uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
874 max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
875
876 /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
877 uint alloced_sel_args;
878 bool force_default_mrr;
879 /**
880 Whether index statistics or index dives should be used when
881 estimating the number of rows in an equality range. If true, index
882 statistics is used for these indexes.
883 */
884 bool use_index_statistics;
885
statement_should_be_aborted() const886 bool statement_should_be_aborted() const
887 {
888 return
889 thd->is_fatal_error ||
890 thd->is_error() ||
891 alloced_sel_args > SEL_ARG::MAX_SEL_ARGS;
892 }
893
894 };
895
896 class PARAM : public RANGE_OPT_PARAM
897 {
898 public:
899 KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
900 longlong baseflag;
901 uint max_key_part;
902 /* Number of ranges in the last checked tree->key */
903 uint range_count;
904
905 bool quick; // Don't calulate possible keys
906
907 uint fields_bitmap_size;
908 MY_BITMAP needed_fields; /* bitmask of fields needed by the query */
909 MY_BITMAP tmp_covered_fields;
910
911 key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */
912
913 uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
914 uint imerge_cost_buff_size; /* size of the buffer */
915
916 /* TRUE if last checked tree->key can be used for ROR-scan */
917 bool is_ror_scan;
918 /* Number of ranges in the last checked tree->key */
919 uint n_ranges;
920
921 /*
922 The sort order the range access method must be able
923 to provide. Three-value logic: asc/desc/don't care
924 */
925 ORDER::enum_order order_direction;
926 };
927
928 class TABLE_READ_PLAN;
929 class TRP_RANGE;
930 class TRP_ROR_INTERSECT;
931 class TRP_ROR_UNION;
932 class TRP_INDEX_MERGE;
933 class TRP_GROUP_MIN_MAX;
934
935 struct st_ror_scan_info;
936
937 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,
938 Item_func *cond_func,Field *field,
939 Item_func::Functype type,Item *value,
940 Item_result cmp_type);
941 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,Item *cond_func,Field *field,
942 KEY_PART *key_part,
943 Item_func::Functype type,Item *value);
944 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond);
945
946 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts);
947 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
948 SEL_ARG *tree, bool update_tbl_stats,
949 uint *mrr_flags, uint *bufsize,
950 Cost_estimate *cost);
951 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
952 SEL_ARG *key_tree, uint mrr_flags,
953 uint mrr_buf_size, MEM_ROOT *alloc);
954 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
955 bool index_read_must_be_used,
956 bool update_tbl_stats,
957 double read_time);
958 static
959 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
960 double read_time);
961 static
962 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
963 double read_time);
964 static
965 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
966 double read_time);
967 #ifndef DBUG_OFF
968 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
969 const char *msg);
970 static void print_ror_scans_arr(TABLE *table, const char *msg,
971 struct st_ror_scan_info **start,
972 struct st_ror_scan_info **end);
973 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
974 #endif
975
976 static void append_range_all_keyparts(Opt_trace_array *range_trace,
977 String *range_string,
978 String *range_so_far,
979 SEL_ARG *keypart_root,
980 const KEY_PART_INFO *key_parts);
981 static inline void dbug_print_tree(const char *tree_name,
982 SEL_TREE *tree,
983 const RANGE_OPT_PARAM *param);
984
985 void append_range(String *out,
986 const KEY_PART_INFO *key_parts,
987 const uchar *min_key, const uchar *max_key,
988 const uint flag);
989
990 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
991 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
992 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
993 static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
994 static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
995 uint clone_flag);
996 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
997 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
998 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
999 uchar *max_key,uint max_key_flag);
1000 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
1001 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count,
1002 uint limit);
1003
1004 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
1005 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
1006 uint length);
1007 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
1008
1009
1010 /*
1011 SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
1012 a condition in the following form:
1013 (t_1||t_2||...||t_N) && (next)
1014
1015 where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
1016 (t_i,t_j) contains SEL_ARGS for the same index.
1017
1018 SEL_TREE contained in SEL_IMERGE always has merges=NULL.
1019
1020 This class relies on memory manager to do the cleanup.
1021 */
1022
1023 class SEL_IMERGE : public Sql_alloc
1024 {
1025 enum { PREALLOCED_TREES= 10};
1026 public:
1027 SEL_TREE *trees_prealloced[PREALLOCED_TREES];
1028 SEL_TREE **trees; /* trees used to do index_merge */
1029 SEL_TREE **trees_next; /* last of these trees */
1030 SEL_TREE **trees_end; /* end of allocated space */
1031
1032 SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */
1033
SEL_IMERGE()1034 SEL_IMERGE() :
1035 trees(&trees_prealloced[0]),
1036 trees_next(trees),
1037 trees_end(trees + PREALLOCED_TREES)
1038 {}
1039 SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
1040 int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
1041 int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
1042 int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
1043 };
1044
1045
1046 /*
1047 Add SEL_TREE to this index_merge without any checks,
1048
1049 NOTES
1050 This function implements the following:
1051 (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
1052
1053 RETURN
1054 0 - OK
1055 -1 - Out of memory.
1056 */
1057
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)1058 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
1059 {
1060 if (trees_next == trees_end)
1061 {
1062 const int realloc_ratio= 2; /* Double size for next round */
1063 uint old_elements= (trees_end - trees);
1064 uint old_size= sizeof(SEL_TREE**) * old_elements;
1065 uint new_size= old_size * realloc_ratio;
1066 SEL_TREE **new_trees;
1067 if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
1068 return -1;
1069 memcpy(new_trees, trees, old_size);
1070 trees= new_trees;
1071 trees_next= trees + old_elements;
1072 trees_end= trees + old_elements * realloc_ratio;
1073 }
1074 *(trees_next++)= tree;
1075 return 0;
1076 }
1077
1078
1079 /*
1080 Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
1081 combining new_tree with one of the trees in this SEL_IMERGE if they both
1082 have SEL_ARGs for the same key.
1083
1084 SYNOPSIS
1085 or_sel_tree_with_checks()
1086 param PARAM from SQL_SELECT::test_quick_select
1087 new_tree SEL_TREE with type KEY or KEY_SMALLER.
1088
1089 NOTES
1090 This does the following:
1091 (t_1||...||t_k)||new_tree =
1092 either
1093 = (t_1||...||t_k||new_tree)
1094 or
1095 = (t_1||....||(t_j|| new_tree)||...||t_k),
1096
1097 where t_i, y are SEL_TREEs.
1098 new_tree is combined with the first t_j it has a SEL_ARG on common
1099 key with. As a consequence of this, choice of keys to do index_merge
1100 read may depend on the order of conditions in WHERE part of the query.
1101
1102 RETURN
1103 0 OK
1104 1 One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
1105 and (*this) should be discarded.
1106 -1 An error occurred.
1107 */
1108
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,SEL_TREE * new_tree)1109 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
1110 {
1111 for (SEL_TREE** tree = trees;
1112 tree != trees_next;
1113 tree++)
1114 {
1115 if (sel_trees_can_be_ored(*tree, new_tree, param))
1116 {
1117 *tree = tree_or(param, *tree, new_tree);
1118 if (!*tree)
1119 return 1;
1120 if (((*tree)->type == SEL_TREE::MAYBE) ||
1121 ((*tree)->type == SEL_TREE::ALWAYS))
1122 return 1;
1123 /* SEL_TREE::IMPOSSIBLE is impossible here */
1124 return 0;
1125 }
1126 }
1127
1128 /* New tree cannot be combined with any of existing trees. */
1129 return or_sel_tree(param, new_tree);
1130 }
1131
1132
1133 /*
1134 Perform OR operation on this index_merge and supplied index_merge list.
1135
1136 RETURN
1137 0 - OK
1138 1 - One of conditions in result is always TRUE and this SEL_IMERGE
1139 should be discarded.
1140 -1 - An error occurred
1141 */
1142
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,SEL_IMERGE * imerge)1143 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
1144 {
1145 for (SEL_TREE** tree= imerge->trees;
1146 tree != imerge->trees_next;
1147 tree++)
1148 {
1149 if (or_sel_tree_with_checks(param, *tree))
1150 return 1;
1151 }
1152 return 0;
1153 }
1154
1155
SEL_TREE(SEL_TREE * arg,RANGE_OPT_PARAM * param)1156 SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
1157 {
1158 keys_map= arg->keys_map;
1159 type= arg->type;
1160 for (uint idx= 0; idx < MAX_KEY; idx++)
1161 {
1162 if ((keys[idx]= arg->keys[idx]))
1163 {
1164 keys[idx]->use_count++;
1165 keys[idx]->increment_use_count(1);
1166 }
1167 }
1168
1169 List_iterator<SEL_IMERGE> it(arg->merges);
1170 for (SEL_IMERGE *el= it++; el; el= it++)
1171 {
1172 SEL_IMERGE *merge= new SEL_IMERGE(el, param);
1173 if (!merge || merge->trees == merge->trees_next)
1174 {
1175 merges.empty();
1176 return;
1177 }
1178 merges.push_back (merge);
1179 }
1180 }
1181
1182
SEL_IMERGE(SEL_IMERGE * arg,RANGE_OPT_PARAM * param)1183 SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
1184 {
1185 uint elements= (arg->trees_end - arg->trees);
1186 if (elements > PREALLOCED_TREES)
1187 {
1188 uint size= elements * sizeof (SEL_TREE **);
1189 if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
1190 goto mem_err;
1191 }
1192 else
1193 trees= &trees_prealloced[0];
1194
1195 trees_next= trees;
1196 trees_end= trees + elements;
1197
1198 for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end;
1199 tree++, arg_tree++)
1200 {
1201 if (!(*tree= new SEL_TREE(*arg_tree, param)))
1202 goto mem_err;
1203 }
1204
1205 return;
1206
1207 mem_err:
1208 trees= &trees_prealloced[0];
1209 trees_next= trees;
1210 trees_end= trees;
1211 }
1212
1213
1214 /*
1215 Perform AND operation on two index_merge lists and store result in *im1.
1216 */
1217
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1218 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
1219 {
1220 im1->concat(im2);
1221 }
1222
1223
1224 /*
1225 Perform OR operation on 2 index_merge lists, storing result in first list.
1226
1227 NOTES
1228 The following conversion is implemented:
1229 (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
1230 => (a_1||b_1).
1231
1232 i.e. all conjuncts except the first one are currently dropped.
1233 This is done to avoid producing N*K ways to do index_merge.
1234
1235 If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
1236 and index_merge is discarded (while it is actually possible to try
1237 harder).
1238
1239 As a consequence of this, choice of keys to do index_merge read may depend
1240 on the order of conditions in WHERE part of the query.
1241
1242 RETURN
1243 0 OK, result is stored in *im1
1244 other Error, both passed lists are unusable
1245 */
1246
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1247 int imerge_list_or_list(RANGE_OPT_PARAM *param,
1248 List<SEL_IMERGE> *im1,
1249 List<SEL_IMERGE> *im2)
1250 {
1251 SEL_IMERGE *imerge= im1->head();
1252 im1->empty();
1253 im1->push_back(imerge);
1254
1255 return imerge->or_sel_imerge_with_checks(param, im2->head());
1256 }
1257
1258
1259 /*
1260 Perform OR operation on index_merge list and key tree.
1261
1262 RETURN
1263 false OK, result is stored in *im1.
1264 true Error
1265 */
1266
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,SEL_TREE * tree)1267 static bool imerge_list_or_tree(RANGE_OPT_PARAM *param,
1268 List<SEL_IMERGE> *im1,
1269 SEL_TREE *tree)
1270 {
1271 DBUG_ENTER("imerge_list_or_tree");
1272 SEL_IMERGE *imerge;
1273 List_iterator<SEL_IMERGE> it(*im1);
1274
1275 uint remaining_trees= im1->elements;
1276 while ((imerge= it++))
1277 {
1278 SEL_TREE *or_tree;
1279 /*
1280 Need to make a copy of 'tree' for all but the last OR operation
1281 because or_sel_tree_with_checks() may change it.
1282 */
1283 if (--remaining_trees == 0)
1284 or_tree= tree;
1285 else
1286 {
1287 or_tree= new SEL_TREE (tree, param);
1288 if (!or_tree)
1289 DBUG_RETURN(true);
1290 if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
1291 DBUG_RETURN(false);
1292 }
1293
1294 int result_or= imerge->or_sel_tree_with_checks(param, or_tree);
1295 if (result_or == 1)
1296 it.remove();
1297 else if (result_or == -1)
1298 DBUG_RETURN(true);
1299 }
1300 DBUG_ASSERT(remaining_trees == 0);
1301 DBUG_RETURN(im1->is_empty());
1302 }
1303
1304
1305 /***************************************************************************
1306 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1307 ***************************************************************************/
1308
1309 /* make a select from mysql info
1310 Error is set as following:
1311 0 = ok
1312 1 = Got some error (out of memory?)
1313 */
1314
make_select(TABLE * head,table_map const_tables,table_map read_tables,Item * conds,bool allow_null_cond,int * error)1315 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1316 table_map read_tables, Item *conds,
1317 bool allow_null_cond,
1318 int *error)
1319 {
1320 SQL_SELECT *select;
1321 DBUG_ENTER("make_select");
1322
1323 *error=0;
1324
1325 if (!conds && !allow_null_cond)
1326 DBUG_RETURN(0);
1327 if (!(select= new SQL_SELECT))
1328 {
1329 *error= 1; // out of memory
1330 DBUG_RETURN(0); /* purecov: inspected */
1331 }
1332 select->read_tables=read_tables;
1333 select->const_tables=const_tables;
1334 select->head=head;
1335 select->cond=conds;
1336
1337 if (head->sort.io_cache)
1338 {
1339 select->file= *head->sort.io_cache;
1340 select->records=(ha_rows) (select->file.end_of_file/
1341 head->file->ref_length);
1342 my_free(head->sort.io_cache);
1343 head->sort.io_cache=0;
1344 }
1345 DBUG_RETURN(select);
1346 }
1347
1348
SQL_SELECT()1349 SQL_SELECT::SQL_SELECT() :
1350 quick(0), cond(0), icp_cond(0),
1351 free_cond(0), traced_before(false)
1352 {
1353 my_b_clear(&file);
1354 }
1355
1356
cleanup()1357 void SQL_SELECT::cleanup()
1358 {
1359 set_quick(NULL);
1360 if (free_cond)
1361 {
1362 free_cond=0;
1363 delete cond;
1364 cond= 0;
1365 }
1366 close_cached_file(&file);
1367 traced_before= false;
1368 }
1369
1370
~SQL_SELECT()1371 SQL_SELECT::~SQL_SELECT()
1372 {
1373 cleanup();
1374 }
1375
1376 #undef index // Fix for Unixware 7
1377
QUICK_SELECT_I()1378 QUICK_SELECT_I::QUICK_SELECT_I()
1379 :max_used_key_length(0),
1380 used_key_parts(0)
1381 {}
1382
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1383 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1384 bool no_alloc, MEM_ROOT *parent_alloc,
1385 bool *create_error)
1386 :free_file(0), cur_range(NULL), last_range(0),
1387 mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
1388 dont_free(0)
1389 {
1390 my_bitmap_map *bitmap;
1391 DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1392
1393 in_ror_merged_scan= 0;
1394 index= key_nr;
1395 head= table;
1396 key_part_info= head->key_info[index].key_part;
1397 my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
1398
1399 /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1400 mrr_buf_size= thd->variables.read_rnd_buff_size;
1401
1402 if (!no_alloc && !parent_alloc)
1403 {
1404 // Allocates everything through the internal memroot
1405 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1406 thd->mem_root= &alloc;
1407 }
1408 else
1409 memset(&alloc, 0, sizeof(alloc));
1410 file= head->file;
1411 record= head->record[0];
1412
1413 /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
1414 if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
1415 MYF(MY_WME))))
1416 {
1417 column_bitmap.bitmap= 0;
1418 *create_error= 1;
1419 }
1420 else
1421 bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1422 DBUG_VOID_RETURN;
1423 }
1424
1425
need_sorted_output()1426 void QUICK_RANGE_SELECT::need_sorted_output()
1427 {
1428 mrr_flags |= HA_MRR_SORTED;
1429 }
1430
1431
init()1432 int QUICK_RANGE_SELECT::init()
1433 {
1434 DBUG_ENTER("QUICK_RANGE_SELECT::init");
1435
1436 if (file->inited)
1437 file->ha_index_or_rnd_end();
1438 DBUG_RETURN(FALSE);
1439 }
1440
1441
range_end()1442 void QUICK_RANGE_SELECT::range_end()
1443 {
1444 if (file->inited)
1445 file->ha_index_or_rnd_end();
1446 }
1447
1448
~QUICK_RANGE_SELECT()1449 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1450 {
1451 DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1452 if (!dont_free)
1453 {
1454 /* file is NULL for CPK scan on covering ROR-intersection */
1455 if (file)
1456 {
1457 range_end();
1458 if (free_file)
1459 {
1460 DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1461 free_file));
1462 file->ha_external_lock(current_thd, F_UNLCK);
1463 file->ha_close();
1464 delete file;
1465 }
1466 }
1467 delete_dynamic(&ranges); /* ranges are allocated in alloc */
1468 free_root(&alloc,MYF(0));
1469 my_free(column_bitmap.bitmap);
1470 }
1471 my_free(mrr_buf_desc);
1472 DBUG_VOID_RETURN;
1473 }
1474
1475
QUICK_INDEX_MERGE_SELECT(THD * thd_param,TABLE * table)1476 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
1477 TABLE *table)
1478 :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1479 {
1480 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
1481 index= MAX_KEY;
1482 head= table;
1483 memset(static_cast<void*>(&read_record), 0, sizeof(read_record));
1484 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1485 DBUG_VOID_RETURN;
1486 }
1487
init()1488 int QUICK_INDEX_MERGE_SELECT::init()
1489 {
1490 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
1491 DBUG_RETURN(0);
1492 }
1493
reset()1494 int QUICK_INDEX_MERGE_SELECT::reset()
1495 {
1496 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
1497 const int retval= read_keys_and_merge();
1498 DBUG_RETURN(retval);
1499 }
1500
1501 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1502 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1503 {
1504 /*
1505 Save quick_select that does scan on clustered primary key as it will be
1506 processed separately.
1507 */
1508 if (head->file->primary_key_is_clustered() &&
1509 quick_sel_range->index == head->s->primary_key)
1510 pk_quick_select= quick_sel_range;
1511 else
1512 return quick_selects.push_back(quick_sel_range);
1513 return 0;
1514 }
1515
~QUICK_INDEX_MERGE_SELECT()1516 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
1517 {
1518 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1519 QUICK_RANGE_SELECT* quick;
1520 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
1521 delete unique;
1522 quick_it.rewind();
1523 while ((quick= quick_it++))
1524 quick->file= NULL;
1525 quick_selects.delete_elements();
1526 delete pk_quick_select;
1527 /* It's ok to call the next two even if they are already deinitialized */
1528 end_read_record(&read_record);
1529 free_io_cache(head);
1530 free_root(&alloc,MYF(0));
1531 DBUG_VOID_RETURN;
1532 }
1533
1534
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1535 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1536 TABLE *table,
1537 bool retrieve_full_rows,
1538 MEM_ROOT *parent_alloc)
1539 : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1540 scans_inited(FALSE)
1541 {
1542 index= MAX_KEY;
1543 head= table;
1544 record= head->record[0];
1545 if (!parent_alloc)
1546 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1547 else
1548 memset(&alloc, 0, sizeof(MEM_ROOT));
1549 last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1550 head->file->ref_length);
1551 }
1552
1553
1554 /*
1555 Do post-constructor initialization.
1556 SYNOPSIS
1557 QUICK_ROR_INTERSECT_SELECT::init()
1558
1559 RETURN
1560 0 OK
1561 other Error code
1562 */
1563
init()1564 int QUICK_ROR_INTERSECT_SELECT::init()
1565 {
1566 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1567 /* Check if last_rowid was successfully allocated in ctor */
1568 DBUG_RETURN(!last_rowid);
1569 }
1570
1571
1572 /*
1573 Initialize this quick select to be a ROR-merged scan.
1574
1575 SYNOPSIS
1576 QUICK_RANGE_SELECT::init_ror_merged_scan()
1577 reuse_handler If TRUE, use head->file, otherwise create a separate
1578 handler object
1579
1580 NOTES
1581 This function creates and prepares for subsequent use a separate handler
1582 object if it can't reuse head->file. The reason for this is that during
1583 ROR-merge several key scans are performed simultaneously, and a single
1584 handler is only capable of preserving context of a single key scan.
1585
1586 In ROR-merge the quick select doing merge does full records retrieval,
1587 merged quick selects read only keys.
1588
1589 RETURN
1590 0 ROR child scan initialized, ok to use.
1591 1 error
1592 */
1593
init_ror_merged_scan(bool reuse_handler)1594 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1595 {
1596 handler *save_file= file, *org_file;
1597 THD *thd;
1598 MY_BITMAP * const save_read_set= head->read_set;
1599 MY_BITMAP * const save_write_set= head->write_set;
1600 DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1601
1602 in_ror_merged_scan= 1;
1603 mrr_flags|= HA_MRR_SORTED;
1604 if (reuse_handler)
1605 {
1606 DBUG_PRINT("info", ("Reusing handler %p", file));
1607 if (init() || reset())
1608 {
1609 DBUG_RETURN(1);
1610 }
1611 head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1612 file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1613 goto end;
1614 }
1615
1616 /* Create a separate handler object for this quick select */
1617 if (free_file)
1618 {
1619 /* already have own 'handler' object. */
1620 DBUG_RETURN(0);
1621 }
1622
1623 thd= head->in_use;
1624 if (!(file= head->file->clone(head->s->normalized_path.str, thd->mem_root)))
1625 {
1626 /*
1627 Manually set the error flag. Note: there seems to be quite a few
1628 places where a failure could cause the server to "hang" the client by
1629 sending no response to a query. ATM those are not real errors because
1630 the storage engine calls in question happen to never fail with the
1631 existing storage engines.
1632 */
1633 my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1634 /* Caller will free the memory */
1635 goto failure; /* purecov: inspected */
1636 }
1637
1638 head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1639
1640 if (file->ha_external_lock(thd, F_RDLCK))
1641 goto failure;
1642
1643 if (init() || reset())
1644 {
1645 file->ha_external_lock(thd, F_UNLCK);
1646 file->ha_close();
1647 goto failure;
1648 }
1649 free_file= TRUE;
1650 last_rowid= file->ref;
1651 file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1652
1653 end:
1654 /*
1655 We are only going to read key fields and call position() on 'file'
1656 The following sets head->tmp_set to only use this key and then updates
1657 head->read_set and head->write_set to use this bitmap.
1658 The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
1659 */
1660 org_file= head->file;
1661 head->file= file;
1662 /* We don't have to set 'head->keyread' here as the 'file' is unique */
1663 if (!head->no_keyread)
1664 head->mark_columns_used_by_index(index);
1665 head->prepare_for_position();
1666 head->file= org_file;
1667 bitmap_copy(&column_bitmap, head->read_set);
1668
1669 /*
1670 We have prepared a column_bitmap which get_next() will use. To do this we
1671 used TABLE::read_set/write_set as playground; restore them to their
1672 original value to not pollute other scans.
1673 */
1674 head->column_bitmaps_set(save_read_set, save_write_set);
1675
1676 DBUG_RETURN(0);
1677
1678 failure:
1679 head->column_bitmaps_set(save_read_set, save_write_set);
1680 delete file;
1681 file= save_file;
1682 DBUG_RETURN(1);
1683 }
1684
1685
1686 /*
1687 Initialize this quick select to be a part of a ROR-merged scan.
1688 SYNOPSIS
1689 QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1690 reuse_handler If TRUE, use head->file, otherwise create separate
1691 handler object.
1692 RETURN
1693 0 OK
1694 other error code
1695 */
init_ror_merged_scan(bool reuse_handler)1696 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1697 {
1698 int error;
1699 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1700 QUICK_RANGE_SELECT* quick;
1701 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1702
1703 /* Initialize all merged "children" quick selects */
1704 DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1705 if (!need_to_fetch_row && reuse_handler)
1706 {
1707 quick= quick_it++;
1708 /*
1709 There is no use of this->file. Use it for the first of merged range
1710 selects.
1711 */
1712 int error= quick->init_ror_merged_scan(TRUE);
1713 if (error)
1714 DBUG_RETURN(error);
1715 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1716 }
1717 while ((quick= quick_it++))
1718 {
1719 #ifndef DBUG_OFF
1720 const MY_BITMAP * const save_read_set= quick->head->read_set;
1721 const MY_BITMAP * const save_write_set= quick->head->write_set;
1722 #endif
1723 if ((error= quick->init_ror_merged_scan(FALSE)))
1724 DBUG_RETURN(error);
1725 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1726 // Sets are shared by all members of "quick_selects" so must not change
1727 DBUG_ASSERT(quick->head->read_set == save_read_set);
1728 DBUG_ASSERT(quick->head->write_set == save_write_set);
1729 /* All merged scans share the same record buffer in intersection. */
1730 quick->record= head->record[0];
1731 }
1732
1733 /* Prepare for ha_rnd_pos calls if needed. */
1734 if (need_to_fetch_row && (error= head->file->ha_rnd_init(false)))
1735 {
1736 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1737 DBUG_RETURN(error);
1738 }
1739 DBUG_RETURN(0);
1740 }
1741
1742
1743 /*
1744 Initialize quick select for row retrieval.
1745 SYNOPSIS
1746 reset()
1747 RETURN
1748 0 OK
1749 other Error code
1750 */
1751
reset()1752 int QUICK_ROR_INTERSECT_SELECT::reset()
1753 {
1754 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1755 if (!scans_inited && init_ror_merged_scan(TRUE))
1756 DBUG_RETURN(1);
1757 scans_inited= TRUE;
1758 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
1759 QUICK_RANGE_SELECT *quick;
1760 while ((quick= it++))
1761 quick->reset();
1762 DBUG_RETURN(0);
1763 }
1764
1765
1766 /*
1767 Add a merged quick select to this ROR-intersection quick select.
1768
1769 SYNOPSIS
1770 QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1771 quick Quick select to be added. The quick select must return
1772 rows in rowid order.
1773 NOTES
1774 This call can only be made before init() is called.
1775
1776 RETURN
1777 FALSE OK
1778 TRUE Out of memory.
1779 */
1780
1781 bool
push_quick_back(QUICK_RANGE_SELECT * quick)1782 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
1783 {
1784 return quick_selects.push_back(quick);
1785 }
1786
~QUICK_ROR_INTERSECT_SELECT()1787 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1788 {
1789 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1790 quick_selects.delete_elements();
1791 delete cpk_quick;
1792 free_root(&alloc,MYF(0));
1793 if (need_to_fetch_row && head->file->inited)
1794 head->file->ha_rnd_end();
1795 DBUG_VOID_RETURN;
1796 }
1797
1798
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1799 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1800 TABLE *table)
1801 : thd(thd_param), scans_inited(FALSE)
1802 {
1803 index= MAX_KEY;
1804 head= table;
1805 rowid_length= table->file->ref_length;
1806 record= head->record[0];
1807 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1808 thd_param->mem_root= &alloc;
1809 }
1810
1811
1812 /*
1813 Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1814 queue.
1815
1816 SYNPOSIS
1817 QUICK_ROR_UNION_SELECT_queue_cmp()
1818 arg Pointer to QUICK_ROR_UNION_SELECT
1819 val1 First merged select
1820 val2 Second merged select
1821 */
1822
1823 C_MODE_START
1824
QUICK_ROR_UNION_SELECT_queue_cmp(void * arg,uchar * val1,uchar * val2)1825 static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1826 {
1827 QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1828 return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1829 ((QUICK_SELECT_I*)val2)->last_rowid);
1830 }
1831
1832 C_MODE_END
1833
1834
1835 /*
1836 Do post-constructor initialization.
1837 SYNOPSIS
1838 QUICK_ROR_UNION_SELECT::init()
1839
1840 RETURN
1841 0 OK
1842 other Error code
1843 */
1844
init()1845 int QUICK_ROR_UNION_SELECT::init()
1846 {
1847 DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1848 if (init_queue(&queue, quick_selects.elements, 0,
1849 FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1850 (void*) this))
1851 {
1852 memset(&queue, 0, sizeof(QUEUE));
1853 DBUG_RETURN(1);
1854 }
1855
1856 if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1857 DBUG_RETURN(1);
1858 prev_rowid= cur_rowid + head->file->ref_length;
1859 DBUG_RETURN(0);
1860 }
1861
1862
1863 /*
1864 Initialize quick select for row retrieval.
1865 SYNOPSIS
1866 reset()
1867
1868 RETURN
1869 0 OK
1870 other Error code
1871 */
1872
reset()1873 int QUICK_ROR_UNION_SELECT::reset()
1874 {
1875 QUICK_SELECT_I *quick;
1876 int error;
1877 DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1878 have_prev_rowid= FALSE;
1879 if (!scans_inited)
1880 {
1881 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1882 while ((quick= it++))
1883 {
1884 /*
1885 Use mem_root of this "QUICK" as using the statement mem_root
1886 might result in too many allocations when combined with
1887 dynamic range access where range optimizer is invoked many times
1888 for a single statement.
1889 */
1890 THD *thd= quick->head->in_use;
1891 MEM_ROOT *saved_root= thd->mem_root;
1892 thd->mem_root= &alloc;
1893 error= quick->init_ror_merged_scan(false);
1894 thd->mem_root= saved_root;
1895 if (error)
1896 DBUG_RETURN(1);
1897 }
1898 scans_inited= TRUE;
1899 }
1900 queue_remove_all(&queue);
1901 /*
1902 Initialize scans for merged quick selects and put all merged quick
1903 selects into the queue.
1904 */
1905 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1906 while ((quick= it++))
1907 {
1908 if ((error= quick->reset()))
1909 DBUG_RETURN(error);
1910 if ((error= quick->get_next()))
1911 {
1912 if (error == HA_ERR_END_OF_FILE)
1913 continue;
1914 DBUG_RETURN(error);
1915 }
1916 quick->save_last_pos();
1917 queue_insert(&queue, (uchar*)quick);
1918 }
1919
1920 /* Prepare for ha_rnd_pos calls. */
1921 if (head->file->inited && (error= head->file->ha_rnd_end()))
1922 {
1923 DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1924 DBUG_RETURN(error);
1925 }
1926 if ((error= head->file->ha_rnd_init(false)))
1927 {
1928 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1929 DBUG_RETURN(error);
1930 }
1931
1932 DBUG_RETURN(0);
1933 }
1934
1935
1936 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)1937 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1938 {
1939 return quick_selects.push_back(quick_sel_range);
1940 }
1941
~QUICK_ROR_UNION_SELECT()1942 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1943 {
1944 DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1945 delete_queue(&queue);
1946 quick_selects.delete_elements();
1947 if (head->file->inited)
1948 head->file->ha_rnd_end();
1949 free_root(&alloc,MYF(0));
1950 DBUG_VOID_RETURN;
1951 }
1952
1953
QUICK_RANGE()1954 QUICK_RANGE::QUICK_RANGE()
1955 :min_key(0),max_key(0),min_length(0),max_length(0),
1956 flag(NO_MIN_RANGE | NO_MAX_RANGE),
1957 min_keypart_map(0), max_keypart_map(0)
1958 {}
1959
QUICK_RANGE(const uchar * min_key_arg,uint min_length_arg,key_part_map min_keypart_map_arg,const uchar * max_key_arg,uint max_length_arg,key_part_map max_keypart_map_arg,uint flag_arg)1960 QUICK_RANGE::QUICK_RANGE(const uchar *min_key_arg, uint min_length_arg,
1961 key_part_map min_keypart_map_arg,
1962 const uchar *max_key_arg, uint max_length_arg,
1963 key_part_map max_keypart_map_arg,
1964 uint flag_arg)
1965 : min_key(NULL),
1966 max_key(NULL),
1967 min_length((uint16) min_length_arg),
1968 max_length((uint16) max_length_arg),
1969 flag((uint16) flag_arg),
1970 min_keypart_map(min_keypart_map_arg),
1971 max_keypart_map(max_keypart_map_arg)
1972 {
1973 min_key= static_cast<uchar*>(sql_memdup(min_key_arg, min_length_arg + 1));
1974 max_key= static_cast<uchar*>(sql_memdup(max_key_arg, max_length_arg + 1));
1975 // If we get is_null_string as argument, the memdup is undefined behavior.
1976 DBUG_ASSERT(min_key_arg != is_null_string);
1977 DBUG_ASSERT(max_key_arg != is_null_string);
1978 }
1979
SEL_ARG(SEL_ARG & arg)1980 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1981 {
1982 DBUG_ASSERT(arg.type != MAYBE_KEY); // Would need left=right=NULL
1983 left=right= &null_element;
1984 prev=next= NULL;
1985 type=arg.type;
1986 min_flag=arg.min_flag;
1987 max_flag=arg.max_flag;
1988 maybe_flag=arg.maybe_flag;
1989 maybe_null=arg.maybe_null;
1990 part=arg.part;
1991 field=arg.field;
1992 min_value=arg.min_value;
1993 max_value=arg.max_value;
1994 next_key_part=arg.next_key_part;
1995 use_count=1; elements=1;
1996 }
1997
1998
make_root()1999 inline void SEL_ARG::make_root()
2000 {
2001 left=right= &null_element;
2002 color=BLACK;
2003 next=prev= NULL;
2004 use_count=0; elements=1;
2005 }
2006
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)2007 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
2008 const uchar *max_value_arg)
2009 :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
2010 elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
2011 max_value((uchar*) max_value_arg), next(NULL), prev(NULL),
2012 next_key_part(0), color(BLACK), type(KEY_RANGE)
2013 {
2014 left=right= &null_element;
2015 }
2016
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)2017 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
2018 uchar *min_value_, uchar *max_value_,
2019 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
2020 :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
2021 part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
2022 field(field_), min_value(min_value_), max_value(max_value_),
2023 next(NULL), prev(NULL), next_key_part(0), color(BLACK), type(KEY_RANGE)
2024 {
2025 left=right= &null_element;
2026 }
2027
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2028 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2029 SEL_ARG **next_arg)
2030 {
2031 SEL_ARG *tmp;
2032
2033 /* Bail out if we have already generated too many SEL_ARGs */
2034 if (++param->alloced_sel_args > MAX_SEL_ARGS)
2035 return 0;
2036
2037 if (type != KEY_RANGE)
2038 {
2039 if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2040 return 0; // out of memory
2041 tmp->prev= *next_arg; // Link into next/prev chain
2042 (*next_arg)->next=tmp;
2043 (*next_arg)= tmp;
2044 tmp->part= this->part;
2045 }
2046 else
2047 {
2048 if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2049 min_flag, max_flag, maybe_flag)))
2050 return 0; // OOM
2051 tmp->parent=new_parent;
2052 tmp->next_key_part=next_key_part;
2053 if (left != &null_element)
2054 if (!(tmp->left=left->clone(param, tmp, next_arg)))
2055 return 0; // OOM
2056
2057 tmp->prev= *next_arg; // Link into next/prev chain
2058 (*next_arg)->next=tmp;
2059 (*next_arg)= tmp;
2060
2061 if (right != &null_element)
2062 if (!(tmp->right= right->clone(param, tmp, next_arg)))
2063 return 0; // OOM
2064 }
2065 increment_use_count(1);
2066 tmp->color= color;
2067 tmp->elements= this->elements;
2068 return tmp;
2069 }
2070
2071 /**
2072 This gives the first SEL_ARG in the interval list, and the minimal element
2073 in the red-black tree
2074
2075 @return
2076 SEL_ARG first SEL_ARG in the interval list
2077 */
first()2078 SEL_ARG *SEL_ARG::first()
2079 {
2080 SEL_ARG *next_arg=this;
2081 if (!next_arg->left)
2082 return 0; // MAYBE_KEY
2083 while (next_arg->left != &null_element)
2084 next_arg=next_arg->left;
2085 return next_arg;
2086 }
2087
first() const2088 const SEL_ARG *SEL_ARG::first() const
2089 {
2090 return const_cast<SEL_ARG*>(this)->first();
2091 }
2092
last()2093 SEL_ARG *SEL_ARG::last()
2094 {
2095 SEL_ARG *next_arg=this;
2096 if (!next_arg->right)
2097 return 0; // MAYBE_KEY
2098 while (next_arg->right != &null_element)
2099 next_arg=next_arg->right;
2100 return next_arg;
2101 }
2102
2103
2104 /*
2105 Check if a compare is ok, when one takes ranges in account
2106 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
2107 */
2108
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2109 static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2110 uint8 b_flag)
2111 {
2112 int cmp;
2113 /* First check if there was a compare to a min or max element */
2114 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2115 {
2116 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2117 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2118 return 0;
2119 return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2120 }
2121 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2122 return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2123
2124 if (field->real_maybe_null()) // If null is part of key
2125 {
2126 if (*a != *b)
2127 {
2128 return *a ? -1 : 1;
2129 }
2130 if (*a)
2131 goto end; // NULL where equal
2132 a++; b++; // Skip NULL marker
2133 }
2134 cmp=field->key_cmp(a , b);
2135 if (cmp) return cmp < 0 ? -1 : 1; // The values differed
2136
2137 // Check if the compared equal arguments was defined with open/closed range
2138 end:
2139 if (a_flag & (NEAR_MIN | NEAR_MAX))
2140 {
2141 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2142 return 0;
2143 if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2144 return (a_flag & NEAR_MIN) ? 2 : -2;
2145 return (a_flag & NEAR_MIN) ? 1 : -1;
2146 }
2147 if (b_flag & (NEAR_MIN | NEAR_MAX))
2148 return (b_flag & NEAR_MIN) ? -2 : 2;
2149 return 0; // The elements where equal
2150 }
2151
2152
clone_tree(RANGE_OPT_PARAM * param)2153 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2154 {
2155 SEL_ARG tmp_link,*next_arg,*root;
2156 next_arg= &tmp_link;
2157 if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2158 return 0;
2159 next_arg->next=0; // Fix last link
2160 tmp_link.next->prev=0; // Fix first link
2161 if (root) // If not OOM
2162 root->use_count= 0;
2163 return root;
2164 }
2165
2166
2167 /*
2168 Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2169 objects from table read plans.
2170 */
2171 class TABLE_READ_PLAN
2172 {
2173 public:
2174 /*
2175 Plan read cost, with or without cost of full row retrieval, depending
2176 on plan creation parameters.
2177 */
2178 double read_cost;
2179 ha_rows records; /* estimate of #rows to be examined */
2180
2181 /*
2182 If TRUE, the scan returns rows in rowid order. This is used only for
2183 scans that can be both ROR and non-ROR.
2184 */
2185 bool is_ror;
2186
2187 /*
2188 Create quick select for this plan.
2189 SYNOPSIS
2190 make_quick()
2191 param Parameter from test_quick_select
2192 retrieve_full_rows If TRUE, created quick select will do full record
2193 retrieval.
2194 parent_alloc Memory pool to use, if any.
2195
2196 NOTES
2197 retrieve_full_rows is ignored by some implementations.
2198
2199 RETURN
2200 created quick select
2201 NULL on any error.
2202 */
2203 virtual QUICK_SELECT_I *make_quick(PARAM *param,
2204 bool retrieve_full_rows,
2205 MEM_ROOT *parent_alloc=NULL) = 0;
2206
2207 /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2208 static void *operator new(size_t size, MEM_ROOT *mem_root)
2209 { return (void*) alloc_root(mem_root, (uint) size); }
operator delete(void * ptr,size_t size)2210 static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2211 static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2212 virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */
2213
2214 /**
2215 Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2216
2217 @param param Parameters for range analysis of this table
2218 @param trace_object The optimizer trace object the info is appended to
2219 */
2220 virtual void trace_basic_info(const PARAM *param,
2221 Opt_trace_object *trace_object) const = 0;
2222 };
2223
2224 /*
2225 Plan for a QUICK_RANGE_SELECT scan.
2226 TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2227 QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2228 record retrieval scans.
2229 */
2230
2231 class TRP_RANGE : public TABLE_READ_PLAN
2232 {
2233 public:
2234 /**
2235 Root of red-black tree for intervals over key fields to be used in
2236 "range" method retrieval. See SEL_ARG graph description.
2237 */
2238 SEL_ARG *key;
2239 uint key_idx; /* key number in PARAM::key and PARAM::real_keynr*/
2240 uint mrr_flags;
2241 uint mrr_buf_size;
2242
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2243 TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2244 : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2245 {}
~TRP_RANGE()2246 virtual ~TRP_RANGE() {} /* Remove gcc warning */
2247
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2248 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2249 MEM_ROOT *parent_alloc)
2250 {
2251 DBUG_ENTER("TRP_RANGE::make_quick");
2252 QUICK_RANGE_SELECT *quick;
2253 if ((quick= get_quick_select(param, key_idx, key, mrr_flags, mrr_buf_size,
2254 parent_alloc)))
2255 {
2256 quick->records= records;
2257 quick->read_time= read_cost;
2258 }
2259 DBUG_RETURN(quick);
2260 }
2261
2262 void trace_basic_info(const PARAM *param,
2263 Opt_trace_object *trace_object) const;
2264 };
2265
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2266 void TRP_RANGE::trace_basic_info(const PARAM *param,
2267 Opt_trace_object *trace_object) const
2268 {
2269 #ifdef OPTIMIZER_TRACE
2270 DBUG_ASSERT(param->using_real_indexes);
2271 const uint keynr_in_table= param->real_keynr[key_idx];
2272
2273 const KEY &cur_key= param->table->key_info[keynr_in_table];
2274 const KEY_PART_INFO *key_part= cur_key.key_part;
2275
2276 trace_object->add_alnum("type", "range_scan").
2277 add_utf8("index", cur_key.name).add("rows", records);
2278
2279 Opt_trace_array trace_range(¶m->thd->opt_trace, "ranges");
2280
2281 // TRP_RANGE should not be created if there are no range intervals
2282 DBUG_ASSERT(key);
2283
2284 String range_info;
2285 range_info.set_charset(system_charset_info);
2286 append_range_all_keyparts(&trace_range, NULL, &range_info, key, key_part);
2287
2288 #endif
2289 }
2290
2291
2292 typedef struct st_ror_scan_info
2293 {
2294 uint idx; ///< # of used key in param->keys
2295 uint keynr; ///< # of used key in table
2296 ha_rows records; ///< estimate of # records this scan will return
2297
2298 /** Set of intervals over key fields that will be used for row retrieval. */
2299 SEL_ARG *sel_arg;
2300
2301 /** Fields used in the query and covered by this ROR scan. */
2302 MY_BITMAP covered_fields;
2303 /**
2304 Fields used in the query that are a) covered by this ROR scan and
2305 b) not already covered by ROR scans ordered earlier in the merge
2306 sequence.
2307 */
2308 MY_BITMAP covered_fields_remaining;
2309 /** #fields in covered_fields_remaining (caching of bitmap_bits_set()) */
2310 uint num_covered_fields_remaining;
2311
2312 /**
2313 Cost of reading all index records with values in sel_arg intervals set
2314 (assuming there is no need to access full table records)
2315 */
2316 double index_read_cost;
2317 } ROR_SCAN_INFO;
2318
2319 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2320
2321 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2322 {
2323 public:
TRP_ROR_INTERSECT()2324 TRP_ROR_INTERSECT() {} /* Remove gcc warning */
~TRP_ROR_INTERSECT()2325 virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */
2326 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2327 MEM_ROOT *parent_alloc);
2328
2329 /* Array of pointers to ROR range scans used in this intersection */
2330 struct st_ror_scan_info **first_scan;
2331 struct st_ror_scan_info **last_scan; /* End of the above array */
2332 struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
2333 bool is_covering; /* TRUE if no row retrieval phase is necessary */
2334 double index_scan_costs; /* SUM(cost(index_scan)) */
2335
2336 void trace_basic_info(const PARAM *param,
2337 Opt_trace_object *trace_object) const;
2338 };
2339
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2340 void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
2341 Opt_trace_object *trace_object) const
2342 {
2343 #ifdef OPTIMIZER_TRACE
2344 trace_object->add_alnum("type", "index_roworder_intersect").
2345 add("rows", records).
2346 add("cost", read_cost).
2347 add("covering", is_covering).
2348 add("clustered_pk_scan", cpk_scan != NULL);
2349
2350 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2351 Opt_trace_array ota(trace, "intersect_of");
2352 for (st_ror_scan_info **cur_scan= first_scan;
2353 cur_scan != last_scan;
2354 cur_scan++)
2355 {
2356 const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
2357 const KEY_PART_INFO *key_part= cur_key.key_part;
2358
2359 Opt_trace_object trace_isect_idx(trace);
2360 trace_isect_idx.add_alnum("type", "range_scan").
2361 add_utf8("index", cur_key.name).add("rows", (*cur_scan)->records);
2362
2363 Opt_trace_array trace_range(trace, "ranges");
2364 for (const SEL_ARG *current= (*cur_scan)->sel_arg;
2365 current;
2366 current= current->next)
2367 {
2368 String range_info;
2369 range_info.set_charset(system_charset_info);
2370 for (const SEL_ARG *part= current;
2371 part;
2372 part= part->next_key_part)
2373 {
2374 const KEY_PART_INFO *cur_key_part= key_part + part->part;
2375 append_range(&range_info, cur_key_part,
2376 part->min_value, part->max_value,
2377 part->min_flag | part->max_flag);
2378 }
2379 trace_range.add_utf8(range_info.ptr(), range_info.length());
2380 }
2381 }
2382 #endif
2383 }
2384
2385 /*
2386 Plan for QUICK_ROR_UNION_SELECT scan.
2387 QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2388 is ignored by make_quick.
2389 */
2390
2391 class TRP_ROR_UNION : public TABLE_READ_PLAN
2392 {
2393 public:
TRP_ROR_UNION()2394 TRP_ROR_UNION() {} /* Remove gcc warning */
~TRP_ROR_UNION()2395 virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */
2396 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2397 MEM_ROOT *parent_alloc);
2398 TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2399 TABLE_READ_PLAN **last_ror; /* end of the above array */
2400
2401 void trace_basic_info(const PARAM *param,
2402 Opt_trace_object *trace_object) const;
2403 };
2404
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2405 void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
2406 Opt_trace_object *trace_object) const
2407 {
2408 #ifdef OPTIMIZER_TRACE
2409 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2410 trace_object->add_alnum("type", "index_roworder_union");
2411 Opt_trace_array ota(trace, "union_of");
2412 for (TABLE_READ_PLAN **current= first_ror;
2413 current != last_ror;
2414 current++)
2415 {
2416 Opt_trace_object trp_info(trace);
2417 (*current)->trace_basic_info(param, &trp_info);
2418 }
2419 #endif
2420 }
2421
2422 /*
2423 Plan for QUICK_INDEX_MERGE_SELECT scan.
2424 QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2425 is ignored by make_quick.
2426 */
2427
2428 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2429 {
2430 public:
TRP_INDEX_MERGE()2431 TRP_INDEX_MERGE() {} /* Remove gcc warning */
~TRP_INDEX_MERGE()2432 virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */
2433 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2434 MEM_ROOT *parent_alloc);
2435 TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2436 TRP_RANGE **range_scans_end; /* end of the array */
2437
2438 void trace_basic_info(const PARAM *param,
2439 Opt_trace_object *trace_object) const;
2440 };
2441
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2442 void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
2443 Opt_trace_object *trace_object) const
2444 {
2445 #ifdef OPTIMIZER_TRACE
2446 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2447 trace_object->add_alnum("type", "index_merge");
2448 Opt_trace_array ota(trace, "index_merge_of");
2449 for (TRP_RANGE **current= range_scans;
2450 current != range_scans_end;
2451 current++)
2452 {
2453 Opt_trace_object trp_info(trace);
2454 (*current)->trace_basic_info(param, &trp_info);
2455 }
2456 #endif
2457 }
2458
2459 /*
2460 Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2461 */
2462
2463 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2464 {
2465 private:
2466 bool have_min; ///< TRUE if there is a MIN function
2467 bool have_max; ///< TRUE if there is a MAX function
2468 /**
2469 TRUE if there is an aggregate distinct function, e.g.
2470 "COUNT(DISTINCT x)"
2471 */
2472 bool have_agg_distinct;
2473 /**
2474 The key_part of the only field used by all MIN/MAX functions.
2475 Note that TRP_GROUP_MIN_MAX is not used if there are MIN/MAX
2476 functions on more than one field.
2477 */
2478 KEY_PART_INFO *min_max_arg_part;
2479 uint group_prefix_len; ///< Length of all key parts in the group prefix
2480 uint used_key_parts; ///< Number of index key parts used for access
2481 uint group_key_parts; ///< Number of index key parts in the group prefix
2482 KEY *index_info; ///< The index chosen for data access
2483 uint index; ///< The id of the chosen index
2484 uchar key_infix[MAX_KEY_LENGTH]; ///< Constants from equality predicates
2485 uint key_infix_len; ///< Length of key_infix
2486 SEL_TREE *range_tree; ///< Represents all range predicates in the query
2487 SEL_ARG *index_tree; ///< The sub-tree corresponding to index_info
2488 uint param_idx; ///< Index of used key in param->key
2489 bool is_index_scan; ///< Use index_next() instead of random read
2490 public:
2491 /** Number of records selected by the ranges in index_tree. */
2492 ha_rows quick_prefix_records;
2493 public:
2494
2495 void trace_basic_info(const PARAM *param,
2496 Opt_trace_object *trace_object) const;
2497
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2498 TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2499 bool have_agg_distinct_arg,
2500 KEY_PART_INFO *min_max_arg_part_arg,
2501 uint group_prefix_len_arg, uint used_key_parts_arg,
2502 uint group_key_parts_arg, KEY *index_info_arg,
2503 uint index_arg, uint key_infix_len_arg,
2504 uchar *key_infix_arg,
2505 SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2506 uint param_idx_arg, ha_rows quick_prefix_records_arg)
2507 : have_min(have_min_arg), have_max(have_max_arg),
2508 have_agg_distinct(have_agg_distinct_arg),
2509 min_max_arg_part(min_max_arg_part_arg),
2510 group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2511 group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2512 index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2513 index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2514 quick_prefix_records(quick_prefix_records_arg)
2515 {
2516 if (key_infix_len)
2517 memcpy(this->key_infix, key_infix_arg, key_infix_len);
2518 }
~TRP_GROUP_MIN_MAX()2519 virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */
2520
2521 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2522 MEM_ROOT *parent_alloc);
use_index_scan()2523 void use_index_scan() { is_index_scan= TRUE; }
2524 };
2525
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2526 void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
2527 Opt_trace_object *trace_object) const
2528 {
2529 #ifdef OPTIMIZER_TRACE
2530 trace_object->add_alnum("type", "index_group").
2531 add_utf8("index", index_info->name);
2532 if (min_max_arg_part)
2533 trace_object->add_utf8("group_attribute",
2534 min_max_arg_part->field->field_name);
2535 else
2536 trace_object->add_null("group_attribute");
2537 trace_object->add("min_aggregate", have_min).
2538 add("max_aggregate", have_max).
2539 add("distinct_aggregate", have_agg_distinct).
2540 add("rows", records).
2541 add("cost", read_cost);
2542
2543 const KEY_PART_INFO *key_part= index_info->key_part;
2544 Opt_trace_context * const trace= ¶m->thd->opt_trace;
2545 {
2546 Opt_trace_array trace_keyparts(trace, "key_parts_used_for_access");
2547 for (uint partno= 0; partno < used_key_parts; partno++)
2548 {
2549 const KEY_PART_INFO *cur_key_part= key_part + partno;
2550 trace_keyparts.add_utf8(cur_key_part->field->field_name);
2551 }
2552 }
2553 Opt_trace_array trace_range(trace, "ranges");
2554
2555 // can have group quick without ranges
2556 if (index_tree)
2557 {
2558 String range_info;
2559 range_info.set_charset(system_charset_info);
2560 append_range_all_keyparts(&trace_range, NULL,
2561 &range_info, index_tree, key_part);
2562 }
2563 #endif
2564 }
2565
2566 /*
2567 Fill param->needed_fields with bitmap of fields used in the query.
2568 SYNOPSIS
2569 fill_used_fields_bitmap()
2570 param Parameter from test_quick_select function.
2571
2572 NOTES
2573 Clustered PK members are not put into the bitmap as they are implicitly
2574 present in all keys (and it is impossible to avoid reading them).
2575 RETURN
2576 0 Ok
2577 1 Out of memory.
2578 */
2579
fill_used_fields_bitmap(PARAM * param)2580 static int fill_used_fields_bitmap(PARAM *param)
2581 {
2582 TABLE *table= param->table;
2583 my_bitmap_map *tmp;
2584 uint pk;
2585 param->tmp_covered_fields.bitmap= 0;
2586 param->fields_bitmap_size= table->s->column_bitmap_size;
2587 if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2588 param->fields_bitmap_size)) ||
2589 bitmap_init(¶m->needed_fields, tmp, table->s->fields, FALSE))
2590 return 1;
2591
2592 bitmap_copy(¶m->needed_fields, table->read_set);
2593 bitmap_union(¶m->needed_fields, table->write_set);
2594
2595 pk= param->table->s->primary_key;
2596 if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2597 {
2598 /* The table uses clustered PK and it is not internally generated */
2599 KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2600 KEY_PART_INFO *key_part_end=
2601 key_part + param->table->key_info[pk].user_defined_key_parts;
2602 for (;key_part != key_part_end; ++key_part)
2603 bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1);
2604 }
2605 return 0;
2606 }
2607
2608
2609 /*
2610 Test if a key can be used in different ranges
2611
2612 SYNOPSIS
2613 SQL_SELECT::test_quick_select()
2614 thd Current thread
2615 keys_to_use Keys to use for range retrieval
2616 prev_tables Tables assumed to be already read when the scan is
2617 performed (but not read at the moment of this call)
2618 limit Query limit
2619 force_quick_range Prefer to use range (instead of full table scan) even
2620 if it is more expensive.
2621 interesting_order The sort order the range access method must be able
2622 to provide. Three-value logic: asc/desc/don't care
2623
2624 NOTES
2625 Updates the following in the select parameter:
2626 needed_reg - Bits for keys with may be used if all prev regs are read
2627 quick - Parameter to use when reading records.
2628
2629 In the table struct the following information is updated:
2630 quick_keys - Which keys can be used
2631 quick_rows - How many rows the key matches
2632 quick_condition_rows - E(# rows that will satisfy the table condition)
2633
2634 IMPLEMENTATION
2635 quick_condition_rows value is obtained as follows:
2636
2637 It is a minimum of E(#output rows) for all considered table access
2638 methods (range and index_merge accesses over various indexes).
2639
2640 The obtained value is not a true E(#rows that satisfy table condition)
2641 but rather a pessimistic estimate. To obtain a true E(#...) one would
2642 need to combine estimates of various access methods, taking into account
2643 correlations between sets of rows they will return.
2644
2645 For example, if values of tbl.key1 and tbl.key2 are independent (a right
2646 assumption if we have no information about their correlation) then the
2647 correct estimate will be:
2648
2649 E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2650 = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2651
2652 which is smaller than
2653
2654 MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2655
2656 which is currently produced.
2657
2658 TODO
2659 * Change the value returned in quick_condition_rows from a pessimistic
2660 estimate to true E(#rows that satisfy table condition).
2661 (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2662 for this)
2663
2664 * Check if this function really needs to modify keys_to_use, and change the
2665 code to pass it by reference if it doesn't.
2666
2667 * In addition to force_quick_range other means can be (an usually are) used
2668 to make this function prefer range over full table scan. Figure out if
2669 force_quick_range is really needed.
2670
2671 RETURN
2672 -1 if impossible select (i.e. certainly no rows will be selected)
2673 0 if can't use quick_select
2674 1 if found usable ranges and quick select has been successfully created.
2675 */
2676
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,const ORDER::enum_order interesting_order)2677 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2678 table_map prev_tables,
2679 ha_rows limit, bool force_quick_range,
2680 const ORDER::enum_order interesting_order)
2681 {
2682 uint idx;
2683 double scan_time;
2684 DBUG_ENTER("SQL_SELECT::test_quick_select");
2685 DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
2686 (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2687 (ulong) const_tables));
2688
2689 set_quick(NULL);
2690 needed_reg.clear_all();
2691 quick_keys.clear_all();
2692 if (keys_to_use.is_clear_all())
2693 DBUG_RETURN(0);
2694 records= head->file->stats.records;
2695 if (!records)
2696 records++; /* purecov: inspected */
2697 scan_time= records * ROW_EVALUATE_COST + 1;
2698 read_time= head->file->scan_time() + scan_time + 1.1;
2699 if (head->force_index)
2700 scan_time= read_time= DBL_MAX;
2701 if (limit < records)
2702 read_time= (double) records + scan_time + 1; // Force to use index
2703 else if (read_time <= 2.0 && !force_quick_range)
2704 DBUG_RETURN(0); /* No need for quick select */
2705
2706 Opt_trace_context * const trace= &thd->opt_trace;
2707 Opt_trace_object trace_range(trace, "range_analysis");
2708 Opt_trace_object(trace, "table_scan").
2709 add("rows", head->file->stats.records).
2710 add("cost", read_time);
2711
2712 keys_to_use.intersect(head->keys_in_use_for_query);
2713 if (!keys_to_use.is_clear_all())
2714 {
2715 MEM_ROOT alloc;
2716 SEL_TREE *tree= NULL;
2717 KEY_PART *key_parts;
2718 KEY *key_info;
2719 PARAM param;
2720
2721 /*
2722 Use the 3 multiplier as range optimizer allocates big PARAM structure
2723 and may evaluate a subquery expression
2724 TODO During the optimization phase we should evaluate only inexpensive
2725 single-lookup subqueries.
2726 */
2727 if (check_stack_overrun(thd, 3*STACK_MIN_SIZE + sizeof(PARAM), NULL))
2728 DBUG_RETURN(0); // Fatal error flag is set
2729
2730 /* set up parameter that is passed to all functions */
2731 param.thd= thd;
2732 param.baseflag= head->file->ha_table_flags();
2733 param.prev_tables=prev_tables | const_tables;
2734 param.read_tables=read_tables;
2735 param.current_table= head->map;
2736 param.table=head;
2737 param.keys=0;
2738 param.mem_root= &alloc;
2739 param.old_root= thd->mem_root;
2740 param.needed_reg= &needed_reg;
2741 param.imerge_cost_buff_size= 0;
2742 param.using_real_indexes= TRUE;
2743 param.remove_jump_scans= TRUE;
2744 param.force_default_mrr= (interesting_order == ORDER::ORDER_DESC);
2745 param.order_direction= interesting_order;
2746 param.use_index_statistics= false;
2747
2748 thd->no_errors=1; // Don't warn about NULL
2749 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
2750 if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
2751 sizeof(KEY_PART)*
2752 head->s->key_parts)) ||
2753 fill_used_fields_bitmap(¶m))
2754 {
2755 thd->no_errors=0;
2756 free_root(&alloc,MYF(0)); // Return memory & allocator
2757 DBUG_RETURN(0); // Can't use range
2758 }
2759 key_parts= param.key_parts;
2760 thd->mem_root= &alloc;
2761
2762 {
2763 Opt_trace_array trace_idx(trace,
2764 "potential_range_indices",
2765 Opt_trace_context::RANGE_OPTIMIZER);
2766 /*
2767 Make an array with description of all key parts of all table keys.
2768 This is used in get_mm_parts function.
2769 */
2770 key_info= head->key_info;
2771 for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2772 {
2773 Opt_trace_object trace_idx_details(trace);
2774 trace_idx_details.add_utf8("index", key_info->name);
2775 KEY_PART_INFO *key_part_info;
2776 if (!keys_to_use.is_set(idx))
2777 {
2778 trace_idx_details.add("usable", false).
2779 add_alnum("cause", "not_applicable");
2780 continue;
2781 }
2782 if (key_info->flags & HA_FULLTEXT)
2783 {
2784 trace_idx_details.add("usable", false).
2785 add_alnum("cause", "fulltext");
2786 continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
2787 }
2788
2789 trace_idx_details.add("usable", true);
2790
2791 param.key[param.keys]=key_parts;
2792 key_part_info= key_info->key_part;
2793 Opt_trace_array trace_keypart(trace, "key_parts");
2794 for (uint part=0 ; part < actual_key_parts(key_info) ;
2795 part++, key_parts++, key_part_info++)
2796 {
2797 key_parts->key= param.keys;
2798 key_parts->part= part;
2799 key_parts->length= key_part_info->length;
2800 key_parts->store_length= key_part_info->store_length;
2801 key_parts->field= key_part_info->field;
2802 key_parts->null_bit= key_part_info->null_bit;
2803 key_parts->image_type =
2804 (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2805 /* Only HA_PART_KEY_SEG is used */
2806 key_parts->flag= (uint8) key_part_info->key_part_flag;
2807 trace_keypart.add_utf8(key_parts->field->field_name);
2808 }
2809 param.real_keynr[param.keys++]=idx;
2810 }
2811 }
2812 param.key_parts_end=key_parts;
2813 param.alloced_sel_args= 0;
2814
2815 /* Calculate cost of full index read for the shortest covering index */
2816 if (!head->covering_keys.is_clear_all())
2817 {
2818 int key_for_use= find_shortest_key(head, &head->covering_keys);
2819 double key_read_time=
2820 param.table->file->index_only_read_time(key_for_use,
2821 rows2double(records)) +
2822 records * ROW_EVALUATE_COST;
2823
2824 bool chosen= false;
2825 if (key_read_time < read_time)
2826 {
2827 read_time= key_read_time;
2828 chosen= true;
2829 }
2830
2831 Opt_trace_object trace_cov(trace,
2832 "best_covering_index_scan",
2833 Opt_trace_context::RANGE_OPTIMIZER);
2834 trace_cov.add_utf8("index", head->key_info[key_for_use].name).
2835 add("cost", key_read_time).add("chosen", chosen);
2836 if (!chosen)
2837 trace_cov.add_alnum("cause", "cost");
2838 }
2839
2840 TABLE_READ_PLAN *best_trp= NULL;
2841 TRP_GROUP_MIN_MAX *group_trp;
2842 double best_read_time= read_time;
2843
2844 if (cond)
2845 {
2846 {
2847 Opt_trace_array trace_setup_cond(trace, "setup_range_conditions");
2848 tree= get_mm_tree(¶m,cond);
2849 }
2850 if (tree)
2851 {
2852 if (tree->type == SEL_TREE::IMPOSSIBLE)
2853 {
2854 trace_range.add("impossible_range", true);
2855 records=0L; /* Return -1 from this function. */
2856 read_time= (double) HA_POS_ERROR;
2857 goto free_mem;
2858 }
2859 /*
2860 If the tree can't be used for range scans, proceed anyway, as we
2861 can construct a group-min-max quick select
2862 */
2863 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2864 {
2865 trace_range.add("range_scan_possible", false);
2866 if (tree->type == SEL_TREE::ALWAYS)
2867 trace_range.add_alnum("cause", "condition_always_true");
2868
2869 tree= NULL;
2870 }
2871 }
2872 }
2873
2874 /*
2875 Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
2876 Notice that it can be constructed no matter if there is a range tree.
2877 */
2878 group_trp= get_best_group_min_max(¶m, tree, best_read_time);
2879 if (group_trp)
2880 {
2881 param.table->quick_condition_rows= min(group_trp->records,
2882 head->file->stats.records);
2883 Opt_trace_object grp_summary(trace,
2884 "best_group_range_summary",
2885 Opt_trace_context::RANGE_OPTIMIZER);
2886 if (unlikely(trace->is_started()))
2887 group_trp->trace_basic_info(¶m, &grp_summary);
2888 if (group_trp->read_cost < best_read_time)
2889 {
2890 grp_summary.add("chosen", true);
2891 best_trp= group_trp;
2892 best_read_time= best_trp->read_cost;
2893 }
2894 else
2895 grp_summary.add("chosen", false).add_alnum("cause", "cost");
2896 }
2897
2898 if (tree)
2899 {
2900 /*
2901 It is possible to use a range-based quick select (but it might be
2902 slower than 'all' table scan).
2903 */
2904 dbug_print_tree("final_tree", tree, ¶m);
2905
2906 {
2907 /*
2908 Calculate cost of single index range scan and possible
2909 intersections of these
2910 */
2911 Opt_trace_object trace_range(trace,
2912 "analyzing_range_alternatives",
2913 Opt_trace_context::RANGE_OPTIMIZER);
2914 TRP_RANGE *range_trp;
2915 TRP_ROR_INTERSECT *rori_trp;
2916
2917 /* Get best 'range' plan and prepare data for making other plans */
2918 if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE,
2919 best_read_time)))
2920 {
2921 best_trp= range_trp;
2922 best_read_time= best_trp->read_cost;
2923 }
2924
2925 /*
2926 Simultaneous key scans and row deletes on several handler
2927 objects are not allowed so don't use ROR-intersection for
2928 table deletes. Also, ROR-intersection cannot return rows in
2929 descending order
2930 */
2931 if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2932 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2933 interesting_order != ORDER::ORDER_DESC)
2934 {
2935 /*
2936 Get best non-covering ROR-intersection plan and prepare data for
2937 building covering ROR-intersection.
2938 */
2939 if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time)))
2940 {
2941 best_trp= rori_trp;
2942 best_read_time= best_trp->read_cost;
2943 }
2944 }
2945 }
2946
2947 // Here we calculate cost of union index merge
2948 if (!tree->merges.is_empty())
2949 {
2950 // Cannot return rows in descending order.
2951 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2952 interesting_order != ORDER::ORDER_DESC &&
2953 param.table->file->stats.records)
2954 {
2955 /* Try creating index_merge/ROR-union scan. */
2956 SEL_IMERGE *imerge;
2957 TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
2958 LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
2959 List_iterator_fast<SEL_IMERGE> it(tree->merges);
2960 Opt_trace_array trace_idx_merge(trace,
2961 "analyzing_index_merge",
2962 Opt_trace_context::RANGE_OPTIMIZER);
2963 while ((imerge= it++))
2964 {
2965 new_conj_trp= get_best_disjunct_quick(¶m, imerge,
2966 best_read_time);
2967 if (new_conj_trp)
2968 set_if_smaller(param.table->quick_condition_rows,
2969 new_conj_trp->records);
2970 if (!best_conj_trp ||
2971 (new_conj_trp &&
2972 new_conj_trp->read_cost < best_conj_trp->read_cost))
2973 {
2974 best_conj_trp= new_conj_trp;
2975 }
2976 }
2977 if (best_conj_trp)
2978 best_trp= best_conj_trp;
2979 }
2980 }
2981 }
2982
2983 thd->mem_root= param.old_root;
2984
2985 /* If we got a read plan, create a quick select from it. */
2986 if (best_trp)
2987 {
2988 records= best_trp->records;
2989 if (!(quick= best_trp->make_quick(¶m, TRUE)) || quick->init())
2990 set_quick(NULL);
2991 }
2992
2993 free_mem:
2994 if (unlikely(quick && trace->is_started() && best_trp))
2995 {
2996 // best_trp cannot be NULL if quick is set, done to keep fortify happy
2997 Opt_trace_object trace_range_summary(trace,
2998 "chosen_range_access_summary");
2999 {
3000 Opt_trace_object trace_range_plan(trace,
3001 "range_access_plan");
3002 best_trp->trace_basic_info(¶m, &trace_range_plan);
3003 }
3004 trace_range_summary.add("rows_for_plan", quick->records).
3005 add("cost_for_plan", quick->read_time).
3006 add("chosen", true);
3007 }
3008
3009 free_root(&alloc,MYF(0)); // Return memory & allocator
3010 thd->mem_root= param.old_root;
3011 thd->no_errors=0;
3012 }
3013
3014 DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
3015
3016 /*
3017 Assume that if the user is using 'limit' we will only need to scan
3018 limit rows if we are using a key
3019 */
3020 DBUG_RETURN(records ? MY_TEST(quick) : -1);
3021 }
3022
3023 /****************************************************************************
3024 * Partition pruning module
3025 ****************************************************************************/
3026 #ifdef WITH_PARTITION_STORAGE_ENGINE
3027
3028 /*
3029 PartitionPruningModule
3030
3031 This part of the code does partition pruning. Partition pruning solves the
3032 following problem: given a query over partitioned tables, find partitions
3033 that we will not need to access (i.e. partitions that we can assume to be
3034 empty) when executing the query.
3035 The set of partitions to prune doesn't depend on which query execution
3036 plan will be used to execute the query.
3037
3038 HOW IT WORKS
3039
3040 Partition pruning module makes use of RangeAnalysisModule. The following
3041 examples show how the problem of partition pruning can be reduced to the
3042 range analysis problem:
3043
3044 EXAMPLE 1
3045 Consider a query:
3046
3047 SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3048
3049 where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent
3050 way to find the used (i.e. not pruned away) partitions is as follows:
3051
3052 1. analyze the WHERE clause and extract the list of intervals over t1.a
3053 for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3054
3055 2. for each interval I
3056 {
3057 find partitions that have non-empty intersection with I;
3058 mark them as used;
3059 }
3060
3061 EXAMPLE 2
3062 Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3063 we need to:
3064
3065 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3066 The list of intervals we'll obtain will look like this:
3067 ((t1.a, t1.b) = (1,'foo')),
3068 ((t1.a, t1.b) = (2,'bar')),
3069 ((t1,a, t1.b) > (10,'zz'))
3070
3071 2. for each interval I
3072 {
3073 if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3074 {
3075 calculate HASH(part_func(t1.a, t1.b));
3076 find which partition has records with this hash value and mark
3077 it as used;
3078 }
3079 else
3080 {
3081 mark all partitions as used;
3082 break;
3083 }
3084 }
3085
3086 For both examples the step #1 is exactly what RangeAnalysisModule could
3087 be used to do, if it was provided with appropriate index description
3088 (array of KEY_PART structures).
3089 In example #1, we need to provide it with description of index(t1.a),
3090 in example #2, we need to provide it with description of index(t1.a, t1.b).
3091
3092 These index descriptions are further called "partitioning index
3093 descriptions". Note that it doesn't matter if such indexes really exist,
3094 as range analysis module only uses the description.
3095
3096 Putting it all together, partitioning module works as follows:
3097
3098 prune_partitions() {
3099 call create_partition_index_description();
3100
3101 call get_mm_tree(); // invoke the RangeAnalysisModule
3102
3103 // analyze the obtained interval list and get used partitions
3104 call find_used_partitions();
3105 }
3106
3107 */
3108
3109 struct st_part_prune_param;
3110 struct st_part_opt_info;
3111
3112 typedef void (*mark_full_part_func)(partition_info*, uint32);
3113
3114 /*
3115 Partition pruning operation context
3116 */
3117 typedef struct st_part_prune_param
3118 {
3119 RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3120
3121 /***************************************************************
3122 Following fields are filled in based solely on partitioning
3123 definition and not modified after that:
3124 **************************************************************/
3125 partition_info *part_info; /* Copy of table->part_info */
3126 /* Function to get partition id from partitioning fields only */
3127 get_part_id_func get_top_partition_id_func;
3128 /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3129 mark_full_part_func mark_full_partition_used;
3130
3131 /* Partitioning 'index' description, array of key parts */
3132 KEY_PART *key;
3133
3134 /*
3135 Number of fields in partitioning 'index' definition created for
3136 partitioning (0 if partitioning 'index' doesn't include partitioning
3137 fields)
3138 */
3139 uint part_fields;
3140 uint subpart_fields; /* Same as above for subpartitioning */
3141
3142 /*
3143 Number of the last partitioning field keypart in the index, or -1 if
3144 partitioning index definition doesn't include partitioning fields.
3145 */
3146 int last_part_partno;
3147 int last_subpart_partno; /* Same as above for supartitioning */
3148
3149 /*
3150 is_part_keypart[i] == test(keypart #i in partitioning index is a member
3151 used in partitioning)
3152 Used to maintain current values of cur_part_fields and cur_subpart_fields
3153 */
3154 my_bool *is_part_keypart;
3155 /* Same as above for subpartitioning */
3156 my_bool *is_subpart_keypart;
3157
3158 my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3159
3160 /***************************************************************
3161 Following fields form find_used_partitions() recursion context:
3162 **************************************************************/
3163 SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */
3164 SEL_ARG **arg_stack_end; /* Top of the stack */
3165 /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3166 uint cur_part_fields;
3167 /* Same as cur_part_fields, but for subpartitioning */
3168 uint cur_subpart_fields;
3169
3170 /* Iterator to be used to obtain the "current" set of used partitions */
3171 PARTITION_ITERATOR part_iter;
3172
3173 /* Initialized bitmap of num_subparts size */
3174 MY_BITMAP subparts_bitmap;
3175
3176 uchar *cur_min_key;
3177 uchar *cur_max_key;
3178
3179 uint cur_min_flag, cur_max_flag;
3180 } PART_PRUNE_PARAM;
3181
3182 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3183 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3184 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3185 SEL_IMERGE *imerge);
3186 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3187 List<SEL_IMERGE> &merges);
3188 static void mark_all_partitions_as_used(partition_info *part_info);
3189
3190 #ifndef DBUG_OFF
3191 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3192 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3193 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3194 #endif
3195
3196
3197 /**
3198 Perform partition pruning for a given table and condition.
3199
3200 @param thd Thread handle
3201 @param table Table to perform partition pruning for
3202 @param pprune_cond Condition to use for partition pruning
3203
3204 @note This function assumes that lock_partitions are setup when it
3205 is invoked. The function analyzes the condition, finds partitions that
3206 need to be used to retrieve the records that match the condition, and
3207 marks them as used by setting appropriate bit in part_info->read_partitions
3208 In the worst case all partitions are marked as used. If the table is not
3209 yet locked, it will also unset bits in part_info->lock_partitions that is
3210 not set in read_partitions.
3211
3212 This function returns promptly if called for non-partitioned table.
3213
3214 @return Operation status
3215 @retval true Failure
3216 @retval false Success
3217 */
3218
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3219 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3220 {
3221 partition_info *part_info = table->part_info;
3222 DBUG_ENTER("prune_partitions");
3223 table->all_partitions_pruned_away= false;
3224
3225 if (!part_info)
3226 DBUG_RETURN(FALSE); /* not a partitioned table */
3227
3228 if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION &&
3229 part_info->is_auto_partitioned)
3230 DBUG_RETURN(false); /* Should not prune auto partitioned table */
3231
3232 if (!pprune_cond)
3233 {
3234 mark_all_partitions_as_used(part_info);
3235 DBUG_RETURN(FALSE);
3236 }
3237
3238 /* No need to continue pruning if there is no more partitions to prune! */
3239 if (bitmap_is_clear_all(&part_info->lock_partitions))
3240 bitmap_clear_all(&part_info->read_partitions);
3241 if (bitmap_is_clear_all(&part_info->read_partitions))
3242 {
3243 table->all_partitions_pruned_away= true;
3244 DBUG_RETURN(false);
3245 }
3246
3247 /*
3248 If the prepare stage already have completed pruning successfully,
3249 it is no use of running prune_partitions() again on the same condition.
3250 Since it will not be able to prune anything more than the previous call
3251 from the prepare step.
3252 */
3253 if (part_info->is_pruning_completed)
3254 DBUG_RETURN(false);
3255
3256 PART_PRUNE_PARAM prune_param;
3257 MEM_ROOT alloc;
3258 RANGE_OPT_PARAM *range_par= &prune_param.range_param;
3259 my_bitmap_map *old_sets[2];
3260
3261 prune_param.part_info= part_info;
3262 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
3263 range_par->mem_root= &alloc;
3264 range_par->old_root= thd->mem_root;
3265
3266 if (create_partition_index_description(&prune_param))
3267 {
3268 mark_all_partitions_as_used(part_info);
3269 free_root(&alloc,MYF(0)); // Return memory & allocator
3270 DBUG_RETURN(FALSE);
3271 }
3272
3273 dbug_tmp_use_all_columns(table, old_sets,
3274 table->read_set, table->write_set);
3275 range_par->thd= thd;
3276 range_par->table= table;
3277 /* range_par->cond doesn't need initialization */
3278 range_par->prev_tables= range_par->read_tables= 0;
3279 range_par->current_table= table->map;
3280
3281 range_par->keys= 1; // one index
3282 range_par->using_real_indexes= FALSE;
3283 range_par->remove_jump_scans= FALSE;
3284 range_par->real_keynr[0]= 0;
3285 range_par->alloced_sel_args= 0;
3286
3287 thd->no_errors=1; // Don't warn about NULL
3288 thd->mem_root=&alloc;
3289
3290 bitmap_clear_all(&part_info->read_partitions);
3291
3292 prune_param.key= prune_param.range_param.key_parts;
3293 SEL_TREE *tree;
3294 int res;
3295
3296 tree= get_mm_tree(range_par, pprune_cond);
3297 if (!tree)
3298 goto all_used;
3299
3300 if (tree->type == SEL_TREE::IMPOSSIBLE)
3301 {
3302 /* Cannot improve the pruning any further. */
3303 part_info->is_pruning_completed= true;
3304 goto end;
3305 }
3306
3307 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3308 goto all_used;
3309
3310 if (tree->merges.is_empty())
3311 {
3312 /* Range analysis has produced a single list of intervals. */
3313 prune_param.arg_stack_end= prune_param.arg_stack;
3314 prune_param.cur_part_fields= 0;
3315 prune_param.cur_subpart_fields= 0;
3316
3317 prune_param.cur_min_key= prune_param.range_param.min_key;
3318 prune_param.cur_max_key= prune_param.range_param.max_key;
3319 prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3320
3321 init_all_partitions_iterator(part_info, &prune_param.part_iter);
3322 if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3323 tree->keys[0]))))
3324 goto all_used;
3325 }
3326 else
3327 {
3328 if (tree->merges.elements == 1)
3329 {
3330 /*
3331 Range analysis has produced a "merge" of several intervals lists, a
3332 SEL_TREE that represents an expression in form
3333 sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3334 that cannot be reduced to one tree. This can only happen when
3335 partitioning index has several keyparts and the condition is OR of
3336 conditions that refer to different key parts. For example, we'll get
3337 here for "partitioning_field=const1 OR subpartitioning_field=const2"
3338 */
3339 if (-1 == (res= find_used_partitions_imerge(&prune_param,
3340 tree->merges.head())))
3341 goto all_used;
3342 }
3343 else
3344 {
3345 /*
3346 Range analysis has produced a list of several imerges, i.e. a
3347 structure that represents a condition in form
3348 imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3349 This is produced for complicated WHERE clauses that range analyzer
3350 can't really analyze properly.
3351 */
3352 if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3353 tree->merges)))
3354 goto all_used;
3355 }
3356 }
3357
3358 /*
3359 If the condition can be evaluated now, we are done with pruning.
3360
3361 During the prepare phase, before locking, subqueries and stored programs
3362 are not evaluated. So we need to run prune_partitions() a second time in
3363 the optimize phase to prune partitions for reading, when subqueries and
3364 stored programs may be evaluated.
3365 */
3366 if (pprune_cond->can_be_evaluated_now())
3367 part_info->is_pruning_completed= true;
3368 goto end;
3369
3370 all_used:
3371 mark_all_partitions_as_used(prune_param.part_info);
3372 end:
3373 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3374 thd->no_errors=0;
3375 thd->mem_root= range_par->old_root;
3376 free_root(&alloc,MYF(0)); // Return memory & allocator
3377 /*
3378 Must be a subset of the locked partitions.
3379 lock_partitions contains the partitions marked by explicit partition
3380 selection (... t PARTITION (pX) ...) and we must only use partitions
3381 within that set.
3382 */
3383 bitmap_intersect(&prune_param.part_info->read_partitions,
3384 &prune_param.part_info->lock_partitions);
3385 /*
3386 If not yet locked, also prune partitions to lock if not UPDATEing
3387 partition key fields. This will also prune lock_partitions if we are under
3388 LOCK TABLES, so prune away calls to start_stmt().
3389 TODO: enhance this prune locking to also allow pruning of
3390 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3391 a lock for part_key partition.
3392 */
3393 if (!thd->lex->is_query_tables_locked() &&
3394 !partition_key_modified(table, table->write_set))
3395 {
3396 bitmap_copy(&prune_param.part_info->lock_partitions,
3397 &prune_param.part_info->read_partitions);
3398 }
3399 if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3400 table->all_partitions_pruned_away= true;
3401 DBUG_RETURN(false);
3402 }
3403
3404
3405 /*
3406 Store field key image to table record
3407
3408 SYNOPSIS
3409 store_key_image_to_rec()
3410 field Field which key image should be stored
3411 ptr Field value in key format
3412 len Length of the value, in bytes
3413
3414 DESCRIPTION
3415 Copy the field value from its key image to the table record. The source
3416 is the value in key image format, occupying len bytes in buffer pointed
3417 by ptr. The destination is table record, in "field value in table record"
3418 format.
3419 */
3420
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3421 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3422 {
3423 /* Do the same as print_key_value() does */
3424 my_bitmap_map *old_map;
3425
3426 if (field->real_maybe_null())
3427 {
3428 if (*ptr)
3429 {
3430 field->set_null();
3431 return;
3432 }
3433 field->set_notnull();
3434 ptr++;
3435 }
3436 old_map= dbug_tmp_use_all_columns(field->table,
3437 field->table->write_set);
3438 field->set_key_image(ptr, len);
3439 dbug_tmp_restore_column_map(field->table->write_set, old_map);
3440 }
3441
3442
3443 /*
3444 For SEL_ARG* array, store sel_arg->min values into table record buffer
3445
3446 SYNOPSIS
3447 store_selargs_to_rec()
3448 ppar Partition pruning context
3449 start Array of SEL_ARG* for which the minimum values should be stored
3450 num Number of elements in the array
3451
3452 DESCRIPTION
3453 For each SEL_ARG* interval in the specified array, store the left edge
3454 field value (sel_arg->min, key image format) into the table record.
3455 */
3456
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)3457 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3458 int num)
3459 {
3460 KEY_PART *parts= ppar->range_param.key_parts;
3461 for (SEL_ARG **end= start + num; start != end; start++)
3462 {
3463 SEL_ARG *sel_arg= (*start);
3464 store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3465 parts[sel_arg->part].length);
3466 }
3467 }
3468
3469
3470 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)3471 static void mark_full_partition_used_no_parts(partition_info* part_info,
3472 uint32 part_id)
3473 {
3474 DBUG_ENTER("mark_full_partition_used_no_parts");
3475 DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3476 bitmap_set_bit(&part_info->read_partitions, part_id);
3477 DBUG_VOID_RETURN;
3478 }
3479
3480
3481 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)3482 static void mark_full_partition_used_with_parts(partition_info *part_info,
3483 uint32 part_id)
3484 {
3485 uint32 start= part_id * part_info->num_subparts;
3486 uint32 end= start + part_info->num_subparts;
3487 DBUG_ENTER("mark_full_partition_used_with_parts");
3488
3489 for (; start != end; start++)
3490 {
3491 DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3492 bitmap_set_bit(&part_info->read_partitions, start);
3493 }
3494 DBUG_VOID_RETURN;
3495 }
3496
3497 /*
3498 Find the set of used partitions for List<SEL_IMERGE>
3499 SYNOPSIS
3500 find_used_partitions_imerge_list
3501 ppar Partition pruning context.
3502 key_tree Intervals tree to perform pruning for.
3503
3504 DESCRIPTION
3505 List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3506 The set of used partitions is an intersection of used partitions sets
3507 for imerge_{i}.
3508 We accumulate this intersection in a separate bitmap.
3509
3510 RETURN
3511 See find_used_partitions()
3512 */
3513
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)3514 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3515 List<SEL_IMERGE> &merges)
3516 {
3517 MY_BITMAP all_merges;
3518 uint bitmap_bytes;
3519 my_bitmap_map *bitmap_buf;
3520 uint n_bits= ppar->part_info->read_partitions.n_bits;
3521 bitmap_bytes= bitmap_buffer_size(n_bits);
3522 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3523 bitmap_bytes)))
3524 {
3525 /*
3526 Fallback, process just the first SEL_IMERGE. This can leave us with more
3527 partitions marked as used then actually needed.
3528 */
3529 return find_used_partitions_imerge(ppar, merges.head());
3530 }
3531 bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3532 bitmap_set_prefix(&all_merges, n_bits);
3533
3534 List_iterator<SEL_IMERGE> it(merges);
3535 SEL_IMERGE *imerge;
3536 while ((imerge=it++))
3537 {
3538 int res= find_used_partitions_imerge(ppar, imerge);
3539 if (!res)
3540 {
3541 /* no used partitions on one ANDed imerge => no used partitions at all */
3542 return 0;
3543 }
3544
3545 if (res != -1)
3546 bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3547
3548 if (bitmap_is_clear_all(&all_merges))
3549 return 0;
3550
3551 bitmap_clear_all(&ppar->part_info->read_partitions);
3552 }
3553 memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3554 bitmap_bytes);
3555 return 1;
3556 }
3557
3558
3559 /*
3560 Find the set of used partitions for SEL_IMERGE structure
3561 SYNOPSIS
3562 find_used_partitions_imerge()
3563 ppar Partition pruning context.
3564 key_tree Intervals tree to perform pruning for.
3565
3566 DESCRIPTION
3567 SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3568 trivial - just use mark used partitions for each tree and bail out early
3569 if for some tree_{i} all partitions are used.
3570
3571 RETURN
3572 See find_used_partitions().
3573 */
3574
3575 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)3576 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3577 {
3578 int res= 0;
3579 for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3580 {
3581 ppar->arg_stack_end= ppar->arg_stack;
3582 ppar->cur_part_fields= 0;
3583 ppar->cur_subpart_fields= 0;
3584
3585 ppar->cur_min_key= ppar->range_param.min_key;
3586 ppar->cur_max_key= ppar->range_param.max_key;
3587 ppar->cur_min_flag= ppar->cur_max_flag= 0;
3588
3589 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3590 SEL_ARG *key_tree= (*ptree)->keys[0];
3591 if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3592 return -1;
3593 }
3594 return res;
3595 }
3596
3597
3598 /*
3599 Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3600
3601 SYNOPSIS
3602 find_used_partitions()
3603 ppar Partition pruning context.
3604 key_tree SEL_ARG range tree to perform pruning for
3605
3606 DESCRIPTION
3607 This function
3608 * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3609 * finds the partitions one needs to use to get rows in these intervals
3610 * marks these partitions as used.
3611 The next session desribes the process in greater detail.
3612
3613 IMPLEMENTATION
3614 TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3615 We can find out which [sub]partitions to use if we obtain restrictions on
3616 [sub]partitioning fields in the following form:
3617 1. "partition_field1=const1 AND ... AND partition_fieldN=constN"
3618 1.1 Same as (1) but for subpartition fields
3619
3620 If partitioning supports interval analysis (i.e. partitioning is a
3621 function of a single table field, and partition_info::
3622 get_part_iter_for_interval != NULL), then we can also use condition in
3623 this form:
3624 2. "const1 <=? partition_field <=? const2"
3625 2.1 Same as (2) but for subpartition_field
3626
3627 INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3628
3629 The below is an example of what SEL_ARG tree may represent:
3630
3631 (start)
3632 | $
3633 | Partitioning keyparts $ subpartitioning keyparts
3634 | $
3635 | ... ... $
3636 | | | $
3637 | +---------+ +---------+ $ +-----------+ +-----------+
3638 \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3639 +---------+ +---------+ $ +-----------+ +-----------+
3640 | $ | |
3641 | $ | +-----------+
3642 | $ | | subpar2=c6|
3643 | $ | +-----------+
3644 | $ |
3645 | $ +-----------+ +-----------+
3646 | $ | subpar1=c4|--| subpar2=c8|
3647 | $ +-----------+ +-----------+
3648 | $
3649 | $
3650 +---------+ $ +------------+ +------------+
3651 | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3652 +---------+ $ +------------+ +------------+
3653 | $
3654 ... $
3655
3656 The up-down connections are connections via SEL_ARG::left and
3657 SEL_ARG::right. A horizontal connection to the right is the
3658 SEL_ARG::next_key_part connection.
3659
3660 find_used_partitions() traverses the entire tree via recursion on
3661 * SEL_ARG::next_key_part (from left to right on the picture)
3662 * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3663 performed for each depth level.
3664
3665 Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3666 ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3667 For the example in the above picture, one of stack states is:
3668 in find_used_partitions(key_tree = "subpar2=c5") (***)
3669 in find_used_partitions(key_tree = "subpar1=c3")
3670 in find_used_partitions(key_tree = "par2=c2") (**)
3671 in find_used_partitions(key_tree = "par1=c1")
3672 in prune_partitions(...)
3673 We apply partitioning limits as soon as possible, e.g. when we reach the
3674 depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3675 and save them in ppar->part_iter.
3676 When we reach the depth (***), we find which subpartition(s) correspond to
3677 "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3678 appropriate subpartitions as used.
3679
3680 It is possible that constraints on some partitioning fields are missing.
3681 For the above example, consider this stack state:
3682 in find_used_partitions(key_tree = "subpar2=c12") (***)
3683 in find_used_partitions(key_tree = "subpar1=c10")
3684 in find_used_partitions(key_tree = "par1=c2")
3685 in prune_partitions(...)
3686 Here we don't have constraints for all partitioning fields. Since we've
3687 never set the ppar->part_iter to contain used set of partitions, we use
3688 its default "all partitions" value. We get subpartition id for
3689 "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3690 partition.
3691
3692 The inverse is also possible: we may get constraints on partitioning
3693 fields, but not constraints on subpartitioning fields. In that case,
3694 calls to find_used_partitions() with depth below (**) will return -1,
3695 and we will mark entire partition as used.
3696
3697 TODO
3698 Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3699
3700 RETURN
3701 1 OK, one or more [sub]partitions are marked as used.
3702 0 The passed condition doesn't match any partitions
3703 -1 Couldn't infer any partition pruning "intervals" from the passed
3704 SEL_ARG* tree (which means that all partitions should be marked as
3705 used) Marking partitions as used is the responsibility of the caller.
3706 */
3707
3708 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)3709 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3710 {
3711 int res, left_res=0, right_res=0;
3712 int key_tree_part= (int)key_tree->part;
3713 bool set_full_part_if_bad_ret= FALSE;
3714 bool ignore_part_fields= ppar->ignore_part_fields;
3715 bool did_set_ignore_part_fields= FALSE;
3716 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3717
3718 if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3719 return -1;
3720
3721 if (key_tree->left != &null_element)
3722 {
3723 if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3724 return -1;
3725 }
3726
3727 /* Push SEL_ARG's to stack to enable looking backwards as well */
3728 ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3729 ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3730 *(ppar->arg_stack_end++)= key_tree;
3731
3732 if (ignore_part_fields)
3733 {
3734 /*
3735 We come here when a condition on the first partitioning
3736 fields led to evaluating the partitioning condition
3737 (due to finding a condition of the type a < const or
3738 b > const). Thus we must ignore the rest of the
3739 partitioning fields but we still want to analyse the
3740 subpartitioning fields.
3741 */
3742 if (key_tree->next_key_part)
3743 res= find_used_partitions(ppar, key_tree->next_key_part);
3744 else
3745 res= -1;
3746 goto pop_and_go_right;
3747 }
3748
3749 if (key_tree->type == SEL_ARG::KEY_RANGE)
3750 {
3751 if (ppar->part_info->get_part_iter_for_interval &&
3752 key_tree->part <= ppar->last_part_partno)
3753 {
3754 /* Collect left and right bound, their lengths and flags */
3755 uchar *min_key= ppar->cur_min_key;
3756 uchar *max_key= ppar->cur_max_key;
3757 uchar *tmp_min_key= min_key;
3758 uchar *tmp_max_key= max_key;
3759 key_tree->store_min(ppar->key[key_tree->part].store_length,
3760 &tmp_min_key, ppar->cur_min_flag);
3761 key_tree->store_max(ppar->key[key_tree->part].store_length,
3762 &tmp_max_key, ppar->cur_max_flag);
3763 uint flag;
3764 if (key_tree->next_key_part &&
3765 key_tree->next_key_part->part == key_tree->part+1 &&
3766 key_tree->next_key_part->part <= ppar->last_part_partno &&
3767 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3768 {
3769 /*
3770 There are more key parts for partition pruning to handle
3771 This mainly happens when the condition is an equality
3772 condition.
3773 */
3774 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3775 (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3776 !key_tree->min_flag && !key_tree->max_flag)
3777 {
3778 /* Set 'parameters' */
3779 ppar->cur_min_key= tmp_min_key;
3780 ppar->cur_max_key= tmp_max_key;
3781 uint save_min_flag= ppar->cur_min_flag;
3782 uint save_max_flag= ppar->cur_max_flag;
3783
3784 ppar->cur_min_flag|= key_tree->min_flag;
3785 ppar->cur_max_flag|= key_tree->max_flag;
3786
3787 res= find_used_partitions(ppar, key_tree->next_key_part);
3788
3789 /* Restore 'parameters' back */
3790 ppar->cur_min_key= min_key;
3791 ppar->cur_max_key= max_key;
3792
3793 ppar->cur_min_flag= save_min_flag;
3794 ppar->cur_max_flag= save_max_flag;
3795 goto pop_and_go_right;
3796 }
3797 /* We have arrived at the last field in the partition pruning */
3798 uint tmp_min_flag= key_tree->min_flag,
3799 tmp_max_flag= key_tree->max_flag;
3800 if (!tmp_min_flag)
3801 key_tree->next_key_part->store_min_key(ppar->key,
3802 &tmp_min_key,
3803 &tmp_min_flag,
3804 ppar->last_part_partno);
3805 if (!tmp_max_flag)
3806 key_tree->next_key_part->store_max_key(ppar->key,
3807 &tmp_max_key,
3808 &tmp_max_flag,
3809 ppar->last_part_partno);
3810 flag= tmp_min_flag | tmp_max_flag;
3811 }
3812 else
3813 flag= key_tree->min_flag | key_tree->max_flag;
3814
3815 if (tmp_min_key != range_par->min_key)
3816 flag&= ~NO_MIN_RANGE;
3817 else
3818 flag|= NO_MIN_RANGE;
3819 if (tmp_max_key != range_par->max_key)
3820 flag&= ~NO_MAX_RANGE;
3821 else
3822 flag|= NO_MAX_RANGE;
3823
3824 /*
3825 We need to call the interval mapper if we have a condition which
3826 makes sense to prune on. In the example of COLUMNS on a and
3827 b it makes sense if we have a condition on a, or conditions on
3828 both a and b. If we only have conditions on b it might make sense
3829 but this is a harder case we will solve later. For the harder case
3830 this clause then turns into use of all partitions and thus we
3831 simply set res= -1 as if the mapper had returned that.
3832 TODO: What to do here is defined in WL#4065.
3833 */
3834 if (ppar->arg_stack[0]->part == 0)
3835 {
3836 uint32 i;
3837 uint32 store_length_array[MAX_KEY];
3838 uint32 num_keys= ppar->part_fields;
3839
3840 for (i= 0; i < num_keys; i++)
3841 store_length_array[i]= ppar->key[i].store_length;
3842 res= ppar->part_info->
3843 get_part_iter_for_interval(ppar->part_info,
3844 FALSE,
3845 store_length_array,
3846 range_par->min_key,
3847 range_par->max_key,
3848 tmp_min_key - range_par->min_key,
3849 tmp_max_key - range_par->max_key,
3850 flag,
3851 &ppar->part_iter);
3852 if (!res)
3853 goto pop_and_go_right; /* res==0 --> no satisfying partitions */
3854 }
3855 else
3856 res= -1;
3857
3858 if (res == -1)
3859 {
3860 /* get a full range iterator */
3861 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3862 }
3863 /*
3864 Save our intent to mark full partition as used if we will not be able
3865 to obtain further limits on subpartitions
3866 */
3867 if (key_tree_part < ppar->last_part_partno)
3868 {
3869 /*
3870 We need to ignore the rest of the partitioning fields in all
3871 evaluations after this
3872 */
3873 did_set_ignore_part_fields= TRUE;
3874 ppar->ignore_part_fields= TRUE;
3875 }
3876 set_full_part_if_bad_ret= TRUE;
3877 goto process_next_key_part;
3878 }
3879
3880 if (key_tree_part == ppar->last_subpart_partno &&
3881 (NULL != ppar->part_info->get_subpart_iter_for_interval))
3882 {
3883 PARTITION_ITERATOR subpart_iter;
3884 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
3885 range_par->key_parts););
3886 res= ppar->part_info->
3887 get_subpart_iter_for_interval(ppar->part_info,
3888 TRUE,
3889 NULL, /* Currently not used here */
3890 key_tree->min_value,
3891 key_tree->max_value,
3892 0, 0, /* Those are ignored here */
3893 key_tree->min_flag |
3894 key_tree->max_flag,
3895 &subpart_iter);
3896 if (res == 0)
3897 {
3898 /*
3899 The only case where we can get "no satisfying subpartitions"
3900 returned from the above call is when an error has occurred.
3901 */
3902 DBUG_ASSERT(range_par->thd->is_error());
3903 return 0;
3904 }
3905
3906 if (res == -1)
3907 goto pop_and_go_right; /* all subpartitions satisfy */
3908
3909 uint32 subpart_id;
3910 bitmap_clear_all(&ppar->subparts_bitmap);
3911 while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
3912 NOT_A_PARTITION_ID)
3913 bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
3914
3915 /* Mark each partition as used in each subpartition. */
3916 uint32 part_id;
3917 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3918 NOT_A_PARTITION_ID)
3919 {
3920 for (uint i= 0; i < ppar->part_info->num_subparts; i++)
3921 if (bitmap_is_set(&ppar->subparts_bitmap, i))
3922 bitmap_set_bit(&ppar->part_info->read_partitions,
3923 part_id * ppar->part_info->num_subparts + i);
3924 }
3925 goto pop_and_go_right;
3926 }
3927
3928 if (key_tree->is_singlepoint())
3929 {
3930 if (key_tree_part == ppar->last_part_partno &&
3931 ppar->cur_part_fields == ppar->part_fields &&
3932 ppar->part_info->get_part_iter_for_interval == NULL)
3933 {
3934 /*
3935 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
3936 fields. Save all constN constants into table record buffer.
3937 */
3938 store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
3939 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
3940 ppar->part_fields););
3941 uint32 part_id;
3942 longlong func_value;
3943 /* Find in which partition the {const1, ...,constN} tuple goes */
3944 if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
3945 &func_value))
3946 {
3947 res= 0; /* No satisfying partitions */
3948 goto pop_and_go_right;
3949 }
3950 /* Rembember the limit we got - single partition #part_id */
3951 init_single_partition_iterator(part_id, &ppar->part_iter);
3952
3953 /*
3954 If there are no subpartitions/we fail to get any limit for them,
3955 then we'll mark full partition as used.
3956 */
3957 set_full_part_if_bad_ret= TRUE;
3958 goto process_next_key_part;
3959 }
3960
3961 if (key_tree_part == ppar->last_subpart_partno &&
3962 ppar->cur_subpart_fields == ppar->subpart_fields)
3963 {
3964 /*
3965 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
3966 fields. Save all constN constants into table record buffer.
3967 */
3968 store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
3969 ppar->subpart_fields);
3970 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
3971 ppar->subpart_fields,
3972 ppar->subpart_fields););
3973 /* Find the subpartition (it's HASH/KEY so we always have one) */
3974 partition_info *part_info= ppar->part_info;
3975 uint32 part_id, subpart_id;
3976
3977 if (part_info->get_subpartition_id(part_info, &subpart_id))
3978 return 0;
3979
3980 /* Mark this partition as used in each subpartition. */
3981 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3982 NOT_A_PARTITION_ID)
3983 {
3984 bitmap_set_bit(&part_info->read_partitions,
3985 part_id * part_info->num_subparts + subpart_id);
3986 }
3987 res= 1; /* Some partitions were marked as used */
3988 goto pop_and_go_right;
3989 }
3990 }
3991 else
3992 {
3993 /*
3994 Can't handle condition on current key part. If we're that deep that
3995 we're processing subpartititoning's key parts, this means we'll not be
3996 able to infer any suitable condition, so bail out.
3997 */
3998 if (key_tree_part >= ppar->last_part_partno)
3999 {
4000 res= -1;
4001 goto pop_and_go_right;
4002 }
4003 /*
4004 No meaning in continuing with rest of partitioning key parts.
4005 Will try to continue with subpartitioning key parts.
4006 */
4007 ppar->ignore_part_fields= true;
4008 did_set_ignore_part_fields= true;
4009 goto process_next_key_part;
4010 }
4011 }
4012
4013 process_next_key_part:
4014 if (key_tree->next_key_part)
4015 res= find_used_partitions(ppar, key_tree->next_key_part);
4016 else
4017 res= -1;
4018
4019 if (did_set_ignore_part_fields)
4020 {
4021 /*
4022 We have returned from processing all key trees linked to our next
4023 key part. We are ready to be moving down (using right pointers) and
4024 this tree is a new evaluation requiring its own decision on whether
4025 to ignore partitioning fields.
4026 */
4027 ppar->ignore_part_fields= FALSE;
4028 }
4029 if (set_full_part_if_bad_ret)
4030 {
4031 if (res == -1)
4032 {
4033 /* Got "full range" for subpartitioning fields */
4034 uint32 part_id;
4035 bool found= FALSE;
4036 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4037 NOT_A_PARTITION_ID)
4038 {
4039 ppar->mark_full_partition_used(ppar->part_info, part_id);
4040 found= TRUE;
4041 }
4042 res= MY_TEST(found);
4043 }
4044 /*
4045 Restore the "used partitions iterator" to the default setting that
4046 specifies iteration over all partitions.
4047 */
4048 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4049 }
4050
4051 pop_and_go_right:
4052 /* Pop this key part info off the "stack" */
4053 ppar->arg_stack_end--;
4054 ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part];
4055 ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4056
4057 if (res == -1)
4058 return -1;
4059 if (key_tree->right != &null_element)
4060 {
4061 if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4062 return -1;
4063 }
4064 return (left_res || right_res || res);
4065 }
4066
4067
mark_all_partitions_as_used(partition_info * part_info)4068 static void mark_all_partitions_as_used(partition_info *part_info)
4069 {
4070 bitmap_copy(&(part_info->read_partitions),
4071 &(part_info->lock_partitions));
4072 }
4073
4074
4075 /*
4076 Check if field types allow to construct partitioning index description
4077
4078 SYNOPSIS
4079 fields_ok_for_partition_index()
4080 pfield NULL-terminated array of pointers to fields.
4081
4082 DESCRIPTION
4083 For an array of fields, check if we can use all of the fields to create
4084 partitioning index description.
4085
4086 We can't process GEOMETRY fields - for these fields singlepoint intervals
4087 cant be generated, and non-singlepoint are "special" kinds of intervals
4088 to which our processing logic can't be applied.
4089
4090 It is not known if we could process ENUM fields, so they are disabled to be
4091 on the safe side.
4092
4093 RETURN
4094 TRUE Yes, fields can be used in partitioning index
4095 FALSE Otherwise
4096 */
4097
fields_ok_for_partition_index(Field ** pfield)4098 static bool fields_ok_for_partition_index(Field **pfield)
4099 {
4100 if (!pfield)
4101 return FALSE;
4102 for (; (*pfield); pfield++)
4103 {
4104 enum_field_types ftype= (*pfield)->real_type();
4105 if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4106 return FALSE;
4107 }
4108 return TRUE;
4109 }
4110
4111
4112 /*
4113 Create partition index description and fill related info in the context
4114 struct
4115
4116 SYNOPSIS
4117 create_partition_index_description()
4118 prune_par INOUT Partition pruning context
4119
4120 DESCRIPTION
4121 Create partition index description. Partition index description is:
4122
4123 part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4124
4125 If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4126 corresponding fields_list(...) is not included into index description
4127 and we don't perform partition pruning for partitions/subpartitions.
4128
4129 RETURN
4130 TRUE Out of memory or can't do partition pruning at all
4131 FALSE OK
4132 */
4133
create_partition_index_description(PART_PRUNE_PARAM * ppar)4134 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4135 {
4136 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4137 partition_info *part_info= ppar->part_info;
4138 uint used_part_fields, used_subpart_fields;
4139
4140 used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4141 part_info->num_part_fields : 0;
4142 used_subpart_fields=
4143 fields_ok_for_partition_index(part_info->subpart_field_array)?
4144 part_info->num_subpart_fields : 0;
4145
4146 uint total_parts= used_part_fields + used_subpart_fields;
4147
4148 ppar->ignore_part_fields= FALSE;
4149 ppar->part_fields= used_part_fields;
4150 ppar->last_part_partno= (int)used_part_fields - 1;
4151
4152 ppar->subpart_fields= used_subpart_fields;
4153 ppar->last_subpart_partno=
4154 used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4155
4156 if (part_info->is_sub_partitioned())
4157 {
4158 ppar->mark_full_partition_used= mark_full_partition_used_with_parts;
4159 ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4160 }
4161 else
4162 {
4163 ppar->mark_full_partition_used= mark_full_partition_used_no_parts;
4164 ppar->get_top_partition_id_func= part_info->get_partition_id;
4165 }
4166
4167 KEY_PART *key_part;
4168 MEM_ROOT *alloc= range_par->mem_root;
4169 if (!total_parts ||
4170 !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4171 total_parts)) ||
4172 !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4173 total_parts)) ||
4174 !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4175 total_parts)) ||
4176 !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4177 total_parts)))
4178 return TRUE;
4179
4180 if (ppar->subpart_fields)
4181 {
4182 my_bitmap_map *buf;
4183 uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4184 if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4185 return TRUE;
4186 bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4187 FALSE);
4188 }
4189 range_par->key_parts= key_part;
4190 Field **field= (ppar->part_fields)? part_info->part_field_array :
4191 part_info->subpart_field_array;
4192 bool in_subpart_fields= FALSE;
4193 for (uint part= 0; part < total_parts; part++, key_part++)
4194 {
4195 key_part->key= 0;
4196 key_part->part= part;
4197 key_part->length= (uint16)(*field)->key_length();
4198 key_part->store_length= (uint16)get_partition_field_store_length(*field);
4199
4200 DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4201 key_part->length, key_part->store_length));
4202
4203 key_part->field= (*field);
4204 key_part->image_type = Field::itRAW;
4205 /*
4206 We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4207 in the RangeAnalysisModule.
4208 */
4209 key_part->flag= 0;
4210 /* We don't set key_parts->null_bit as it will not be used */
4211
4212 ppar->is_part_keypart[part]= !in_subpart_fields;
4213 ppar->is_subpart_keypart[part]= in_subpart_fields;
4214
4215 /*
4216 Check if this was last field in this array, in this case we
4217 switch to subpartitioning fields. (This will only happens if
4218 there are subpartitioning fields to cater for).
4219 */
4220 if (!*(++field))
4221 {
4222 field= part_info->subpart_field_array;
4223 in_subpart_fields= TRUE;
4224 }
4225 }
4226 range_par->key_parts_end= key_part;
4227
4228 DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4229 range_par->key_parts_end););
4230 return FALSE;
4231 }
4232
4233
4234 #ifndef DBUG_OFF
4235
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4236 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4237 {
4238 DBUG_ENTER("print_partitioning_index");
4239 DBUG_LOCK_FILE;
4240 fprintf(DBUG_FILE, "partitioning INDEX(");
4241 for (KEY_PART *p=parts; p != parts_end; p++)
4242 {
4243 fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
4244 }
4245 fputs(");\n", DBUG_FILE);
4246 DBUG_UNLOCK_FILE;
4247 DBUG_VOID_RETURN;
4248 }
4249
4250
4251 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4252 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4253 {
4254 DBUG_ENTER("dbug_print_segment_range");
4255 DBUG_LOCK_FILE;
4256 if (!(arg->min_flag & NO_MIN_RANGE))
4257 {
4258 store_key_image_to_rec(part->field, arg->min_value, part->length);
4259 part->field->dbug_print();
4260 if (arg->min_flag & NEAR_MIN)
4261 fputs(" < ", DBUG_FILE);
4262 else
4263 fputs(" <= ", DBUG_FILE);
4264 }
4265
4266 fprintf(DBUG_FILE, "%s", part->field->field_name);
4267
4268 if (!(arg->max_flag & NO_MAX_RANGE))
4269 {
4270 if (arg->max_flag & NEAR_MAX)
4271 fputs(" < ", DBUG_FILE);
4272 else
4273 fputs(" <= ", DBUG_FILE);
4274 store_key_image_to_rec(part->field, arg->max_value, part->length);
4275 part->field->dbug_print();
4276 }
4277 fputs("\n", DBUG_FILE);
4278 DBUG_UNLOCK_FILE;
4279 DBUG_VOID_RETURN;
4280 }
4281
4282
4283 /*
4284 Print a singlepoint multi-keypart range interval to debug trace
4285
4286 SYNOPSIS
4287 dbug_print_singlepoint_range()
4288 start Array of SEL_ARG* ptrs representing conditions on key parts
4289 num Number of elements in the array.
4290
4291 DESCRIPTION
4292 This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4293 interval to debug trace.
4294 */
4295
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4296 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4297 {
4298 DBUG_ENTER("dbug_print_singlepoint_range");
4299 DBUG_LOCK_FILE;
4300 SEL_ARG **end= start + num;
4301
4302 for (SEL_ARG **arg= start; arg != end; arg++)
4303 {
4304 Field *field= (*arg)->field;
4305 fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
4306 field->dbug_print();
4307 }
4308 fputs("\n", DBUG_FILE);
4309 DBUG_UNLOCK_FILE;
4310 DBUG_VOID_RETURN;
4311 }
4312 #endif
4313
4314 /****************************************************************************
4315 * Partition pruning code ends
4316 ****************************************************************************/
4317 #endif
4318
4319
4320 /*
4321 Get best plan for a SEL_IMERGE disjunctive expression.
4322 SYNOPSIS
4323 get_best_disjunct_quick()
4324 param Parameter from check_quick_select function
4325 imerge Expression to use
4326 read_time Don't create scans with cost > read_time
4327
4328 NOTES
4329 index_merge cost is calculated as follows:
4330 index_merge_cost =
4331 cost(index_reads) + (see #1)
4332 cost(rowid_to_row_scan) + (see #2)
4333 cost(unique_use) (see #3)
4334
4335 1. cost(index_reads) =SUM_i(cost(index_read_i))
4336 For non-CPK scans,
4337 cost(index_read_i) = {cost of ordinary 'index only' scan}
4338 For CPK scan,
4339 cost(index_read_i) = {cost of non-'index only' scan}
4340
4341 2. cost(rowid_to_row_scan)
4342 If table PK is clustered then
4343 cost(rowid_to_row_scan) =
4344 {cost of ordinary clustered PK scan with n_ranges=n_rows}
4345
4346 Otherwise, we use the following model to calculate costs:
4347 We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4348 We assume that offsets of rows we need are independent variates with
4349 uniform distribution in [0..max_file_offset] range.
4350
4351 We'll denote block as "busy" if it contains row(s) we need to retrieve
4352 and "empty" if doesn't contain rows we need.
4353
4354 Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4355 applies to any block in file). Let x_i be a variate taking value 1 if
4356 block #i is empty and 0 otherwise.
4357
4358 Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4359
4360 E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4361 = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4362 ~= n_blocks * exp(-n_rows/n_blocks).
4363
4364 E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4365 ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4366
4367 Average size of "hole" between neighbor non-empty blocks is
4368 E(hole_size) = n_blocks/E(n_busy_blocks).
4369
4370 The total cost of reading all needed blocks in one "sweep" is:
4371
4372 E(n_busy_blocks)*
4373 (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
4374
4375 3. Cost of Unique use is calculated in Unique::get_use_cost function.
4376
4377 ROR-union cost is calculated in the same way index_merge, but instead of
4378 Unique a priority queue is used.
4379
4380 RETURN
4381 Created read plan
4382 NULL - Out of memory or no read scan could be built.
4383 */
4384
4385 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,double read_time)4386 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4387 double read_time)
4388 {
4389 SEL_TREE **ptree;
4390 TRP_INDEX_MERGE *imerge_trp= NULL;
4391 uint n_child_scans= imerge->trees_next - imerge->trees;
4392 TRP_RANGE **range_scans;
4393 TRP_RANGE **cur_child;
4394 TRP_RANGE **cpk_scan= NULL;
4395 bool imerge_too_expensive= FALSE;
4396 double imerge_cost= 0.0;
4397 ha_rows cpk_scan_records= 0;
4398 ha_rows non_cpk_scan_records= 0;
4399 bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4400 bool all_scans_ror_able= TRUE;
4401 bool all_scans_rors= TRUE;
4402 uint unique_calc_buff_size;
4403 TABLE_READ_PLAN **roru_read_plans;
4404 TABLE_READ_PLAN **cur_roru_plan;
4405 double roru_index_costs;
4406 ha_rows roru_total_records;
4407 double roru_intersect_part= 1.0;
4408 DBUG_ENTER("get_best_disjunct_quick");
4409 DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
4410
4411 DBUG_ASSERT(param->table->file->stats.records);
4412
4413 Opt_trace_context * const trace= ¶m->thd->opt_trace;
4414 Opt_trace_object trace_best_disjunct(trace);
4415 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4416 sizeof(TRP_RANGE*)*
4417 n_child_scans)))
4418 DBUG_RETURN(NULL);
4419 // Note: to_merge.end() is called to close this object after this for-loop.
4420 Opt_trace_array to_merge(trace, "indices_to_merge");
4421 /*
4422 Collect best 'range' scan for each of disjuncts, and, while doing so,
4423 analyze possibility of ROR scans. Also calculate some values needed by
4424 other parts of the code.
4425 */
4426 for (ptree= imerge->trees, cur_child= range_scans;
4427 ptree != imerge->trees_next;
4428 ptree++, cur_child++)
4429 {
4430 DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4431 "tree in SEL_IMERGE"););
4432 Opt_trace_object trace_idx(trace);
4433 if (!(*cur_child=
4434 get_key_scans_params(param, *ptree, true, false, read_time)))
4435 {
4436 /*
4437 One of index scans in this index_merge is more expensive than entire
4438 table read for another available option. The entire index_merge (and
4439 any possible ROR-union) will be more expensive then, too. We continue
4440 here only to update SQL_SELECT members.
4441 */
4442 imerge_too_expensive= true;
4443 }
4444 if (imerge_too_expensive)
4445 {
4446 trace_idx.add("chosen", false).add_alnum("cause", "cost");
4447 continue;
4448 }
4449
4450 const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
4451 imerge_cost += (*cur_child)->read_cost;
4452 all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4453 all_scans_rors &= (*cur_child)->is_ror;
4454 if (pk_is_clustered &&
4455 keynr_in_table == param->table->s->primary_key)
4456 {
4457 cpk_scan= cur_child;
4458 cpk_scan_records= (*cur_child)->records;
4459 }
4460 else
4461 non_cpk_scan_records += (*cur_child)->records;
4462
4463 trace_idx.
4464 add_utf8("index_to_merge", param->table->key_info[keynr_in_table].name).
4465 add("cumulated_cost", imerge_cost);
4466 }
4467
4468 // Note: to_merge trace object is closed here
4469 to_merge.end();
4470
4471
4472 trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
4473 if (imerge_too_expensive || (imerge_cost > read_time) ||
4474 ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
4475 read_time != DBL_MAX))
4476 {
4477 /*
4478 Bail out if it is obvious that both index_merge and ROR-union will be
4479 more expensive
4480 */
4481 DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4482 "full table scan, bailing out"));
4483 trace_best_disjunct.add("chosen", false).add_alnum("cause", "cost");
4484 DBUG_RETURN(NULL);
4485 }
4486
4487 /*
4488 If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4489 guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4490 disabled in @@optimizer_switch
4491 */
4492 if (all_scans_rors &&
4493 param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4494 {
4495 roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4496 trace_best_disjunct.add("use_roworder_union", true).
4497 add_alnum("cause", "always_cheaper_than_not_roworder_retrieval");
4498 goto skip_to_ror_scan;
4499 }
4500
4501 if (cpk_scan)
4502 {
4503 /*
4504 Add one ROWID comparison for each row retrieved on non-CPK scan. (it
4505 is done in QUICK_RANGE_SELECT::row_in_ranges)
4506 */
4507 const double rid_comp_cost= non_cpk_scan_records * ROWID_COMPARE_COST;
4508 imerge_cost+= rid_comp_cost;
4509 trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
4510 rid_comp_cost);
4511 }
4512
4513 /* Calculate cost(rowid_to_row_scan) */
4514 {
4515 Cost_estimate sweep_cost;
4516 JOIN *join= param->thd->lex->select_lex.join;
4517 const bool is_interrupted= join && join->tables != 1;
4518 get_sweep_read_cost(param->table, non_cpk_scan_records, is_interrupted,
4519 &sweep_cost);
4520 const double sweep_total_cost= sweep_cost.total_cost();
4521 imerge_cost+= sweep_total_cost;
4522 trace_best_disjunct.add("cost_sort_rowid_and_read_disk",
4523 sweep_total_cost);
4524 }
4525 DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4526 imerge_cost));
4527 if (imerge_cost > read_time ||
4528 !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
4529 {
4530 trace_best_disjunct.add("use_roworder_index_merge", true).
4531 add_alnum("cause", "cost");
4532 goto build_ror_index_merge;
4533 }
4534
4535 /* Add Unique operations cost */
4536 unique_calc_buff_size=
4537 Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4538 param->table->file->ref_length,
4539 param->thd->variables.sortbuff_size);
4540 if (param->imerge_cost_buff_size < unique_calc_buff_size)
4541 {
4542 if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
4543 unique_calc_buff_size)))
4544 DBUG_RETURN(NULL);
4545 param->imerge_cost_buff_size= unique_calc_buff_size;
4546 }
4547
4548 {
4549 const double dup_removal_cost=
4550 Unique::get_use_cost(param->imerge_cost_buff,
4551 (uint)non_cpk_scan_records,
4552 param->table->file->ref_length,
4553 param->thd->variables.sortbuff_size);
4554
4555 trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost);
4556 imerge_cost += dup_removal_cost;
4557 trace_best_disjunct.add("total_cost", imerge_cost);
4558 DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
4559 imerge_cost, read_time));
4560 }
4561 if (imerge_cost < read_time)
4562 {
4563 if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
4564 {
4565 imerge_trp->read_cost= imerge_cost;
4566 imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
4567 imerge_trp->records= min(imerge_trp->records,
4568 param->table->file->stats.records);
4569 imerge_trp->range_scans= range_scans;
4570 imerge_trp->range_scans_end= range_scans + n_child_scans;
4571 read_time= imerge_cost;
4572 }
4573 }
4574
4575 build_ror_index_merge:
4576 if (!all_scans_ror_able ||
4577 param->thd->lex->sql_command == SQLCOM_DELETE ||
4578 !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4579 DBUG_RETURN(imerge_trp);
4580
4581 /* Ok, it is possible to build a ROR-union, try it. */
4582 if (!(roru_read_plans=
4583 (TABLE_READ_PLAN**)alloc_root(param->mem_root,
4584 sizeof(TABLE_READ_PLAN*)*
4585 n_child_scans)))
4586 DBUG_RETURN(imerge_trp);
4587 skip_to_ror_scan:
4588 roru_index_costs= 0.0;
4589 roru_total_records= 0;
4590 cur_roru_plan= roru_read_plans;
4591
4592 /*
4593 Note: trace_analyze_ror.end() is called to close this object after
4594 this for-loop.
4595 */
4596 Opt_trace_array trace_analyze_ror(trace, "analyzing_roworder_scans");
4597 /* Find 'best' ROR scan for each of trees in disjunction */
4598 for (ptree= imerge->trees, cur_child= range_scans;
4599 ptree != imerge->trees_next;
4600 ptree++, cur_child++, cur_roru_plan++)
4601 {
4602 Opt_trace_object trp_info(trace);
4603 if (unlikely(trace->is_started()))
4604 (*cur_child)->trace_basic_info(param, &trp_info);
4605
4606 /*
4607 Assume the best ROR scan is the one that has cheapest
4608 full-row-retrieval scan cost.
4609 Also accumulate index_only scan costs as we'll need them to
4610 calculate overall index_intersection cost.
4611 */
4612 double cost;
4613 if ((*cur_child)->is_ror)
4614 {
4615 /* Ok, we have index_only cost, now get full rows scan cost */
4616 cost= param->table->file->
4617 read_time(param->real_keynr[(*cur_child)->key_idx], 1,
4618 (*cur_child)->records) +
4619 rows2double((*cur_child)->records) * ROW_EVALUATE_COST;
4620 }
4621 else
4622 cost= read_time;
4623
4624 TABLE_READ_PLAN *prev_plan= *cur_child;
4625 if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost)))
4626 {
4627 if (prev_plan->is_ror)
4628 *cur_roru_plan= prev_plan;
4629 else
4630 DBUG_RETURN(imerge_trp);
4631 roru_index_costs += (*cur_roru_plan)->read_cost;
4632 }
4633 else
4634 roru_index_costs +=
4635 ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
4636 roru_total_records += (*cur_roru_plan)->records;
4637 roru_intersect_part *= (*cur_roru_plan)->records /
4638 param->table->file->stats.records;
4639 }
4640 // Note: trace_analyze_ror trace object is closed here
4641 trace_analyze_ror.end();
4642
4643 /*
4644 rows to retrieve=
4645 SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
4646 This is valid because index_merge construction guarantees that conditions
4647 in disjunction do not share key parts.
4648 */
4649 roru_total_records -= (ha_rows)(roru_intersect_part*
4650 param->table->file->stats.records);
4651 /* ok, got a ROR read plan for each of the disjuncts
4652 Calculate cost:
4653 cost(index_union_scan(scan_1, ... scan_n)) =
4654 SUM_i(cost_of_index_only_scan(scan_i)) +
4655 queue_use_cost(rowid_len, n) +
4656 cost_of_row_retrieval
4657 See get_merge_buffers_cost function for queue_use_cost formula derivation.
4658 */
4659 double roru_total_cost;
4660 {
4661 Cost_estimate sweep_cost;
4662 JOIN *join= param->thd->lex->select_lex.join;
4663 const bool is_interrupted= join && join->tables != 1;
4664 get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
4665 &sweep_cost);
4666 roru_total_cost= roru_index_costs +
4667 rows2double(roru_total_records) *
4668 log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 +
4669 sweep_cost.total_cost();
4670 }
4671
4672 trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost).
4673 add("members", n_child_scans);
4674 TRP_ROR_UNION* roru;
4675 if (roru_total_cost < read_time)
4676 {
4677 if ((roru= new (param->mem_root) TRP_ROR_UNION))
4678 {
4679 trace_best_disjunct.add("chosen", true);
4680 roru->first_ror= roru_read_plans;
4681 roru->last_ror= roru_read_plans + n_child_scans;
4682 roru->read_cost= roru_total_cost;
4683 roru->records= roru_total_records;
4684 DBUG_RETURN(roru);
4685 }
4686 }
4687 trace_best_disjunct.add("chosen", false);
4688
4689 DBUG_RETURN(imerge_trp);
4690 }
4691
4692
4693 /*
4694 Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
4695 sel_arg set of intervals.
4696
4697 SYNOPSIS
4698 make_ror_scan()
4699 param Parameter from test_quick_select function
4700 idx Index of key in param->keys
4701 sel_arg Set of intervals for a given key
4702
4703 RETURN
4704 NULL - out of memory
4705 ROR scan structure containing a scan for {idx, sel_arg}
4706 */
4707
4708 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)4709 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
4710 {
4711 ROR_SCAN_INFO *ror_scan;
4712 my_bitmap_map *bitmap_buf1;
4713 my_bitmap_map *bitmap_buf2;
4714 uint keynr;
4715 DBUG_ENTER("make_ror_scan");
4716
4717 if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
4718 sizeof(ROR_SCAN_INFO))))
4719 DBUG_RETURN(NULL);
4720
4721 ror_scan->idx= idx;
4722 ror_scan->keynr= keynr= param->real_keynr[idx];
4723 ror_scan->sel_arg= sel_arg;
4724 ror_scan->records= param->table->quick_rows[keynr];
4725
4726 if (!(bitmap_buf1= (my_bitmap_map*) alloc_root(param->mem_root,
4727 param->fields_bitmap_size)))
4728 DBUG_RETURN(NULL);
4729 if (!(bitmap_buf2= (my_bitmap_map*) alloc_root(param->mem_root,
4730 param->fields_bitmap_size)))
4731 DBUG_RETURN(NULL);
4732
4733 if (bitmap_init(&ror_scan->covered_fields, bitmap_buf1,
4734 param->table->s->fields, FALSE))
4735 DBUG_RETURN(NULL);
4736 if (bitmap_init(&ror_scan->covered_fields_remaining, bitmap_buf2,
4737 param->table->s->fields, FALSE))
4738 DBUG_RETURN(NULL);
4739
4740 bitmap_clear_all(&ror_scan->covered_fields);
4741
4742 KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
4743 KEY_PART_INFO *key_part_end=
4744 key_part + param->table->key_info[keynr].user_defined_key_parts;
4745 for (;key_part != key_part_end; ++key_part)
4746 {
4747 if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr-1))
4748 bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
4749 }
4750 bitmap_copy(&ror_scan->covered_fields_remaining, &ror_scan->covered_fields);
4751
4752 double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
4753 ror_scan->index_read_cost=
4754 param->table->file->index_only_read_time(ror_scan->keynr, rows);
4755 DBUG_RETURN(ror_scan);
4756 }
4757
4758
4759 /**
4760 Compare two ROR_SCAN_INFO* by
4761 1. #fields in this index that are not already covered
4762 by other indexes earlier in the intersect ordering: descending
4763 2. E(#records): ascending
4764
4765 @param scan1 first ror scan to compare
4766 @param scan2 second ror scan to compare
4767
4768 @return true if scan1 > scan2, false otherwise
4769 */
is_better_intersect_match(const ROR_SCAN_INFO * scan1,const ROR_SCAN_INFO * scan2)4770 static bool is_better_intersect_match(const ROR_SCAN_INFO *scan1,
4771 const ROR_SCAN_INFO *scan2)
4772 {
4773 if (scan1 == scan2)
4774 return false;
4775
4776 if (scan1->num_covered_fields_remaining >
4777 scan2->num_covered_fields_remaining)
4778 return false;
4779
4780 if (scan1->num_covered_fields_remaining <
4781 scan2->num_covered_fields_remaining)
4782 return true;
4783
4784 return (scan1->records > scan2->records);
4785 }
4786
4787 /**
4788 Sort indexes in an order that is likely to be a good index merge
4789 intersection order. After running this function, [start, ..., end-1]
4790 is ordered according to this strategy:
4791
4792 1) Minimize the number of indexes that must be used in the
4793 intersection. I.e., the index covering most fields not already
4794 covered by other indexes earlier in the sort order is picked first.
4795 2) When multiple indexes cover equally many uncovered fields, the
4796 index with lowest E(#rows) is chosen.
4797
4798 Note that all permutations of index ordering are not tested, so this
4799 function may not find the optimal order.
4800
4801 @param[in,out] start Pointer to the start of indexes that may
4802 be used in index merge intersection
4803 @param end Pointer past the last index that may be used.
4804 @param param Parameter from test_quick_select function.
4805 */
find_intersect_order(ROR_SCAN_INFO ** start,ROR_SCAN_INFO ** end,const PARAM * param)4806 static void find_intersect_order(ROR_SCAN_INFO **start,
4807 ROR_SCAN_INFO **end,
4808 const PARAM *param)
4809 {
4810 // nothing to sort if there are only zero or one ROR scans
4811 if ((start == end) || (start + 1 == end))
4812 return;
4813
4814 /*
4815 Bitmap of fields we would like the ROR scans to cover. Will be
4816 modified by the loop below so that when we're looking for a ROR
4817 scan in position 'x' in the ordering, all fields covered by ROR
4818 scans 0,...,x-1 have been removed.
4819 */
4820 MY_BITMAP fields_to_cover;
4821 my_bitmap_map *map;
4822 if (!(map= (my_bitmap_map*) alloc_root(param->mem_root,
4823 param->fields_bitmap_size)))
4824 return;
4825 bitmap_init(&fields_to_cover, map, param->needed_fields.n_bits, FALSE);
4826 bitmap_copy(&fields_to_cover, ¶m->needed_fields);
4827
4828 // Sort ROR scans in [start,...,end-1]
4829 for (ROR_SCAN_INFO **place= start; place < (end - 1); place++)
4830 {
4831 /* Placeholder for the best ROR scan found for position 'place' so far */
4832 ROR_SCAN_INFO **best= place;
4833 ROR_SCAN_INFO **current= place + 1;
4834
4835 {
4836 /*
4837 Calculate how many fields in 'fields_to_cover' not already
4838 covered by [start,...,place-1] the 'best' index covers. The
4839 result is used in is_better_intersect_match() and is valid
4840 when finding the best ROR scan for position 'place' only.
4841 */
4842 bitmap_intersect(&(*best)->covered_fields_remaining, &fields_to_cover);
4843 (*best)->num_covered_fields_remaining=
4844 bitmap_bits_set(&(*best)->covered_fields_remaining);
4845 }
4846 for (; current < end; current++)
4847 {
4848 {
4849 /*
4850 Calculate how many fields in 'fields_to_cover' not already
4851 covered by [start,...,place-1] the 'current' index covers.
4852 The result is used in is_better_intersect_match() and is
4853 valid when finding the best ROR scan for position 'place' only.
4854 */
4855 bitmap_intersect(&(*current)->covered_fields_remaining,
4856 &fields_to_cover);
4857 (*current)->num_covered_fields_remaining=
4858 bitmap_bits_set(&(*current)->covered_fields_remaining);
4859
4860 /*
4861 No need to compare with 'best' if 'current' does not
4862 contribute with uncovered fields.
4863 */
4864 if ((*current)->num_covered_fields_remaining == 0)
4865 continue;
4866 }
4867
4868 if (is_better_intersect_match(*best, *current))
4869 best= current;
4870 }
4871
4872 /*
4873 'best' is now the ROR scan that will be sorted in position
4874 'place'. When searching for the best ROR scans later in the sort
4875 sequence we do not need coverage of the fields covered by 'best'
4876 */
4877 bitmap_subtract(&fields_to_cover, &(*best)->covered_fields);
4878 if (best != place)
4879 swap_variables(ROR_SCAN_INFO*, *best, *place);
4880
4881 if (bitmap_is_clear_all(&fields_to_cover))
4882 return; // No more fields to cover
4883 }
4884 }
4885
4886 /* Auxiliary structure for incremental ROR-intersection creation */
4887 typedef struct
4888 {
4889 const PARAM *param;
4890 MY_BITMAP covered_fields; /* union of fields covered by all scans */
4891 /*
4892 Fraction of table records that satisfies conditions of all scans.
4893 This is the number of full records that will be retrieved if a
4894 non-index_only index intersection will be employed.
4895 */
4896 double out_rows;
4897 /* TRUE if covered_fields is a superset of needed_fields */
4898 bool is_covering;
4899
4900 ha_rows index_records; /* sum(#records to look in indexes) */
4901 double index_scan_costs; /* SUM(cost of 'index-only' scans) */
4902 double total_cost;
4903 } ROR_INTERSECT_INFO;
4904
4905
4906 /*
4907 Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
4908
4909 SYNOPSIS
4910 ror_intersect_init()
4911 param Parameter from test_quick_select
4912
4913 RETURN
4914 allocated structure
4915 NULL on error
4916 */
4917
4918 static
ror_intersect_init(const PARAM * param)4919 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
4920 {
4921 ROR_INTERSECT_INFO *info;
4922 my_bitmap_map* buf;
4923 if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
4924 sizeof(ROR_INTERSECT_INFO))))
4925 return NULL;
4926 info->param= param;
4927 if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
4928 param->fields_bitmap_size)))
4929 return NULL;
4930 if (bitmap_init(&info->covered_fields, buf, param->table->s->fields,
4931 FALSE))
4932 return NULL;
4933 info->is_covering= FALSE;
4934 info->index_scan_costs= 0.0;
4935 info->total_cost= 0.0;
4936 info->index_records= 0;
4937 info->out_rows= (double) param->table->file->stats.records;
4938 bitmap_clear_all(&info->covered_fields);
4939 return info;
4940 }
4941
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)4942 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
4943 {
4944 dst->param= src->param;
4945 memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
4946 no_bytes_in_map(&src->covered_fields));
4947 dst->out_rows= src->out_rows;
4948 dst->is_covering= src->is_covering;
4949 dst->index_records= src->index_records;
4950 dst->index_scan_costs= src->index_scan_costs;
4951 dst->total_cost= src->total_cost;
4952 }
4953
4954
4955 /*
4956 Get selectivity of adding a ROR scan to the ROR-intersection.
4957
4958 SYNOPSIS
4959 ror_scan_selectivity()
4960 info ROR-interection, an intersection of ROR index scans
4961 scan ROR scan that may or may not improve the selectivity
4962 of 'info'
4963
4964 NOTES
4965 Suppose we have conditions on several keys
4966 cond=k_11=c_11 AND k_12=c_12 AND ... // key_parts of first key in 'info'
4967 k_21=c_21 AND k_22=c_22 AND ... // key_parts of second key in 'info'
4968 ...
4969 k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //key_parts of 'scan'
4970
4971 where k_ij may be the same as any k_pq (i.e. keys may have common parts).
4972
4973 Note that for ROR retrieval, only equality conditions are usable so there
4974 are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'
4975
4976 A full row is retrieved if entire condition holds.
4977
4978 The recursive procedure for finding P(cond) is as follows:
4979
4980 First step:
4981 Pick 1st part of 1st key and break conjunction (1) into two parts:
4982 cond= (k_11=c_11 AND R)
4983
4984 Here R may still contain condition(s) equivalent to k_11=c_11.
4985 Nevertheless, the following holds:
4986
4987 P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
4988
4989 Mark k_11 as fixed field (and satisfied condition) F, save P(F),
4990 save R to be cond and proceed to recursion step.
4991
4992 Recursion step:
4993 We have a set of fixed fields/satisfied conditions) F, probability P(F),
4994 and remaining conjunction R
4995 Pick next key part on current key and its condition "k_ij=c_ij".
4996 We will add "k_ij=c_ij" into F and update P(F).
4997 Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then
4998
4999 P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
5000
5001 (where '|' mean conditional probability, not "or")
5002
5003 Consider the first multiplier in (2). One of the following holds:
5004 a) F contains condition on field used in t (i.e. t AND F = F).
5005 Then P(t|F) = 1
5006
5007 b) F doesn't contain condition on field used in t. Then F and t are
5008 considered independent.
5009
5010 P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
5011 = P(t|fields_before_t_in_key).
5012
5013 P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
5014 #records(fields_before_t_in_key, t)
5015
5016 The second multiplier is calculated by applying this step recursively.
5017
5018 IMPLEMENTATION
5019 This function calculates the result of application of the "recursion step"
5020 described above for all fixed key members of a single key, accumulating set
5021 of covered fields, selectivity, etc.
5022
5023 The calculation is conducted as follows:
5024 Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
5025
5026 n_{k1} n_{k2}
5027 --------- * --------- * .... (3)
5028 n_{k1-1} n_{k2-1}
5029
5030 where k1,k2,... are key parts which fields were not yet marked as fixed
5031 ( this is result of application of option b) of the recursion step for
5032 parts of a single key).
5033 Since it is reasonable to expect that most of the fields are not marked
5034 as fixed, we calculate (3) as
5035
5036 n_{i1} n_{i2}
5037 (3) = n_{max_key_part} / ( --------- * --------- * .... )
5038 n_{i1-1} n_{i2-1}
5039
5040 where i1,i2, .. are key parts that were already marked as fixed.
5041
5042 In order to minimize number of expensive records_in_range calls we
5043 group and reduce adjacent fractions. Note that on the optimizer's
5044 request, index statistics may be used instead of records_in_range
5045 @see RANGE_OPT_PARAM::use_index_statistics.
5046
5047 RETURN
5048 Selectivity of given ROR scan, a number between 0 and 1. 1 means that
5049 adding 'scan' to the intersection does not improve the selectivity.
5050 */
5051
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)5052 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
5053 const ROR_SCAN_INFO *scan)
5054 {
5055 double selectivity_mult= 1.0;
5056 const TABLE * const table= info->param->table;
5057 const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;
5058 /**
5059 key values tuple, used to store both min_range.key and
5060 max_range.key. This function is only called for equality ranges;
5061 open ranges (e.g. "min_value < X < max_value") cannot be used for
5062 rowid ordered retrieval, so in this function we know that
5063 min_range.key == max_range.key
5064 */
5065 uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
5066 uchar *key_ptr= key_val;
5067 SEL_ARG *sel_arg, *tuple_arg= NULL;
5068 key_part_map keypart_map= 0;
5069 bool cur_covered;
5070 bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5071 key_part->fieldnr-1));
5072 key_range min_range;
5073 key_range max_range;
5074 min_range.key= key_val;
5075 min_range.flag= HA_READ_KEY_EXACT;
5076 max_range.key= key_val;
5077 max_range.flag= HA_READ_AFTER_KEY;
5078 ha_rows prev_records= table->file->stats.records;
5079 DBUG_ENTER("ror_scan_selectivity");
5080
5081 for (sel_arg= scan->sel_arg; sel_arg;
5082 sel_arg= sel_arg->next_key_part)
5083 {
5084 DBUG_PRINT("info",("sel_arg step"));
5085 cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5086 key_part[sel_arg->part].fieldnr-1));
5087 if (cur_covered != prev_covered)
5088 {
5089 /* create (part1val, ..., part{n-1}val) tuple. */
5090 bool is_null_range= false;
5091 ha_rows records;
5092 if (!tuple_arg)
5093 {
5094 tuple_arg= scan->sel_arg;
5095 /* Here we use the length of the first key part */
5096 tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);
5097 is_null_range|= tuple_arg->is_null_interval();
5098 keypart_map= 1;
5099 }
5100 while (tuple_arg->next_key_part != sel_arg)
5101 {
5102 tuple_arg= tuple_arg->next_key_part;
5103 tuple_arg->store_min(key_part[tuple_arg->part].store_length,
5104 &key_ptr, 0);
5105 is_null_range|= tuple_arg->is_null_interval();
5106 keypart_map= (keypart_map << 1) | 1;
5107 }
5108 min_range.length= max_range.length= (size_t) (key_ptr - key_val);
5109 min_range.keypart_map= max_range.keypart_map= keypart_map;
5110
5111 /*
5112 Get the number of rows in this range. This is done by calling
5113 records_in_range() unless all these are true:
5114 1) The user has requested that index statistics should be used
5115 for equality ranges to avoid the incurred overhead of
5116 index dives in records_in_range()
5117 2) The range is not on the form "x IS NULL". The reason is
5118 that the number of rows with this value are likely to be
5119 very different than the values in the index statistics
5120 3) Index statistics is available.
5121 @see key_val
5122 */
5123 if (!info->param->use_index_statistics || // (1)
5124 is_null_range || // (2)
5125 !(records= table->key_info[scan->keynr].
5126 rec_per_key[tuple_arg->part])) // (3)
5127 {
5128 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5129 DBUG_ASSERT(min_range.length > 0);
5130 records= (table->file->
5131 records_in_range(scan->keynr, &min_range, &max_range));
5132 }
5133 if (cur_covered)
5134 {
5135 /* uncovered -> covered */
5136 double tmp= rows2double(records)/rows2double(prev_records);
5137 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5138 selectivity_mult *= tmp;
5139 prev_records= HA_POS_ERROR;
5140 }
5141 else
5142 {
5143 /* covered -> uncovered */
5144 prev_records= records;
5145 }
5146 }
5147 prev_covered= cur_covered;
5148 }
5149 if (!prev_covered)
5150 {
5151 double tmp= rows2double(table->quick_rows[scan->keynr]) /
5152 rows2double(prev_records);
5153 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5154 selectivity_mult *= tmp;
5155 }
5156 // Todo: This assert fires in PB sysqa RQG tests.
5157 // DBUG_ASSERT(selectivity_mult <= 1.0);
5158 DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
5159 DBUG_RETURN(selectivity_mult);
5160 }
5161
5162
5163 /*
5164 Check if adding a ROR scan to a ROR-intersection reduces its cost of
5165 ROR-intersection and if yes, update parameters of ROR-intersection,
5166 including its cost.
5167
5168 SYNOPSIS
5169 ror_intersect_add()
5170 param Parameter from test_quick_select
5171 info ROR-intersection structure to add the scan to.
5172 ror_scan ROR scan info to add.
5173 is_cpk_scan If TRUE, add the scan as CPK scan (this can be inferred
5174 from other parameters and is passed separately only to
5175 avoid duplicating the inference code)
5176 trace_costs Optimizer trace object cost details are added to
5177
5178 NOTES
5179 Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
5180 intersection decreases. The cost of ROR-intersection is calculated as
5181 follows:
5182
5183 cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
5184
5185 When we add a scan the first increases and the second decreases.
5186
5187 cost_of_full_rows_retrieval=
5188 (union of indexes used covers all needed fields) ?
5189 cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
5190 0
5191
5192 E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
5193 ror_scan_selectivity({scan1}, scan2) * ... *
5194 ror_scan_selectivity({scan1,...}, scanN).
5195 RETURN
5196 TRUE ROR scan added to ROR-intersection, cost updated.
5197 FALSE It doesn't make sense to add this ROR scan to this ROR-intersection.
5198 */
5199
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,bool is_cpk_scan,Opt_trace_object * trace_costs)5200 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
5201 ROR_SCAN_INFO* ror_scan, bool is_cpk_scan,
5202 Opt_trace_object *trace_costs)
5203 {
5204 double selectivity_mult= 1.0;
5205
5206 DBUG_ENTER("ror_intersect_add");
5207 DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
5208 DBUG_PRINT("info", ("Adding scan on %s",
5209 info->param->table->key_info[ror_scan->keynr].name));
5210 DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
5211
5212 selectivity_mult = ror_scan_selectivity(info, ror_scan);
5213 if (selectivity_mult == 1.0)
5214 {
5215 /* Don't add this scan if it doesn't improve selectivity. */
5216 DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
5217 DBUG_RETURN(FALSE);
5218 }
5219
5220 info->out_rows *= selectivity_mult;
5221
5222 if (is_cpk_scan)
5223 {
5224 /*
5225 CPK scan is used to filter out rows. We apply filtering for
5226 each record of every scan. Assuming ROWID_COMPARE_COST
5227 per check this gives us:
5228 */
5229 const double idx_cost=
5230 rows2double(info->index_records) * ROWID_COMPARE_COST;
5231 info->index_scan_costs+= idx_cost;
5232 trace_costs->add("index_scan_cost", idx_cost);
5233 }
5234 else
5235 {
5236 info->index_records += info->param->table->quick_rows[ror_scan->keynr];
5237 info->index_scan_costs += ror_scan->index_read_cost;
5238 trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
5239 bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
5240 if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
5241 &info->covered_fields))
5242 {
5243 DBUG_PRINT("info", ("ROR-intersect is covering now"));
5244 info->is_covering= TRUE;
5245 }
5246 }
5247
5248 info->total_cost= info->index_scan_costs;
5249 trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs);
5250
5251 if (!info->is_covering)
5252 {
5253 Cost_estimate sweep_cost;
5254 JOIN *join= info->param->thd->lex->select_lex.join;
5255 const bool is_interrupted= join && join->tables != 1;
5256 get_sweep_read_cost(info->param->table, double2rows(info->out_rows),
5257 is_interrupted, &sweep_cost);
5258 info->total_cost += sweep_cost.total_cost();
5259 trace_costs->add("disk_sweep_cost", sweep_cost.total_cost());
5260 }
5261 else
5262 trace_costs->add("disk_sweep_cost", 0);
5263
5264 DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
5265 DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
5266 info->is_covering?"" : "non-"));
5267 DBUG_RETURN(TRUE);
5268 }
5269
5270
5271 /*
5272 Get best ROR-intersection plan using non-covering ROR-intersection search
5273 algorithm. The returned plan may be covering.
5274
5275 SYNOPSIS
5276 get_best_ror_intersect()
5277 param Parameter from test_quick_select function.
5278 tree Transformed restriction condition to be used to look
5279 for ROR scans.
5280 read_time Do not return read plans with cost > read_time.
5281 are_all_covering [out] set to TRUE if union of all scans covers all
5282 fields needed by the query (and it is possible to build
5283 a covering ROR-intersection)
5284
5285 NOTES
5286 get_key_scans_params must be called before this function can be called.
5287
5288 When this function is called by ROR-union construction algorithm it
5289 assumes it is building an uncovered ROR-intersection (and thus # of full
5290 records to be retrieved is wrong here). This is a hack.
5291
5292 IMPLEMENTATION
5293 The approximate best non-covering plan search algorithm is as follows:
5294
5295 find_min_ror_intersection_scan()
5296 {
5297 R= select all ROR scans;
5298 order R by (E(#records_matched) * key_record_length).
5299
5300 S= first(R); -- set of scans that will be used for ROR-intersection
5301 R= R-first(S);
5302 min_cost= cost(S);
5303 min_scan= make_scan(S);
5304 while (R is not empty)
5305 {
5306 firstR= R - first(R);
5307 if (!selectivity(S + firstR < selectivity(S)))
5308 continue;
5309
5310 S= S + first(R);
5311 if (cost(S) < min_cost)
5312 {
5313 min_cost= cost(S);
5314 min_scan= make_scan(S);
5315 }
5316 }
5317 return min_scan;
5318 }
5319
5320 See ror_intersect_add function for ROR intersection costs.
5321
5322 Special handling for Clustered PK scans
5323 Clustered PK contains all table fields, so using it as a regular scan in
5324 index intersection doesn't make sense: a range scan on CPK will be less
5325 expensive in this case.
5326 Clustered PK scan has special handling in ROR-intersection: it is not used
5327 to retrieve rows, instead its condition is used to filter row references
5328 we get from scans on other keys.
5329
5330 RETURN
5331 ROR-intersection table read plan
5332 NULL if out of memory or no suitable plan found.
5333 */
5334
5335 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,double read_time)5336 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
5337 double read_time)
5338 {
5339 uint idx;
5340 double min_cost= DBL_MAX;
5341 Opt_trace_context * const trace= ¶m->thd->opt_trace;
5342 DBUG_ENTER("get_best_ror_intersect");
5343
5344 Opt_trace_object trace_ror(trace, "analyzing_roworder_intersect");
5345
5346 if ((tree->n_ror_scans < 2) || !param->table->file->stats.records ||
5347 !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
5348 {
5349 trace_ror.add("usable", false);
5350 if (tree->n_ror_scans < 2)
5351 trace_ror.add_alnum("cause", "too_few_roworder_scans");
5352 else
5353 trace_ror.add("need_tracing", true);
5354 DBUG_RETURN(NULL);
5355 }
5356
5357 if (param->order_direction == ORDER::ORDER_DESC)
5358 DBUG_RETURN(NULL);
5359
5360 /*
5361 Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
5362 them. Also find and save clustered PK scan if there is one.
5363 */
5364 ROR_SCAN_INFO **cur_ror_scan;
5365 ROR_SCAN_INFO *cpk_scan= NULL;
5366 uint cpk_no;
5367 bool cpk_scan_used= FALSE;
5368
5369 if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5370 sizeof(ROR_SCAN_INFO*)*
5371 param->keys)))
5372 return NULL;
5373 cpk_no= ((param->table->file->primary_key_is_clustered()) ?
5374 param->table->s->primary_key : MAX_KEY);
5375
5376 for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
5377 {
5378 ROR_SCAN_INFO *scan;
5379 if (!tree->ror_scans_map.is_set(idx))
5380 continue;
5381 if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
5382 return NULL;
5383 if (param->real_keynr[idx] == cpk_no)
5384 {
5385 cpk_scan= scan;
5386 tree->n_ror_scans--;
5387 }
5388 else
5389 *(cur_ror_scan++)= scan;
5390 }
5391
5392 tree->ror_scans_end= cur_ror_scan;
5393 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
5394 tree->ror_scans,
5395 tree->ror_scans_end););
5396 /*
5397 Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
5398 ROR_SCAN_INFO's.
5399 Step 2: Get best ROR-intersection using an approximate algorithm.
5400 */
5401 find_intersect_order(tree->ror_scans, tree->ror_scans_end, param);
5402
5403 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
5404 tree->ror_scans,
5405 tree->ror_scans_end););
5406
5407 ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
5408 ROR_SCAN_INFO **intersect_scans_end;
5409 if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5410 sizeof(ROR_SCAN_INFO*)*
5411 tree->n_ror_scans)))
5412 return NULL;
5413 intersect_scans_end= intersect_scans;
5414
5415 /* Create and incrementally update ROR intersection. */
5416 ROR_INTERSECT_INFO *intersect, *intersect_best;
5417 if (!(intersect= ror_intersect_init(param)) ||
5418 !(intersect_best= ror_intersect_init(param)))
5419 return NULL;
5420
5421 /* [intersect_scans,intersect_scans_best) will hold the best intersection */
5422 ROR_SCAN_INFO **intersect_scans_best;
5423 cur_ror_scan= tree->ror_scans;
5424 intersect_scans_best= intersect_scans;
5425 /*
5426 Note: trace_isect_idx.end() is called to close this object after
5427 this while-loop.
5428 */
5429 Opt_trace_array trace_isect_idx(trace, "intersecting_indices");
5430 while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
5431 {
5432 Opt_trace_object trace_idx(trace);
5433 trace_idx.add_utf8("index",
5434 param->table->key_info[(*cur_ror_scan)->keynr].name);
5435 /* S= S + first(R); R= R - first(R); */
5436 if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE, &trace_idx))
5437 {
5438 trace_idx.add("cumulated_total_cost", intersect->total_cost).
5439 add("usable", false).
5440 add_alnum("cause", "does_not_reduce_cost_of_intersect");
5441 cur_ror_scan++;
5442 continue;
5443 }
5444
5445 trace_idx.add("cumulated_total_cost", intersect->total_cost).
5446 add("usable", true).
5447 add("matching_rows_now", intersect->out_rows).
5448 add("isect_covering_with_this_index", intersect->is_covering);
5449
5450 *(intersect_scans_end++)= *(cur_ror_scan++);
5451
5452 if (intersect->total_cost < min_cost)
5453 {
5454 /* Local minimum found, save it */
5455 ror_intersect_cpy(intersect_best, intersect);
5456 intersect_scans_best= intersect_scans_end;
5457 min_cost = intersect->total_cost;
5458 trace_idx.add("chosen", true);
5459 }
5460 else
5461 {
5462 trace_idx.add("chosen", false).
5463 add_alnum("cause", "does_not_reduce_cost");
5464 }
5465 }
5466 // Note: trace_isect_idx trace object is closed here
5467 trace_isect_idx.end();
5468
5469 if (intersect_scans_best == intersect_scans)
5470 {
5471 trace_ror.add("chosen", false).
5472 add_alnum("cause", "does_not_increase_selectivity");
5473 DBUG_PRINT("info", ("None of scans increase selectivity"));
5474 DBUG_RETURN(NULL);
5475 }
5476
5477 DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
5478 "best ROR-intersection",
5479 intersect_scans,
5480 intersect_scans_best););
5481
5482 uint best_num= intersect_scans_best - intersect_scans;
5483 ror_intersect_cpy(intersect, intersect_best);
5484
5485 /*
5486 Ok, found the best ROR-intersection of non-CPK key scans.
5487 Check if we should add a CPK scan. If the obtained ROR-intersection is
5488 covering, it doesn't make sense to add CPK scan.
5489 */
5490 { // Scope for trace object
5491 Opt_trace_object trace_cpk(trace, "clustered_pk");
5492 if (cpk_scan && !intersect->is_covering)
5493 {
5494 if (ror_intersect_add(intersect, cpk_scan, TRUE, &trace_cpk) &&
5495 (intersect->total_cost < min_cost))
5496 {
5497 trace_cpk.add("clustered_pk_scan_added_to_intersect", true).
5498 add("cumulated_cost", intersect->total_cost);
5499 cpk_scan_used= TRUE;
5500 intersect_best= intersect; //just set pointer here
5501 }
5502 else
5503 trace_cpk.add("clustered_pk_added_to_intersect", false).
5504 add_alnum("cause", "cost");
5505 }
5506 else
5507 {
5508 trace_cpk.add("clustered_pk_added_to_intersect", false).
5509 add_alnum("cause", cpk_scan ?
5510 "roworder_is_covering" : "no_clustered_pk_index");
5511 }
5512 }
5513 /* Ok, return ROR-intersect plan if we have found one */
5514 TRP_ROR_INTERSECT *trp= NULL;
5515 if (min_cost < read_time && (cpk_scan_used || best_num > 1))
5516 {
5517 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
5518 DBUG_RETURN(trp);
5519 if (!(trp->first_scan=
5520 (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5521 sizeof(ROR_SCAN_INFO*)*best_num)))
5522 DBUG_RETURN(NULL);
5523 memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
5524 trp->last_scan= trp->first_scan + best_num;
5525 trp->is_covering= intersect_best->is_covering;
5526 trp->read_cost= intersect_best->total_cost;
5527 /* Prevent divisons by zero */
5528 ha_rows best_rows = double2rows(intersect_best->out_rows);
5529 if (!best_rows)
5530 best_rows= 1;
5531 set_if_smaller(param->table->quick_condition_rows, best_rows);
5532 trp->records= best_rows;
5533 trp->index_scan_costs= intersect_best->index_scan_costs;
5534 trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
5535
5536 trace_ror.add("rows", trp->records).
5537 add("cost", trp->read_cost).
5538 add("covering", trp->is_covering).
5539 add("chosen", true);
5540
5541 DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
5542 "cost %g, records %lu",
5543 trp->read_cost, (ulong) trp->records));
5544 }
5545 else
5546 {
5547 trace_ror.add("chosen", false).
5548 add_alnum("cause", (min_cost >= read_time) ? "cost" :
5549 "too_few_indexes_to_merge");
5550
5551 }
5552 DBUG_RETURN(trp);
5553 }
5554
5555 /*
5556 Get best "range" table read plan for given SEL_TREE, also update some info
5557
5558 SYNOPSIS
5559 get_key_scans_params()
5560 param Parameters from test_quick_select
5561 tree Make range select for this SEL_TREE
5562 index_read_must_be_used TRUE <=> assume 'index only' option will be set
5563 (except for clustered PK indexes)
5564 update_tbl_stats TRUE <=> update table->quick_* with information
5565 about range scans we've evaluated.
5566 read_time Maximum cost. i.e. don't create read plans with
5567 cost > read_time.
5568
5569 DESCRIPTION
5570 Find the best "range" table read plan for given SEL_TREE.
5571 The side effects are
5572 - tree->ror_scans is updated to indicate which scans are ROR scans.
5573 - if update_tbl_stats=TRUE then table->quick_* is updated with info
5574 about every possible range scan.
5575
5576 RETURN
5577 Best range read plan
5578 NULL if no plan found or error occurred
5579 */
5580
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool update_tbl_stats,double read_time)5581 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
5582 bool index_read_must_be_used,
5583 bool update_tbl_stats,
5584 double read_time)
5585 {
5586 uint idx;
5587 SEL_ARG **key,**end, **key_to_read= NULL;
5588 ha_rows UNINIT_VAR(best_records); /* protected by key_to_read */
5589 uint best_mrr_flags, best_buf_size;
5590 TRP_RANGE* read_plan= NULL;
5591 DBUG_ENTER("get_key_scans_params");
5592 LINT_INIT(best_mrr_flags); /* protected by key_to_read */
5593 LINT_INIT(best_buf_size); /* protected by key_to_read */
5594 Opt_trace_context * const trace= ¶m->thd->opt_trace;
5595 /*
5596 Note that there may be trees that have type SEL_TREE::KEY but contain no
5597 key reads at all, e.g. tree for expression "key1 is not null" where key1
5598 is defined as "not null".
5599 */
5600 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
5601 "tree scans"););
5602 Opt_trace_array ota(trace, "range_scan_alternatives");
5603
5604 tree->ror_scans_map.clear_all();
5605 tree->n_ror_scans= 0;
5606 for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
5607 {
5608 if (*key)
5609 {
5610 ha_rows found_records;
5611 Cost_estimate cost;
5612 double found_read_time;
5613 uint mrr_flags, buf_size;
5614 uint keynr= param->real_keynr[idx];
5615 if ((*key)->type == SEL_ARG::MAYBE_KEY ||
5616 (*key)->maybe_flag)
5617 param->needed_reg->set_bit(keynr);
5618
5619 bool read_index_only= index_read_must_be_used ? TRUE :
5620 (bool) param->table->covering_keys.is_set(keynr);
5621
5622 Opt_trace_object trace_idx(trace);
5623 trace_idx.add_utf8("index", param->table->key_info[keynr].name);
5624
5625 found_records= check_quick_select(param, idx, read_index_only, *key,
5626 update_tbl_stats, &mrr_flags,
5627 &buf_size, &cost);
5628
5629 #ifdef OPTIMIZER_TRACE
5630 // check_quick_select() says don't use range if it returns HA_POS_ERROR
5631 if (found_records != HA_POS_ERROR &&
5632 param->thd->opt_trace.is_started())
5633 {
5634 Opt_trace_array trace_range(¶m->thd->opt_trace, "ranges");
5635
5636 const KEY &cur_key= param->table->key_info[keynr];
5637 const KEY_PART_INFO *key_part= cur_key.key_part;
5638
5639 String range_info;
5640 range_info.set_charset(system_charset_info);
5641 append_range_all_keyparts(&trace_range, NULL,
5642 &range_info, *key, key_part);
5643 trace_range.end(); // NOTE: ends the tracing scope
5644
5645 trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics).
5646 add("rowid_ordered", param->is_ror_scan).
5647 add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)).
5648 add("index_only", read_index_only).
5649 add("rows", found_records).
5650 add("cost", cost.total_cost());
5651 if (param->thd->optimizer_switch_flag(
5652 OPTIMIZER_SWITCH_FAVOR_RANGE_SCAN))
5653 trace_idx.add("revised_cost", cost.total_cost() * 0.1);
5654 }
5655 #endif
5656 if (param->thd->optimizer_switch_flag(
5657 OPTIMIZER_SWITCH_FAVOR_RANGE_SCAN))
5658 cost.multiply(0.1);
5659
5660 if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
5661 {
5662 tree->n_ror_scans++;
5663 tree->ror_scans_map.set_bit(idx);
5664 }
5665
5666
5667 if (found_records != HA_POS_ERROR &&
5668 read_time > (found_read_time= cost.total_cost()))
5669 {
5670 trace_idx.add("chosen", true);
5671 read_time= found_read_time;
5672 best_records= found_records;
5673 key_to_read= key;
5674 best_mrr_flags= mrr_flags;
5675 best_buf_size= buf_size;
5676 }
5677 else
5678 trace_idx.add("chosen", false).
5679 add_alnum("cause",
5680 (found_records == HA_POS_ERROR) ? "unknown" : "cost");
5681
5682 }
5683 }
5684
5685 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
5686 "ROR scans"););
5687 if (key_to_read)
5688 {
5689 idx= key_to_read - tree->keys;
5690 if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
5691 best_mrr_flags)))
5692 {
5693 read_plan->records= best_records;
5694 read_plan->is_ror= tree->ror_scans_map.is_set(idx);
5695 read_plan->read_cost= read_time;
5696 read_plan->mrr_buf_size= best_buf_size;
5697 DBUG_PRINT("info",
5698 ("Returning range plan for key %s, cost %g, records %lu",
5699 param->table->key_info[param->real_keynr[idx]].name,
5700 read_plan->read_cost, (ulong) read_plan->records));
5701 }
5702 }
5703 else
5704 DBUG_PRINT("info", ("No 'range' table read plan found"));
5705
5706 DBUG_RETURN(read_plan);
5707 }
5708
5709
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5710 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
5711 bool retrieve_full_rows,
5712 MEM_ROOT *parent_alloc)
5713 {
5714 QUICK_INDEX_MERGE_SELECT *quick_imerge;
5715 QUICK_RANGE_SELECT *quick;
5716 /* index_merge always retrieves full rows, ignore retrieve_full_rows */
5717 if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
5718 return NULL;
5719
5720 quick_imerge->records= records;
5721 quick_imerge->read_time= read_cost;
5722 for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
5723 range_scan++)
5724 {
5725 if (!(quick= (QUICK_RANGE_SELECT*)
5726 ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
5727 quick_imerge->push_quick_back(quick))
5728 {
5729 delete quick;
5730 delete quick_imerge;
5731 return NULL;
5732 }
5733 }
5734 return quick_imerge;
5735 }
5736
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5737 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
5738 bool retrieve_full_rows,
5739 MEM_ROOT *parent_alloc)
5740 {
5741 QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
5742 QUICK_RANGE_SELECT *quick;
5743 DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
5744 MEM_ROOT *alloc;
5745
5746 if ((quick_intrsect=
5747 new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
5748 (retrieve_full_rows? (!is_covering) :
5749 FALSE),
5750 parent_alloc)))
5751 {
5752 DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
5753 "creating ROR-intersect",
5754 first_scan, last_scan););
5755 alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
5756 for (st_ror_scan_info **current= first_scan;
5757 current != last_scan;
5758 current++)
5759 {
5760 if (!(quick= get_quick_select(param, (*current)->idx,
5761 (*current)->sel_arg,
5762 HA_MRR_SORTED,
5763 0, alloc)) ||
5764 quick_intrsect->push_quick_back(quick))
5765 {
5766 delete quick_intrsect;
5767 DBUG_RETURN(NULL);
5768 }
5769 }
5770 if (cpk_scan)
5771 {
5772 if (!(quick= get_quick_select(param, cpk_scan->idx,
5773 cpk_scan->sel_arg,
5774 HA_MRR_SORTED,
5775 0, alloc)))
5776 {
5777 delete quick_intrsect;
5778 DBUG_RETURN(NULL);
5779 }
5780 quick->file= NULL;
5781 quick_intrsect->cpk_quick= quick;
5782 }
5783 quick_intrsect->records= records;
5784 quick_intrsect->read_time= read_cost;
5785 }
5786 DBUG_RETURN(quick_intrsect);
5787 }
5788
5789
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5790 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
5791 bool retrieve_full_rows,
5792 MEM_ROOT *parent_alloc)
5793 {
5794 QUICK_ROR_UNION_SELECT *quick_roru;
5795 TABLE_READ_PLAN **scan;
5796 QUICK_SELECT_I *quick;
5797 DBUG_ENTER("TRP_ROR_UNION::make_quick");
5798 /*
5799 It is impossible to construct a ROR-union that will not retrieve full
5800 rows, ignore retrieve_full_rows parameter.
5801 */
5802 if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
5803 {
5804 for (scan= first_ror; scan != last_ror; scan++)
5805 {
5806 if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
5807 quick_roru->push_quick_back(quick))
5808 DBUG_RETURN(NULL);
5809 }
5810 quick_roru->records= records;
5811 quick_roru->read_time= read_cost;
5812 }
5813 DBUG_RETURN(quick_roru);
5814 }
5815
5816
5817 /**
5818 If EXPLAIN EXTENDED, add a warning that the index cannot be
5819 used for range access due to either type conversion or different
5820 collations on the field used for comparison
5821
5822 @param param PARAM from SQL_SELECT::test_quick_select
5823 @param key_num Key number
5824 @param field Field in the predicate
5825 */
5826 static void
if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM * param,const uint key_num,const Field * field)5827 if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM *param,
5828 const uint key_num,
5829 const Field *field)
5830 {
5831 if (param->using_real_indexes &&
5832 param->thd->lex->describe & DESCRIBE_EXTENDED)
5833 push_warning_printf(
5834 param->thd,
5835 Sql_condition::WARN_LEVEL_WARN,
5836 ER_WARN_INDEX_NOT_APPLICABLE,
5837 ER(ER_WARN_INDEX_NOT_APPLICABLE),
5838 "range",
5839 field->table->key_info[param->real_keynr[key_num]].name,
5840 field->field_name);
5841 }
5842
5843
5844 /*
5845 Build a SEL_TREE for <> or NOT BETWEEN predicate
5846
5847 SYNOPSIS
5848 get_ne_mm_tree()
5849 param PARAM from SQL_SELECT::test_quick_select
5850 cond_func item for the predicate
5851 field field in the predicate
5852 lt_value constant that field should be smaller
5853 gt_value constant that field should be greaterr
5854 cmp_type compare type for the field
5855
5856 RETURN
5857 # Pointer to tree built tree
5858 0 on error
5859 */
get_ne_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * lt_value,Item * gt_value,Item_result cmp_type)5860 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5861 Field *field,
5862 Item *lt_value, Item *gt_value,
5863 Item_result cmp_type)
5864 {
5865 SEL_TREE *tree;
5866 tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
5867 lt_value, cmp_type);
5868 if (tree)
5869 {
5870 tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
5871 Item_func::GT_FUNC,
5872 gt_value, cmp_type));
5873 }
5874 return tree;
5875 }
5876
5877
5878 /*
5879 Build a SEL_TREE for a simple predicate
5880
5881 SYNOPSIS
5882 get_func_mm_tree()
5883 param PARAM from SQL_SELECT::test_quick_select
5884 cond_func item for the predicate
5885 field field in the predicate
5886 value constant in the predicate
5887 cmp_type compare type for the field
5888 inv TRUE <> NOT cond_func is considered
5889 (makes sense only when cond_func is BETWEEN or IN)
5890
5891 RETURN
5892 Pointer to the tree built tree
5893 */
5894
get_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * value,Item_result cmp_type,bool inv)5895 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5896 Field *field, Item *value,
5897 Item_result cmp_type, bool inv)
5898 {
5899 SEL_TREE *tree= 0;
5900 DBUG_ENTER("get_func_mm_tree");
5901
5902 switch (cond_func->functype()) {
5903
5904 case Item_func::XOR_FUNC:
5905 DBUG_RETURN(NULL); // Always true (don't use range access on XOR).
5906 break; // See WL#5800
5907
5908 case Item_func::NE_FUNC:
5909 tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
5910 break;
5911
5912 case Item_func::BETWEEN:
5913 {
5914 if (!value)
5915 {
5916 if (inv)
5917 {
5918 tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1],
5919 cond_func->arguments()[2], cmp_type);
5920 }
5921 else
5922 {
5923 tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
5924 cond_func->arguments()[1],cmp_type);
5925 if (tree)
5926 {
5927 tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
5928 Item_func::LE_FUNC,
5929 cond_func->arguments()[2],
5930 cmp_type));
5931 }
5932 }
5933 }
5934 else
5935 tree= get_mm_parts(param, cond_func, field,
5936 (inv ?
5937 (value == (Item*)1 ? Item_func::GT_FUNC :
5938 Item_func::LT_FUNC):
5939 (value == (Item*)1 ? Item_func::LE_FUNC :
5940 Item_func::GE_FUNC)),
5941 cond_func->arguments()[0], cmp_type);
5942 break;
5943 }
5944 case Item_func::IN_FUNC:
5945 {
5946 Item_func_in *func=(Item_func_in*) cond_func;
5947
5948 /*
5949 Array for IN() is constructed when all values have the same result
5950 type. Tree won't be built for values with different result types,
5951 so we check it here to avoid unnecessary work.
5952 */
5953 if (!func->arg_types_compatible)
5954 break;
5955
5956 if (inv)
5957 {
5958 if (func->array && func->array->result_type() != ROW_RESULT)
5959 {
5960 /*
5961 We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
5962 where c{i} are constants. Our goal is to produce a SEL_TREE that
5963 represents intervals:
5964
5965 ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
5966
5967 where $MIN is either "-inf" or NULL.
5968
5969 The most straightforward way to produce it is to convert NOT IN
5970 into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
5971 analyzer to build SEL_TREE from that. The problem is that the
5972 range analyzer will use O(N^2) memory (which is probably a bug),
5973 and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
5974 will run out of memory.
5975
5976 Another problem with big lists like (*) is that a big list is
5977 unlikely to produce a good "range" access, while considering that
5978 range access will require expensive CPU calculations (and for
5979 MyISAM even index accesses). In short, big NOT IN lists are rarely
5980 worth analyzing.
5981
5982 Considering the above, we'll handle NOT IN as follows:
5983 * if the number of entries in the NOT IN list is less than
5984 NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
5985 * Otherwise, don't produce a SEL_TREE.
5986 */
5987 #define NOT_IN_IGNORE_THRESHOLD 1000
5988 MEM_ROOT *tmp_root= param->mem_root;
5989 param->thd->mem_root= param->old_root;
5990 /*
5991 Create one Item_type constant object. We'll need it as
5992 get_mm_parts only accepts constant values wrapped in Item_Type
5993 objects.
5994 We create the Item on param->mem_root which points to
5995 per-statement mem_root (while thd->mem_root is currently pointing
5996 to mem_root local to range optimizer).
5997 */
5998 Item *value_item= func->array->create_item();
5999 param->thd->mem_root= tmp_root;
6000
6001 if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
6002 break;
6003
6004 /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
6005 uint i=0;
6006 do
6007 {
6008 func->array->value_to_item(i, value_item);
6009 tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6010 value_item, cmp_type);
6011 if (!tree)
6012 break;
6013 i++;
6014 } while (i < func->array->count && tree->type == SEL_TREE::IMPOSSIBLE);
6015
6016 if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
6017 {
6018 /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
6019 tree= NULL;
6020 break;
6021 }
6022 SEL_TREE *tree2;
6023 for (; i < func->array->count; i++)
6024 {
6025 if (func->array->compare_elems(i, i-1))
6026 {
6027 /* Get a SEL_TREE for "-inf < X < c_i" interval */
6028 func->array->value_to_item(i, value_item);
6029 tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6030 value_item, cmp_type);
6031 if (!tree2)
6032 {
6033 tree= NULL;
6034 break;
6035 }
6036
6037 /* Change all intervals to be "c_{i-1} < X < c_i" */
6038 for (uint idx= 0; idx < param->keys; idx++)
6039 {
6040 SEL_ARG *new_interval, *last_val;
6041 if (((new_interval= tree2->keys[idx])) &&
6042 (tree->keys[idx]) &&
6043 ((last_val= tree->keys[idx]->last())))
6044 {
6045 new_interval->min_value= last_val->max_value;
6046 new_interval->min_flag= NEAR_MIN;
6047
6048 /*
6049 If the interval is over a partial keypart, the
6050 interval must be "c_{i-1} <= X < c_i" instead of
6051 "c_{i-1} < X < c_i". Reason:
6052
6053 Consider a table with a column "my_col VARCHAR(3)",
6054 and an index with definition
6055 "INDEX my_idx my_col(1)". If the table contains rows
6056 with my_col values "f" and "foo", the index will not
6057 distinguish the two rows.
6058
6059 Note that tree_or() below will effectively merge
6060 this range with the range created for c_{i-1} and
6061 we'll eventually end up with only one range:
6062 "NULL < X".
6063
6064 Partitioning indexes are never partial.
6065 */
6066 if (param->using_real_indexes)
6067 {
6068 const KEY key=
6069 param->table->key_info[param->real_keynr[idx]];
6070 const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
6071
6072 if (kpi->key_part_flag & HA_PART_KEY_SEG)
6073 new_interval->min_flag= 0;
6074 }
6075 }
6076 }
6077 /*
6078 The following doesn't try to allocate memory so no need to
6079 check for NULL.
6080 */
6081 tree= tree_or(param, tree, tree2);
6082 }
6083 }
6084
6085 if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
6086 {
6087 /*
6088 Get the SEL_TREE for the last "c_last < X < +inf" interval
6089 (value_item cotains c_last already)
6090 */
6091 tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC,
6092 value_item, cmp_type);
6093 tree= tree_or(param, tree, tree2);
6094 }
6095 }
6096 else
6097 {
6098 tree= get_ne_mm_tree(param, cond_func, field,
6099 func->arguments()[1], func->arguments()[1],
6100 cmp_type);
6101 if (tree)
6102 {
6103 Item **arg, **end;
6104 for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6105 arg < end ; arg++)
6106 {
6107 tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
6108 *arg, *arg, cmp_type));
6109 }
6110 }
6111 }
6112 }
6113 else
6114 {
6115 tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC,
6116 func->arguments()[1], cmp_type);
6117 if (tree)
6118 {
6119 Item **arg, **end;
6120 for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6121 arg < end ; arg++)
6122 {
6123 tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
6124 Item_func::EQ_FUNC,
6125 *arg, cmp_type));
6126 }
6127 }
6128 }
6129 break;
6130 }
6131 default:
6132 {
6133 /*
6134 Here the function for the following predicates are processed:
6135 <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL and GIS functions.
6136 If the predicate is of the form (value op field) it is handled
6137 as the equivalent predicate (field rev_op value), e.g.
6138 2 <= a is handled as a >= 2.
6139 */
6140 Item_func::Functype func_type=
6141 (value != cond_func->arguments()[0]) ? cond_func->functype() :
6142 ((Item_bool_func2*) cond_func)->rev_functype();
6143 tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
6144 }
6145 }
6146
6147 DBUG_RETURN(tree);
6148 }
6149
6150
6151 /*
6152 Build conjunction of all SEL_TREEs for a simple predicate applying equalities
6153
6154 SYNOPSIS
6155 get_full_func_mm_tree()
6156 param PARAM from SQL_SELECT::test_quick_select
6157 cond_func item for the predicate
6158 field_item field in the predicate
6159 value constant in the predicate (or a field already read from
6160 a table in the case of dynamic range access)
6161 (for BETWEEN it contains the number of the field argument,
6162 for IN it's always 0)
6163 inv TRUE <> NOT cond_func is considered
6164 (makes sense only when cond_func is BETWEEN or IN)
6165
6166 DESCRIPTION
6167 For a simple SARGable predicate of the form (f op c), where f is a field and
6168 c is a constant, the function builds a conjunction of all SEL_TREES that can
6169 be obtained by the substitution of f for all different fields equal to f.
6170
6171 NOTES
6172 If the WHERE condition contains a predicate (fi op c),
6173 then not only SELL_TREE for this predicate is built, but
6174 the trees for the results of substitution of fi for
6175 each fj belonging to the same multiple equality as fi
6176 are built as well.
6177 E.g. for WHERE t1.a=t2.a AND t2.a > 10
6178 a SEL_TREE for t2.a > 10 will be built for quick select from t2
6179 and
6180 a SEL_TREE for t1.a > 10 will be built for quick select from t1.
6181
6182 A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
6183 in a similar way: we build a conjuction of trees for the results
6184 of all substitutions of fi for equal fj.
6185 Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
6186 differently. It is considered as a conjuction of two SARGable
6187 predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
6188 is called for each of them separately producing trees for
6189 AND j (f1j <=c ) and AND j (f2j <= c)
6190 After this these two trees are united in one conjunctive tree.
6191 It's easy to see that the same tree is obtained for
6192 AND j,k (f1j <=c AND f2k<=c)
6193 which is equivalent to
6194 AND j,k (c BETWEEN f1j AND f2k).
6195 The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
6196 which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
6197 function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
6198 producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
6199 trees are united in one OR-tree. The expression
6200 (AND j (f1j > c) OR AND j (f2j < c)
6201 is equivalent to the expression
6202 AND j,k (f1j > c OR f2k < c)
6203 which is just a translation of
6204 AND j,k (c NOT BETWEEN f1j AND f2k)
6205
6206 In the cases when one of the items f1, f2 is a constant c1 we do not create
6207 a tree for it at all. It works for BETWEEN predicates but does not
6208 work for NOT BETWEEN predicates as we have to evaluate the expression
6209 with it. If it is TRUE then the other tree can be completely ignored.
6210 We do not do it now and no trees are built in these cases for
6211 NOT BETWEEN predicates.
6212
6213 As to IN predicates only ones of the form (f IN (c1,...,cn)),
6214 where f1 is a field and c1,...,cn are constant, are considered as
6215 SARGable. We never try to narrow the index scan using predicates of
6216 the form (c IN (c1,...,f,...,cn)).
6217
6218 RETURN
6219 Pointer to the tree representing the built conjunction of SEL_TREEs
6220 */
6221
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Item_field * field_item,Item * value,bool inv)6222 static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
6223 Item_func *cond_func,
6224 Item_field *field_item, Item *value,
6225 bool inv)
6226 {
6227 SEL_TREE *tree= 0;
6228 SEL_TREE *ftree= 0;
6229 table_map ref_tables= 0;
6230 table_map param_comp= ~(param->prev_tables | param->read_tables |
6231 param->current_table);
6232 DBUG_ENTER("get_full_func_mm_tree");
6233
6234 for (uint i= 0; i < cond_func->arg_count; i++)
6235 {
6236 Item *arg= cond_func->arguments()[i]->real_item();
6237 if (arg != field_item)
6238 ref_tables|= arg->used_tables();
6239 }
6240 Field *field= field_item->field;
6241 Item_result cmp_type= field->cmp_type();
6242 if (!((ref_tables | field->table->map) & param_comp))
6243 ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv);
6244 Item_equal *item_equal= field_item->item_equal;
6245 if (item_equal)
6246 {
6247 Item_equal_iterator it(*item_equal);
6248 Item_field *item;
6249 while ((item= it++))
6250 {
6251 Field *f= item->field;
6252 if (field->eq(f))
6253 continue;
6254 if (!((ref_tables | f->table->map) & param_comp))
6255 {
6256 tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv);
6257 ftree= !ftree ? tree : tree_and(param, ftree, tree);
6258 }
6259 }
6260 }
6261 DBUG_RETURN(ftree);
6262 }
6263
6264 /**
6265 The Range Analysis Module, which finds range access alternatives
6266 applicable to single or multi-index (UNION) access. The function
6267 does not calculate or care about the cost of the different
6268 alternatives.
6269
6270 get_mm_tree() employs a relaxed boolean algebra where the solution
6271 may be bigger than what the rules of boolean algebra accept. In
6272 other words, get_mm_tree() may return range access plans that will
6273 read more rows than the input conditions dictate. In it's simplest
6274 form, consider a condition on two fields indexed by two different
6275 indexes:
6276
6277 "WHERE fld1 > 'x' AND fld2 > 'y'"
6278
6279 In this case, there are two single-index range access alternatives.
6280 No matter which access path is chosen, rows that are not in the
6281 result set may be read.
6282
6283 In the case above, get_mm_tree() will create range access
6284 alternatives for both indexes, so boolean algebra is still correct.
6285 In other cases, however, the conditions are too complex to be used
6286 without relaxing the rules. This typically happens when ORing a
6287 conjunction to a multi-index disjunctions (@see e.g.
6288 imerge_list_or_tree()). When this happens, the range optimizer may
6289 choose to ignore conjunctions (any condition connected with AND). The
6290 effect of this is that the result includes a "bigger" solution than
6291 neccessary. This is OK since all conditions will be used as filters
6292 after row retrieval.
6293
6294 @see SEL_TREE::keys and SEL_TREE::merges for details of how single
6295 and multi-index range access alternatives are stored.
6296 */
get_mm_tree(RANGE_OPT_PARAM * param,Item * cond)6297 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond)
6298 {
6299 SEL_TREE *tree=0;
6300 SEL_TREE *ftree= 0;
6301 Item_field *field_item= 0;
6302 bool inv= FALSE;
6303 Item *value= 0;
6304 DBUG_ENTER("get_mm_tree");
6305
6306 if (cond->type() == Item::COND_ITEM)
6307 {
6308 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
6309
6310 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6311 {
6312 tree= NULL;
6313 Item *item;
6314 while ((item=li++))
6315 {
6316 SEL_TREE *new_tree= get_mm_tree(param,item);
6317 if (param->statement_should_be_aborted())
6318 DBUG_RETURN(NULL);
6319 tree= tree_and(param,tree,new_tree);
6320 dbug_print_tree("after_and", tree, param);
6321 if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
6322 break;
6323 }
6324 }
6325 else
6326 { // Item OR
6327 tree= get_mm_tree(param,li++);
6328 if (param->statement_should_be_aborted())
6329 DBUG_RETURN(NULL);
6330 if (tree)
6331 {
6332 Item *item;
6333 while ((item=li++))
6334 {
6335 SEL_TREE *new_tree=get_mm_tree(param,item);
6336 if (new_tree == NULL || param->statement_should_be_aborted())
6337 DBUG_RETURN(NULL);
6338 tree= tree_or(param,tree,new_tree);
6339 dbug_print_tree("after_or", tree, param);
6340 if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
6341 break;
6342 }
6343 }
6344 }
6345 dbug_print_tree("tree_returned", tree, param);
6346 DBUG_RETURN(tree);
6347 }
6348 /*
6349 Here when simple cond
6350 There are limits on what kinds of const items we can evaluate.
6351 At this stage a subquery in 'cond' might not be fully transformed yet
6352 (example: semijoin) thus cannot be evaluated.
6353 */
6354 if (cond->const_item() && !cond->is_expensive() && !cond->has_subquery())
6355 {
6356 /*
6357 During the cond->val_int() evaluation we can come across a subselect
6358 item which may allocate memory on the thd->mem_root and assumes
6359 all the memory allocated has the same life span as the subselect
6360 item itself. So we have to restore the thread's mem_root here.
6361 */
6362 MEM_ROOT *tmp_root= param->mem_root;
6363 param->thd->mem_root= param->old_root;
6364 tree= cond->val_int() ? new(tmp_root) SEL_TREE(SEL_TREE::ALWAYS) :
6365 new(tmp_root) SEL_TREE(SEL_TREE::IMPOSSIBLE);
6366 param->thd->mem_root= tmp_root;
6367 dbug_print_tree("tree_returned", tree, param);
6368 DBUG_RETURN(tree);
6369 }
6370
6371 table_map ref_tables= 0;
6372 table_map param_comp= ~(param->prev_tables | param->read_tables |
6373 param->current_table);
6374 if (cond->type() != Item::FUNC_ITEM)
6375 { // Should be a field
6376 ref_tables= cond->used_tables();
6377 if ((ref_tables & param->current_table) ||
6378 (ref_tables & ~(param->prev_tables | param->read_tables)))
6379 DBUG_RETURN(0);
6380 DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE));
6381 }
6382
6383 Item_func *cond_func= (Item_func*) cond;
6384 if (cond_func->functype() == Item_func::BETWEEN ||
6385 cond_func->functype() == Item_func::IN_FUNC)
6386 inv= ((Item_func_opt_neg *) cond_func)->negated;
6387 else
6388 {
6389 /*
6390 During the cond_func->select_optimize() evaluation we can come across a
6391 subselect item which may allocate memory on the thd->mem_root and assumes
6392 all the memory allocated has the same life span as the subselect item
6393 itself. So we have to restore the thread's mem_root here.
6394 */
6395 MEM_ROOT *tmp_root= param->mem_root;
6396 param->thd->mem_root= param->old_root;
6397 Item_func::optimize_type opt_type= cond_func->select_optimize();
6398 param->thd->mem_root= tmp_root;
6399 if (opt_type == Item_func::OPTIMIZE_NONE)
6400 DBUG_RETURN(NULL);
6401 }
6402
6403 param->cond= cond;
6404
6405 switch (cond_func->functype()) {
6406 case Item_func::BETWEEN:
6407 if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6408 {
6409 field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6410 ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6411 }
6412
6413 /*
6414 Concerning the code below see the NOTES section in
6415 the comments for the function get_full_func_mm_tree()
6416 */
6417 for (uint i= 1 ; i < cond_func->arg_count ; i++)
6418 {
6419 if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
6420 {
6421 field_item= (Item_field*) (cond_func->arguments()[i]->real_item());
6422 SEL_TREE *tmp= get_full_func_mm_tree(param, cond_func,
6423 field_item, (Item*)(intptr)i, inv);
6424 if (inv)
6425 {
6426 tree= !tree ? tmp : tree_or(param, tree, tmp);
6427 if (tree == NULL)
6428 break;
6429 }
6430 else
6431 tree= tree_and(param, tree, tmp);
6432 }
6433 else if (inv)
6434 {
6435 tree= 0;
6436 break;
6437 }
6438 }
6439
6440 ftree = tree_and(param, ftree, tree);
6441 break;
6442 case Item_func::IN_FUNC:
6443 {
6444 Item_func_in *func=(Item_func_in*) cond_func;
6445 if (func->key_item()->real_item()->type() != Item::FIELD_ITEM)
6446 DBUG_RETURN(0);
6447 field_item= (Item_field*) (func->key_item()->real_item());
6448 ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6449 break;
6450 }
6451 case Item_func::MULT_EQUAL_FUNC:
6452 {
6453 Item_equal *item_equal= (Item_equal *) cond;
6454 if (!(value= item_equal->get_const()))
6455 DBUG_RETURN(0);
6456 Item_equal_iterator it(*item_equal);
6457 ref_tables= value->used_tables();
6458 while ((field_item= it++))
6459 {
6460 Field *field= field_item->field;
6461 Item_result cmp_type= field->cmp_type();
6462 if (!((ref_tables | field->table->map) & param_comp))
6463 {
6464 tree= get_mm_parts(param, item_equal, field, Item_func::EQ_FUNC,
6465 value,cmp_type);
6466 ftree= !ftree ? tree : tree_and(param, ftree, tree);
6467 }
6468 }
6469
6470 dbug_print_tree("tree_returned", ftree, param);
6471 DBUG_RETURN(ftree);
6472 }
6473 default:
6474
6475 DBUG_ASSERT (!ftree);
6476 if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6477 {
6478 field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6479 value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : NULL;
6480 ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6481 }
6482 /*
6483 Even if get_full_func_mm_tree() was executed above and did not
6484 return a range predicate it may still be possible to create one
6485 by reversing the order of the operands. Note that this only
6486 applies to predicates where both operands are fields. Example: A
6487 query of the form
6488
6489 WHERE t1.a OP t2.b
6490
6491 In this case, arguments()[0] == t1.a and arguments()[1] == t2.b.
6492 When creating range predicates for t2, get_full_func_mm_tree()
6493 above will return NULL because 'field' belongs to t1 and only
6494 predicates that applies to t2 are of interest. In this case a
6495 call to get_full_func_mm_tree() with reversed operands (see
6496 below) may succeed.
6497 */
6498 if (!ftree && cond_func->have_rev_func() &&
6499 cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM)
6500 {
6501 field_item= (Item_field*) (cond_func->arguments()[1]->real_item());
6502 value= cond_func->arguments()[0];
6503 ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6504 }
6505 }
6506
6507 dbug_print_tree("tree_returned", ftree, param);
6508 DBUG_RETURN(ftree);
6509 }
6510
6511 /**
6512 Test whether a comparison operator is a spatial comparison
6513 operator, i.e. Item_func::SP_*.
6514
6515 Used to check if range access using operator 'op_type' is applicable
6516 for a non-spatial index.
6517
6518 @param op_type The comparison operator.
6519 @return true if 'op_type' is a spatial comparison operator, false otherwise.
6520
6521 */
is_spatial_operator(Item_func::Functype op_type)6522 bool is_spatial_operator(Item_func::Functype op_type)
6523 {
6524 switch (op_type)
6525 {
6526 case Item_func::SP_EQUALS_FUNC:
6527 case Item_func::SP_DISJOINT_FUNC:
6528 case Item_func::SP_INTERSECTS_FUNC:
6529 case Item_func::SP_TOUCHES_FUNC:
6530 case Item_func::SP_CROSSES_FUNC:
6531 case Item_func::SP_WITHIN_FUNC:
6532 case Item_func::SP_CONTAINS_FUNC:
6533 case Item_func::SP_OVERLAPS_FUNC:
6534 case Item_func::SP_STARTPOINT:
6535 case Item_func::SP_ENDPOINT:
6536 case Item_func::SP_EXTERIORRING:
6537 case Item_func::SP_POINTN:
6538 case Item_func::SP_GEOMETRYN:
6539 case Item_func::SP_INTERIORRINGN:
6540 return true;
6541 default:
6542 return false;
6543 }
6544 }
6545
6546 static SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item_func::Functype type,Item * value,Item_result cmp_type)6547 get_mm_parts(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field,
6548 Item_func::Functype type,
6549 Item *value, Item_result cmp_type)
6550 {
6551 DBUG_ENTER("get_mm_parts");
6552 if (field->table != param->table)
6553 DBUG_RETURN(0);
6554
6555 KEY_PART *key_part = param->key_parts;
6556 KEY_PART *end = param->key_parts_end;
6557 SEL_TREE *tree=0;
6558 if (value &&
6559 value->used_tables() & ~(param->prev_tables | param->read_tables))
6560 DBUG_RETURN(0);
6561 for (; key_part != end ; key_part++)
6562 {
6563 if (field->eq(key_part->field))
6564 {
6565 /*
6566 Cannot do range access for spatial operators when a
6567 non-spatial index is used.
6568 */
6569 if (key_part->image_type != Field::itMBR &&
6570 is_spatial_operator(cond_func->functype()))
6571 continue;
6572
6573 SEL_ARG *sel_arg=0;
6574 if (!tree && !(tree=new SEL_TREE()))
6575 DBUG_RETURN(0); // OOM
6576 if (!value || !(value->used_tables() & ~param->read_tables))
6577 {
6578 sel_arg=get_mm_leaf(param,cond_func,
6579 key_part->field,key_part,type,value);
6580 if (!sel_arg)
6581 continue;
6582 if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
6583 {
6584 tree->type=SEL_TREE::IMPOSSIBLE;
6585 DBUG_RETURN(tree);
6586 }
6587 }
6588 else
6589 {
6590 // This key may be used later
6591 if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
6592 DBUG_RETURN(0); // OOM
6593 }
6594 sel_arg->part=(uchar) key_part->part;
6595 tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
6596 tree->keys_map.set_bit(key_part->key);
6597 }
6598 }
6599
6600 if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
6601 tree= NULL;
6602 DBUG_RETURN(tree);
6603 }
6604
6605 /**
6606 Saves 'value' in 'field' and handles potential type conversion
6607 problems.
6608
6609 @param tree [out] The SEL_ARG leaf under construction. If
6610 an always false predicate is found it is
6611 modified to point to a SEL_ARG with
6612 type == SEL_ARG::IMPOSSIBLE
6613 @param value The Item that contains a value that shall
6614 be stored in 'field'.
6615 @param comp_op Comparison operator: >, >=, <=> etc.
6616 @param field The field that 'value' is stored into.
6617 @param impossible_cond_cause[out] Set to a descriptive string if an
6618 impossible condition is found.
6619 @param memroot Memroot for creation of new SEL_ARG.
6620
6621 @retval false if saving went fine and it makes sense to continue
6622 optimizing for this predicate.
6623 @retval true if always true/false predicate was found, in which
6624 case 'tree' has been modified to reflect this: NULL
6625 pointer if always true, SEL_ARG with type IMPOSSIBLE
6626 if always false.
6627 */
save_value_and_handle_conversion(SEL_ARG ** tree,Item * value,const Item_func::Functype comp_op,Field * field,const char ** impossible_cond_cause,MEM_ROOT * memroot)6628 static bool save_value_and_handle_conversion(SEL_ARG **tree,
6629 Item *value,
6630 const Item_func::Functype comp_op,
6631 Field *field,
6632 const char **impossible_cond_cause,
6633 MEM_ROOT *memroot)
6634 {
6635 // A SEL_ARG should not have been created for this predicate yet.
6636 DBUG_ASSERT(*tree == NULL);
6637
6638 if (!value->can_be_evaluated_now())
6639 {
6640 /*
6641 We cannot evaluate the value yet (i.e. required tables are not yet
6642 locked.)
6643 This is the case of prune_partitions() called during JOIN::prepare().
6644 */
6645 return true;
6646 }
6647
6648 // For comparison purposes allow invalid dates like 2000-01-32
6649 const sql_mode_t orig_sql_mode= field->table->in_use->variables.sql_mode;
6650 field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
6651
6652 /*
6653 We want to change "field > value" to "field OP V"
6654 where:
6655 * V is what is in "field" after we stored "value" in it via
6656 save_in_field_no_warning() (such store operation may have done
6657 rounding...)
6658 * OP is > or >=, depending on what's correct.
6659 For example, if c is an INT column,
6660 "c > 2.9" is changed to "c OP 3"
6661 where OP is ">=" (">" would not be correct, as 3 > 2.9, a comparison
6662 done with stored_field_cmp_to_item()). And
6663 "c > 3.1" is changed to "c OP 3" where OP is ">" (3 < 3.1...).
6664 */
6665
6666 // Note that value may be a stored function call, executed here.
6667 const type_conversion_status err= value->save_in_field_no_warnings(field, 1);
6668 field->table->in_use->variables.sql_mode= orig_sql_mode;
6669
6670 switch (err) {
6671 case TYPE_OK:
6672 case TYPE_NOTE_TRUNCATED:
6673 return false;
6674 case TYPE_ERR_BAD_VALUE:
6675 /*
6676 In the case of incompatible values, MySQL's SQL dialect has some
6677 strange interpretations. For example,
6678
6679 "int_col > 'foo'" is interpreted as "int_col > 0"
6680
6681 instead of always false. Because of this, we assume that the
6682 range predicate is always true instead of always false and let
6683 evaluate_join_record() decide the outcome.
6684 */
6685 return true;
6686 case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
6687 // Checking NULL value on a field that cannot contain NULL.
6688 *impossible_cond_cause= "null_field_in_non_null_column";
6689 goto impossible_cond;
6690 case TYPE_WARN_OUT_OF_RANGE:
6691 /*
6692 value to store was either higher than field::max_value or lower
6693 than field::min_value. The field's max/min value has been stored
6694 instead.
6695 */
6696 if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
6697 {
6698 /*
6699 Independent of data type, "out_of_range_value =/<=> field" is
6700 always false.
6701 */
6702 *impossible_cond_cause= "value_out_of_range";
6703 goto impossible_cond;
6704 }
6705
6706 // If the field is numeric, we can interpret the out of range value.
6707 if ((field->type() != FIELD_TYPE_BIT) &&
6708 (field->result_type() == REAL_RESULT ||
6709 field->result_type() == INT_RESULT ||
6710 field->result_type() == DECIMAL_RESULT))
6711 {
6712 /*
6713 value to store was higher than field::max_value if
6714 a) field has a value greater than 0, or
6715 b) if field is unsigned and has a negative value (which, when
6716 cast to unsigned, means some value higher than LONGLONG_MAX).
6717 */
6718 if ((field->val_int() > 0) || // a)
6719 (static_cast<Field_num*>(field)->unsigned_flag &&
6720 field->val_int() < 0)) // b)
6721 {
6722 if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6723 {
6724 /*
6725 '<' or '<=' compared to a value higher than the field
6726 can store is always true.
6727 */
6728 return true;
6729 }
6730 if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6731 {
6732 /*
6733 '>' or '>=' compared to a value higher than the field can
6734 store is always false.
6735 */
6736 *impossible_cond_cause= "value_out_of_range";
6737 goto impossible_cond;
6738 }
6739 }
6740 else // value is lower than field::min_value
6741 {
6742 if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6743 {
6744 /*
6745 '>' or '>=' compared to a value lower than the field
6746 can store is always true.
6747 */
6748 return true;
6749 }
6750 if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6751 {
6752 /*
6753 '<' or '=' compared to a value lower than the field can
6754 store is always false.
6755 */
6756 *impossible_cond_cause= "value_out_of_range";
6757 goto impossible_cond;
6758 }
6759 }
6760 }
6761 /*
6762 Value is out of range on a datatype where it can't be decided if
6763 it was underflow or overflow. It is therefore not possible to
6764 determine whether or not the condition is impossible or always
6765 true and we have to assume always true.
6766 */
6767 return true;
6768 case TYPE_NOTE_TIME_TRUNCATED:
6769 if (field->type() == FIELD_TYPE_DATE &&
6770 (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC ||
6771 comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC))
6772 {
6773 /*
6774 We were saving DATETIME into a DATE column, the conversion went ok
6775 but a non-zero time part was cut off.
6776
6777 In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
6778 values. Index over a DATE column uses DATE comparison. Changing
6779 from one comparison to the other is possible:
6780
6781 datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
6782 datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
6783
6784 datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
6785 datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
6786
6787 but we'll need to convert '>' to '>=' and '<' to '<='. This will
6788 be done together with other types at the end of get_mm_leaf()
6789 (grep for stored_field_cmp_to_item)
6790 */
6791 return false;
6792 }
6793 if (comp_op == Item_func::EQ_FUNC || comp_op == Item_func::EQUAL_FUNC)
6794 {
6795 // Equality comparison is always false when time info has been truncated.
6796 goto impossible_cond;
6797 }
6798 // Fall through
6799 default:
6800 return true;
6801 }
6802
6803 DBUG_ASSERT(FALSE); // Should never get here.
6804
6805 impossible_cond:
6806 *tree= new (memroot) SEL_ARG(field, 0, 0);
6807 (*tree)->type= SEL_ARG::IMPOSSIBLE;
6808 return true;
6809 }
6810
6811 static SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Item * conf_func,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)6812 get_mm_leaf(RANGE_OPT_PARAM *param, Item *conf_func, Field *field,
6813 KEY_PART *key_part, Item_func::Functype type,Item *value)
6814 {
6815 uint maybe_null=(uint) field->real_maybe_null();
6816 bool optimize_range;
6817 SEL_ARG *tree= 0;
6818 MEM_ROOT *alloc= param->mem_root;
6819 uchar *str;
6820 const char *impossible_cond_cause= NULL;
6821 DBUG_ENTER("get_mm_leaf");
6822
6823 /*
6824 We need to restore the runtime mem_root of the thread in this
6825 function because it evaluates the value of its argument, while
6826 the argument can be any, e.g. a subselect. The subselect
6827 items, in turn, assume that all the memory allocated during
6828 the evaluation has the same life span as the item itself.
6829 TODO: opt_range.cc should not reset thd->mem_root at all.
6830 */
6831 param->thd->mem_root= param->old_root;
6832 if (!value) // IS NULL or IS NOT NULL
6833 {
6834 if (field->table->maybe_null) // Can't use a key on this
6835 goto end;
6836 if (!maybe_null) // Not null field
6837 {
6838 if (type == Item_func::ISNULL_FUNC)
6839 tree= &null_element;
6840 goto end;
6841 }
6842 uchar *null_string=
6843 static_cast<uchar*>(alloc_root(alloc, key_part->store_length + 1));
6844 if (!null_string)
6845 goto end; // out of memory
6846
6847 TRASH(null_string, key_part->store_length + 1);
6848 memcpy(null_string, is_null_string, sizeof(is_null_string));
6849
6850 if (!(tree= new (alloc) SEL_ARG(field, null_string, null_string)))
6851 goto end; // out of memory
6852 if (type == Item_func::ISNOTNULL_FUNC)
6853 {
6854 tree->min_flag=NEAR_MIN; /* IS NOT NULL -> X > NULL */
6855 tree->max_flag=NO_MAX_RANGE;
6856 }
6857 goto end;
6858 }
6859
6860 /*
6861 1. Usually we can't use an index if the column collation
6862 differ from the operation collation.
6863
6864 2. However, we can reuse a case insensitive index for
6865 the binary searches:
6866
6867 WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;
6868
6869 WHERE latin1_swedish_ci_colimn = BINARY 'a '
6870 */
6871 if ((field->result_type() == STRING_RESULT &&
6872 field->match_collation_to_optimize_range() &&
6873 value->result_type() == STRING_RESULT &&
6874 key_part->image_type == Field::itRAW &&
6875 field->charset() != conf_func->compare_collation() &&
6876 !(conf_func->compare_collation()->state & MY_CS_BINSORT &&
6877 (type == Item_func::EQUAL_FUNC || type == Item_func::EQ_FUNC))))
6878 {
6879 if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6880 goto end;
6881 }
6882
6883 /*
6884 Temporal values: Cannot use range access if:
6885 1) 'temporal_value = indexed_varchar_column' because there are
6886 many ways to represent the same date as a string. A few
6887 examples: "01-01-2001", "1-1-2001", "2001-01-01",
6888 "2001#01#01". The same problem applies to time. Thus, we
6889 cannot create a usefull range predicate for temporal values
6890 into VARCHAR column indexes. @see add_key_field()
6891 2) 'temporal_value_with_date_part = indexed_time' because:
6892 - without index, a TIME column with value '48:00:00' is
6893 equal to a DATETIME column with value
6894 'CURDATE() + 2 days'
6895 - with range access into the TIME column, CURDATE() + 2
6896 days becomes "00:00:00" (Field_timef::store_internal()
6897 simply extracts the time part from the datetime) which
6898 is a lookup key which does not match "48:00:00"; so
6899 ref access is not be able to give the same result as
6900 On the other hand, we can do ref access for
6901 IndexedDatetimeComparedToTime because
6902 Field_temporal_with_date::store_time() will convert
6903 48:00:00 to CURDATE() + 2 days which is the correct
6904 lookup key.
6905 */
6906 if ((!field->is_temporal() && value->is_temporal()) || // 1)
6907 field_time_cmp_date(field, value)) // 2)
6908 {
6909 if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6910 goto end;
6911 }
6912
6913 if (key_part->image_type == Field::itMBR)
6914 {
6915 // @todo: use is_spatial_operator() instead?
6916 switch (type) {
6917 case Item_func::SP_EQUALS_FUNC:
6918 case Item_func::SP_DISJOINT_FUNC:
6919 case Item_func::SP_INTERSECTS_FUNC:
6920 case Item_func::SP_TOUCHES_FUNC:
6921 case Item_func::SP_CROSSES_FUNC:
6922 case Item_func::SP_WITHIN_FUNC:
6923 case Item_func::SP_CONTAINS_FUNC:
6924 case Item_func::SP_OVERLAPS_FUNC:
6925 break;
6926 default:
6927 /*
6928 We cannot involve spatial indexes for queries that
6929 don't use MBREQUALS(), MBRDISJOINT(), etc. functions.
6930 */
6931 goto end;
6932 }
6933 }
6934
6935 if (param->using_real_indexes)
6936 optimize_range= field->optimize_range(param->real_keynr[key_part->key],
6937 key_part->part);
6938 else
6939 optimize_range= TRUE;
6940
6941 if (type == Item_func::LIKE_FUNC)
6942 {
6943 bool like_error;
6944 char buff1[MAX_FIELD_WIDTH];
6945 uchar *min_str,*max_str;
6946 String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
6947 size_t length, offset, min_length, max_length;
6948 uint field_length= field->pack_length()+maybe_null;
6949
6950 if (!optimize_range)
6951 goto end;
6952 if (!(res= value->val_str(&tmp)))
6953 {
6954 tree= &null_element;
6955 goto end;
6956 }
6957
6958 /*
6959 TODO:
6960 Check if this was a function. This should have be optimized away
6961 in the sql_select.cc
6962 */
6963 if (res != &tmp)
6964 {
6965 tmp.copy(*res); // Get own copy
6966 res= &tmp;
6967 }
6968 if (field->cmp_type() != STRING_RESULT)
6969 goto end; // Can only optimize strings
6970
6971 offset=maybe_null;
6972 length=key_part->store_length;
6973
6974 if (length != key_part->length + maybe_null)
6975 {
6976 /* key packed with length prefix */
6977 offset+= HA_KEY_BLOB_LENGTH;
6978 field_length= length - HA_KEY_BLOB_LENGTH;
6979 }
6980 else
6981 {
6982 if (unlikely(length < field_length))
6983 {
6984 /*
6985 This can only happen in a table created with UNIREG where one key
6986 overlaps many fields
6987 */
6988 length= field_length;
6989 }
6990 else
6991 field_length= length;
6992 }
6993 length+=offset;
6994 if (!(min_str= (uchar*) alloc_root(alloc, length*2)))
6995 goto end;
6996
6997 max_str=min_str+length;
6998 if (maybe_null)
6999 max_str[0]= min_str[0]=0;
7000
7001 field_length-= maybe_null;
7002 like_error= my_like_range(field->charset(),
7003 res->ptr(), res->length(),
7004 ((Item_func_like*)(param->cond))->escape,
7005 wild_one, wild_many,
7006 field_length,
7007 (char*) min_str+offset, (char*) max_str+offset,
7008 &min_length, &max_length);
7009 if (like_error) // Can't optimize with LIKE
7010 goto end;
7011
7012 if (offset != maybe_null) // BLOB or VARCHAR
7013 {
7014 int2store(min_str+maybe_null,min_length);
7015 int2store(max_str+maybe_null,max_length);
7016 }
7017 tree= new (alloc) SEL_ARG(field, min_str, max_str);
7018 goto end;
7019 }
7020
7021 if (!optimize_range &&
7022 type != Item_func::EQ_FUNC &&
7023 type != Item_func::EQUAL_FUNC)
7024 goto end; // Can't optimize this
7025
7026 /*
7027 We can't always use indexes when comparing a string index to a number
7028 cmp_type() is checked to allow compare of dates to numbers
7029 */
7030 if (field->result_type() == STRING_RESULT &&
7031 value->result_type() != STRING_RESULT &&
7032 field->cmp_type() != value->result_type())
7033 {
7034 if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
7035 goto end;
7036 }
7037
7038 if (save_value_and_handle_conversion(&tree, value, type, field,
7039 &impossible_cond_cause, alloc))
7040 goto end;
7041
7042 /*
7043 Any sargable predicate except "<=>" involving NULL as a constant is always
7044 FALSE
7045 */
7046 if (type != Item_func::EQUAL_FUNC && field->is_real_null())
7047 {
7048 impossible_cond_cause= "comparison_with_null_always_false";
7049 tree= &null_element;
7050 goto end;
7051 }
7052
7053 str= (uchar*) alloc_root(alloc, key_part->store_length+1);
7054 if (!str)
7055 goto end;
7056 if (maybe_null)
7057 *str= (uchar) field->is_real_null(); // Set to 1 if null
7058 field->get_key_image(str+maybe_null, key_part->length,
7059 key_part->image_type);
7060 if (!(tree= new (alloc) SEL_ARG(field, str, str)))
7061 goto end; // out of memory
7062
7063 /*
7064 Check if we are comparing an UNSIGNED integer with a negative constant.
7065 In this case we know that:
7066 (a) (unsigned_int [< | <=] negative_constant) == FALSE
7067 (b) (unsigned_int [> | >=] negative_constant) == TRUE
7068 In case (a) the condition is false for all values, and in case (b) it
7069 is true for all values, so we can avoid unnecessary retrieval and condition
7070 testing, and we also get correct comparison of unsinged integers with
7071 negative integers (which otherwise fails because at query execution time
7072 negative integers are cast to unsigned if compared with unsigned).
7073 */
7074 if (field->result_type() == INT_RESULT &&
7075 value->result_type() == INT_RESULT &&
7076 ((field->type() == FIELD_TYPE_BIT ||
7077 ((Field_num *) field)->unsigned_flag) &&
7078 !((Item_int*) value)->unsigned_flag))
7079 {
7080 longlong item_val= value->val_int();
7081 if (item_val < 0)
7082 {
7083 if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
7084 {
7085 impossible_cond_cause= "unsigned_int_cannot_be_negative";
7086 tree->type= SEL_ARG::IMPOSSIBLE;
7087 goto end;
7088 }
7089 if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
7090 {
7091 tree= 0;
7092 goto end;
7093 }
7094 }
7095 }
7096
7097 switch (type) {
7098 case Item_func::LT_FUNC:
7099 /* Don't use open ranges for partial key_segments */
7100 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7101 stored_field_cmp_to_item(param->thd, field, value) == 0)
7102 tree->max_flag=NEAR_MAX;
7103 /* fall through */
7104 case Item_func::LE_FUNC:
7105 if (!maybe_null)
7106 tree->min_flag=NO_MIN_RANGE; /* From start */
7107 else
7108 { // > NULL
7109 if (!(tree->min_value=
7110 static_cast<uchar*>(alloc_root(alloc, key_part->store_length+1))))
7111 goto end;
7112 TRASH(tree->min_value, key_part->store_length + 1);
7113 memcpy(tree->min_value, is_null_string, sizeof(is_null_string));
7114 tree->min_flag=NEAR_MIN;
7115 }
7116 break;
7117 case Item_func::GT_FUNC:
7118 /* Don't use open ranges for partial key_segments */
7119 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7120 (stored_field_cmp_to_item(param->thd, field, value) <= 0))
7121 tree->min_flag=NEAR_MIN;
7122 tree->max_flag= NO_MAX_RANGE;
7123 break;
7124 case Item_func::GE_FUNC:
7125 /* Don't use open ranges for partial key_segments */
7126 if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7127 (stored_field_cmp_to_item(param->thd, field, value) < 0))
7128 tree->min_flag= NEAR_MIN;
7129 tree->max_flag=NO_MAX_RANGE;
7130 break;
7131 case Item_func::SP_EQUALS_FUNC:
7132 tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512;
7133 tree->max_flag=NO_MAX_RANGE;
7134 break;
7135 case Item_func::SP_DISJOINT_FUNC:
7136 tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512;
7137 tree->max_flag=NO_MAX_RANGE;
7138 break;
7139 case Item_func::SP_INTERSECTS_FUNC:
7140 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7141 tree->max_flag=NO_MAX_RANGE;
7142 break;
7143 case Item_func::SP_TOUCHES_FUNC:
7144 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7145 tree->max_flag=NO_MAX_RANGE;
7146 break;
7147
7148 case Item_func::SP_CROSSES_FUNC:
7149 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7150 tree->max_flag=NO_MAX_RANGE;
7151 break;
7152 case Item_func::SP_WITHIN_FUNC:
7153 /*
7154 Adjust the min_flag as MyISAM implements this function
7155 in reverse order.
7156 */
7157 tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512;
7158 tree->max_flag=NO_MAX_RANGE;
7159 break;
7160
7161 case Item_func::SP_CONTAINS_FUNC:
7162 /*
7163 Adjust the min_flag as MyISAM implements this function
7164 in reverse order.
7165 */
7166 tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512;
7167 tree->max_flag=NO_MAX_RANGE;
7168 break;
7169 case Item_func::SP_OVERLAPS_FUNC:
7170 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7171 tree->max_flag=NO_MAX_RANGE;
7172 break;
7173
7174 default:
7175 break;
7176 }
7177
7178 end:
7179 if (impossible_cond_cause != NULL)
7180 {
7181 Opt_trace_object wrapper (¶m->thd->opt_trace);
7182 Opt_trace_object (¶m->thd->opt_trace, "impossible_condition",
7183 Opt_trace_context::RANGE_OPTIMIZER).
7184 add_alnum("cause", impossible_cond_cause);
7185 }
7186 param->thd->mem_root= alloc;
7187 DBUG_RETURN(tree);
7188 }
7189
7190
7191 /******************************************************************************
7192 ** Tree manipulation functions
7193 ** If tree is 0 it means that the condition can't be tested. It refers
7194 ** to a non existent table or to a field in current table with isn't a key.
7195 ** The different tree flags:
7196 ** IMPOSSIBLE: Condition is never TRUE
7197 ** ALWAYS: Condition is always TRUE
7198 ** MAYBE: Condition may exists when tables are read
7199 ** MAYBE_KEY: Condition refers to a key that may be used in join loop
7200 ** KEY_RANGE: Condition uses a key
7201 ******************************************************************************/
7202
7203 /*
7204 Add a new key test to a key when scanning through all keys
7205 This will never be called for same key parts.
7206 */
7207
7208 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)7209 sel_add(SEL_ARG *key1,SEL_ARG *key2)
7210 {
7211 SEL_ARG *root,**key_link;
7212
7213 if (!key1)
7214 return key2;
7215 if (!key2)
7216 return key1;
7217
7218 key_link= &root;
7219 while (key1 && key2)
7220 {
7221 if (key1->part < key2->part)
7222 {
7223 *key_link= key1;
7224 key_link= &key1->next_key_part;
7225 key1=key1->next_key_part;
7226 }
7227 else
7228 {
7229 *key_link= key2;
7230 key_link= &key2->next_key_part;
7231 key2=key2->next_key_part;
7232 }
7233 }
7234 *key_link=key1 ? key1 : key2;
7235 return root;
7236 }
7237
7238 #define CLONE_KEY1_MAYBE 1
7239 #define CLONE_KEY2_MAYBE 2
7240 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
7241
7242
7243 static SEL_TREE *
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7244 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7245 {
7246 DBUG_ENTER("tree_and");
7247 if (!tree1)
7248 DBUG_RETURN(tree2);
7249 if (!tree2)
7250 DBUG_RETURN(tree1);
7251 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7252 DBUG_RETURN(tree1);
7253 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7254 DBUG_RETURN(tree2);
7255 if (tree1->type == SEL_TREE::MAYBE)
7256 {
7257 if (tree2->type == SEL_TREE::KEY)
7258 tree2->type=SEL_TREE::KEY_SMALLER;
7259 DBUG_RETURN(tree2);
7260 }
7261 if (tree2->type == SEL_TREE::MAYBE)
7262 {
7263 tree1->type=SEL_TREE::KEY_SMALLER;
7264 DBUG_RETURN(tree1);
7265 }
7266
7267 dbug_print_tree("tree1", tree1, param);
7268 dbug_print_tree("tree2", tree2, param);
7269
7270 key_map result_keys;
7271
7272 /* Join the trees key per key */
7273 SEL_ARG **key1,**key2,**end;
7274 for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
7275 key1 != end ; key1++,key2++)
7276 {
7277 uint flag=0;
7278 if (*key1 || *key2)
7279 {
7280 if (*key1 && !(*key1)->simple_key())
7281 flag|=CLONE_KEY1_MAYBE;
7282 if (*key2 && !(*key2)->simple_key())
7283 flag|=CLONE_KEY2_MAYBE;
7284 *key1=key_and(param, *key1, *key2, flag);
7285 if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
7286 {
7287 tree1->type= SEL_TREE::IMPOSSIBLE;
7288 DBUG_RETURN(tree1);
7289 }
7290 result_keys.set_bit(key1 - tree1->keys);
7291 #ifndef DBUG_OFF
7292 if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7293 (*key1)->test_use_count(*key1);
7294 #endif
7295 }
7296 }
7297 tree1->keys_map= result_keys;
7298
7299 /* ok, both trees are index_merge trees */
7300 imerge_list_and_list(&tree1->merges, &tree2->merges);
7301 DBUG_RETURN(tree1);
7302 }
7303
7304
7305 /*
7306 Check if two SEL_TREES can be combined into one (i.e. a single key range
7307 read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
7308 using index_merge.
7309 */
7310
sel_trees_can_be_ored(SEL_TREE * tree1,SEL_TREE * tree2,RANGE_OPT_PARAM * param)7311 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
7312 RANGE_OPT_PARAM* param)
7313 {
7314 key_map common_keys= tree1->keys_map;
7315 DBUG_ENTER("sel_trees_can_be_ored");
7316 common_keys.intersect(tree2->keys_map);
7317
7318 dbug_print_tree("tree1", tree1, param);
7319 dbug_print_tree("tree2", tree2, param);
7320
7321 if (common_keys.is_clear_all())
7322 DBUG_RETURN(FALSE);
7323
7324 /* trees have a common key, check if they refer to same key part */
7325 SEL_ARG **key1,**key2;
7326 for (uint key_no=0; key_no < param->keys; key_no++)
7327 {
7328 if (common_keys.is_set(key_no))
7329 {
7330 key1= tree1->keys + key_no;
7331 key2= tree2->keys + key_no;
7332 if ((*key1)->part == (*key2)->part)
7333 DBUG_RETURN(TRUE);
7334 }
7335 }
7336 DBUG_RETURN(FALSE);
7337 }
7338
7339
7340 /*
7341 Remove the trees that are not suitable for record retrieval.
7342 SYNOPSIS
7343 param Range analysis parameter
7344 tree Tree to be processed, tree->type is KEY or KEY_SMALLER
7345
7346 DESCRIPTION
7347 This function walks through tree->keys[] and removes the SEL_ARG* trees
7348 that are not "maybe" trees (*) and cannot be used to construct quick range
7349 selects.
7350 (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
7351 these types here as well.
7352
7353 A SEL_ARG* tree cannot be used to construct quick select if it has
7354 tree->part != 0. (e.g. it could represent "keypart2 < const").
7355
7356 WHY THIS FUNCTION IS NEEDED
7357
7358 Normally we allow construction of SEL_TREE objects that have SEL_ARG
7359 trees that do not allow quick range select construction. For example for
7360 " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
7361 tree1= SEL_TREE { SEL_ARG{keypart1=1} }
7362 tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
7363 from this
7364 call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
7365 tree.
7366
7367 There is an exception though: when we construct index_merge SEL_TREE,
7368 any SEL_ARG* tree that cannot be used to construct quick range select can
7369 be removed, because current range analysis code doesn't provide any way
7370 that tree could be later combined with another tree.
7371 Consider an example: we should not construct
7372 st1 = SEL_TREE {
7373 merges = SEL_IMERGE {
7374 SEL_TREE(t.key1part1 = 1),
7375 SEL_TREE(t.key2part2 = 2) -- (*)
7376 }
7377 };
7378 because
7379 - (*) cannot be used to construct quick range select,
7380 - There is no execution path that would cause (*) to be converted to
7381 a tree that could be used.
7382
7383 The latter is easy to verify: first, notice that the only way to convert
7384 (*) into a usable tree is to call tree_and(something, (*)).
7385
7386 Second look at what tree_and/tree_or function would do when passed a
7387 SEL_TREE that has the structure like st1 tree has, and conlcude that
7388 tree_and(something, (*)) will not be called.
7389
7390 RETURN
7391 0 Ok, some suitable trees left
7392 1 No tree->keys[] left.
7393 */
7394
remove_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree)7395 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
7396 {
7397 bool res= FALSE;
7398 for (uint i=0; i < param->keys; i++)
7399 {
7400 if (tree->keys[i])
7401 {
7402 if (tree->keys[i]->part)
7403 {
7404 tree->keys[i]= NULL;
7405 tree->keys_map.clear_bit(i);
7406 }
7407 else
7408 res= TRUE;
7409 }
7410 }
7411 return !res;
7412 }
7413
7414
7415 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7416 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7417 {
7418 DBUG_ENTER("tree_or");
7419 if (!tree1 || !tree2)
7420 DBUG_RETURN(0);
7421 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7422 DBUG_RETURN(tree2);
7423 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7424 DBUG_RETURN(tree1);
7425 if (tree1->type == SEL_TREE::MAYBE)
7426 DBUG_RETURN(tree1); // Can't use this
7427 if (tree2->type == SEL_TREE::MAYBE)
7428 DBUG_RETURN(tree2);
7429
7430 /*
7431 It is possible that a tree contains both
7432 a) simple range predicates (in tree->keys[]) and
7433 b) index merge range predicates (in tree->merges)
7434
7435 If a tree has both, they represent equally *valid* range
7436 predicate alternatives; both will return all relevant rows from
7437 the table but one may return more unnecessary rows than the
7438 other (additional rows will be filtered later). However, doing
7439 an OR operation on trees with both types of predicates is too
7440 complex at the time. We therefore remove the index merge
7441 predicates (if we have both types) before OR'ing the trees.
7442
7443 TODO: enable tree_or() for trees with both simple and index
7444 merge range predicates.
7445 */
7446 if (!tree1->merges.is_empty())
7447 {
7448 for (uint i= 0; i < param->keys; i++)
7449 if (tree1->keys[i] != NULL && tree2->keys[i] != &null_element)
7450 {
7451 tree1->merges.empty();
7452 break;
7453 }
7454 }
7455 if (!tree2->merges.is_empty())
7456 {
7457 for (uint i= 0; i< param->keys; i++)
7458 if (tree2->keys[i] != NULL && tree2->keys[i] != &null_element)
7459 {
7460 tree2->merges.empty();
7461 break;
7462 }
7463 }
7464
7465 SEL_TREE *result= 0;
7466 key_map result_keys;
7467 if (sel_trees_can_be_ored(tree1, tree2, param))
7468 {
7469 /* Join the trees key per key */
7470 SEL_ARG **key1,**key2,**end;
7471 for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
7472 key1 != end ; key1++,key2++)
7473 {
7474 *key1=key_or(param, *key1, *key2);
7475 if (*key1)
7476 {
7477 result=tree1; // Added to tree1
7478 result_keys.set_bit(key1 - tree1->keys);
7479 #ifndef DBUG_OFF
7480 if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7481 (*key1)->test_use_count(*key1);
7482 #endif
7483 }
7484 }
7485 if (result)
7486 result->keys_map= result_keys;
7487 }
7488 else
7489 {
7490 /* ok, two trees have KEY type but cannot be used without index merge */
7491 if (tree1->merges.is_empty() && tree2->merges.is_empty())
7492 {
7493 if (param->remove_jump_scans)
7494 {
7495 bool no_trees= remove_nonrange_trees(param, tree1);
7496 no_trees= no_trees || remove_nonrange_trees(param, tree2);
7497 if (no_trees)
7498 DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7499 }
7500 SEL_IMERGE *merge;
7501 /* both trees are "range" trees, produce new index merge structure */
7502 if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
7503 (result->merges.push_back(merge)) ||
7504 (merge->or_sel_tree(param, tree1)) ||
7505 (merge->or_sel_tree(param, tree2)))
7506 result= NULL;
7507 else
7508 result->type= tree1->type;
7509 }
7510 else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
7511 {
7512 if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
7513 result= new SEL_TREE(SEL_TREE::ALWAYS);
7514 else
7515 result= tree1;
7516 }
7517 else
7518 {
7519 /* one tree is index merge tree and another is range tree */
7520 if (tree1->merges.is_empty())
7521 swap_variables(SEL_TREE*, tree1, tree2);
7522
7523 if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
7524 DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7525 /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
7526 if (imerge_list_or_tree(param, &tree1->merges, tree2))
7527 result= new SEL_TREE(SEL_TREE::ALWAYS);
7528 else
7529 result= tree1;
7530 }
7531 }
7532 DBUG_RETURN(result);
7533 }
7534
7535
7536 /* And key trees where key1->part < key2 -> part */
7537
7538 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7539 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
7540 uint clone_flag)
7541 {
7542 SEL_ARG *next;
7543 ulong use_count=key1->use_count;
7544
7545 if (key1->elements != 1)
7546 {
7547 key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
7548 key2->increment_use_count((int) key1->elements-1);
7549 }
7550 if (key1->type == SEL_ARG::MAYBE_KEY)
7551 {
7552 // See todo for left/right pointers
7553 DBUG_ASSERT(!key1->left);
7554 DBUG_ASSERT(!key1->right);
7555 key1->next= key1->prev= 0;
7556 }
7557 for (next=key1->first(); next ; next=next->next)
7558 {
7559 if (next->next_key_part)
7560 {
7561 SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
7562 if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
7563 {
7564 key1=key1->tree_delete(next);
7565 continue;
7566 }
7567 next->next_key_part=tmp;
7568 if (use_count)
7569 next->increment_use_count(use_count);
7570 if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
7571 break;
7572 }
7573 else
7574 next->next_key_part=key2;
7575 }
7576 if (!key1)
7577 return &null_element; // Impossible ranges
7578 key1->use_count++;
7579 return key1;
7580 }
7581
7582
7583 /*
7584 Produce a SEL_ARG graph that represents "key1 AND key2"
7585
7586 SYNOPSIS
7587 key_and()
7588 param Range analysis context (needed to track if we have allocated
7589 too many SEL_ARGs)
7590 key1 First argument, root of its RB-tree
7591 key2 Second argument, root of its RB-tree
7592
7593 RETURN
7594 RB-tree root of the resulting SEL_ARG graph.
7595 NULL if the result of AND operation is an empty interval {0}.
7596 */
7597
7598 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7599 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
7600 {
7601 if (!key1)
7602 return key2;
7603 if (!key2)
7604 return key1;
7605 if (key1->part != key2->part)
7606 {
7607 if (key1->part > key2->part)
7608 {
7609 swap_variables(SEL_ARG *, key1, key2);
7610 clone_flag=swap_clone_flag(clone_flag);
7611 }
7612 // key1->part < key2->part
7613 key1->use_count--;
7614 /*
7615 Clone key1 if the use_count is greater than 0 otherwise use the
7616 "clone_flag" to determine if a key needs to be cloned.
7617 "clone_flag" is set to true if the conditions which need to be
7618 ANDed (in tree_and) are not simple (has many OR conditions within).
7619 */
7620 if (key1->use_count > 0 || (clone_flag & CLONE_KEY2_MAYBE))
7621 if (!(key1= key1->clone_tree(param)))
7622 return 0; // OOM
7623 return and_all_keys(param, key1, key2, clone_flag);
7624 }
7625
7626 if (((clone_flag & CLONE_KEY2_MAYBE) &&
7627 !(clone_flag & CLONE_KEY1_MAYBE) &&
7628 key2->type != SEL_ARG::MAYBE_KEY) ||
7629 key1->type == SEL_ARG::MAYBE_KEY)
7630 { // Put simple key in key2
7631 swap_variables(SEL_ARG *, key1, key2);
7632 clone_flag=swap_clone_flag(clone_flag);
7633 }
7634
7635 /* If one of the key is MAYBE_KEY then the found region may be smaller */
7636 if (key2->type == SEL_ARG::MAYBE_KEY)
7637 {
7638 if (key1->use_count > 1)
7639 {
7640 key1->use_count--;
7641 if (!(key1=key1->clone_tree(param)))
7642 return 0; // OOM
7643 key1->use_count++;
7644 }
7645 if (key1->type == SEL_ARG::MAYBE_KEY)
7646 { // Both are maybe key
7647 key1->next_key_part=key_and(param, key1->next_key_part,
7648 key2->next_key_part, clone_flag);
7649 if (key1->next_key_part &&
7650 key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
7651 return key1;
7652 }
7653 else
7654 {
7655 key1->maybe_smaller();
7656 if (key2->next_key_part)
7657 {
7658 key1->use_count--; // Incremented in and_all_keys
7659 return and_all_keys(param, key1, key2, clone_flag);
7660 }
7661 key2->use_count--; // Key2 doesn't have a tree
7662 }
7663 return key1;
7664 }
7665
7666 if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
7667 {
7668 /* TODO: why not leave one of the trees? */
7669 key1->free_tree();
7670 key2->free_tree();
7671 return 0; // Can't optimize this
7672 }
7673
7674 key1->use_count--;
7675 key2->use_count--;
7676 SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
7677
7678 while (e1 && e2)
7679 {
7680 int cmp=e1->cmp_min_to_min(e2);
7681 if (cmp < 0)
7682 {
7683 if (get_range(&e1,&e2,key1))
7684 continue;
7685 }
7686 else if (get_range(&e2,&e1,key2))
7687 continue;
7688 SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
7689 clone_flag);
7690 e1->increment_use_count(1);
7691 e2->increment_use_count(1);
7692 if (!next || next->type != SEL_ARG::IMPOSSIBLE)
7693 {
7694 SEL_ARG *new_arg= e1->clone_and(e2);
7695 if (!new_arg)
7696 return &null_element; // End of memory
7697 new_arg->next_key_part=next;
7698 if (!new_tree)
7699 {
7700 new_tree=new_arg;
7701 }
7702 else
7703 new_tree=new_tree->insert(new_arg);
7704 }
7705 if (e1->cmp_max_to_max(e2) < 0)
7706 e1=e1->next; // e1 can't overlapp next e2
7707 else
7708 e2=e2->next;
7709 }
7710 key1->free_tree();
7711 key2->free_tree();
7712 if (!new_tree)
7713 return &null_element; // Impossible range
7714 return new_tree;
7715 }
7716
7717
7718 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)7719 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
7720 {
7721 (*e1)=root1->find_range(*e2); // first e1->min < e2->min
7722 if ((*e1)->cmp_max_to_min(*e2) < 0)
7723 {
7724 if (!((*e1)=(*e1)->next))
7725 return 1;
7726 if ((*e1)->cmp_min_to_max(*e2) > 0)
7727 {
7728 (*e2)=(*e2)->next;
7729 return 1;
7730 }
7731 }
7732 return 0;
7733 }
7734
7735
7736 /**
7737 Combine two range expression under a common OR. On a logical level, the
7738 transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
7739
7740 Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
7741 theformat is reminiscent of DNF, since an expression such as the following
7742
7743 ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
7744
7745 where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
7746 and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
7747 SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
7748 the first range and ranges must not overlap. It follows that they are also
7749 ordered by maximum endpoints. Thus
7750
7751 ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
7752
7753 Is a a valid SER_ARG expression for a key of at least 2 keyparts.
7754
7755 For simplicity, we will assume that expr2 is a single range predicate,
7756 i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
7757 disjunction of several predicates by subsequently call key_or for each
7758 disjunct.
7759
7760 The algorithm iterates over each disjunct of expr1, and for each disjunct
7761 where the first keypart's range overlaps with the first keypart's range in
7762 expr2:
7763
7764 If the predicates are equal for the rest of the keyparts, or if there are
7765 no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
7766 node in expr2 is deallocated. If more ranges became connected in expr1, the
7767 surplus is also dealocated. If they differ, two ranges are created.
7768
7769 - The range leading up to the overlap. Empty if endpoints are equal.
7770
7771 - The overlapping sub-range. May be the entire range if they are equal.
7772
7773 Finally, there may be one more range if expr2's first keypart's range has a
7774 greater maximum endpoint than the last range in expr1.
7775
7776 For the overlapping sub-range, we recursively call key_or. Thus in order to
7777 compute key_or of
7778
7779 (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
7780
7781 (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
7782
7783 We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
7784 first one, we simply hook on the condition for the second keypart from (1)
7785 : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
7786 < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
7787 the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
7788
7789 ( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR
7790 ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR
7791 ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
7792
7793 @param param PARAM from SQL_SELECT::test_quick_select
7794 @param key1 Root of RB-tree of SEL_ARGs to be ORed with key2
7795 @param key2 Root of RB-tree of SEL_ARGs to be ORed with key1
7796 */
7797 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)7798 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2)
7799 {
7800 if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
7801 {
7802 if (key2)
7803 {
7804 key2->use_count--;
7805 key2->free_tree();
7806 }
7807 return key1;
7808 }
7809 if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
7810 // Case is symmetric to the one above, just flip parameters.
7811 return key_or(param, key2, key1);
7812
7813 key1->use_count--;
7814 key2->use_count--;
7815
7816 if (key1->part != key2->part ||
7817 (key1->min_flag | key2->min_flag) & GEOM_FLAG)
7818 {
7819 key1->free_tree();
7820 key2->free_tree();
7821 return 0; // Can't optimize this
7822 }
7823
7824 // If one of the key is MAYBE_KEY then the found region may be bigger
7825 if (key1->type == SEL_ARG::MAYBE_KEY)
7826 {
7827 key2->free_tree();
7828 key1->use_count++;
7829 return key1;
7830 }
7831 if (key2->type == SEL_ARG::MAYBE_KEY)
7832 {
7833 key1->free_tree();
7834 key2->use_count++;
7835 return key2;
7836 }
7837
7838 if (key1->use_count > 0)
7839 {
7840 if (key2->use_count == 0 || key1->elements > key2->elements)
7841 {
7842 swap_variables(SEL_ARG *,key1,key2);
7843 }
7844 if (key1->use_count > 0 && (key1= key1->clone_tree(param)) == NULL)
7845 return 0; // OOM
7846 }
7847
7848 // Add tree at key2 to tree at key1
7849 const bool key2_shared= (key2->use_count != 0);
7850 key1->maybe_flag|= key2->maybe_flag;
7851
7852 /*
7853 Notation for illustrations used in the rest of this function:
7854
7855 Range: [--------]
7856 ^ ^
7857 start stop
7858
7859 Two overlapping ranges:
7860 [-----] [----] [--]
7861 [---] or [---] or [-------]
7862
7863 Ambiguity: ***
7864 The range starts or stops somewhere in the "***" range.
7865 Example: a starts before b and may end before/the same place/after b
7866 a: [----***]
7867 b: [---]
7868
7869 Adjacent ranges:
7870 Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
7871 a: ----]
7872 b: [----
7873 */
7874
7875 SEL_ARG *cur_key2= key2->first();
7876 while (cur_key2)
7877 {
7878 /*
7879 key1 consists of one or more ranges. cur_key1 is the
7880 range currently being handled.
7881
7882 initialize cur_key1 to the latest range in key1 that starts the
7883 same place or before the range in cur_key2 starts
7884
7885 cur_key2: [------]
7886 key1: [---] [-----] [----]
7887 ^
7888 cur_key1
7889 */
7890 SEL_ARG *cur_key1= key1->find_range(cur_key2);
7891
7892 /*
7893 Used to describe how two key values are positioned compared to
7894 each other. Consider key_value_a.<cmp_func>(key_value_b):
7895
7896 -2: key_value_a is smaller than key_value_b, and they are adjacent
7897 -1: key_value_a is smaller than key_value_b (not adjacent)
7898 0: the key values are equal
7899 1: key_value_a is bigger than key_value_b (not adjacent)
7900 2: key_value_a is bigger than key_value_b, and they are adjacent
7901
7902 Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
7903
7904 cur_key2: [-------- (10 <= x ... )
7905 cur_key1: -----] ( ... x < 10) => cmp==-2
7906 cur_key1: ----] ( ... x < 9) => cmp==-1
7907 cur_key1: ------] ( ... x <= 10) => cmp== 0
7908 cur_key1: --------] ( ... x <= 12) => cmp== 1
7909 (cmp == 2 does not make sense for cmp_max_to_min())
7910 */
7911 int cmp= 0;
7912
7913 if (!cur_key1)
7914 {
7915 /*
7916 The range in cur_key2 starts before the first range in key1. Use
7917 the first range in key1 as cur_key1.
7918
7919 cur_key2: [--------]
7920 key1: [****--] [----] [-------]
7921 ^
7922 cur_key1
7923 */
7924 cur_key1= key1->first();
7925 cmp= -1;
7926 }
7927 else if ((cmp= cur_key1->cmp_max_to_min(cur_key2)) < 0)
7928 {
7929 /*
7930 This is the case:
7931 cur_key2: [-------]
7932 cur_key1: [----**]
7933 */
7934 SEL_ARG *next_key1= cur_key1->next;
7935 if (cmp == -2 &&
7936 eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
7937 {
7938 /*
7939 Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
7940
7941 This is the case:
7942 cur_key2: [-------]
7943 cur_key1: [----]
7944
7945 Result:
7946 cur_key2: [-------------] => inserted into key1 below
7947 cur_key1: => deleted
7948 */
7949 SEL_ARG *next_key2= cur_key2->next;
7950 if (key2_shared)
7951 {
7952 if (!(cur_key2= new SEL_ARG(*cur_key2)))
7953 return 0; // out of memory
7954 cur_key2->increment_use_count(key1->use_count+1);
7955 cur_key2->next= next_key2; // New copy of cur_key2
7956 }
7957
7958 if (cur_key2->copy_min(cur_key1))
7959 {
7960 // cur_key2 is full range: [-inf <= cur_key2 <= +inf]
7961 key1->free_tree();
7962 key2->free_tree();
7963 key1->type= SEL_ARG::ALWAYS;
7964 key2->type= SEL_ARG::ALWAYS;
7965 if (key1->maybe_flag)
7966 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
7967 return 0;
7968 }
7969
7970 if (!(key1= key1->tree_delete(cur_key1)))
7971 {
7972 /*
7973 cur_key1 was the last range in key1; move the cur_key2
7974 range that was merged above to key1
7975 */
7976 key1= cur_key2;
7977 key1->make_root();
7978 cur_key2= next_key2;
7979 break;
7980 }
7981 }
7982 // Move to next range in key1. Now cur_key1.min > cur_key2.min
7983 if (!(cur_key1= next_key1))
7984 break; // No more ranges in key1. Copy rest of key2
7985 }
7986
7987 if (cmp < 0)
7988 {
7989 /*
7990 This is the case:
7991 cur_key2: [--***]
7992 cur_key1: [----]
7993 */
7994 int cur_key1_cmp;
7995 if ((cur_key1_cmp= cur_key1->cmp_min_to_max(cur_key2)) > 0)
7996 {
7997 /*
7998 This is the case:
7999 cur_key2: [------**]
8000 cur_key1: [----]
8001 */
8002 if (cur_key1_cmp == 2 &&
8003 eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8004 {
8005 /*
8006 Adjacent ranges with equal next_key_part. Merge like this:
8007
8008 This is the case:
8009 cur_key2: [------]
8010 cur_key1: [-----]
8011
8012 Result:
8013 cur_key2: [------]
8014 cur_key1: [-------------]
8015
8016 Then move on to next key2 range.
8017 */
8018 cur_key1->copy_min_to_min(cur_key2);
8019 key1->merge_flags(cur_key2); //should be cur_key1->merge...() ?
8020 if (cur_key1->min_flag & NO_MIN_RANGE &&
8021 cur_key1->max_flag & NO_MAX_RANGE)
8022 {
8023 if (key1->maybe_flag)
8024 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8025 return 0;
8026 }
8027 cur_key2->increment_use_count(-1); // Free not used tree
8028 cur_key2=cur_key2->next;
8029 continue;
8030 }
8031 else
8032 {
8033 /*
8034 cur_key2 not adjacent to cur_key1 or has different next_key_part.
8035 Insert into key1 and move to next range in key2
8036
8037 This is the case:
8038 cur_key2: [------**]
8039 cur_key1: [----]
8040
8041 Result:
8042 key1: [------**][----]
8043 ^ ^
8044 insert cur_key1
8045 */
8046 SEL_ARG *next_key2= cur_key2->next;
8047 if (key2_shared)
8048 {
8049 SEL_ARG *cpy= new SEL_ARG(*cur_key2); // Must make copy
8050 if (!cpy)
8051 return 0; // OOM
8052 key1= key1->insert(cpy);
8053 cur_key2->increment_use_count(key1->use_count+1);
8054 }
8055 else
8056 key1= key1->insert(cur_key2); // Will destroy key2_root
8057 cur_key2= next_key2;
8058 continue;
8059 }
8060 }
8061 }
8062
8063 /*
8064 The ranges in cur_key1 and cur_key2 are overlapping:
8065
8066 cur_key2: [----------]
8067 cur_key1: [*****-----*****]
8068
8069 Corollary: cur_key1.min <= cur_key2.max
8070 */
8071 if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8072 {
8073 // Merge overlapping ranges with equal next_key_part
8074 if (cur_key1->is_same(cur_key2))
8075 {
8076 /*
8077 cur_key1 covers exactly the same range as cur_key2
8078 Use the relevant range in key1.
8079 */
8080 cur_key1->merge_flags(cur_key2); // Copy maybe flags
8081 cur_key2->increment_use_count(-1); // Free not used tree
8082 }
8083 else
8084 {
8085 SEL_ARG *last= cur_key1;
8086 SEL_ARG *first= cur_key1;
8087
8088 /*
8089 Find the last range in key1 that overlaps cur_key2 and
8090 where all ranges first...last have the same next_key_part as
8091 cur_key2.
8092
8093 cur_key2: [****----------------------*******]
8094 key1: [--] [----] [---] [-----] [xxxx]
8095 ^ ^ ^
8096 first last different next_key_part
8097
8098 Since cur_key2 covers them, the ranges between first and last
8099 are merged into one range by deleting first...last-1 from
8100 the key1 tree. In the figure, this applies to first and the
8101 two consecutive ranges. The range of last is then extended:
8102 * last.min: Set to min(cur_key2.min, first.min)
8103 * last.max: If there is a last->next that overlaps cur_key2
8104 (i.e., last->next has a different next_key_part):
8105 Set adjacent to last->next.min
8106 Otherwise: Set to max(cur_key2.max, last.max)
8107
8108 Result:
8109 cur_key2: [****----------------------*******]
8110 [--] [----] [---] => deleted from key1
8111 key1: [**------------------------***][xxxx]
8112 ^ ^
8113 cur_key1=last different next_key_part
8114 */
8115 while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
8116 eq_tree(last->next->next_key_part, cur_key2->next_key_part))
8117 {
8118 /*
8119 last->next is covered by cur_key2 and has same next_key_part.
8120 last can be deleted
8121 */
8122 SEL_ARG *save=last;
8123 last=last->next;
8124 key1= key1->tree_delete(save);
8125 }
8126 // Redirect cur_key1 to last which will cover the entire range
8127 cur_key1= last;
8128
8129 /*
8130 Extend last to cover the entire range of
8131 [min(first.min_value,cur_key2.min_value)...last.max_value].
8132 If this forms a full range (the range covers all possible
8133 values) we return no SEL_ARG RB-tree.
8134 */
8135 bool full_range= last->copy_min(first);
8136 if (!full_range)
8137 full_range= last->copy_min(cur_key2);
8138
8139 if (!full_range)
8140 {
8141 if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0)
8142 {
8143 /*
8144 This is the case:
8145 cur_key2: [-------------]
8146 key1: [***------] [xxxx]
8147 ^ ^
8148 last different next_key_part
8149
8150 Extend range of last up to last->next:
8151 cur_key2: [-------------]
8152 key1: [***--------][xxxx]
8153 */
8154 last->copy_min_to_max(last->next);
8155 }
8156 else
8157 /*
8158 This is the case:
8159 cur_key2: [--------*****]
8160 key1: [***---------] [xxxx]
8161 ^ ^
8162 last different next_key_part
8163
8164 Extend range of last up to max(last.max, cur_key2.max):
8165 cur_key2: [--------*****]
8166 key1: [***----------**] [xxxx]
8167 */
8168 full_range= last->copy_max(cur_key2);
8169 }
8170 if (full_range)
8171 { // Full range
8172 key1->free_tree();
8173 key1->type= SEL_ARG::ALWAYS;
8174 key2->type= SEL_ARG::ALWAYS;
8175 for (; cur_key2 ; cur_key2= cur_key2->next)
8176 cur_key2->increment_use_count(-1); // Free not used tree
8177 if (key1->maybe_flag)
8178 return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8179 return 0;
8180 }
8181 }
8182 }
8183
8184 if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0)
8185 {
8186 /*
8187 This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
8188 cur_key2: [-------]
8189 cur_key1: [----------*******]
8190 */
8191
8192 if (!cur_key1->next_key_part)
8193 {
8194 /*
8195 cur_key1->next_key_part is empty: cut the range that
8196 is covered by cur_key1 from cur_key2.
8197 Reason: (cur_key2->next_key_part OR
8198 cur_key1->next_key_part) will be empty and therefore
8199 equal to cur_key1->next_key_part. Thus, this part of
8200 the cur_key2 range is completely covered by cur_key1.
8201 */
8202 if (cur_key1->cmp_max_to_max(cur_key2) >= 0)
8203 {
8204 /*
8205 cur_key1 covers the entire range in cur_key2.
8206 cur_key2: [-------]
8207 cur_key1: [-----------------]
8208
8209 Move on to next range in key2
8210 */
8211 cur_key2->increment_use_count(-1); // Free not used tree
8212 cur_key2= cur_key2->next;
8213 continue;
8214 }
8215 else
8216 {
8217 /*
8218 This is the case:
8219 cur_key2: [-------]
8220 cur_key1: [---------]
8221
8222 Result:
8223 cur_key2: [---]
8224 cur_key1: [---------]
8225 */
8226 cur_key2->copy_max_to_min(cur_key1);
8227 continue;
8228 }
8229 }
8230
8231 /*
8232 The ranges are overlapping but have not been merged because
8233 next_key_part of cur_key1 and cur_key2 differ.
8234 cur_key2: [----]
8235 cur_key1: [------------*****]
8236
8237 Split cur_key1 in two where cur_key2 starts:
8238 cur_key2: [----]
8239 key1: [--------][--*****]
8240 ^ ^
8241 insert cur_key1
8242 */
8243 SEL_ARG *new_arg= cur_key1->clone_first(cur_key2);
8244 if (!new_arg)
8245 return 0; // OOM
8246 if ((new_arg->next_key_part= cur_key1->next_key_part))
8247 new_arg->increment_use_count(key1->use_count+1);
8248 cur_key1->copy_min_to_min(cur_key2);
8249 key1= key1->insert(new_arg);
8250 } // cur_key1.min >= cur_key2.min due to this if()
8251
8252 /*
8253 Now cur_key2.min <= cur_key1.min <= cur_key2.max:
8254 cur_key2: [---------]
8255 cur_key1: [****---*****]
8256 */
8257 SEL_ARG key2_cpy(*cur_key2); // Get copy we can modify
8258 for (;;)
8259 {
8260 if (cur_key1->cmp_min_to_min(&key2_cpy) > 0)
8261 {
8262 /*
8263 This is the case:
8264 key2_cpy: [------------]
8265 key1: [-*****]
8266 ^
8267 cur_key1
8268
8269 Result:
8270 key2_cpy: [---]
8271 key1: [-------][-*****]
8272 ^ ^
8273 insert cur_key1
8274 */
8275 SEL_ARG *new_arg=key2_cpy.clone_first(cur_key1);
8276 if (!new_arg)
8277 return 0; // OOM
8278 if ((new_arg->next_key_part=key2_cpy.next_key_part))
8279 new_arg->increment_use_count(key1->use_count+1);
8280 key1= key1->insert(new_arg);
8281 key2_cpy.copy_min_to_min(cur_key1);
8282 }
8283 // Now key2_cpy.min == cur_key1.min
8284
8285 if ((cmp= cur_key1->cmp_max_to_max(&key2_cpy)) <= 0)
8286 {
8287 /*
8288 cur_key1.max <= key2_cpy.max:
8289 key2_cpy: a) [-------] or b) [----]
8290 cur_key1: [----] [----]
8291
8292 Steps:
8293
8294 1) Update next_key_part of cur_key1: OR it with
8295 key2_cpy->next_key_part.
8296 2) If case a: Insert range [cur_key1.max, key2_cpy.max]
8297 into key1 using next_key_part of key2_cpy
8298
8299 Result:
8300 key1: a) [----][-] or b) [----]
8301 */
8302 cur_key1->maybe_flag|= key2_cpy.maybe_flag;
8303 key2_cpy.increment_use_count(key1->use_count+1);
8304 cur_key1->next_key_part=
8305 key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part);
8306
8307 if (!cmp)
8308 break; // case b: done with this key2 range
8309
8310 // Make key2_cpy the range [cur_key1.max, key2_cpy.max]
8311 key2_cpy.copy_max_to_min(cur_key1);
8312 if (!(cur_key1= cur_key1->next))
8313 {
8314 /*
8315 No more ranges in key1. Insert key2_cpy and go to "end"
8316 label to insert remaining ranges in key2 if any.
8317 */
8318 SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8319 if (!new_key1_range)
8320 return 0; // OOM
8321 key1= key1->insert(new_key1_range);
8322 cur_key2= cur_key2->next;
8323 goto end;
8324 }
8325 if (cur_key1->cmp_min_to_max(&key2_cpy) > 0)
8326 {
8327 /*
8328 The next range in key1 does not overlap with key2_cpy.
8329 Insert this range into key1 and move on to the next range
8330 in key2.
8331 */
8332 SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8333 if (!new_key1_range)
8334 return 0; // OOM
8335 key1= key1->insert(new_key1_range);
8336 break;
8337 }
8338 /*
8339 key2_cpy overlaps with the next range in key1 and the case
8340 is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
8341 to for(;;) to handle this situation.
8342 */
8343 continue;
8344 }
8345 else
8346 {
8347 /*
8348 This is the case:
8349 key2_cpy: [-------]
8350 cur_key1: [------------]
8351
8352 Result:
8353 key1: [-------][---]
8354 ^ ^
8355 new_arg cur_key1
8356 Steps:
8357
8358 0) If cur_key1->next_key_part is empty: do nothing.
8359 Reason: (key2_cpy->next_key_part OR
8360 cur_key1->next_key_part) will be empty and
8361 therefore equal to cur_key1->next_key_part. Thus,
8362 the range in key2_cpy is completely covered by
8363 cur_key1
8364 1) Make new_arg with range [cur_key1.min, key2_cpy.max].
8365 new_arg->next_key_part is OR between next_key_part of
8366 cur_key1 and key2_cpy
8367 2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
8368 3) Insert new_arg into key1
8369 */
8370 if (!cur_key1->next_key_part) // Step 0
8371 {
8372 key2_cpy.increment_use_count(-1); // Free not used tree
8373 break;
8374 }
8375 SEL_ARG *new_arg= cur_key1->clone_last(&key2_cpy);
8376 if (!new_arg)
8377 return 0; // OOM
8378 cur_key1->copy_max_to_min(&key2_cpy);
8379 cur_key1->increment_use_count(key1->use_count+1);
8380 /* Increment key count as it may be used for next loop */
8381 key2_cpy.increment_use_count(1);
8382 new_arg->next_key_part= key_or(param, cur_key1->next_key_part,
8383 key2_cpy.next_key_part);
8384 key1= key1->insert(new_arg);
8385 break;
8386 }
8387 }
8388 // Move on to next range in key2
8389 cur_key2= cur_key2->next;
8390 }
8391
8392 end:
8393 /*
8394 Add key2 ranges that are non-overlapping with and higher than the
8395 highest range in key1.
8396 */
8397 while (cur_key2)
8398 {
8399 SEL_ARG *next= cur_key2->next;
8400 if (key2_shared)
8401 {
8402 SEL_ARG *key2_cpy=new SEL_ARG(*cur_key2); // Must make copy
8403 if (!key2_cpy)
8404 return 0;
8405 cur_key2->increment_use_count(key1->use_count+1);
8406 key1= key1->insert(key2_cpy);
8407 }
8408 else
8409 key1= key1->insert(cur_key2); // Will destroy key2_root
8410 cur_key2= next;
8411 }
8412 key1->use_count++;
8413
8414 return key1;
8415 }
8416
8417
8418 /* Compare if two trees are equal */
8419
eq_tree(SEL_ARG * a,SEL_ARG * b)8420 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
8421 {
8422 if (a == b)
8423 return 1;
8424 if (!a || !b || !a->is_same(b))
8425 return 0;
8426 if (a->left != &null_element && b->left != &null_element)
8427 {
8428 if (!eq_tree(a->left,b->left))
8429 return 0;
8430 }
8431 else if (a->left != &null_element || b->left != &null_element)
8432 return 0;
8433 if (a->right != &null_element && b->right != &null_element)
8434 {
8435 if (!eq_tree(a->right,b->right))
8436 return 0;
8437 }
8438 else if (a->right != &null_element || b->right != &null_element)
8439 return 0;
8440 if (a->next_key_part != b->next_key_part)
8441 { // Sub range
8442 if (!a->next_key_part != !b->next_key_part ||
8443 !eq_tree(a->next_key_part, b->next_key_part))
8444 return 0;
8445 }
8446 return 1;
8447 }
8448
8449
8450 SEL_ARG *
insert(SEL_ARG * key)8451 SEL_ARG::insert(SEL_ARG *key)
8452 {
8453 SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element);
8454
8455 for (element= this; element != &null_element ; )
8456 {
8457 last_element=element;
8458 if (key->cmp_min_to_min(element) > 0)
8459 {
8460 par= &element->right; element= element->right;
8461 }
8462 else
8463 {
8464 par = &element->left; element= element->left;
8465 }
8466 }
8467 *par=key;
8468 key->parent=last_element;
8469 /* Link in list */
8470 if (par == &last_element->left)
8471 {
8472 key->next=last_element;
8473 if ((key->prev=last_element->prev))
8474 key->prev->next=key;
8475 last_element->prev=key;
8476 }
8477 else
8478 {
8479 if ((key->next=last_element->next))
8480 key->next->prev=key;
8481 key->prev=last_element;
8482 last_element->next=key;
8483 }
8484 key->left=key->right= &null_element;
8485 SEL_ARG *root=rb_insert(key); // rebalance tree
8486 root->use_count=this->use_count; // copy root info
8487 root->elements= this->elements+1;
8488 root->maybe_flag=this->maybe_flag;
8489 return root;
8490 }
8491
8492
8493 /*
8494 ** Find best key with min <= given key
8495 ** Because the call context this should never return 0 to get_range
8496 */
8497
8498 SEL_ARG *
find_range(SEL_ARG * key)8499 SEL_ARG::find_range(SEL_ARG *key)
8500 {
8501 SEL_ARG *element=this,*found=0;
8502
8503 for (;;)
8504 {
8505 if (element == &null_element)
8506 return found;
8507 int cmp=element->cmp_min_to_min(key);
8508 if (cmp == 0)
8509 return element;
8510 if (cmp < 0)
8511 {
8512 found=element;
8513 element=element->right;
8514 }
8515 else
8516 element=element->left;
8517 }
8518 }
8519
8520
8521 /*
8522 Remove a element from the tree
8523
8524 SYNOPSIS
8525 tree_delete()
8526 key Key that is to be deleted from tree (this)
8527
8528 NOTE
8529 This also frees all sub trees that is used by the element
8530
8531 RETURN
8532 root of new tree (with key deleted)
8533 */
8534
8535 SEL_ARG *
tree_delete(SEL_ARG * key)8536 SEL_ARG::tree_delete(SEL_ARG *key)
8537 {
8538 enum leaf_color remove_color;
8539 SEL_ARG *root,*nod,**par,*fix_par;
8540 DBUG_ENTER("tree_delete");
8541
8542 root=this;
8543 this->parent= 0;
8544
8545 /* Unlink from list */
8546 if (key->prev)
8547 key->prev->next=key->next;
8548 if (key->next)
8549 key->next->prev=key->prev;
8550 key->increment_use_count(-1);
8551 if (!key->parent)
8552 par= &root;
8553 else
8554 par=key->parent_ptr();
8555
8556 if (key->left == &null_element)
8557 {
8558 *par=nod=key->right;
8559 fix_par=key->parent;
8560 if (nod != &null_element)
8561 nod->parent=fix_par;
8562 remove_color= key->color;
8563 }
8564 else if (key->right == &null_element)
8565 {
8566 *par= nod=key->left;
8567 nod->parent=fix_par=key->parent;
8568 remove_color= key->color;
8569 }
8570 else
8571 {
8572 SEL_ARG *tmp=key->next; // next bigger key (exist!)
8573 nod= *tmp->parent_ptr()= tmp->right; // unlink tmp from tree
8574 fix_par=tmp->parent;
8575 if (nod != &null_element)
8576 nod->parent=fix_par;
8577 remove_color= tmp->color;
8578
8579 tmp->parent=key->parent; // Move node in place of key
8580 (tmp->left=key->left)->parent=tmp;
8581 if ((tmp->right=key->right) != &null_element)
8582 tmp->right->parent=tmp;
8583 tmp->color=key->color;
8584 *par=tmp;
8585 if (fix_par == key) // key->right == key->next
8586 fix_par=tmp; // new parent of nod
8587 }
8588
8589 if (root == &null_element)
8590 DBUG_RETURN(0); // Maybe root later
8591 if (remove_color == BLACK)
8592 root=rb_delete_fixup(root,nod,fix_par);
8593 #ifndef DBUG_OFF
8594 test_rb_tree(root,root->parent);
8595 #endif
8596 root->use_count=this->use_count; // Fix root counters
8597 root->elements=this->elements-1;
8598 root->maybe_flag=this->maybe_flag;
8599 DBUG_RETURN(root);
8600 }
8601
8602
8603 /* Functions to fix up the tree after insert and delete */
8604
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)8605 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
8606 {
8607 SEL_ARG *y=leaf->right;
8608 leaf->right=y->left;
8609 if (y->left != &null_element)
8610 y->left->parent=leaf;
8611 if (!(y->parent=leaf->parent))
8612 *root=y;
8613 else
8614 *leaf->parent_ptr()=y;
8615 y->left=leaf;
8616 leaf->parent=y;
8617 }
8618
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)8619 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
8620 {
8621 SEL_ARG *y=leaf->left;
8622 leaf->left=y->right;
8623 if (y->right != &null_element)
8624 y->right->parent=leaf;
8625 if (!(y->parent=leaf->parent))
8626 *root=y;
8627 else
8628 *leaf->parent_ptr()=y;
8629 y->right=leaf;
8630 leaf->parent=y;
8631 }
8632
8633
8634 SEL_ARG *
rb_insert(SEL_ARG * leaf)8635 SEL_ARG::rb_insert(SEL_ARG *leaf)
8636 {
8637 SEL_ARG *y,*par,*par2,*root;
8638 root= this; root->parent= 0;
8639
8640 leaf->color=RED;
8641 while (leaf != root && (par= leaf->parent)->color == RED)
8642 { // This can't be root or 1 level under
8643 if (par == (par2= leaf->parent->parent)->left)
8644 {
8645 y= par2->right;
8646 if (y->color == RED)
8647 {
8648 par->color=BLACK;
8649 y->color=BLACK;
8650 leaf=par2;
8651 leaf->color=RED; /* And the loop continues */
8652 }
8653 else
8654 {
8655 if (leaf == par->right)
8656 {
8657 left_rotate(&root,leaf->parent);
8658 par=leaf; /* leaf is now parent to old leaf */
8659 }
8660 par->color=BLACK;
8661 par2->color=RED;
8662 right_rotate(&root,par2);
8663 break;
8664 }
8665 }
8666 else
8667 {
8668 y= par2->left;
8669 if (y->color == RED)
8670 {
8671 par->color=BLACK;
8672 y->color=BLACK;
8673 leaf=par2;
8674 leaf->color=RED; /* And the loop continues */
8675 }
8676 else
8677 {
8678 if (leaf == par->left)
8679 {
8680 right_rotate(&root,par);
8681 par=leaf;
8682 }
8683 par->color=BLACK;
8684 par2->color=RED;
8685 left_rotate(&root,par2);
8686 break;
8687 }
8688 }
8689 }
8690 root->color=BLACK;
8691 #ifndef DBUG_OFF
8692 test_rb_tree(root,root->parent);
8693 #endif
8694 return root;
8695 }
8696
8697
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)8698 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
8699 {
8700 SEL_ARG *x,*w;
8701 root->parent=0;
8702
8703 x= key;
8704 while (x != root && x->color == SEL_ARG::BLACK)
8705 {
8706 if (x == par->left)
8707 {
8708 w=par->right;
8709 if (w->color == SEL_ARG::RED)
8710 {
8711 w->color=SEL_ARG::BLACK;
8712 par->color=SEL_ARG::RED;
8713 left_rotate(&root,par);
8714 w=par->right;
8715 }
8716 if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
8717 {
8718 w->color=SEL_ARG::RED;
8719 x=par;
8720 }
8721 else
8722 {
8723 if (w->right->color == SEL_ARG::BLACK)
8724 {
8725 w->left->color=SEL_ARG::BLACK;
8726 w->color=SEL_ARG::RED;
8727 right_rotate(&root,w);
8728 w=par->right;
8729 }
8730 w->color=par->color;
8731 par->color=SEL_ARG::BLACK;
8732 w->right->color=SEL_ARG::BLACK;
8733 left_rotate(&root,par);
8734 x=root;
8735 break;
8736 }
8737 }
8738 else
8739 {
8740 w=par->left;
8741 if (w->color == SEL_ARG::RED)
8742 {
8743 w->color=SEL_ARG::BLACK;
8744 par->color=SEL_ARG::RED;
8745 right_rotate(&root,par);
8746 w=par->left;
8747 }
8748 if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
8749 {
8750 w->color=SEL_ARG::RED;
8751 x=par;
8752 }
8753 else
8754 {
8755 if (w->left->color == SEL_ARG::BLACK)
8756 {
8757 w->right->color=SEL_ARG::BLACK;
8758 w->color=SEL_ARG::RED;
8759 left_rotate(&root,w);
8760 w=par->left;
8761 }
8762 w->color=par->color;
8763 par->color=SEL_ARG::BLACK;
8764 w->left->color=SEL_ARG::BLACK;
8765 right_rotate(&root,par);
8766 x=root;
8767 break;
8768 }
8769 }
8770 par=x->parent;
8771 }
8772 x->color=SEL_ARG::BLACK;
8773 return root;
8774 }
8775
8776
8777 #ifndef DBUG_OFF
8778 /* Test that the properties for a red-black tree hold */
8779
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)8780 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
8781 {
8782 int count_l,count_r;
8783
8784 if (element == &null_element)
8785 return 0; // Found end of tree
8786 if (element->parent != parent)
8787 {
8788 sql_print_error("Wrong tree: Parent doesn't point at parent");
8789 return -1;
8790 }
8791 if (element->color == SEL_ARG::RED &&
8792 (element->left->color == SEL_ARG::RED ||
8793 element->right->color == SEL_ARG::RED))
8794 {
8795 sql_print_error("Wrong tree: Found two red in a row");
8796 return -1;
8797 }
8798 if (element->left == element->right && element->left != &null_element)
8799 { // Dummy test
8800 sql_print_error("Wrong tree: Found right == left");
8801 return -1;
8802 }
8803 count_l=test_rb_tree(element->left,element);
8804 count_r=test_rb_tree(element->right,element);
8805 if (count_l >= 0 && count_r >= 0)
8806 {
8807 if (count_l == count_r)
8808 return count_l+(element->color == SEL_ARG::BLACK);
8809 sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
8810 count_l,count_r);
8811 }
8812 return -1; // Error, no more warnings
8813 }
8814
8815
8816 /**
8817 Count how many times SEL_ARG graph "root" refers to its part "key" via
8818 transitive closure.
8819
8820 @param root An RB-Root node in a SEL_ARG graph.
8821 @param key Another RB-Root node in that SEL_ARG graph.
8822
8823 The passed "root" node may refer to "key" node via root->next_key_part,
8824 root->next->n
8825
8826 This function counts how many times the node "key" is referred (via
8827 SEL_ARG::next_key_part) by
8828 - intervals of RB-tree pointed by "root",
8829 - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
8830 intervals of RB-tree pointed by "root",
8831 - and so on.
8832
8833 Here is an example (horizontal links represent next_key_part pointers,
8834 vertical links - next/prev prev pointers):
8835
8836 +----+ $
8837 |root|-----------------+
8838 +----+ $ |
8839 | $ |
8840 | $ |
8841 +----+ +---+ $ | +---+ Here the return value
8842 | |- ... -| |---$-+--+->|key| will be 4.
8843 +----+ +---+ $ | | +---+
8844 | $ | |
8845 ... $ | |
8846 | $ | |
8847 +----+ +---+ $ | |
8848 | |---| |---------+ |
8849 +----+ +---+ $ |
8850 | | $ |
8851 ... +---+ $ |
8852 | |------------+
8853 +---+ $
8854 @return
8855 Number of links to "key" from nodes reachable from "root".
8856 */
8857
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)8858 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
8859 {
8860 ulong count= 0;
8861 for (root=root->first(); root ; root=root->next)
8862 {
8863 if (root->next_key_part)
8864 {
8865 if (root->next_key_part == key)
8866 count++;
8867 if (root->next_key_part->part < key->part)
8868 count+=count_key_part_usage(root->next_key_part,key);
8869 }
8870 }
8871 return count;
8872 }
8873
8874
8875 /*
8876 Check if SEL_ARG::use_count value is correct
8877
8878 SYNOPSIS
8879 SEL_ARG::test_use_count()
8880 root The root node of the SEL_ARG graph (an RB-tree root node that
8881 has the least value of sel_arg->part in the entire graph, and
8882 thus is the "origin" of the graph)
8883
8884 DESCRIPTION
8885 Check if SEL_ARG::use_count value is correct. See the definition of
8886 use_count for what is "correct".
8887 */
8888
test_use_count(SEL_ARG * root)8889 void SEL_ARG::test_use_count(SEL_ARG *root)
8890 {
8891 uint e_count=0;
8892 if (this == root && use_count != 1)
8893 {
8894 sql_print_information("Use_count: Wrong count %lu for root",use_count);
8895 // DBUG_ASSERT(false); // Todo - enable and clean up mess
8896 return;
8897 }
8898 if (this->type != SEL_ARG::KEY_RANGE)
8899 return;
8900 for (SEL_ARG *pos=first(); pos ; pos=pos->next)
8901 {
8902 e_count++;
8903 if (pos->next_key_part)
8904 {
8905 ulong count=count_key_part_usage(root,pos->next_key_part);
8906 if (count > pos->next_key_part->use_count)
8907 {
8908 sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu "
8909 "should be %lu", (long unsigned int)pos,
8910 pos->next_key_part->use_count, count);
8911 // DBUG_ASSERT(false); // Todo - enable and clean up mess
8912 return;
8913 }
8914 pos->next_key_part->test_use_count(root);
8915 }
8916 }
8917 if (e_count != elements)
8918 {
8919 sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
8920 e_count, elements, (long unsigned int) this);
8921 // DBUG_ASSERT(false); // Todo - enable and clean up mess
8922 }
8923 }
8924 #endif
8925
8926 /****************************************************************************
8927 MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
8928 ****************************************************************************/
8929
8930 /* MRR range sequence, SEL_ARG* implementation: stack entry */
8931 typedef struct st_range_seq_entry
8932 {
8933 /*
8934 Pointers in min and max keys. They point to right-after-end of key
8935 images. The 0-th entry has these pointing to key tuple start.
8936 */
8937 uchar *min_key, *max_key;
8938
8939 /*
8940 Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
8941 min_key_flag may have NULL_RANGE set.
8942 */
8943 uint min_key_flag, max_key_flag;
8944
8945 /* Number of key parts */
8946 uint min_key_parts, max_key_parts;
8947 /**
8948 Pointer into the R-B tree for this keypart. It points to the
8949 currently active range for the keypart, so calling next on it will
8950 get to the next range. sel_arg_range_seq_next() uses this to avoid
8951 reparsing the R-B range trees each time a new range is fetched.
8952 */
8953 SEL_ARG *key_tree;
8954 } RANGE_SEQ_ENTRY;
8955
8956
8957 /*
8958 MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
8959 */
8960 class Sel_arg_range_sequence
8961 {
8962 private:
8963
8964 /**
8965 Stack of ranges for the curr_kp first keyparts. Used by
8966 sel_arg_range_seq_next() so that if the next range is equal to the
8967 previous one for the first x keyparts, stack[x-1] can be
8968 accumulated with the new range in keyparts > x to quickly form
8969 the next range to return.
8970
8971 Notation used below: "x:y" means a range where
8972 "column_in_keypart_0=x" and "column_in_keypart_1=y". For
8973 simplicity, only equality (no BETWEEN, < etc) is considered in the
8974 example but the same principle applies to other range predicate
8975 operators too.
8976
8977 Consider a query with these range predicates:
8978 (kp0=1 and kp1=2 and kp2=3) or
8979 (kp0=1 and kp1=2 and kp2=4) or
8980 (kp0=1 and kp1=3 and kp2=5) or
8981 (kp0=1 and kp1=3 and kp2=6)
8982
8983 1) sel_arg_range_seq_next() is called the first time
8984 - traverse the R-B tree (see SEL_ARG) to find the first range
8985 - returns range "1:2:3"
8986 - values in stack after this: stack[1, 1:2, 1:2:3]
8987 2) sel_arg_range_seq_next() is called second time
8988 - keypart 2 has another range, so the next range in
8989 keypart 2 is appended to stack[1] and saved
8990 in stack[2]
8991 - returns range "1:2:4"
8992 - values in stack after this: stack[1, 1:2, 1:2:4]
8993 3) sel_arg_range_seq_next() is called the third time
8994 - no more ranges in keypart 2, but keypart 1 has
8995 another range, so the next range in keypart 1 is
8996 appended to stack[0] and saved in stack[1]. The first
8997 range in keypart 2 is then appended to stack[1] and
8998 saved in stack[2]
8999 - returns range "1:3:5"
9000 - values in stack after this: stack[1, 1:3, 1:3:5]
9001 4) sel_arg_range_seq_next() is called the fourth time
9002 - keypart 2 has another range, see 2)
9003 - returns range "1:3:6"
9004 - values in stack after this: stack[1, 1:3, 1:3:6]
9005 */
9006 RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
9007 /*
9008 Index of last used element in the above array. A value of -1 means
9009 that the stack is empty.
9010 */
9011 int curr_kp;
9012
9013 public:
9014 uint keyno; /* index of used tree in SEL_TREE structure */
9015 uint real_keyno; /* Number of the index in tables */
9016
9017 PARAM * const param;
9018 SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
9019
Sel_arg_range_sequence(PARAM * param_arg)9020 Sel_arg_range_sequence(PARAM *param_arg) : param(param_arg) { reset(); }
9021
reset()9022 void reset()
9023 {
9024 stack[0].key_tree= NULL;
9025 stack[0].min_key= (uchar*)param->min_key;
9026 stack[0].min_key_flag= 0;
9027 stack[0].min_key_parts= 0;
9028
9029 stack[0].max_key= (uchar*)param->max_key;
9030 stack[0].max_key_flag= 0;
9031 stack[0].max_key_parts= 0;
9032 curr_kp= -1;
9033 }
9034
stack_empty() const9035 bool stack_empty() const { return (curr_kp == -1); }
9036
9037 void stack_push_range(SEL_ARG *key_tree);
9038
stack_pop_range()9039 void stack_pop_range()
9040 {
9041 DBUG_ASSERT(!stack_empty());
9042 if (curr_kp == 0)
9043 reset();
9044 else
9045 curr_kp--;
9046 }
9047
stack_size() const9048 int stack_size() const { return curr_kp + 1; }
9049
stack_top()9050 RANGE_SEQ_ENTRY *stack_top()
9051 {
9052 return stack_empty() ? NULL : &stack[curr_kp];
9053 }
9054 };
9055
9056
9057 /*
9058 Range sequence interface, SEL_ARG* implementation: Initialize the traversal
9059
9060 SYNOPSIS
9061 init()
9062 init_params SEL_ARG tree traversal context
9063 n_ranges [ignored] The number of ranges obtained
9064 flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
9065
9066 RETURN
9067 Value of init_param
9068 */
9069
sel_arg_range_seq_init(void * init_param,uint n_ranges,uint flags)9070 range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
9071 {
9072 Sel_arg_range_sequence *seq=
9073 static_cast<Sel_arg_range_sequence*>(init_param);
9074 seq->reset();
9075 return init_param;
9076 }
9077
9078
stack_push_range(SEL_ARG * key_tree)9079 void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree)
9080 {
9081
9082 DBUG_ASSERT((uint)curr_kp+1 < MAX_REF_PARTS);
9083
9084 RANGE_SEQ_ENTRY *push_position= &stack[curr_kp + 1];
9085 RANGE_SEQ_ENTRY *last_added_kp= stack_top();
9086 if (stack_empty())
9087 {
9088 /*
9089 If we get here this is either
9090 a) the first time a range sequence is constructed for this
9091 range access method (in which case stack[0] has not been
9092 modified since the constructor was called), or
9093 b) there are multiple ranges for the first keypart in the
9094 condition (and we have called stack_pop_range() to empty
9095 the stack).
9096 In both cases, reset() has been called and all fields in
9097 push_position have been reset. All we need to do is to copy the
9098 min/max key flags from the predicate we're about to add to
9099 stack[0].
9100 */
9101 push_position->min_key_flag= key_tree->min_flag;
9102 push_position->max_key_flag= key_tree->max_flag;
9103 }
9104 else
9105 {
9106 push_position->min_key= last_added_kp->min_key;
9107 push_position->max_key= last_added_kp->max_key;
9108 push_position->min_key_parts= last_added_kp->min_key_parts;
9109 push_position->max_key_parts= last_added_kp->max_key_parts;
9110 push_position->min_key_flag= last_added_kp->min_key_flag |
9111 key_tree->min_flag;
9112 push_position->max_key_flag= last_added_kp->max_key_flag |
9113 key_tree->max_flag;
9114 }
9115
9116 push_position->key_tree= key_tree;
9117 uint16 stor_length= param->key[keyno][key_tree->part].store_length;
9118 /* psergey-merge-done:
9119 key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
9120 &cur->min_key, prev->min_key_flag,
9121 &cur->max_key, prev->max_key_flag);
9122 */
9123 push_position->min_key_parts+=
9124 key_tree->store_min(stor_length, &push_position->min_key,
9125 last_added_kp ? last_added_kp->min_key_flag : 0);
9126 push_position->max_key_parts+=
9127 key_tree->store_max(stor_length, &push_position->max_key,
9128 last_added_kp ? last_added_kp->max_key_flag : 0);
9129
9130 if (key_tree->is_null_interval())
9131 push_position->min_key_flag |= NULL_RANGE;
9132 curr_kp++;
9133 }
9134
9135
9136 /*
9137 Range sequence interface, SEL_ARG* implementation: get the next interval
9138 in the R-B tree
9139
9140 SYNOPSIS
9141 sel_arg_range_seq_next()
9142 rseq Value returned from sel_arg_range_seq_init
9143 range OUT Store information about the range here
9144
9145 DESCRIPTION
9146 This is "get_next" function for Range sequence interface implementation
9147 for SEL_ARG* tree.
9148
9149 IMPLEMENTATION
9150 The traversal also updates those param members:
9151 - is_ror_scan
9152 - range_count
9153 - max_key_part
9154
9155 RETURN
9156 0 Ok
9157 1 No more ranges in the sequence
9158
9159 NOTE: append_range_all_keyparts(), which is used to e.g. print
9160 ranges to Optimizer Trace in a human readable format, mimics the
9161 behavior of this function.
9162 */
9163
9164 //psergey-merge-todo: support check_quick_keys:max_keypart
sel_arg_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)9165 uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
9166 {
9167 SEL_ARG *key_tree;
9168 Sel_arg_range_sequence *seq= static_cast<Sel_arg_range_sequence*>(rseq);
9169
9170 if (seq->stack_empty())
9171 {
9172 /*
9173 This is the first time sel_arg_range_seq_next is called.
9174 seq->start points to the root of the R-B tree for the first
9175 keypart
9176 */
9177 key_tree= seq->start;
9178
9179 /*
9180 Move to the first range for the first keypart. Save this range
9181 in seq->stack[0] and carry on to ranges in the next keypart if
9182 any
9183 */
9184 key_tree= key_tree->first();
9185 seq->stack_push_range(key_tree);
9186 }
9187 else
9188 {
9189 /*
9190 This is not the first time sel_arg_range_seq_next is called, so
9191 seq->stack is populated with the range the last call to this
9192 function found. seq->stack[current_keypart].key_tree points to a
9193 leaf in the R-B tree of the last keypart that was part of the
9194 former range. This is the starting point for finding the next
9195 range. @see Sel_arg_range_sequence::stack
9196 */
9197 // See if there are more ranges in this or any of the previous keyparts
9198 while (true)
9199 {
9200 key_tree= seq->stack_top()->key_tree;
9201 seq->stack_pop_range();
9202 if (key_tree->next)
9203 {
9204 /* This keypart has more ranges */
9205 DBUG_ASSERT(key_tree->next != &null_element);
9206 key_tree= key_tree->next;
9207
9208 /*
9209 save the next range for this keypart and carry on to ranges in
9210 the next keypart if any
9211 */
9212 seq->stack_push_range(key_tree);
9213 seq->param->is_ror_scan= FALSE;
9214 break;
9215 }
9216
9217 if (seq->stack_empty())
9218 {
9219 // There are no more ranges for the first keypart: we're done
9220 return 1;
9221 }
9222 /*
9223 There are no more ranges for the current keypart. Step back
9224 to the previous keypart and see if there are more ranges
9225 there.
9226 */
9227 }
9228 }
9229
9230 DBUG_ASSERT(!seq->stack_empty());
9231
9232 /*
9233 Add range info for the next keypart if
9234 1) there is a range predicate for a later keypart
9235 2) the range predicate is for the next keypart in the index: a
9236 range predicate on keypartX+1 can only be used if there is a
9237 range predicate on keypartX.
9238 3) the range predicate on the next keypart is usable
9239 */
9240 while (key_tree->next_key_part && // 1)
9241 key_tree->next_key_part != &null_element && // 1)
9242 key_tree->next_key_part->part == key_tree->part + 1 && // 2)
9243 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) // 3)
9244 {
9245 {
9246 DBUG_PRINT("info", ("while(): key_tree->part %d",key_tree->part));
9247 RANGE_SEQ_ENTRY *cur= seq->stack_top();
9248 const uint min_key_total_length= cur->min_key - seq->param->min_key;
9249 const uint max_key_total_length= cur->max_key - seq->param->max_key;
9250
9251 /*
9252 Check if more ranges can be added. This is the case if all
9253 predicates for keyparts handled so far are equality
9254 predicates. If either of the following apply, there are
9255 non-equality predicates in stack[]:
9256
9257 1) min_key_total_length != max_key_total_length (because
9258 equality ranges are stored as "min_key = max_key = <value>")
9259 2) memcmp(<min_key_values>,<max_key_values>) != 0 (same argument as 1)
9260 3) A min or max flag has been set: Because flags denote ranges
9261 ('<', '<=' etc), any value but 0 indicates a non-equality
9262 predicate.
9263 */
9264
9265 uchar* min_key_start;
9266 uchar* max_key_start;
9267 uint cur_key_length;
9268
9269 if (seq->stack_size() == 1)
9270 {
9271 min_key_start= seq->param->min_key;
9272 max_key_start= seq->param->max_key;
9273 cur_key_length= min_key_total_length;
9274 }
9275 else
9276 {
9277 const RANGE_SEQ_ENTRY prev= cur[-1];
9278 min_key_start= prev.min_key;
9279 max_key_start= prev.max_key;
9280 cur_key_length= cur->min_key - prev.min_key;
9281 }
9282
9283 if ((min_key_total_length != max_key_total_length) || // 1)
9284 (memcmp(min_key_start, max_key_start, cur_key_length)) || // 2)
9285 (key_tree->min_flag || key_tree->max_flag)) // 3)
9286 {
9287 DBUG_PRINT("info", ("while(): inside if()"));
9288 /*
9289 The range predicate up to and including the one in key_tree
9290 is usable by range access but does not allow subranges made
9291 up from predicates in later keyparts. This may e.g. be
9292 because the predicate operator is "<". Since there are range
9293 predicates on more keyparts, we use those to more closely
9294 specify the start and stop locations for the range. Example:
9295
9296 "SELECT * FROM t1 WHERE a >= 2 AND b >= 3":
9297
9298 t1 content:
9299 -----------
9300 1 1
9301 2 1 <- 1)
9302 2 2
9303 2 3 <- 2)
9304 2 4
9305 3 1
9306 3 2
9307 3 3
9308
9309 The predicate cannot be translated into something like
9310 "(a=2 and b>=3) or (a=3 and b>=3) or ..."
9311 I.e., it cannot be divided into subranges, but by storing
9312 min/max key below we can at least start the scan from 2)
9313 instead of 1)
9314 */
9315 SEL_ARG *store_key_part= key_tree->next_key_part;
9316 seq->param->is_ror_scan= FALSE;
9317 if (!key_tree->min_flag)
9318 cur->min_key_parts +=
9319 store_key_part->store_min_key(seq->param->key[seq->keyno],
9320 &cur->min_key,
9321 &cur->min_key_flag,
9322 MAX_KEY);
9323 if (!key_tree->max_flag)
9324 cur->max_key_parts +=
9325 store_key_part->store_max_key(seq->param->key[seq->keyno],
9326 &cur->max_key,
9327 &cur->max_key_flag,
9328 MAX_KEY);
9329 break;
9330 }
9331 }
9332
9333 /*
9334 There are usable range predicates for the next keypart and the
9335 range predicate for the current keypart allows us to make use of
9336 them. Move to the first range predicate for the next keypart.
9337 Push this range predicate to seq->stack and move on to the next
9338 keypart (if any). @see Sel_arg_range_sequence::stack
9339 */
9340 key_tree= key_tree->next_key_part->first();
9341 seq->stack_push_range(key_tree);
9342 }
9343
9344 DBUG_ASSERT(!seq->stack_empty() && (seq->stack_top() != NULL));
9345
9346 // We now have a full range predicate in seq->stack_top()
9347 RANGE_SEQ_ENTRY *cur= seq->stack_top();
9348 PARAM *param= seq->param;
9349 uint min_key_length= cur->min_key - param->min_key;
9350
9351 if (cur->min_key_flag & GEOM_FLAG)
9352 {
9353 range->range_flag= cur->min_key_flag;
9354
9355 /* Here minimum contains also function code bits, and maximum is +inf */
9356 range->start_key.key= param->min_key;
9357 range->start_key.length= min_key_length;
9358 range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9359 range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
9360 /*
9361 Spatial operators are only allowed on spatial indexes, and no
9362 spatial index can at the moment return rows in ROWID order
9363 */
9364 DBUG_ASSERT(!param->is_ror_scan);
9365 }
9366 else
9367 {
9368 const KEY *cur_key_info= ¶m->table->key_info[seq->real_keyno];
9369 range->range_flag= cur->min_key_flag | cur->max_key_flag;
9370
9371 range->start_key.key= param->min_key;
9372 range->start_key.length= cur->min_key - param->min_key;
9373 range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9374 range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
9375 HA_READ_KEY_EXACT);
9376
9377 range->end_key.key= param->max_key;
9378 range->end_key.length= cur->max_key - param->max_key;
9379 range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
9380 range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
9381 HA_READ_AFTER_KEY);
9382
9383 /*
9384 This is an equality range (keypart_0=X and ... and keypart_n=Z) if
9385 1) There are no flags indicating open range (e.g.,
9386 "keypart_x > y") or GIS.
9387 2) The lower bound and the upper bound of the range has the
9388 same value (min_key == max_key).
9389 */
9390 const uint is_open_range= (NO_MIN_RANGE | NO_MAX_RANGE |
9391 NEAR_MIN | NEAR_MAX | GEOM_FLAG);
9392 const bool is_eq_range_pred=
9393 !(cur->min_key_flag & is_open_range) && // 1)
9394 !(cur->max_key_flag & is_open_range) && // 1)
9395 range->start_key.length == range->end_key.length && // 2)
9396 !memcmp(param->min_key, param->max_key, range->start_key.length);
9397
9398 if (is_eq_range_pred)
9399 {
9400 range->range_flag= EQ_RANGE;
9401 /*
9402 Use statistics instead of index dives for estimates of rows in
9403 this range if the user requested it
9404 */
9405 if (param->use_index_statistics)
9406 range->range_flag|= USE_INDEX_STATISTICS;
9407
9408 /*
9409 An equality range is a unique range (0 or 1 rows in the range)
9410 if the index is unique (1) and all keyparts are used (2).
9411 Note that keys which are extended with PK parts have no
9412 HA_NOSAME flag. So we can use user_defined_key_parts.
9413 */
9414 if (cur_key_info->flags & HA_NOSAME && // 1)
9415 (uint)key_tree->part+1 == cur_key_info->user_defined_key_parts) // 2)
9416 range->range_flag|= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
9417 }
9418
9419 if (param->is_ror_scan)
9420 {
9421 const uint key_part_number= key_tree->part + 1;
9422 /*
9423 If we get here, the condition on the key was converted to form
9424 "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
9425 somecond(keyXpart{key_tree->part})"
9426 Check if
9427 somecond is "keyXpart{key_tree->part} = const" and
9428 uncovered "tail" of KeyX parts is either empty or is identical to
9429 first members of clustered primary key.
9430
9431 If last key part is PK part added to the key as an extension
9432 and is_key_scan_ror() result is TRUE then it's possible to
9433 use ROR scan.
9434 */
9435 if ((!is_eq_range_pred &&
9436 key_part_number <= cur_key_info->user_defined_key_parts) ||
9437 !is_key_scan_ror(param, seq->real_keyno, key_part_number))
9438 param->is_ror_scan= FALSE;
9439 }
9440 }
9441
9442 seq->param->range_count++;
9443 seq->param->max_key_part=max<uint>(seq->param->max_key_part,key_tree->part);
9444
9445 return 0;
9446 }
9447
9448
9449 /*
9450 Calculate estimate of number records that will be retrieved by a range
9451 scan on given index using given SEL_ARG intervals tree.
9452
9453 SYNOPSIS
9454 check_quick_select()
9455 param Parameter from test_quick_select
9456 idx Number of index to use in PARAM::key SEL_TREE::key
9457 index_only TRUE - assume only index tuples will be accessed
9458 FALSE - assume full table rows will be read
9459 tree Transformed selection condition, tree->key[idx] holds
9460 the intervals for the given index.
9461 update_tbl_stats TRUE <=> update table->quick_* with information
9462 about range scan we've evaluated.
9463 mrr_flags INOUT MRR access flags
9464 cost OUT Scan cost
9465
9466 NOTES
9467 param->is_ror_scan is set to reflect if the key scan is a ROR (see
9468 is_key_scan_ror function for more info)
9469 param->table->quick_*, param->range_count (and maybe others) are
9470 updated with data of given key scan, see quick_range_seq_next for details.
9471
9472 RETURN
9473 Estimate # of records to be retrieved.
9474 HA_POS_ERROR if estimate calculation failed due to table handler problems.
9475 */
9476
9477 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost)9478 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
9479 SEL_ARG *tree, bool update_tbl_stats,
9480 uint *mrr_flags, uint *bufsize, Cost_estimate *cost)
9481 {
9482 Sel_arg_range_sequence seq(param);
9483 RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
9484 handler *file= param->table->file;
9485 ha_rows rows;
9486 uint keynr= param->real_keynr[idx];
9487 DBUG_ENTER("check_quick_select");
9488
9489 /* Handle cases when we don't have a valid non-empty list of range */
9490 if (!tree)
9491 DBUG_RETURN(HA_POS_ERROR);
9492 if (tree->type == SEL_ARG::IMPOSSIBLE)
9493 DBUG_RETURN(0L);
9494 if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
9495 DBUG_RETURN(HA_POS_ERROR); // Don't use tree
9496
9497 seq.keyno= idx;
9498 seq.real_keyno= keynr;
9499 seq.start= tree;
9500
9501 param->range_count=0;
9502 param->max_key_part=0;
9503
9504 /*
9505 If there are more equality ranges than specified by the
9506 eq_range_index_dive_limit variable we switches from using index
9507 dives to use statistics.
9508 */
9509 uint range_count= 0;
9510 param->use_index_statistics=
9511 eq_ranges_exceeds_limit(tree, &range_count,
9512 param->thd->variables.eq_range_index_dive_limit);
9513
9514 param->is_ror_scan= TRUE;
9515 if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9516 param->is_ror_scan= FALSE;
9517
9518 *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
9519 *mrr_flags|= HA_MRR_NO_ASSOCIATION;
9520 /*
9521 Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
9522 */
9523 if (param->order_direction != ORDER::ORDER_NOT_RELEVANT)
9524 *mrr_flags|= HA_MRR_SORTED;
9525
9526 bool pk_is_clustered= file->primary_key_is_clustered();
9527 if (index_only &&
9528 (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
9529 !(pk_is_clustered && keynr == param->table->s->primary_key))
9530 *mrr_flags |= HA_MRR_INDEX_ONLY;
9531
9532 if (current_thd->lex->sql_command != SQLCOM_SELECT)
9533 *mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
9534
9535 *bufsize= param->thd->variables.read_rnd_buff_size;
9536 // Sets is_ror_scan to false for some queries, e.g. multi-ranges
9537 rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
9538 bufsize, mrr_flags, cost);
9539 if (rows != HA_POS_ERROR)
9540 {
9541 param->table->quick_rows[keynr]=rows;
9542 if (update_tbl_stats)
9543 {
9544 param->table->quick_keys.set_bit(keynr);
9545 param->table->quick_key_parts[keynr]=param->max_key_part+1;
9546 param->table->quick_n_ranges[keynr]= param->range_count;
9547 param->table->quick_condition_rows=
9548 min(param->table->quick_condition_rows, rows);
9549 }
9550 param->table->possible_quick_keys.set_bit(keynr);
9551 }
9552 /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
9553 enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
9554 if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
9555 {
9556 /*
9557 All scans are non-ROR scans for those index types.
9558 TODO: Don't have this logic here, make table engines return
9559 appropriate flags instead.
9560 */
9561 param->is_ror_scan= FALSE;
9562 }
9563 else
9564 {
9565 /* Clustered PK scan is always a ROR scan (TODO: same as above) */
9566 if (param->table->s->primary_key == keynr && pk_is_clustered)
9567 param->is_ror_scan= TRUE;
9568 }
9569 if (param->table->file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9570 param->is_ror_scan= FALSE;
9571 DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
9572 DBUG_RETURN(rows);
9573 }
9574
9575
9576 /*
9577 Check if key scan on given index with equality conditions on first n key
9578 parts is a ROR scan.
9579
9580 SYNOPSIS
9581 is_key_scan_ror()
9582 param Parameter from test_quick_select
9583 keynr Number of key in the table. The key must not be a clustered
9584 primary key.
9585 nparts Number of first key parts for which equality conditions
9586 are present.
9587
9588 NOTES
9589 ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
9590 ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
9591
9592 This function is needed to handle a practically-important special case:
9593 an index scan is a ROR scan if it is done using a condition in form
9594
9595 "key1_1=c_1 AND ... AND key1_n=c_n"
9596
9597 where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
9598
9599 and the table has a clustered Primary Key defined as
9600
9601 PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
9602
9603 i.e. the first key parts of it are identical to uncovered parts ot the
9604 key being scanned. This function assumes that the index flags do not
9605 include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
9606
9607 Check (1) is made in quick_range_seq_next()
9608
9609 RETURN
9610 TRUE The scan is ROR-scan
9611 FALSE Otherwise
9612 */
9613
is_key_scan_ror(PARAM * param,uint keynr,uint nparts)9614 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts)
9615 {
9616 KEY *table_key= param->table->key_info + keynr;
9617
9618 /*
9619 Range predicates on hidden key parts do not change the fact
9620 that a scan is rowid ordered, so we only care about user
9621 defined keyparts
9622 */
9623 const uint user_defined_nparts=
9624 std::min<uint>(nparts, table_key->user_defined_key_parts);
9625
9626 KEY_PART_INFO *key_part= table_key->key_part + user_defined_nparts;
9627 KEY_PART_INFO *key_part_end= (table_key->key_part +
9628 table_key->user_defined_key_parts);
9629 uint pk_number;
9630
9631 for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
9632 {
9633 uint16 fieldnr= param->table->key_info[keynr].
9634 key_part[kp - table_key->key_part].fieldnr - 1;
9635 if (param->table->field[fieldnr]->key_length() != kp->length)
9636 return FALSE;
9637 }
9638
9639 if (key_part == key_part_end)
9640 return TRUE;
9641
9642 key_part= table_key->key_part + user_defined_nparts;
9643 pk_number= param->table->s->primary_key;
9644 if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
9645 return FALSE;
9646
9647 KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
9648 KEY_PART_INFO *pk_part_end=
9649 pk_part + param->table->key_info[pk_number].user_defined_key_parts;
9650 for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
9651 ++key_part, ++pk_part)
9652 {
9653 if ((key_part->field != pk_part->field) ||
9654 (key_part->length != pk_part->length))
9655 return FALSE;
9656 }
9657 return (key_part == key_part_end);
9658 }
9659
9660
9661 /*
9662 Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
9663
9664 SYNOPSIS
9665 get_quick_select()
9666 param
9667 idx Index of used key in param->key.
9668 key_tree SEL_ARG tree for the used key
9669 mrr_flags MRR parameter for quick select
9670 mrr_buf_size MRR parameter for quick select
9671 parent_alloc If not NULL, use it to allocate memory for
9672 quick select data. Otherwise use quick->alloc.
9673 NOTES
9674 The caller must call QUICK_SELECT::init for returned quick select.
9675
9676 CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
9677 deallocated when the returned quick select is deleted.
9678
9679 RETURN
9680 NULL on error
9681 otherwise created quick select
9682 */
9683
9684 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)9685 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
9686 uint mrr_buf_size, MEM_ROOT *parent_alloc)
9687 {
9688 QUICK_RANGE_SELECT *quick;
9689 bool create_err= FALSE;
9690 DBUG_ENTER("get_quick_select");
9691
9692 if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
9693 quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
9694 param->real_keynr[idx],
9695 MY_TEST(parent_alloc),
9696 parent_alloc, &create_err);
9697 else
9698 quick=new QUICK_RANGE_SELECT(param->thd, param->table,
9699 param->real_keynr[idx],
9700 MY_TEST(parent_alloc), NULL, &create_err);
9701
9702 if (quick)
9703 {
9704 if (create_err ||
9705 get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
9706 param->max_key,0))
9707 {
9708 delete quick;
9709 quick=0;
9710 }
9711 else
9712 {
9713 quick->mrr_flags= mrr_flags;
9714 quick->mrr_buf_size= mrr_buf_size;
9715 quick->key_parts=(KEY_PART*)
9716 memdup_root(parent_alloc? parent_alloc : &quick->alloc,
9717 (char*) param->key[idx],
9718 sizeof(KEY_PART) *
9719 actual_key_parts(¶m->
9720 table->key_info[param->real_keynr[idx]]));
9721 }
9722 }
9723 DBUG_RETURN(quick);
9724 }
9725
9726
9727 /*
9728 ** Fix this to get all possible sub_ranges
9729 */
9730 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)9731 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
9732 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
9733 uchar *max_key, uint max_key_flag)
9734 {
9735 QUICK_RANGE *range;
9736 uint flag;
9737 int min_part= key_tree->part-1, // # of keypart values in min_key buffer
9738 max_part= key_tree->part-1; // # of keypart values in max_key buffer
9739
9740 if (key_tree->left != &null_element)
9741 {
9742 if (get_quick_keys(param,quick,key,key_tree->left,
9743 min_key,min_key_flag, max_key, max_key_flag))
9744 return 1;
9745 }
9746 uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
9747 min_part+= key_tree->store_min(key[key_tree->part].store_length,
9748 &tmp_min_key,min_key_flag);
9749 max_part+= key_tree->store_max(key[key_tree->part].store_length,
9750 &tmp_max_key,max_key_flag);
9751
9752 if (key_tree->next_key_part &&
9753 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
9754 key_tree->next_key_part->part == key_tree->part+1)
9755 { // const key as prefix
9756 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
9757 memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
9758 key_tree->min_flag==0 && key_tree->max_flag==0)
9759 {
9760 if (get_quick_keys(param,quick,key,key_tree->next_key_part,
9761 tmp_min_key, min_key_flag | key_tree->min_flag,
9762 tmp_max_key, max_key_flag | key_tree->max_flag))
9763 return 1;
9764 goto end; // Ugly, but efficient
9765 }
9766 {
9767 uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
9768 if (!tmp_min_flag)
9769 min_part+= key_tree->next_key_part->store_min_key(key,
9770 &tmp_min_key,
9771 &tmp_min_flag,
9772 MAX_KEY);
9773 if (!tmp_max_flag)
9774 max_part+= key_tree->next_key_part->store_max_key(key,
9775 &tmp_max_key,
9776 &tmp_max_flag,
9777 MAX_KEY);
9778 flag=tmp_min_flag | tmp_max_flag;
9779 }
9780 }
9781 else
9782 {
9783 flag = (key_tree->min_flag & GEOM_FLAG) ?
9784 key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
9785 }
9786
9787 /*
9788 Ensure that some part of min_key and max_key are used. If not,
9789 regard this as no lower/upper range
9790 */
9791 if ((flag & GEOM_FLAG) == 0)
9792 {
9793 if (tmp_min_key != param->min_key)
9794 flag&= ~NO_MIN_RANGE;
9795 else
9796 flag|= NO_MIN_RANGE;
9797 if (tmp_max_key != param->max_key)
9798 flag&= ~NO_MAX_RANGE;
9799 else
9800 flag|= NO_MAX_RANGE;
9801 }
9802 if (flag == 0)
9803 {
9804 uint length= (uint) (tmp_min_key - param->min_key);
9805 if (length == (uint) (tmp_max_key - param->max_key) &&
9806 !memcmp(param->min_key,param->max_key,length))
9807 {
9808 const KEY *table_key=quick->head->key_info+quick->index;
9809 flag=EQ_RANGE;
9810 /*
9811 Note that keys which are extended with PK parts have no
9812 HA_NOSAME flag. So we can use user_defined_key_parts.
9813 */
9814 if ((table_key->flags & HA_NOSAME) &&
9815 key_tree->part == table_key->user_defined_key_parts - 1)
9816 {
9817 if ((table_key->flags & HA_NULL_PART_KEY) &&
9818 null_part_in_key(key,
9819 param->min_key,
9820 (uint) (tmp_min_key - param->min_key)))
9821 flag|= NULL_RANGE;
9822 else
9823 flag|= UNIQUE_RANGE;
9824 }
9825 }
9826 }
9827
9828 /* Get range for retrieving rows in QUICK_SELECT::get_next */
9829 if (!(range= new QUICK_RANGE(param->min_key,
9830 (uint) (tmp_min_key - param->min_key),
9831 min_part >=0 ? make_keypart_map(min_part) : 0,
9832 param->max_key,
9833 (uint) (tmp_max_key - param->max_key),
9834 max_part >=0 ? make_keypart_map(max_part) : 0,
9835 flag)))
9836 return 1; // out of memory
9837
9838 set_if_bigger(quick->max_used_key_length, range->min_length);
9839 set_if_bigger(quick->max_used_key_length, range->max_length);
9840 set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
9841 if (insert_dynamic(&quick->ranges, &range))
9842 return 1;
9843
9844 end:
9845 if (key_tree->right != &null_element)
9846 return get_quick_keys(param,quick,key,key_tree->right,
9847 min_key,min_key_flag,
9848 max_key,max_key_flag);
9849 return 0;
9850 }
9851
9852 /*
9853 Return 1 if there is only one range and this uses the whole unique key
9854 */
9855
unique_key_range()9856 bool QUICK_RANGE_SELECT::unique_key_range()
9857 {
9858 if (ranges.elements == 1)
9859 {
9860 QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
9861 if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
9862 {
9863 KEY *key=head->key_info+index;
9864 return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
9865 }
9866 }
9867 return 0;
9868 }
9869
9870
9871
9872 /*
9873 Return TRUE if any part of the key is NULL
9874
9875 SYNOPSIS
9876 null_part_in_key()
9877 key_part Array of key parts (index description)
9878 key Key values tuple
9879 length Length of key values tuple in bytes.
9880
9881 RETURN
9882 TRUE The tuple has at least one "keypartX is NULL"
9883 FALSE Otherwise
9884 */
9885
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)9886 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
9887 {
9888 for (const uchar *end=key+length ;
9889 key < end;
9890 key+= key_part++->store_length)
9891 {
9892 if (key_part->null_bit && *key)
9893 return 1;
9894 }
9895 return 0;
9896 }
9897
9898
is_keys_used(const MY_BITMAP * fields)9899 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
9900 {
9901 return is_key_used(head, index, fields);
9902 }
9903
is_keys_used(const MY_BITMAP * fields)9904 bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
9905 {
9906 QUICK_RANGE_SELECT *quick;
9907 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9908 while ((quick= it++))
9909 {
9910 if (is_key_used(head, quick->index, fields))
9911 return 1;
9912 }
9913 return 0;
9914 }
9915
is_keys_used(const MY_BITMAP * fields)9916 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
9917 {
9918 QUICK_RANGE_SELECT *quick;
9919 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9920 while ((quick= it++))
9921 {
9922 if (is_key_used(head, quick->index, fields))
9923 return 1;
9924 }
9925 return 0;
9926 }
9927
is_keys_used(const MY_BITMAP * fields)9928 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
9929 {
9930 QUICK_SELECT_I *quick;
9931 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
9932 while ((quick= it++))
9933 {
9934 if (quick->is_keys_used(fields))
9935 return 1;
9936 }
9937 return 0;
9938 }
9939
9940
get_ft_select(THD * thd,TABLE * table,uint key)9941 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
9942 {
9943 bool create_err= FALSE;
9944 FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
9945 if (create_err)
9946 {
9947 delete fts;
9948 return NULL;
9949 }
9950 else
9951 return fts;
9952 }
9953
9954 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
9955 static bool
key_has_nulls(const KEY * key_info,const uchar * key,uint key_len)9956 key_has_nulls(const KEY* key_info, const uchar *key, uint key_len)
9957 {
9958 KEY_PART_INFO *curr_part, *end_part;
9959 const uchar* end_ptr= key + key_len;
9960 curr_part= key_info->key_part;
9961 end_part= curr_part + key_info->user_defined_key_parts;
9962
9963 for (; curr_part != end_part && key < end_ptr; curr_part++)
9964 {
9965 if (curr_part->null_bit && *key)
9966 return TRUE;
9967
9968 key += curr_part->store_length;
9969 }
9970 return FALSE;
9971 }
9972 #endif
9973
9974 /*
9975 Create quick select from ref/ref_or_null scan.
9976
9977 SYNOPSIS
9978 get_quick_select_for_ref()
9979 thd Thread handle
9980 table Table to access
9981 ref ref[_or_null] scan parameters
9982 records Estimate of number of records (needed only to construct
9983 quick select)
9984 NOTES
9985 This allocates things in a new memory root, as this may be called many
9986 times during a query.
9987
9988 RETURN
9989 Quick select that retrieves the same rows as passed ref scan
9990 NULL on error.
9991 */
9992
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)9993 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
9994 TABLE_REF *ref, ha_rows records)
9995 {
9996 MEM_ROOT *old_root, *alloc;
9997 QUICK_RANGE_SELECT *quick;
9998 KEY *key_info = &table->key_info[ref->key];
9999 KEY_PART *key_part;
10000 QUICK_RANGE *range;
10001 uint part;
10002 bool create_err= FALSE;
10003 Cost_estimate cost;
10004
10005 old_root= thd->mem_root;
10006 /* The following call may change thd->mem_root */
10007 quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
10008 /* save mem_root set by QUICK_RANGE_SELECT constructor */
10009 alloc= thd->mem_root;
10010 /*
10011 return back default mem_root (thd->mem_root) changed by
10012 QUICK_RANGE_SELECT constructor
10013 */
10014 thd->mem_root= old_root;
10015
10016 if (!quick || create_err)
10017 return 0; /* no ranges found */
10018 if (quick->init())
10019 goto err;
10020 quick->records= records;
10021
10022 if ((cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error) ||
10023 !(range= new(alloc) QUICK_RANGE()))
10024 goto err; // out of memory
10025
10026 range->min_key= range->max_key= ref->key_buff;
10027 range->min_length= range->max_length= ref->key_length;
10028 range->min_keypart_map= range->max_keypart_map=
10029 make_prev_keypart_map(ref->key_parts);
10030 range->flag= (ref->key_length == key_info->key_length ? EQ_RANGE : 0);
10031
10032 if (!(quick->key_parts=key_part=(KEY_PART *)
10033 alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
10034 goto err;
10035
10036 for (part=0 ; part < ref->key_parts ;part++,key_part++)
10037 {
10038 key_part->part=part;
10039 key_part->field= key_info->key_part[part].field;
10040 key_part->length= key_info->key_part[part].length;
10041 key_part->store_length= key_info->key_part[part].store_length;
10042 key_part->null_bit= key_info->key_part[part].null_bit;
10043 key_part->flag= (uint8) key_info->key_part[part].key_part_flag;
10044 }
10045 if (insert_dynamic(&quick->ranges, &range))
10046 goto err;
10047
10048 /*
10049 Add a NULL range if REF_OR_NULL optimization is used.
10050 For example:
10051 if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
10052 and have ref->null_ref_key set. Will create a new NULL range here.
10053 */
10054 if (ref->null_ref_key)
10055 {
10056 QUICK_RANGE *null_range;
10057
10058 *ref->null_ref_key= 1; // Set null byte then create a range
10059 if (!(null_range= new (alloc)
10060 QUICK_RANGE(ref->key_buff, ref->key_length,
10061 make_prev_keypart_map(ref->key_parts),
10062 ref->key_buff, ref->key_length,
10063 make_prev_keypart_map(ref->key_parts), EQ_RANGE)))
10064 goto err;
10065 *ref->null_ref_key= 0; // Clear null byte
10066 if (insert_dynamic(&quick->ranges, &null_range))
10067 goto err;
10068 }
10069
10070 /* Call multi_range_read_info() to get the MRR flags and buffer size */
10071 quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
10072 (table->key_read ? HA_MRR_INDEX_ONLY : 0);
10073 if (thd->lex->sql_command != SQLCOM_SELECT)
10074 quick->mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10075 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
10076 if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
10077 ref->key_length))
10078 quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
10079 #endif
10080
10081 quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
10082 if (table->file->multi_range_read_info(quick->index, 1, records,
10083 &quick->mrr_buf_size,
10084 &quick->mrr_flags, &cost))
10085 goto err;
10086
10087 return quick;
10088 err:
10089 delete quick;
10090 return 0;
10091 }
10092
10093
10094 /*
10095 Perform key scans for all used indexes (except CPK), get rowids and merge
10096 them into an ordered non-recurrent sequence of rowids.
10097
10098 The merge/duplicate removal is performed using Unique class. We put all
10099 rowids into Unique, get the sorted sequence and destroy the Unique.
10100
10101 If table has a clustered primary key that covers all rows (TRUE for bdb
10102 and innodb currently) and one of the index_merge scans is a scan on PK,
10103 then rows that will be retrieved by PK scan are not put into Unique and
10104 primary key scan is not performed here, it is performed later separately.
10105
10106 RETURN
10107 0 OK
10108 other error
10109 */
10110
read_keys_and_merge()10111 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
10112 {
10113 List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
10114 QUICK_RANGE_SELECT* cur_quick;
10115 int result;
10116 handler *file= head->file;
10117 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
10118
10119 /* We're going to just read rowids. */
10120 head->set_keyread(TRUE);
10121 head->prepare_for_position();
10122
10123 cur_quick_it.rewind();
10124 cur_quick= cur_quick_it++;
10125 DBUG_ASSERT(cur_quick != 0);
10126
10127 DBUG_EXECUTE_IF("simulate_bug13919180",
10128 {
10129 my_error(ER_UNKNOWN_ERROR, MYF(0));
10130 DBUG_RETURN(1);
10131 });
10132 /*
10133 We reuse the same instance of handler so we need to call both init and
10134 reset here.
10135 */
10136 if (cur_quick->init() || cur_quick->reset())
10137 DBUG_RETURN(1);
10138
10139 if (unique == NULL)
10140 {
10141 DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_ABORT(); );
10142 DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
10143 DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
10144
10145 unique= new Unique(refpos_order_cmp, (void *)file,
10146 file->ref_length,
10147 thd->variables.sortbuff_size);
10148 }
10149 else
10150 {
10151 unique->reset();
10152 filesort_free_buffers(head, false);
10153 }
10154
10155 DBUG_ASSERT(file->ref_length == unique->get_size());
10156 DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
10157
10158 if (!unique)
10159 DBUG_RETURN(1);
10160 for (;;)
10161 {
10162 while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
10163 {
10164 cur_quick->range_end();
10165 cur_quick= cur_quick_it++;
10166 if (!cur_quick)
10167 break;
10168
10169 if (cur_quick->file->inited)
10170 cur_quick->file->ha_index_end();
10171 if (cur_quick->init() || cur_quick->reset())
10172 DBUG_RETURN(1);
10173 }
10174
10175 if (result)
10176 {
10177 if (result != HA_ERR_END_OF_FILE)
10178 {
10179 cur_quick->range_end();
10180 DBUG_RETURN(result);
10181 }
10182 break;
10183 }
10184
10185 if (thd->killed)
10186 DBUG_RETURN(1);
10187
10188 /* skip row if it will be retrieved by clustered PK scan */
10189 if (pk_quick_select && pk_quick_select->row_in_ranges())
10190 continue;
10191
10192 cur_quick->file->position(cur_quick->record);
10193 result= unique->unique_add((char*)cur_quick->file->ref);
10194 if (result)
10195 DBUG_RETURN(1);
10196 }
10197
10198 /*
10199 Ok all rowids are in the Unique now. The next call will initialize
10200 head->sort structure so it can be used to iterate through the rowids
10201 sequence.
10202 */
10203 result= unique->get(head);
10204 doing_pk_scan= FALSE;
10205 /* index_merge currently doesn't support "using index" at all */
10206 head->set_keyread(FALSE);
10207 if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1, TRUE))
10208 DBUG_RETURN(1);
10209 DBUG_RETURN(result);
10210 }
10211
10212
10213 /*
10214 Get next row for index_merge.
10215 NOTES
10216 The rows are read from
10217 1. rowids stored in Unique.
10218 2. QUICK_RANGE_SELECT with clustered primary key (if any).
10219 The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
10220 */
10221
get_next()10222 int QUICK_INDEX_MERGE_SELECT::get_next()
10223 {
10224 int result;
10225 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
10226
10227 if (doing_pk_scan)
10228 DBUG_RETURN(pk_quick_select->get_next());
10229
10230 if ((result= read_record.read_record(&read_record)) == -1)
10231 {
10232 result= HA_ERR_END_OF_FILE;
10233 end_read_record(&read_record);
10234 free_io_cache(head);
10235 /* All rows from Unique have been retrieved, do a clustered PK scan */
10236 if (pk_quick_select)
10237 {
10238 doing_pk_scan= TRUE;
10239 if ((result= pk_quick_select->init()) ||
10240 (result= pk_quick_select->reset()))
10241 DBUG_RETURN(result);
10242 DBUG_RETURN(pk_quick_select->get_next());
10243 }
10244 }
10245
10246 DBUG_RETURN(result);
10247 }
10248
10249
10250 /*
10251 Retrieve next record.
10252 SYNOPSIS
10253 QUICK_ROR_INTERSECT_SELECT::get_next()
10254
10255 NOTES
10256 Invariant on enter/exit: all intersected selects have retrieved all index
10257 records with rowid <= some_rowid_val and no intersected select has
10258 retrieved any index records with rowid > some_rowid_val.
10259 We start fresh and loop until we have retrieved the same rowid in each of
10260 the key scans or we got an error.
10261
10262 If a Clustered PK scan is present, it is used only to check if row
10263 satisfies its condition (and never used for row retrieval).
10264
10265 Locking: to ensure that exclusive locks are only set on records that
10266 are included in the final result we must release the lock
10267 on all rows we read but do not include in the final result. This
10268 must be done on each index that reads the record and the lock
10269 must be released using the same handler (the same quick object) as
10270 used when reading the record.
10271
10272 RETURN
10273 0 - Ok
10274 other - Error code if any error occurred.
10275 */
10276
get_next()10277 int QUICK_ROR_INTERSECT_SELECT::get_next()
10278 {
10279 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
10280 QUICK_RANGE_SELECT* quick;
10281
10282 /* quick that reads the given rowid first. This is needed in order
10283 to be able to unlock the row using the same handler object that locked
10284 it */
10285 QUICK_RANGE_SELECT* quick_with_last_rowid;
10286
10287 int error, cmp;
10288 uint last_rowid_count=0;
10289 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
10290
10291 do
10292 {
10293 /* Get a rowid for first quick and save it as a 'candidate' */
10294 quick= quick_it++;
10295 error= quick->get_next();
10296 if (cpk_quick)
10297 {
10298 while (!error && !cpk_quick->row_in_ranges())
10299 {
10300 quick->file->unlock_row(); /* row not in range; unlock */
10301 error= quick->get_next();
10302 }
10303 }
10304 if (error)
10305 DBUG_RETURN(error);
10306
10307 quick->file->position(quick->record);
10308 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10309 last_rowid_count= 1;
10310 quick_with_last_rowid= quick;
10311
10312 while (last_rowid_count < quick_selects.elements)
10313 {
10314 if (!(quick= quick_it++))
10315 {
10316 quick_it.rewind();
10317 quick= quick_it++;
10318 }
10319
10320 do
10321 {
10322 DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
10323 DBUG_SET("+d,innodb_report_deadlock"););
10324 if ((error= quick->get_next()))
10325 {
10326 /* On certain errors like deadlock, trx might be rolled back.*/
10327 if (!current_thd->transaction_rollback_request)
10328 quick_with_last_rowid->file->unlock_row();
10329 DBUG_RETURN(error);
10330 }
10331 quick->file->position(quick->record);
10332 cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
10333 if (cmp < 0)
10334 {
10335 /* This row is being skipped. Release lock on it. */
10336 quick->file->unlock_row();
10337 }
10338 } while (cmp < 0);
10339
10340 /* Ok, current select 'caught up' and returned ref >= cur_ref */
10341 if (cmp > 0)
10342 {
10343 /* Found a row with ref > cur_ref. Make it a new 'candidate' */
10344 if (cpk_quick)
10345 {
10346 while (!cpk_quick->row_in_ranges())
10347 {
10348 quick->file->unlock_row(); /* row not in range; unlock */
10349 if ((error= quick->get_next()))
10350 {
10351 /* On certain errors like deadlock, trx might be rolled back.*/
10352 if (!current_thd->transaction_rollback_request)
10353 quick_with_last_rowid->file->unlock_row();
10354 DBUG_RETURN(error);
10355 }
10356 }
10357 quick->file->position(quick->record);
10358 }
10359 memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10360 quick_with_last_rowid->file->unlock_row();
10361 last_rowid_count= 1;
10362 quick_with_last_rowid= quick;
10363 }
10364 else
10365 {
10366 /* current 'candidate' row confirmed by this select */
10367 last_rowid_count++;
10368 }
10369 }
10370
10371 /* We get here if we got the same row ref in all scans. */
10372 if (need_to_fetch_row)
10373 error= head->file->ha_rnd_pos(head->record[0], last_rowid);
10374 } while (error == HA_ERR_RECORD_DELETED);
10375 DBUG_RETURN(error);
10376 }
10377
10378
10379 /*
10380 Retrieve next record.
10381 SYNOPSIS
10382 QUICK_ROR_UNION_SELECT::get_next()
10383
10384 NOTES
10385 Enter/exit invariant:
10386 For each quick select in the queue a {key,rowid} tuple has been
10387 retrieved but the corresponding row hasn't been passed to output.
10388
10389 RETURN
10390 0 - Ok
10391 other - Error code if any error occurred.
10392 */
10393
get_next()10394 int QUICK_ROR_UNION_SELECT::get_next()
10395 {
10396 int error, dup_row;
10397 QUICK_SELECT_I *quick;
10398 uchar *tmp;
10399 DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
10400
10401 do
10402 {
10403 do
10404 {
10405 if (!queue.elements)
10406 DBUG_RETURN(HA_ERR_END_OF_FILE);
10407 /* Ok, we have a queue with >= 1 scans */
10408
10409 quick= (QUICK_SELECT_I*)queue_top(&queue);
10410 memcpy(cur_rowid, quick->last_rowid, rowid_length);
10411
10412 /* put into queue rowid from the same stream as top element */
10413 if ((error= quick->get_next()))
10414 {
10415 if (error != HA_ERR_END_OF_FILE)
10416 DBUG_RETURN(error);
10417 queue_remove(&queue, 0);
10418 }
10419 else
10420 {
10421 quick->save_last_pos();
10422 queue_replaced(&queue);
10423 }
10424
10425 if (!have_prev_rowid)
10426 {
10427 /* No rows have been returned yet */
10428 dup_row= FALSE;
10429 have_prev_rowid= TRUE;
10430 }
10431 else
10432 dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
10433 } while (dup_row);
10434
10435 tmp= cur_rowid;
10436 cur_rowid= prev_rowid;
10437 prev_rowid= tmp;
10438
10439 error= head->file->ha_rnd_pos(quick->record, prev_rowid);
10440 } while (error == HA_ERR_RECORD_DELETED);
10441 DBUG_RETURN(error);
10442 }
10443
10444
reset()10445 int QUICK_RANGE_SELECT::reset()
10446 {
10447 uint buf_size;
10448 uchar *mrange_buff;
10449 int error;
10450 HANDLER_BUFFER empty_buf;
10451 DBUG_ENTER("QUICK_RANGE_SELECT::reset");
10452 last_range= NULL;
10453 cur_range= (QUICK_RANGE**) ranges.buffer;
10454
10455 /* set keyread to TRUE if index is covering */
10456 if(!head->no_keyread && head->covering_keys.is_set(index))
10457 head->set_keyread(true);
10458 else
10459 head->set_keyread(false);
10460
10461 if (!file->inited)
10462 {
10463 if (in_ror_merged_scan)
10464 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10465 const bool sorted= (mrr_flags & HA_MRR_SORTED);
10466 DBUG_EXECUTE_IF("bug14365043_2",
10467 DBUG_SET("+d,ha_index_init_fail"););
10468 if ((error= file->ha_index_init(index, sorted)))
10469 {
10470 file->print_error(error, MYF(0));
10471 DBUG_RETURN(error);
10472 }
10473 }
10474
10475 /* Allocate buffer if we need one but haven't allocated it yet */
10476 if (mrr_buf_size && !mrr_buf_desc)
10477 {
10478 buf_size= mrr_buf_size;
10479 while (buf_size && !my_multi_malloc(MYF(MY_WME),
10480 &mrr_buf_desc, sizeof(*mrr_buf_desc),
10481 &mrange_buff, buf_size,
10482 NullS))
10483 {
10484 /* Try to shrink the buffers until both are 0. */
10485 buf_size/= 2;
10486 }
10487 if (!mrr_buf_desc)
10488 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
10489
10490 /* Initialize the handler buffer. */
10491 mrr_buf_desc->buffer= mrange_buff;
10492 mrr_buf_desc->buffer_end= mrange_buff + buf_size;
10493 mrr_buf_desc->end_of_used_area= mrange_buff;
10494 #ifdef HAVE_purify
10495 /*
10496 We need this until ndb will use the buffer efficiently
10497 (Now ndb stores complete row in here, instead of only the used fields
10498 which gives us valgrind warnings in compare_record[])
10499 */
10500 memset(mrange_buff, 0, buf_size);
10501 #endif
10502 }
10503
10504 if (!mrr_buf_desc)
10505 empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
10506
10507 RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
10508 error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
10509 mrr_flags, mrr_buf_desc? mrr_buf_desc:
10510 &empty_buf);
10511 DBUG_RETURN(error);
10512 }
10513
10514
10515 /*
10516 Range sequence interface implementation for array<QUICK_RANGE>: initialize
10517
10518 SYNOPSIS
10519 quick_range_seq_init()
10520 init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
10521 n_ranges Number of ranges in the sequence (ignored)
10522 flags MRR flags (currently not used)
10523
10524 RETURN
10525 Opaque value to be passed to quick_range_seq_next
10526 */
10527
quick_range_seq_init(void * init_param,uint n_ranges,uint flags)10528 range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
10529 {
10530 QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
10531 quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer;
10532 quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer;
10533 quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur +
10534 quick->ranges.elements;
10535 return &quick->qr_traversal_ctx;
10536 }
10537
10538
10539 /*
10540 Range sequence interface implementation for array<QUICK_RANGE>: get next
10541
10542 SYNOPSIS
10543 quick_range_seq_next()
10544 rseq Value returned from quick_range_seq_init
10545 range OUT Store information about the range here
10546
10547 RETURN
10548 0 Ok
10549 1 No more ranges in the sequence
10550 */
10551
quick_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)10552 uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
10553 {
10554 QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
10555
10556 if (ctx->cur == ctx->last)
10557 return 1; /* no more ranges */
10558
10559 QUICK_RANGE *cur= *(ctx->cur);
10560 key_range *start_key= &range->start_key;
10561 key_range *end_key= &range->end_key;
10562
10563 start_key->key= cur->min_key;
10564 start_key->length= cur->min_length;
10565 start_key->keypart_map= cur->min_keypart_map;
10566 start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
10567 (cur->flag & EQ_RANGE) ?
10568 HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
10569 end_key->key= cur->max_key;
10570 end_key->length= cur->max_length;
10571 end_key->keypart_map= cur->max_keypart_map;
10572 /*
10573 We use HA_READ_AFTER_KEY here because if we are reading on a key
10574 prefix. We want to find all keys with this prefix.
10575 */
10576 end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
10577 HA_READ_AFTER_KEY);
10578 range->range_flag= cur->flag;
10579 ctx->cur++;
10580 return 0;
10581 }
10582
10583
10584 /*
10585 MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10586
10587 SYNOPSIS
10588 mrr_persistent_flag_storage()
10589 seq Range sequence being traversed
10590 idx Number of range
10591
10592 DESCRIPTION
10593 MRR/NDB implementation needs to store some bits for each range. This
10594 function returns a reference to the "range_flag" associated with the
10595 range number idx.
10596
10597 This function should be removed when we get a proper MRR/NDB
10598 implementation.
10599
10600 RETURN
10601 Reference to range_flag associated with range number #idx
10602 */
10603
mrr_persistent_flag_storage(range_seq_t seq,uint idx)10604 uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
10605 {
10606 QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
10607 return ctx->first[idx]->flag;
10608 }
10609
10610
10611 /*
10612 MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10613
10614 SYNOPSIS
10615 mrr_get_ptr_by_idx()
10616 seq Range sequence bening traversed
10617 idx Number of the range
10618
10619 DESCRIPTION
10620 An extension of MRR range sequence interface needed by NDB: return the
10621 data associated with the given range.
10622
10623 A proper MRR interface implementer is supposed to store and return
10624 range-associated data. NDB stores number of the range instead. So this
10625 is a helper function that translates range number to range associated
10626 data.
10627
10628 This function does nothing, as currrently there is only one user of the
10629 MRR interface - the quick range select code, and this user doesn't need
10630 to use range-associated data.
10631
10632 RETURN
10633 Reference to range-associated data
10634 */
10635
mrr_get_ptr_by_idx(range_seq_t seq,uint idx)10636 char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
10637 {
10638 static char *dummy;
10639 return dummy;
10640 }
10641
10642
10643 /*
10644 Get next possible record using quick-struct.
10645
10646 SYNOPSIS
10647 QUICK_RANGE_SELECT::get_next()
10648
10649 NOTES
10650 Record is read into table->record[0]
10651
10652 RETURN
10653 0 Found row
10654 HA_ERR_END_OF_FILE No (more) rows in range
10655 # Error code
10656 */
10657
get_next()10658 int QUICK_RANGE_SELECT::get_next()
10659 {
10660 char *dummy;
10661 MY_BITMAP * const save_read_set= head->read_set;
10662 MY_BITMAP * const save_write_set= head->write_set;
10663 DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
10664
10665 if (in_ror_merged_scan)
10666 {
10667 /*
10668 We don't need to signal the bitmap change as the bitmap is always the
10669 same for this head->file
10670 */
10671 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10672 }
10673
10674 int result= file->multi_range_read_next(&dummy);
10675
10676 if (in_ror_merged_scan)
10677 {
10678 /* Restore bitmaps set on entry */
10679 head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
10680 }
10681 DBUG_RETURN(result);
10682 }
10683
10684
10685 /*
10686 Get the next record with a different prefix.
10687
10688 @param prefix_length length of cur_prefix
10689 @param group_key_parts The number of key parts in the group prefix
10690 @param cur_prefix prefix of a key to be searched for
10691
10692 Each subsequent call to the method retrieves the first record that has a
10693 prefix with length prefix_length and which is different from cur_prefix,
10694 such that the record with the new prefix is within the ranges described by
10695 this->ranges. The record found is stored into the buffer pointed by
10696 this->record. The method is useful for GROUP-BY queries with range
10697 conditions to discover the prefix of the next group that satisfies the range
10698 conditions.
10699
10700 @todo
10701
10702 This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
10703 methods should be unified into a more general one to reduce code
10704 duplication.
10705
10706 @retval 0 on success
10707 @retval HA_ERR_END_OF_FILE if returned all keys
10708 @retval other if some error occurred
10709 */
10710
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)10711 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
10712 uint group_key_parts,
10713 uchar *cur_prefix)
10714 {
10715 DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
10716 const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
10717
10718 for (;;)
10719 {
10720 int result;
10721 if (last_range)
10722 {
10723 /* Read the next record in the same range with prefix after cur_prefix. */
10724 DBUG_ASSERT(cur_prefix != NULL);
10725 result= file->ha_index_read_map(record, cur_prefix, keypart_map,
10726 HA_READ_AFTER_KEY);
10727 if (result || last_range->max_keypart_map == 0)
10728 DBUG_RETURN(result);
10729
10730 key_range previous_endpoint;
10731 last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
10732 if (file->compare_key(&previous_endpoint) <= 0)
10733 DBUG_RETURN(0);
10734 }
10735
10736 uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10737 if (count == 0)
10738 {
10739 /* Ranges have already been used up before. None is left for read. */
10740 last_range= 0;
10741 DBUG_RETURN(HA_ERR_END_OF_FILE);
10742 }
10743 last_range= *(cur_range++);
10744
10745 key_range start_key, end_key;
10746 last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
10747 last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
10748
10749 const bool sorted= (mrr_flags & HA_MRR_SORTED);
10750 result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
10751 last_range->max_keypart_map ? &end_key : 0,
10752 MY_TEST(last_range->flag & EQ_RANGE),
10753 sorted);
10754 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
10755 last_range= 0; // Stop searching
10756
10757 if (result != HA_ERR_END_OF_FILE)
10758 DBUG_RETURN(result);
10759 last_range= 0; // No matching rows; go to next range
10760 }
10761 }
10762
10763
10764 /* Get next for geometrical indexes */
10765
get_next()10766 int QUICK_RANGE_SELECT_GEOM::get_next()
10767 {
10768 DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
10769
10770 for (;;)
10771 {
10772 int result;
10773 if (last_range)
10774 {
10775 // Already read through key
10776 result= file->ha_index_next_same(record, last_range->min_key,
10777 last_range->min_length);
10778 if (result != HA_ERR_END_OF_FILE)
10779 DBUG_RETURN(result);
10780 }
10781
10782 uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10783 if (count == 0)
10784 {
10785 /* Ranges have already been used up before. None is left for read. */
10786 last_range= 0;
10787 DBUG_RETURN(HA_ERR_END_OF_FILE);
10788 }
10789 last_range= *(cur_range++);
10790
10791 result= file->ha_index_read_map(record, last_range->min_key,
10792 last_range->min_keypart_map,
10793 (ha_rkey_function)(last_range->flag ^
10794 GEOM_FLAG));
10795 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
10796 DBUG_RETURN(result);
10797 last_range= 0; // Not found, to next range
10798 }
10799 }
10800
10801
10802 /*
10803 Check if current row will be retrieved by this QUICK_RANGE_SELECT
10804
10805 NOTES
10806 It is assumed that currently a scan is being done on another index
10807 which reads all necessary parts of the index that is scanned by this
10808 quick select.
10809 The implementation does a binary search on sorted array of disjoint
10810 ranges, without taking size of range into account.
10811
10812 This function is used to filter out clustered PK scan rows in
10813 index_merge quick select.
10814
10815 RETURN
10816 TRUE if current row will be retrieved by this quick select
10817 FALSE if not
10818 */
10819
row_in_ranges()10820 bool QUICK_RANGE_SELECT::row_in_ranges()
10821 {
10822 QUICK_RANGE *res;
10823 uint min= 0;
10824 uint max= ranges.elements - 1;
10825 uint mid= (max + min)/2;
10826
10827 while (min != max)
10828 {
10829 if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
10830 {
10831 /* current row value > mid->max */
10832 min= mid + 1;
10833 }
10834 else
10835 max= mid;
10836 mid= (min + max) / 2;
10837 }
10838 res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
10839 return (!cmp_next(res) && !cmp_prev(res));
10840 }
10841
10842 /*
10843 This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
10844 get_next() interface, but we have to hold a pointer to the original
10845 QUICK_RANGE_SELECT because its data are used all over the place. What
10846 should be done is to factor out the data that is needed into a base
10847 class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
10848 which handle the ranges and implement the get_next() function. But
10849 for now, this seems to work right at least.
10850 */
10851
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg,bool * error)10852 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
10853 uint used_key_parts_arg,
10854 bool *error)
10855 :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
10856 used_key_parts (used_key_parts_arg)
10857 {
10858 QUICK_RANGE *r;
10859 /*
10860 Use default MRR implementation for reverse scans. No table engine
10861 currently can do an MRR scan with output in reverse index order.
10862 */
10863 mrr_buf_desc= NULL;
10864 mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
10865 mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
10866 mrr_buf_size= 0;
10867
10868
10869 QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
10870 QUICK_RANGE **end_range= pr + ranges.elements;
10871 for (; pr!=end_range; pr++)
10872 rev_ranges.push_front(*pr);
10873
10874 /* Remove EQ_RANGE flag for keys that are not using the full key */
10875 for (r = rev_it++; r; r = rev_it++)
10876 {
10877 if ((r->flag & EQ_RANGE) &&
10878 head->key_info[index].key_length != r->max_length)
10879 r->flag&= ~EQ_RANGE;
10880 }
10881 rev_it.rewind();
10882 q->dont_free=1; // Don't free shared mem
10883 }
10884
10885
get_next()10886 int QUICK_SELECT_DESC::get_next()
10887 {
10888 DBUG_ENTER("QUICK_SELECT_DESC::get_next");
10889
10890 /* The max key is handled as follows:
10891 * - if there is NO_MAX_RANGE, start at the end and move backwards
10892 * - if it is an EQ_RANGE (which means that max key covers the entire
10893 * key) and the query does not use any hidden key fields that are
10894 * not considered when the range optimzier sets EQ_RANGE (e.g. the
10895 * primary key added by InnoDB), then go directly to the key and
10896 * read through it (sorting backwards is same as sorting forwards).
10897 * - if it is NEAR_MAX, go to the key or next, step back once, and
10898 * move backwards
10899 * - otherwise (not NEAR_MAX == include the key), go after the key,
10900 * step back once, and move backwards
10901 */
10902
10903 for (;;)
10904 {
10905 int result;
10906 if (last_range)
10907 { // Already read through key
10908 result = ((last_range->flag & EQ_RANGE &&
10909 used_key_parts <=
10910 head->key_info[index].user_defined_key_parts) ?
10911 file->ha_index_next_same(record, last_range->min_key,
10912 last_range->min_length) :
10913 file->ha_index_prev(record));
10914 if (!result)
10915 {
10916 if (cmp_prev(*rev_it.ref()) == 0)
10917 DBUG_RETURN(0);
10918 }
10919 else if (result != HA_ERR_END_OF_FILE)
10920 DBUG_RETURN(result);
10921 }
10922
10923 if (!(last_range= rev_it++))
10924 DBUG_RETURN(HA_ERR_END_OF_FILE); // All ranges used
10925
10926 // Case where we can avoid descending scan, see comment above
10927 const bool eqrange_all_keyparts= (last_range->flag & EQ_RANGE) &&
10928 (used_key_parts <= head->key_info[index].user_defined_key_parts);
10929
10930 /*
10931 If we have pushed an index condition (ICP) and this quick select
10932 will use ha_index_prev() to read data, we need to let the
10933 handler know where to end the scan in order to avoid that the
10934 ICP implemention continues to read past the range boundary.
10935
10936 An addition for MyRocks:
10937 MyRocks needs to know both start of the range and end of the range
10938 in order to use its bloom filters. This is useful regardless of whether
10939 ICP is usable (e.g. it is used for index-only scans which do not use
10940 ICP). Because of that, we remove the following:
10941 // // if (file->pushed_idx_cond)
10942 */
10943 {
10944 if (!eqrange_all_keyparts)
10945 {
10946 key_range min_range;
10947 last_range->make_min_endpoint(&min_range);
10948 if(min_range.length > 0)
10949 file->set_end_range(&min_range, handler::RANGE_SCAN_DESC);
10950 else
10951 file->set_end_range(NULL, handler::RANGE_SCAN_DESC);
10952 }
10953 else
10954 {
10955 /*
10956 Will use ha_index_next_same() for reading records. In case we have
10957 set the end range for an earlier range, this need to be cleared.
10958 */
10959 file->set_end_range(NULL, handler::RANGE_SCAN_ASC);
10960 }
10961 }
10962
10963 key_range prepare_range_start;
10964 key_range prepare_range_end;
10965
10966 last_range->make_min_endpoint(&prepare_range_start);
10967 last_range->make_max_endpoint(&prepare_range_end);
10968 result = file->prepare_range_scan((last_range->flag & NO_MIN_RANGE)
10969 ? NULL : &prepare_range_start,
10970 (last_range->flag & NO_MAX_RANGE)
10971 ? NULL : &prepare_range_end);
10972 if (result)
10973 DBUG_RETURN(result);
10974
10975 if (last_range->flag & NO_MAX_RANGE) // Read last record
10976 {
10977 int local_error;
10978 if ((local_error= file->ha_index_last(record)))
10979 {
10980 /*
10981 HA_ERR_END_OF_FILE is returned both when the table is empty and when
10982 there are no qualifying records in the range (when using ICP).
10983 Interpret this return value as "no qualifying rows in the range" to
10984 avoid loss of records. If the error code truly meant "empty table"
10985 the next iteration of the loop will exit.
10986 */
10987 if (local_error != HA_ERR_END_OF_FILE)
10988 DBUG_RETURN(local_error);
10989 last_range= NULL; // Go to next range
10990 continue;
10991 }
10992
10993 if (cmp_prev(last_range) == 0)
10994 DBUG_RETURN(0);
10995 last_range= 0; // No match; go to next range
10996 continue;
10997 }
10998
10999 if (eqrange_all_keyparts)
11000
11001 {
11002 result= file->ha_index_read_map(record, last_range->max_key,
11003 last_range->max_keypart_map,
11004 HA_READ_KEY_EXACT);
11005 }
11006 else
11007 {
11008 DBUG_ASSERT(last_range->flag & NEAR_MAX ||
11009 (last_range->flag & EQ_RANGE &&
11010 used_key_parts >
11011 head->key_info[index].user_defined_key_parts) ||
11012 range_reads_after_key(last_range));
11013 result= file->ha_index_read_map(record, last_range->max_key,
11014 last_range->max_keypart_map,
11015 ((last_range->flag & NEAR_MAX) ?
11016 HA_READ_BEFORE_KEY :
11017 HA_READ_PREFIX_LAST_OR_PREV));
11018 }
11019 if (result)
11020 {
11021 if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11022 DBUG_RETURN(result);
11023 last_range= 0; // Not found, to next range
11024 continue;
11025 }
11026 if (cmp_prev(last_range) == 0)
11027 {
11028 if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11029 last_range= 0; // Stop searching
11030 DBUG_RETURN(0); // Found key is in range
11031 }
11032 last_range= 0; // To next range
11033 }
11034 }
11035
11036
11037 /**
11038 Create a compatible quick select with the result ordered in an opposite way
11039
11040 @param used_key_parts_arg Number of used key parts
11041
11042 @retval NULL in case of errors (OOM etc)
11043 @retval pointer to a newly created QUICK_SELECT_DESC if success
11044 */
11045
make_reverse(uint used_key_parts_arg)11046 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
11047 {
11048 bool error= FALSE;
11049 QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg,
11050 &error);
11051 if (new_quick == NULL || error)
11052 {
11053 delete new_quick;
11054 return NULL;
11055 }
11056 return new_quick;
11057 }
11058
11059
11060 /*
11061 Compare if found key is over max-value
11062 Returns 0 if key <= range->max_key
11063 TODO: Figure out why can't this function be as simple as cmp_prev().
11064 */
11065
cmp_next(QUICK_RANGE * range_arg)11066 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
11067 {
11068 if (range_arg->flag & NO_MAX_RANGE)
11069 return 0; /* key can't be to large */
11070
11071 KEY_PART *key_part=key_parts;
11072 uint store_length;
11073
11074 for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
11075 key < end;
11076 key+= store_length, key_part++)
11077 {
11078 int cmp;
11079 store_length= key_part->store_length;
11080 if (key_part->null_bit)
11081 {
11082 if (*key)
11083 {
11084 if (!key_part->field->is_null())
11085 return 1;
11086 continue;
11087 }
11088 else if (key_part->field->is_null())
11089 return 0;
11090 key++; // Skip null byte
11091 store_length--;
11092 }
11093 if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
11094 return 0;
11095 if (cmp > 0)
11096 return 1;
11097 }
11098 return (range_arg->flag & NEAR_MAX) ? 1 : 0; // Exact match
11099 }
11100
11101
11102 /*
11103 Returns 0 if found key is inside range (found key >= range->min_key).
11104 */
11105
cmp_prev(QUICK_RANGE * range_arg)11106 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
11107 {
11108 int cmp;
11109 if (range_arg->flag & NO_MIN_RANGE)
11110 return 0; /* key can't be to small */
11111
11112 cmp= key_cmp(key_part_info, range_arg->min_key,
11113 range_arg->min_length);
11114 if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
11115 return 0;
11116 return 1; // outside of range
11117 }
11118
11119
11120 /*
11121 * TRUE if this range will require using HA_READ_AFTER_KEY
11122 See comment in get_next() about this
11123 */
11124
range_reads_after_key(QUICK_RANGE * range_arg)11125 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
11126 {
11127 return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
11128 !(range_arg->flag & EQ_RANGE) ||
11129 head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
11130 }
11131
11132
add_info_string(String * str)11133 void QUICK_RANGE_SELECT::add_info_string(String *str)
11134 {
11135 KEY *key_info= head->key_info + index;
11136 str->append(key_info->name);
11137 }
11138
add_info_string(String * str)11139 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
11140 {
11141 QUICK_RANGE_SELECT *quick;
11142 bool first= TRUE;
11143 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11144 str->append(STRING_WITH_LEN("sort_union("));
11145 while ((quick= it++))
11146 {
11147 if (!first)
11148 str->append(',');
11149 else
11150 first= FALSE;
11151 quick->add_info_string(str);
11152 }
11153 if (pk_quick_select)
11154 {
11155 str->append(',');
11156 pk_quick_select->add_info_string(str);
11157 }
11158 str->append(')');
11159 }
11160
add_info_string(String * str)11161 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
11162 {
11163 bool first= TRUE;
11164 QUICK_RANGE_SELECT *quick;
11165 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11166 str->append(STRING_WITH_LEN("intersect("));
11167 while ((quick= it++))
11168 {
11169 KEY *key_info= head->key_info + quick->index;
11170 if (!first)
11171 str->append(',');
11172 else
11173 first= FALSE;
11174 str->append(key_info->name);
11175 }
11176 if (cpk_quick)
11177 {
11178 KEY *key_info= head->key_info + cpk_quick->index;
11179 str->append(',');
11180 str->append(key_info->name);
11181 }
11182 str->append(')');
11183 }
11184
add_info_string(String * str)11185 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
11186 {
11187 bool first= TRUE;
11188 QUICK_SELECT_I *quick;
11189 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11190 str->append(STRING_WITH_LEN("union("));
11191 while ((quick= it++))
11192 {
11193 if (!first)
11194 str->append(',');
11195 else
11196 first= FALSE;
11197 quick->add_info_string(str);
11198 }
11199 str->append(')');
11200 }
11201
11202
add_keys_and_lengths(String * key_names,String * used_lengths)11203 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
11204 String *used_lengths)
11205 {
11206 char buf[64];
11207 uint length;
11208 KEY *key_info= head->key_info + index;
11209 key_names->append(key_info->name);
11210 length= longlong2str(max_used_key_length, buf, 10) - buf;
11211 used_lengths->append(buf, length);
11212 }
11213
add_keys_and_lengths(String * key_names,String * used_lengths)11214 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
11215 String *used_lengths)
11216 {
11217 char buf[64];
11218 uint length;
11219 bool first= TRUE;
11220 QUICK_RANGE_SELECT *quick;
11221
11222 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11223 while ((quick= it++))
11224 {
11225 if (first)
11226 first= FALSE;
11227 else
11228 {
11229 key_names->append(',');
11230 used_lengths->append(',');
11231 }
11232
11233 KEY *key_info= head->key_info + quick->index;
11234 key_names->append(key_info->name);
11235 length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11236 used_lengths->append(buf, length);
11237 }
11238 if (pk_quick_select)
11239 {
11240 KEY *key_info= head->key_info + pk_quick_select->index;
11241 key_names->append(',');
11242 key_names->append(key_info->name);
11243 length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
11244 used_lengths->append(',');
11245 used_lengths->append(buf, length);
11246 }
11247 }
11248
add_keys_and_lengths(String * key_names,String * used_lengths)11249 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
11250 String *used_lengths)
11251 {
11252 char buf[64];
11253 uint length;
11254 bool first= TRUE;
11255 QUICK_RANGE_SELECT *quick;
11256 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11257 while ((quick= it++))
11258 {
11259 KEY *key_info= head->key_info + quick->index;
11260 if (first)
11261 first= FALSE;
11262 else
11263 {
11264 key_names->append(',');
11265 used_lengths->append(',');
11266 }
11267 key_names->append(key_info->name);
11268 length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11269 used_lengths->append(buf, length);
11270 }
11271
11272 if (cpk_quick)
11273 {
11274 KEY *key_info= head->key_info + cpk_quick->index;
11275 key_names->append(',');
11276 key_names->append(key_info->name);
11277 length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
11278 used_lengths->append(',');
11279 used_lengths->append(buf, length);
11280 }
11281 }
11282
add_keys_and_lengths(String * key_names,String * used_lengths)11283 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
11284 String *used_lengths)
11285 {
11286 bool first= TRUE;
11287 QUICK_SELECT_I *quick;
11288 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11289 while ((quick= it++))
11290 {
11291 if (first)
11292 first= FALSE;
11293 else
11294 {
11295 used_lengths->append(',');
11296 key_names->append(',');
11297 }
11298 quick->add_keys_and_lengths(key_names, used_lengths);
11299 }
11300 }
11301
11302
11303 /*******************************************************************************
11304 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
11305 *******************************************************************************/
11306
11307 static inline uint get_field_keypart(KEY *index, Field *field);
11308 static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
11309 PARAM *param, uint *param_idx);
11310 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
11311 SEL_ARG **cur_range);
11312 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
11313 KEY_PART_INFO *first_non_group_part,
11314 KEY_PART_INFO *min_max_arg_part,
11315 KEY_PART_INFO *last_part, THD *thd,
11316 uchar *key_infix, uint *key_infix_len,
11317 KEY_PART_INFO **first_non_infix_part);
11318 static bool
11319 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
11320 Field::imagetype image_type);
11321
11322 static void
11323 cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
11324 uint group_key_parts, SEL_TREE *range_tree,
11325 SEL_ARG *index_tree, ha_rows quick_prefix_records,
11326 bool have_min, bool have_max,
11327 double *read_cost, ha_rows *records);
11328
11329
11330 /**
11331 Test if this access method is applicable to a GROUP query with MIN/MAX
11332 functions, and if so, construct a new TRP object.
11333
11334 DESCRIPTION
11335 Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
11336 Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
11337 following conditions:
11338 A) Table T has at least one compound index I of the form:
11339 I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
11340 B) Query conditions:
11341 B0. Q is over a single table T.
11342 B1. The attributes referenced by Q are a subset of the attributes of I.
11343 B2. All attributes QA in Q can be divided into 3 overlapping groups:
11344 - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
11345 referenced by any number of MIN and/or MAX functions if present.
11346 - WA = {W_1, ..., W_p} - from the WHERE clause
11347 - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
11348 = SA - if Q is a DISTINCT query (based on the
11349 equivalence of DISTINCT and GROUP queries.
11350 - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
11351 GROUP BY and not referenced by MIN/MAX functions.
11352 with the following properties specified below.
11353 B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
11354 applicable.
11355
11356 SA1. There is at most one attribute in SA referenced by any number of
11357 MIN and/or MAX functions which, which if present, is denoted as C.
11358 SA2. The position of the C attribute in the index is after the last A_k.
11359 SA3. The attribute C can be referenced in the WHERE clause only in
11360 predicates of the forms:
11361 - (C {< | <= | > | >= | =} const)
11362 - (const {< | <= | > | >= | =} C)
11363 - (C between const_i and const_j)
11364 - C IS NULL
11365 - C IS NOT NULL
11366 - C != const
11367 SA4. If Q has a GROUP BY clause, there are no other aggregate functions
11368 except MIN and MAX. For queries with DISTINCT, aggregate functions
11369 are allowed.
11370 SA5. The select list in DISTINCT queries should not contain expressions.
11371 SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
11372 for AGG_FUNC(DISTINCT ...) optimization because cursor position is
11373 never stored after a unique key lookup in the clustered index and
11374 furhter index_next/prev calls can not be used. So loose index scan
11375 optimization can not be used in this case.
11376 SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
11377 access method is not used.
11378 For above queries MIN/MAX() aggregation has to be done at
11379 nested_loops_join (end_send_group). But with current design MIN/MAX()
11380 is always set as part of loose index scan. Because of this mismatch
11381 MIN() and MAX() values will be set incorrectly. For such queries to
11382 work we need a new interface for loose index scan. This new interface
11383 should only fetch records with min and max values and let
11384 end_send_group to do aggregation. Until then do not use
11385 loose_index_scan.
11386 GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
11387 G_i = A_j => i = j.
11388 GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
11389 forms a prefix of I. This permutation is used as the GROUP clause
11390 when the DISTINCT query is converted to a GROUP query.
11391 GA3. The attributes in GA may participate in arbitrary predicates, divided
11392 into two groups:
11393 - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
11394 attributes of a prefix of GA
11395 - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
11396 of GA. Since P is applied to only GROUP attributes it filters some
11397 groups, and thus can be applied after the grouping.
11398 GA4. There are no expressions among G_i, just direct column references.
11399 NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
11400 and the MIN/MAX attribute C, then NGA must consist of exactly the
11401 index attributes that constitute the gap. As a result there is a
11402 permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
11403 in the index.
11404 NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
11405 equality conditions for all NG_i of the form (NG_i = const) or
11406 (const = NG_i), such that each NG_i is referenced in exactly one
11407 conjunct. Informally, the predicates provide constants to fill the
11408 gap in the index.
11409 NGA3.If BA <> {}, there can only be one range. TODO: This is a code
11410 limitation and is not strictly needed. See BUG#15947433
11411 WA1. There are no other attributes in the WHERE clause except the ones
11412 referenced in predicates RNG, PA, PC, EQ defined above. Therefore
11413 WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
11414 above tests. By transitivity then it also follows that each WA_i
11415 participates in the index I (if this was already tested for GA, NGA
11416 and C).
11417 WA2. If there is a predicate on C, then it must be in conjunction
11418 to all predicates on all earlier keyparts in I.
11419
11420 C) Overall query form:
11421 SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
11422 FROM T
11423 WHERE [RNG(A_1,...,A_p ; where p <= k)]
11424 [AND EQ(B_1,...,B_m)]
11425 [AND PC(C)]
11426 [AND PA(A_i1,...,A_iq)]
11427 GROUP BY A_1,...,A_k
11428 [HAVING PH(A_1, ..., B_1,..., C)]
11429 where EXPR(...) is an arbitrary expression over some or all SELECT fields,
11430 or:
11431 SELECT DISTINCT A_i1,...,A_ik
11432 FROM T
11433 WHERE [RNG(A_1,...,A_p ; where p <= k)]
11434 [AND PA(A_i1,...,A_iq)];
11435
11436 NOTES
11437 If the current query satisfies the conditions above, and if
11438 (mem_root! = NULL), then the function constructs and returns a new TRP
11439 object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
11440 If (mem_root == NULL), then the function only tests whether the current
11441 query satisfies the conditions above, and, if so, sets
11442 is_applicable = TRUE.
11443
11444 Queries with DISTINCT for which index access can be used are transformed
11445 into equivalent group-by queries of the form:
11446
11447 SELECT A_1,...,A_k FROM T
11448 WHERE [RNG(A_1,...,A_p ; where p <= k)]
11449 [AND PA(A_i1,...,A_iq)]
11450 GROUP BY A_1,...,A_k;
11451
11452 The group-by list is a permutation of the select attributes, according
11453 to their order in the index.
11454
11455 TODO
11456 - What happens if the query groups by the MIN/MAX field, and there is no
11457 other field as in: "select min(a) from t1 group by a" ?
11458 - We assume that the general correctness of the GROUP-BY query was checked
11459 before this point. Is this correct, or do we have to check it completely?
11460 - Lift the limitation in condition (B3), that is, make this access method
11461 applicable to ROLLUP queries.
11462
11463 @param param Parameter from test_quick_select
11464 @param sel_tree Range tree generated by get_mm_tree
11465 @param read_time Best read time so far (=table/index scan time)
11466 @return table read plan
11467 @retval NULL Loose index scan not applicable or mem_root == NULL
11468 @retval !NULL Loose index scan table read plan
11469 */
11470
11471 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,double read_time)11472 get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
11473 {
11474 THD *thd= param->thd;
11475 JOIN *join= thd->lex->current_select->join;
11476 TABLE *table= param->table;
11477 bool have_min= FALSE; /* TRUE if there is a MIN function. */
11478 bool have_max= FALSE; /* TRUE if there is a MAX function. */
11479 Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
11480 KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
11481 uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
11482 KEY *index_info= NULL; /* The index chosen for data access. */
11483 uint index= 0; /* The id of the chosen index. */
11484 uint group_key_parts= 0; // Number of index key parts in the group prefix.
11485 uint used_key_parts= 0; /* Number of index key parts used for access. */
11486 uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
11487 uint key_infix_len= 0; /* Length of key_infix. */
11488 TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
11489 uint key_part_nr;
11490 ORDER *tmp_group;
11491 Item *item;
11492 Item_field *item_field;
11493 bool is_agg_distinct;
11494 List<Item_field> agg_distinct_flds;
11495 /* Cost-related variables for the best index so far. */
11496 double best_read_cost= DBL_MAX;
11497 ha_rows best_records= 0;
11498 SEL_ARG *best_index_tree= NULL;
11499 ha_rows best_quick_prefix_records= 0;
11500 uint best_param_idx= 0;
11501 List_iterator<Item> select_items_it;
11502 Opt_trace_context * const trace= ¶m->thd->opt_trace;
11503
11504 DBUG_ENTER("get_best_group_min_max");
11505
11506 Opt_trace_object trace_group(trace, "group_index_range",
11507 Opt_trace_context::RANGE_OPTIMIZER);
11508 const char* cause= NULL;
11509
11510 /* Perform few 'cheap' tests whether this access method is applicable. */
11511 if (!join)
11512 cause= "no_join";
11513 else if (join->primary_tables != 1) /* Query must reference one table. */
11514 cause= "not_single_table";
11515 else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
11516 cause= "rollup";
11517 else if (table->s->keys == 0) /* There are no indexes to use. */
11518 cause= "no_index";
11519 else if (param->order_direction == ORDER::ORDER_DESC)
11520 cause= "cannot_do_reverse_ordering";
11521 if (cause != NULL)
11522 {
11523 trace_group.add("chosen", false).add_alnum("cause", cause);
11524 DBUG_RETURN(NULL);
11525 }
11526
11527 /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
11528 is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
11529
11530 if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
11531 (!join->select_distinct) &&
11532 !is_agg_distinct)
11533 {
11534 trace_group.add("chosen", false).
11535 add_alnum("cause", "not_group_by_or_distinct");
11536 DBUG_RETURN(NULL);
11537 }
11538 /* Analyze the query in more detail. */
11539
11540 if (join->sum_funcs[0])
11541 {
11542 Item_sum *min_max_item;
11543 Item_sum **func_ptr= join->sum_funcs;
11544 while ((min_max_item= *(func_ptr++)))
11545 {
11546 if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
11547 have_min= TRUE;
11548 else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
11549 have_max= TRUE;
11550 else if (is_agg_distinct &&
11551 (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
11552 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
11553 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
11554 continue;
11555 else
11556 {
11557 trace_group.add("chosen", false).
11558 add_alnum("cause", "not_applicable_aggregate_function");
11559 DBUG_RETURN(NULL);
11560 }
11561
11562 /* The argument of MIN/MAX. */
11563 Item *expr= min_max_item->get_arg(0)->real_item();
11564 if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
11565 {
11566 if (! min_max_arg_item)
11567 min_max_arg_item= (Item_field*) expr;
11568 else if (! min_max_arg_item->eq(expr, 1))
11569 DBUG_RETURN(NULL);
11570 }
11571 else
11572 DBUG_RETURN(NULL);
11573 }
11574 }
11575
11576 /* Check (SA7). */
11577 if (is_agg_distinct && (have_max || have_min))
11578 {
11579 trace_group.add("chosen", false).
11580 add_alnum("cause", "have_both_agg_distinct_and_min_max");
11581 DBUG_RETURN(NULL);
11582 }
11583
11584 select_items_it= List_iterator<Item>(join->fields_list);
11585 /* Check (SA5). */
11586 if (join->select_distinct)
11587 {
11588 trace_group.add("distinct_query", true);
11589 while ((item= select_items_it++))
11590 {
11591 if (item->real_item()->type() != Item::FIELD_ITEM)
11592 DBUG_RETURN(NULL);
11593 }
11594 }
11595
11596 /* Check (GA4) - that there are no expressions among the group attributes. */
11597 for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
11598 {
11599 if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
11600 {
11601 trace_group.add("chosen", false).
11602 add_alnum("cause", "group_field_is_expression");
11603 DBUG_RETURN(NULL);
11604 }
11605 }
11606
11607 /*
11608 Check that table has at least one compound index such that the conditions
11609 (GA1,GA2) are all TRUE. If there is more than one such index, select the
11610 first one. Here we set the variables: group_prefix_len and index_info.
11611 */
11612
11613 const uint pk= param->table->s->primary_key;
11614 KEY *cur_index_info= table->key_info;
11615 KEY *cur_index_info_end= cur_index_info + table->s->keys;
11616 SEL_ARG *cur_index_tree= NULL;
11617 ha_rows cur_quick_prefix_records= 0;
11618 uint cur_param_idx= MAX_KEY;
11619 Opt_trace_array trace_indices(trace, "potential_group_range_indices");
11620 for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
11621 cur_index_info++, cur_index++)
11622 {
11623 Opt_trace_object trace_idx(trace);
11624 trace_idx.add_utf8("index", cur_index_info->name);
11625 KEY_PART_INFO *cur_part;
11626 KEY_PART_INFO *end_part; /* Last part for loops. */
11627 /* Last index part. */
11628 KEY_PART_INFO *last_part;
11629 KEY_PART_INFO *first_non_group_part;
11630 KEY_PART_INFO *first_non_infix_part;
11631 uint key_infix_parts;
11632 uint cur_group_key_parts= 0;
11633 uint cur_group_prefix_len= 0;
11634 double cur_read_cost;
11635 ha_rows cur_records;
11636 key_map used_key_parts_map;
11637 uint max_key_part= 0;
11638 uint cur_key_infix_len= 0;
11639 uchar cur_key_infix[MAX_KEY_LENGTH];
11640 uint cur_used_key_parts;
11641
11642 /* Check (B1) - if current index is covering. */
11643 if (!table->covering_keys.is_set(cur_index))
11644 {
11645 cause= "not_covering";
11646 goto next_index;
11647 }
11648
11649 /*
11650 If the current storage manager is such that it appends the primary key to
11651 each index, then the above condition is insufficient to check if the
11652 index is covering. In such cases it may happen that some fields are
11653 covered by the PK index, but not by the current index. Since we can't
11654 use the concatenation of both indexes for index lookup, such an index
11655 does not qualify as covering in our case. If this is the case, below
11656 we check that all query fields are indeed covered by 'cur_index'.
11657 */
11658 if (pk < MAX_KEY && cur_index != pk &&
11659 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
11660 {
11661 /* For each table field */
11662 for (uint i= 0; i < table->s->fields; i++)
11663 {
11664 Field *cur_field= table->field[i];
11665 /*
11666 If the field is used in the current query ensure that it's
11667 part of 'cur_index'
11668 */
11669 if (bitmap_is_set(table->read_set, cur_field->field_index) &&
11670 !cur_field->is_part_of_actual_key(thd, cur_index, cur_index_info))
11671 {
11672 cause= "not_covering";
11673 goto next_index; // Field was not part of key
11674 }
11675 }
11676 }
11677 trace_idx.add("covering", true);
11678
11679 /*
11680 Check (GA1) for GROUP BY queries.
11681 */
11682 if (join->group_list)
11683 {
11684 cur_part= cur_index_info->key_part;
11685 end_part= cur_part + actual_key_parts(cur_index_info);
11686 /* Iterate in parallel over the GROUP list and the index parts. */
11687 for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
11688 tmp_group= tmp_group->next, cur_part++)
11689 {
11690 /*
11691 TODO:
11692 tmp_group::item is an array of Item, is it OK to consider only the
11693 first Item? If so, then why? What is the array for?
11694 */
11695 /* Above we already checked that all group items are fields. */
11696 DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
11697 Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
11698 if (group_field->field->eq(cur_part->field))
11699 {
11700 cur_group_prefix_len+= cur_part->store_length;
11701 ++cur_group_key_parts;
11702 max_key_part= cur_part - cur_index_info->key_part + 1;
11703 used_key_parts_map.set_bit(max_key_part);
11704 }
11705 else
11706 {
11707 cause= "group_attribute_not_prefix_in_index";
11708 goto next_index;
11709 }
11710 }
11711 }
11712
11713 /*
11714 Check (GA2) if this is a DISTINCT query.
11715 If GA2, then Store a new ORDER object in group_fields_array at the
11716 position of the key part of item_field->field. Thus we get the ORDER
11717 objects for each field ordered as the corresponding key parts.
11718 Later group_fields_array of ORDER objects is used to convert the query
11719 to a GROUP query.
11720 */
11721 if ((!join->group_list && join->select_distinct) ||
11722 is_agg_distinct)
11723 {
11724 if (!is_agg_distinct)
11725 {
11726 select_items_it.rewind();
11727 }
11728
11729 List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
11730 while (NULL !=
11731 (item= (is_agg_distinct ?
11732 (Item *) agg_distinct_flds_it++ : select_items_it++)))
11733 {
11734 /* (SA5) already checked above. */
11735 item_field= (Item_field*) item->real_item();
11736 DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
11737
11738 /* not doing loose index scan for derived tables */
11739 if (!item_field->field)
11740 {
11741 cause= "derived_table";
11742 goto next_index;
11743 }
11744
11745 /* Find the order of the key part in the index. */
11746 key_part_nr= get_field_keypart(cur_index_info, item_field->field);
11747 /*
11748 Check if this attribute was already present in the select list.
11749 If it was present, then its corresponding key part was alredy used.
11750 */
11751 if (used_key_parts_map.is_set(key_part_nr))
11752 continue;
11753 if (key_part_nr < 1 ||
11754 (!is_agg_distinct && key_part_nr > join->fields_list.elements))
11755 {
11756 cause= "select_attribute_not_prefix_in_index";
11757 goto next_index;
11758 }
11759 cur_part= cur_index_info->key_part + key_part_nr - 1;
11760 cur_group_prefix_len+= cur_part->store_length;
11761 used_key_parts_map.set_bit(key_part_nr);
11762 ++cur_group_key_parts;
11763 max_key_part= max(max_key_part,key_part_nr);
11764 }
11765 /*
11766 Check that used key parts forms a prefix of the index.
11767 To check this we compare bits in all_parts and cur_parts.
11768 all_parts have all bits set from 0 to (max_key_part-1).
11769 cur_parts have bits set for only used keyparts.
11770 */
11771 ulonglong all_parts, cur_parts;
11772 all_parts= (1ULL << max_key_part) - 1;
11773 cur_parts= used_key_parts_map.to_ulonglong() >> 1;
11774 if (all_parts != cur_parts)
11775 goto next_index;
11776 }
11777
11778 /* Check (SA2). */
11779 if (min_max_arg_item)
11780 {
11781 key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
11782 if (key_part_nr <= cur_group_key_parts)
11783 {
11784 cause= "aggregate_column_not_suffix_in_idx";
11785 goto next_index;
11786 }
11787 min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
11788 }
11789
11790 /* Check (SA6) if clustered key is used. */
11791 if (is_agg_distinct && cur_index == table->s->primary_key &&
11792 table->file->primary_key_is_clustered())
11793 {
11794 cause= "primary_key_is_clustered";
11795 goto next_index;
11796 }
11797
11798 /*
11799 Check (NGA1, NGA2) and extract a sequence of constants to be used as part
11800 of all search keys.
11801 */
11802
11803 /*
11804 If there is MIN/MAX, each keypart between the last group part and the
11805 MIN/MAX part must participate in one equality with constants, and all
11806 keyparts after the MIN/MAX part must not be referenced in the query.
11807
11808 If there is no MIN/MAX, the keyparts after the last group part can be
11809 referenced only in equalities with constants, and the referenced keyparts
11810 must form a sequence without any gaps that starts immediately after the
11811 last group keypart.
11812 */
11813 last_part= cur_index_info->key_part + actual_key_parts(cur_index_info);
11814 first_non_group_part=
11815 (cur_group_key_parts < actual_key_parts(cur_index_info)) ?
11816 cur_index_info->key_part + cur_group_key_parts :
11817 NULL;
11818 first_non_infix_part= min_max_arg_part ?
11819 (min_max_arg_part < last_part) ?
11820 min_max_arg_part :
11821 NULL :
11822 NULL;
11823 if (first_non_group_part &&
11824 (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
11825 {
11826 if (tree)
11827 {
11828 uint dummy;
11829 SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11830 &dummy);
11831 if (!get_constant_key_infix(cur_index_info, index_range_tree,
11832 first_non_group_part, min_max_arg_part,
11833 last_part, thd, cur_key_infix,
11834 &cur_key_infix_len,
11835 &first_non_infix_part))
11836 {
11837 cause= "nonconst_equality_gap_attribute";
11838 goto next_index;
11839 }
11840 }
11841 else if (min_max_arg_part &&
11842 (min_max_arg_part - first_non_group_part > 0))
11843 {
11844 /*
11845 There is a gap but no range tree, thus no predicates at all for the
11846 non-group keyparts.
11847 */
11848 cause= "no_nongroup_keypart_predicate";
11849 goto next_index;
11850 }
11851 else if (first_non_group_part && join->conds)
11852 {
11853 /*
11854 If there is no MIN/MAX function in the query, but some index
11855 key part is referenced in the WHERE clause, then this index
11856 cannot be used because the WHERE condition over the keypart's
11857 field cannot be 'pushed' to the index (because there is no
11858 range 'tree'), and the WHERE clause must be evaluated before
11859 GROUP BY/DISTINCT.
11860 */
11861 /*
11862 Store the first and last keyparts that need to be analyzed
11863 into one array that can be passed as parameter.
11864 */
11865 KEY_PART_INFO *key_part_range[2];
11866 key_part_range[0]= first_non_group_part;
11867 key_part_range[1]= last_part;
11868
11869 /* Check if cur_part is referenced in the WHERE clause. */
11870 if (join->conds->walk(&Item::find_item_in_field_list_processor, 1,
11871 (uchar*) key_part_range))
11872 {
11873 cause= "keypart_reference_from_where_clause";
11874 goto next_index;
11875 }
11876 }
11877 }
11878
11879 /*
11880 Test (WA1) partially - that no other keypart after the last infix part is
11881 referenced in the query.
11882 */
11883 if (first_non_infix_part)
11884 {
11885 cur_part= first_non_infix_part +
11886 (min_max_arg_part && (min_max_arg_part < last_part));
11887 for (; cur_part != last_part; cur_part++)
11888 {
11889 if (bitmap_is_set(table->read_set, cur_part->field->field_index))
11890 {
11891 cause= "keypart_after_infix_in_query";
11892 goto next_index;
11893 }
11894 }
11895 }
11896
11897 /**
11898 Test WA2:If there are conditions on a column C participating in
11899 MIN/MAX, those conditions must be conjunctions to all earlier
11900 keyparts. Otherwise, Loose Index Scan cannot be used.
11901 */
11902 if (tree && min_max_arg_item)
11903 {
11904 uint dummy;
11905 SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11906 &dummy);
11907 SEL_ARG *cur_range= NULL;
11908 if (get_sel_arg_for_keypart(min_max_arg_part->field,
11909 index_range_tree, &cur_range) ||
11910 (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
11911 {
11912 cause= "minmax_keypart_in_disjunctive_query";
11913 goto next_index;
11914 }
11915 }
11916
11917 /* If we got to this point, cur_index_info passes the test. */
11918 key_infix_parts= cur_key_infix_len ? (uint)
11919 (first_non_infix_part - first_non_group_part) : 0;
11920 cur_used_key_parts= cur_group_key_parts + key_infix_parts;
11921
11922 /* Compute the cost of using this index. */
11923 if (tree)
11924 {
11925 /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
11926 cur_index_tree= get_index_range_tree(cur_index, tree, param,
11927 &cur_param_idx);
11928 /* Check if this range tree can be used for prefix retrieval. */
11929 Cost_estimate dummy_cost;
11930 uint mrr_flags= HA_MRR_SORTED;
11931 uint mrr_bufsize=0;
11932 cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
11933 FALSE /*don't care*/,
11934 cur_index_tree, TRUE,
11935 &mrr_flags, &mrr_bufsize,
11936 &dummy_cost);
11937 #ifdef OPTIMIZER_TRACE
11938 if (unlikely(cur_index_tree && trace->is_started()))
11939 {
11940 trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics);
11941 Opt_trace_array trace_range(trace, "ranges");
11942
11943 const KEY_PART_INFO *key_part= cur_index_info->key_part;
11944
11945 String range_info;
11946 range_info.set_charset(system_charset_info);
11947 append_range_all_keyparts(&trace_range, NULL, &range_info,
11948 cur_index_tree, key_part);
11949 }
11950 #endif
11951 }
11952 cost_group_min_max(table, cur_index_info, cur_used_key_parts,
11953 cur_group_key_parts, tree, cur_index_tree,
11954 cur_quick_prefix_records, have_min, have_max,
11955 &cur_read_cost, &cur_records);
11956 /*
11957 If cur_read_cost is lower than best_read_cost use cur_index.
11958 Do not compare doubles directly because they may have different
11959 representations (64 vs. 80 bits).
11960 */
11961 trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
11962 if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
11963 {
11964 index_info= cur_index_info;
11965 index= cur_index;
11966 best_read_cost= cur_read_cost;
11967 best_records= cur_records;
11968 best_index_tree= cur_index_tree;
11969 best_quick_prefix_records= cur_quick_prefix_records;
11970 best_param_idx= cur_param_idx;
11971 group_key_parts= cur_group_key_parts;
11972 group_prefix_len= cur_group_prefix_len;
11973 key_infix_len= cur_key_infix_len;
11974 if (key_infix_len)
11975 memcpy (key_infix, cur_key_infix, sizeof (key_infix));
11976 used_key_parts= cur_used_key_parts;
11977 }
11978
11979 next_index:
11980 if (cause)
11981 {
11982 trace_idx.add("usable", false).add_alnum("cause", cause);
11983 cause= NULL;
11984 }
11985 }
11986 trace_indices.end();
11987
11988 if (!index_info) /* No usable index found. */
11989 DBUG_RETURN(NULL);
11990
11991 /* Check (SA3) for the where clause. */
11992 if (join->conds && min_max_arg_item &&
11993 !check_group_min_max_predicates(join->conds, min_max_arg_item,
11994 (index_info->flags & HA_SPATIAL) ?
11995 Field::itMBR : Field::itRAW))
11996 {
11997 trace_group.add("usable", false).
11998 add_alnum("cause", "unsupported_predicate_on_agg_attribute");
11999 DBUG_RETURN(NULL);
12000 }
12001
12002 /* The query passes all tests, so construct a new TRP object. */
12003 read_plan= new (param->mem_root)
12004 TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
12005 min_max_arg_part,
12006 group_prefix_len, used_key_parts,
12007 group_key_parts, index_info, index,
12008 key_infix_len,
12009 (key_infix_len > 0) ? key_infix : NULL,
12010 tree, best_index_tree, best_param_idx,
12011 best_quick_prefix_records);
12012 if (read_plan)
12013 {
12014 if (tree && read_plan->quick_prefix_records == 0)
12015 DBUG_RETURN(NULL);
12016
12017 read_plan->read_cost= best_read_cost;
12018 read_plan->records= best_records;
12019 if (read_time < best_read_cost && is_agg_distinct)
12020 {
12021 trace_group.add("index_scan", true);
12022 read_plan->read_cost= 0;
12023 read_plan->use_index_scan();
12024 }
12025
12026 DBUG_PRINT("info",
12027 ("Returning group min/max plan: cost: %g, records: %lu",
12028 read_plan->read_cost, (ulong) read_plan->records));
12029 }
12030
12031 DBUG_RETURN(read_plan);
12032 }
12033
12034
12035 /*
12036 Check that the MIN/MAX attribute participates only in range predicates
12037 with constants.
12038
12039 SYNOPSIS
12040 check_group_min_max_predicates()
12041 cond tree (or subtree) describing all or part of the WHERE
12042 clause being analyzed
12043 min_max_arg_item the field referenced by the MIN/MAX function(s)
12044 min_max_arg_part the keypart of the MIN/MAX argument if any
12045
12046 DESCRIPTION
12047 The function walks recursively over the cond tree representing a WHERE
12048 clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
12049 aggregate function, it is referenced only by one of the following
12050 predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
12051
12052 RETURN
12053 TRUE if cond passes the test
12054 FALSE o/w
12055 */
12056
12057 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type)12058 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
12059 Field::imagetype image_type)
12060 {
12061 DBUG_ENTER("check_group_min_max_predicates");
12062 DBUG_ASSERT(cond && min_max_arg_item);
12063
12064 cond= cond->real_item();
12065 Item::Type cond_type= cond->type();
12066 if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
12067 {
12068 DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12069 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12070 Item *and_or_arg;
12071 while ((and_or_arg= li++))
12072 {
12073 if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
12074 image_type))
12075 DBUG_RETURN(FALSE);
12076 }
12077 DBUG_RETURN(TRUE);
12078 }
12079
12080 /*
12081 TODO:
12082 This is a very crude fix to handle sub-selects in the WHERE clause
12083 (Item_subselect objects). With the test below we rule out from the
12084 optimization all queries with subselects in the WHERE clause. What has to
12085 be done, is that here we should analyze whether the subselect references
12086 the MIN/MAX argument field, and disallow the optimization only if this is
12087 so.
12088 */
12089 if (cond_type == Item::SUBSELECT_ITEM)
12090 DBUG_RETURN(FALSE);
12091
12092 /*
12093 Condition of the form 'field' is equivalent to 'field <> 0' and thus
12094 satisfies the SA3 condition.
12095 */
12096 if (cond_type == Item::FIELD_ITEM)
12097 {
12098 DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12099 DBUG_RETURN(TRUE);
12100 }
12101
12102 /* We presume that at this point there are no other Items than functions. */
12103 DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
12104
12105 /* Test if cond references only group-by or non-group fields. */
12106 Item_func *pred= (Item_func*) cond;
12107 Item *cur_arg;
12108 DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12109 for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
12110 {
12111 Item **arguments= pred->arguments();
12112 cur_arg= arguments[arg_idx]->real_item();
12113 DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12114 if (cur_arg->type() == Item::FIELD_ITEM)
12115 {
12116 if (min_max_arg_item->eq(cur_arg, 1))
12117 {
12118 /*
12119 If pred references the MIN/MAX argument, check whether pred is a range
12120 condition that compares the MIN/MAX argument with a constant.
12121 */
12122 Item_func::Functype pred_type= pred->functype();
12123 if (pred_type != Item_func::EQUAL_FUNC &&
12124 pred_type != Item_func::LT_FUNC &&
12125 pred_type != Item_func::LE_FUNC &&
12126 pred_type != Item_func::GT_FUNC &&
12127 pred_type != Item_func::GE_FUNC &&
12128 pred_type != Item_func::BETWEEN &&
12129 pred_type != Item_func::ISNULL_FUNC &&
12130 pred_type != Item_func::ISNOTNULL_FUNC &&
12131 pred_type != Item_func::EQ_FUNC &&
12132 pred_type != Item_func::NE_FUNC)
12133 DBUG_RETURN(FALSE);
12134
12135 /* Check that pred compares min_max_arg_item with a constant. */
12136 Item *args[3];
12137 memset(args, 0, 3 * sizeof(Item*));
12138 bool inv;
12139 /* Test if this is a comparison of a field and a constant. */
12140 if (!simple_pred(pred, args, &inv))
12141 DBUG_RETURN(FALSE);
12142
12143 /* Check for compatible string comparisons - similar to get_mm_leaf. */
12144 if (args[0] && args[1] && !args[2] && // this is a binary function
12145 min_max_arg_item->result_type() == STRING_RESULT &&
12146 /*
12147 Don't use an index when comparing strings of different collations.
12148 */
12149 ((args[1]->result_type() == STRING_RESULT &&
12150 image_type == Field::itRAW &&
12151 min_max_arg_item->field->charset() != pred->compare_collation())
12152 ||
12153 /*
12154 We can't always use indexes when comparing a string index to a
12155 number.
12156 */
12157 (args[1]->result_type() != STRING_RESULT &&
12158 min_max_arg_item->field->cmp_type() != args[1]->result_type())))
12159 DBUG_RETURN(FALSE);
12160 }
12161 }
12162 else if (cur_arg->type() == Item::FUNC_ITEM)
12163 {
12164 if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
12165 image_type))
12166 DBUG_RETURN(FALSE);
12167 }
12168 else if (cur_arg->const_item())
12169 {
12170 /*
12171 For predicates of the form "const OP expr" we also have to check 'expr'
12172 to make a decision.
12173 */
12174 continue;
12175 }
12176 else
12177 DBUG_RETURN(FALSE);
12178 }
12179
12180 DBUG_RETURN(TRUE);
12181 }
12182
12183
12184 /*
12185 Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
12186 any. 'tree' must be a unique conjunction to ALL predicates in earlier
12187 keyparts of 'keypart_tree'.
12188
12189 E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
12190 covers 'field', all these conditions satisfies the requirement:
12191
12192 1. "(kp1=2 OR kp1=3) AND kp2=10" => returns "kp2=10"
12193 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)" => returns "kp2=10"
12194 3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12195 => returns "kp2=10 OR kp2=11"
12196
12197 whereas these do not
12198 1. "(kp1=2 AND kp2=10) OR kp1=3"
12199 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
12200 3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12201
12202 This function effectively tests requirement WA2. In combination with
12203 a test that the returned tree has no more than one range it is also
12204 a test of NGA3.
12205
12206 @param[in] field The field we want the SEL_ARG tree for
12207 @param[in] keypart_tree Root node of the SEL_ARG* tree for the index
12208 @param[out] cur_range The SEL_ARG tree, if any, for the keypart
12209 covering field 'keypart_field'
12210 @retval true 'keypart_tree' contained a predicate for 'field' that
12211 is not conjunction to all predicates on earlier keyparts
12212 @retval false otherwise
12213 */
12214
12215 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)12216 get_sel_arg_for_keypart(Field *field,
12217 SEL_ARG *keypart_tree,
12218 SEL_ARG **cur_range)
12219 {
12220 if (keypart_tree == NULL)
12221 return false;
12222 if (keypart_tree->type != SEL_ARG::KEY_RANGE)
12223 {
12224 /*
12225 A range predicate not usable by Loose Index Scan is found.
12226 Predicates for keypart 'keypart_tree->part' and later keyparts
12227 cannot be used.
12228 */
12229 *cur_range= keypart_tree;
12230 return false;
12231 }
12232 if (keypart_tree->field->eq(field))
12233 {
12234 *cur_range= keypart_tree;
12235 return false;
12236 }
12237
12238 SEL_ARG *tree_first_range= NULL;
12239 SEL_ARG *first_kp= keypart_tree->first();
12240
12241 for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
12242 {
12243 SEL_ARG *curr_tree= NULL;
12244 if (cur_kp->next_key_part)
12245 {
12246 if (get_sel_arg_for_keypart(field,
12247 cur_kp->next_key_part,
12248 &curr_tree))
12249 return true;
12250 }
12251 /**
12252 Check if the SEL_ARG tree for 'field' is identical for all ranges in
12253 'keypart_tree
12254 */
12255 if (cur_kp == first_kp)
12256 tree_first_range= curr_tree;
12257 else if (!all_same(tree_first_range, curr_tree))
12258 return true;
12259 }
12260 *cur_range= tree_first_range;
12261 return false;
12262 }
12263
12264 /*
12265 Extract a sequence of constants from a conjunction of equality predicates.
12266
12267 SYNOPSIS
12268 get_constant_key_infix()
12269 index_info [in] Descriptor of the chosen index.
12270 index_range_tree [in] Range tree for the chosen index
12271 first_non_group_part [in] First index part after group attribute parts
12272 min_max_arg_part [in] The keypart of the MIN/MAX argument if any
12273 last_part [in] Last keypart of the index
12274 thd [in] Current thread
12275 key_infix [out] Infix of constants to be used for index lookup
12276 key_infix_len [out] Lenghth of the infix
12277 first_non_infix_part [out] The first keypart after the infix (if any)
12278
12279 DESCRIPTION
12280 Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
12281 for each keypart field NGF_i not in GROUP-BY, check that there is a
12282 constant equality predicate among conds with the form (NGF_i = const_ci) or
12283 (const_ci = NGF_i).
12284 Thus all the NGF_i attributes must fill the 'gap' between the last group-by
12285 attribute and the MIN/MAX attribute in the index (if present). Also ensure
12286 that there is only a single range on NGF_i (NGA3). If these
12287 conditions hold, copy each constant from its corresponding predicate into
12288 key_infix, in the order its NG_i attribute appears in the index, and update
12289 key_infix_len with the total length of the key parts in key_infix.
12290
12291 RETURN
12292 TRUE if the index passes the test
12293 FALSE o/w
12294 */
12295 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)12296 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
12297 KEY_PART_INFO *first_non_group_part,
12298 KEY_PART_INFO *min_max_arg_part,
12299 KEY_PART_INFO *last_part, THD *thd,
12300 uchar *key_infix, uint *key_infix_len,
12301 KEY_PART_INFO **first_non_infix_part)
12302 {
12303 SEL_ARG *cur_range;
12304 KEY_PART_INFO *cur_part;
12305 /* End part for the first loop below. */
12306 KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
12307
12308 *key_infix_len= 0;
12309 uchar *key_ptr= key_infix;
12310 for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
12311 {
12312 cur_range= NULL;
12313 /*
12314 Check NGA3:
12315 1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
12316 checks for a unique conjunction of this tree with all the predicates
12317 on the earlier keyparts in the index.
12318 2. Check for multiple ranges on the found keypart tree.
12319
12320 We assume that index_range_tree points to the leftmost keypart in
12321 the index.
12322 */
12323 if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
12324 &cur_range))
12325 return false;
12326
12327 if (cur_range && cur_range->elements > 1)
12328 return false;
12329
12330 if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
12331 {
12332 if (min_max_arg_part)
12333 return false; /* The current keypart has no range predicates at all. */
12334 else
12335 {
12336 *first_non_infix_part= cur_part;
12337 return true;
12338 }
12339 }
12340
12341 if ((cur_range->min_flag & NO_MIN_RANGE) ||
12342 (cur_range->max_flag & NO_MAX_RANGE) ||
12343 (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
12344 return false;
12345
12346 uint field_length= cur_part->store_length;
12347 if (cur_range->maybe_null &&
12348 cur_range->min_value[0] && cur_range->max_value[0])
12349 {
12350 /*
12351 cur_range specifies 'IS NULL'. In this case the argument points
12352 to a "null value" (a copy of is_null_string) that we do not
12353 memcmp(), or memcpy to a field.
12354 */
12355 DBUG_ASSERT (field_length > 0);
12356 *key_ptr= 1;
12357 key_ptr+= field_length;
12358 *key_infix_len+= field_length;
12359 }
12360 else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
12361 { /* cur_range specifies an equality condition. */
12362 memcpy(key_ptr, cur_range->min_value, field_length);
12363 key_ptr+= field_length;
12364 *key_infix_len+= field_length;
12365 }
12366 else
12367 return false;
12368 }
12369
12370 if (!min_max_arg_part && (cur_part == last_part))
12371 *first_non_infix_part= last_part;
12372
12373 return TRUE;
12374 }
12375
12376
12377 /*
12378 Find the key part referenced by a field.
12379
12380 SYNOPSIS
12381 get_field_keypart()
12382 index descriptor of an index
12383 field field that possibly references some key part in index
12384
12385 NOTES
12386 The return value can be used to get a KEY_PART_INFO pointer by
12387 part= index->key_part + get_field_keypart(...) - 1;
12388
12389 RETURN
12390 Positive number which is the consecutive number of the key part, or
12391 0 if field does not reference any index field.
12392 */
12393
12394 static inline uint
get_field_keypart(KEY * index,Field * field)12395 get_field_keypart(KEY *index, Field *field)
12396 {
12397 KEY_PART_INFO *part, *end;
12398
12399 for (part= index->key_part, end= part + actual_key_parts(index) ;
12400 part < end; part++)
12401 {
12402 if (field->eq(part->field))
12403 return part - index->key_part + 1;
12404 }
12405 return 0;
12406 }
12407
12408
12409 /*
12410 Find the SEL_ARG sub-tree that corresponds to the chosen index.
12411
12412 SYNOPSIS
12413 get_index_range_tree()
12414 index [in] The ID of the index being looked for
12415 range_tree[in] Tree of ranges being searched
12416 param [in] PARAM from SQL_SELECT::test_quick_select
12417 param_idx [out] Index in the array PARAM::key that corresponds to 'index'
12418
12419 DESCRIPTION
12420
12421 A SEL_TREE contains range trees for all usable indexes. This procedure
12422 finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
12423 ordered in the same way as the members of PARAM::key, thus we first find
12424 the corresponding index in the array PARAM::key. This index is returned
12425 through the variable param_idx, to be used later as argument of
12426 check_quick_select().
12427
12428 RETURN
12429 Pointer to the SEL_ARG subtree that corresponds to index.
12430 */
12431
get_index_range_tree(uint index,SEL_TREE * range_tree,PARAM * param,uint * param_idx)12432 SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
12433 uint *param_idx)
12434 {
12435 uint idx= 0; /* Index nr in param->key_parts */
12436 while (idx < param->keys)
12437 {
12438 if (index == param->real_keynr[idx])
12439 break;
12440 idx++;
12441 }
12442 *param_idx= idx;
12443 return(range_tree->keys[idx]);
12444 }
12445
12446
12447 /*
12448 Compute the cost of a quick_group_min_max_select for a particular index.
12449
12450 SYNOPSIS
12451 cost_group_min_max()
12452 table [in] The table being accessed
12453 index_info [in] The index used to access the table
12454 used_key_parts [in] Number of key parts used to access the index
12455 group_key_parts [in] Number of index key parts in the group prefix
12456 range_tree [in] Tree of ranges for all indexes
12457 index_tree [in] The range tree for the current index
12458 quick_prefix_records [in] Number of records retrieved by the internally
12459 used quick range select if any
12460 have_min [in] True if there is a MIN function
12461 have_max [in] True if there is a MAX function
12462 read_cost [out] The cost to retrieve rows via this quick select
12463 records [out] The number of rows retrieved
12464
12465 DESCRIPTION
12466 This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
12467 the number of rows returned.
12468
12469 NOTES
12470 The cost computation distinguishes several cases:
12471 1) No equality predicates over non-group attributes (thus no key_infix).
12472 If groups are bigger than blocks on the average, then we assume that it
12473 is very unlikely that block ends are aligned with group ends, thus even
12474 if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
12475 keys, except for the first MIN and the last MAX keys, will be in the
12476 same block. If groups are smaller than blocks, then we are going to
12477 read all blocks.
12478 2) There are equality predicates over non-group attributes.
12479 In this case the group prefix is extended by additional constants, and
12480 as a result the min/max values are inside sub-groups of the original
12481 groups. The number of blocks that will be read depends on whether the
12482 ends of these sub-groups will be contained in the same or in different
12483 blocks. We compute the probability for the two ends of a subgroup to be
12484 in two different blocks as the ratio of:
12485 - the number of positions of the left-end of a subgroup inside a group,
12486 such that the right end of the subgroup is past the end of the buffer
12487 containing the left-end, and
12488 - the total number of possible positions for the left-end of the
12489 subgroup, which is the number of keys in the containing group.
12490 We assume it is very unlikely that two ends of subsequent subgroups are
12491 in the same block.
12492 3) The are range predicates over the group attributes.
12493 Then some groups may be filtered by the range predicates. We use the
12494 selectivity of the range predicates to decide how many groups will be
12495 filtered.
12496
12497 TODO
12498 - Take into account the optional range predicates over the MIN/MAX
12499 argument.
12500 - Check if we have a PK index and we use all cols - then each key is a
12501 group, and it will be better to use an index scan.
12502
12503 RETURN
12504 None
12505 */
12506
cost_group_min_max(TABLE * table,KEY * index_info,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,double * read_cost,ha_rows * records)12507 void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
12508 uint group_key_parts, SEL_TREE *range_tree,
12509 SEL_ARG *index_tree, ha_rows quick_prefix_records,
12510 bool have_min, bool have_max,
12511 double *read_cost, ha_rows *records)
12512 {
12513 ha_rows table_records;
12514 uint num_groups;
12515 uint num_blocks;
12516 uint keys_per_block;
12517 uint keys_per_group;
12518 uint keys_per_subgroup; /* Average number of keys in sub-groups */
12519 /* formed by a key infix. */
12520 double p_overlap; /* Probability that a sub-group overlaps two blocks. */
12521 double quick_prefix_selectivity;
12522 double io_cost;
12523 DBUG_ENTER("cost_group_min_max");
12524
12525 table_records= table->file->stats.records;
12526 keys_per_block= (table->file->stats.block_size / 2 /
12527 (index_info->key_length + table->file->ref_length)
12528 + 1);
12529 num_blocks= (uint)(table_records / keys_per_block) + 1;
12530
12531 /* Compute the number of keys in a group. */
12532 keys_per_group= index_info->rec_per_key[group_key_parts - 1];
12533 if (keys_per_group == 0) /* If there is no statistics try to guess */
12534 /* each group contains 10% of all records */
12535 keys_per_group= (uint)(table_records / 10) + 1;
12536 num_groups= (uint)(table_records / keys_per_group) + 1;
12537
12538 /* Apply the selectivity of the quick select for group prefixes. */
12539 if (range_tree && (quick_prefix_records != HA_POS_ERROR))
12540 {
12541 quick_prefix_selectivity= (double) quick_prefix_records /
12542 (double) table_records;
12543 num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
12544 set_if_bigger(num_groups, 1);
12545 }
12546
12547 if (used_key_parts > group_key_parts)
12548 { /*
12549 Compute the probability that two ends of a subgroup are inside
12550 different blocks.
12551 */
12552 keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1];
12553 if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
12554 p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
12555 else
12556 {
12557 double blocks_per_group= (double) num_blocks / (double) num_groups;
12558 p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
12559 p_overlap= min(p_overlap, 1.0);
12560 }
12561 io_cost= min<double>(num_groups * (1 + p_overlap), num_blocks);
12562 }
12563 else
12564 io_cost= (keys_per_group > keys_per_block) ?
12565 (have_min && have_max) ? (double) (num_groups + 1) :
12566 (double) num_groups :
12567 (double) num_blocks;
12568
12569 /*
12570 CPU cost must be comparable to that of an index scan as computed
12571 in SQL_SELECT::test_quick_select(). When the groups are small,
12572 e.g. for a unique index, using index scan will be cheaper since it
12573 reads the next record without having to re-position to it on every
12574 group. To make the CPU cost reflect this, we estimate the CPU cost
12575 as the sum of:
12576 1. Cost for evaluating the condition (similarly as for index scan).
12577 2. Cost for navigating the index structure (assuming a b-tree).
12578 Note: We only add the cost for one comparision per block. For a
12579 b-tree the number of comparisons will be larger.
12580 TODO: This cost should be provided by the storage engine.
12581 */
12582 const double tree_traversal_cost=
12583 ceil(log(static_cast<double>(table_records))/
12584 log(static_cast<double>(keys_per_block))) * ROWID_COMPARE_COST;
12585
12586 const double cpu_cost= num_groups * (tree_traversal_cost + ROW_EVALUATE_COST);
12587
12588 *read_cost= io_cost + cpu_cost;
12589 *records= num_groups;
12590
12591 DBUG_PRINT("info",
12592 ("table rows: %lu keys/block: %u keys/group: %u result rows: %lu blocks: %u",
12593 (ulong)table_records, keys_per_block, keys_per_group,
12594 (ulong) *records, num_blocks));
12595 DBUG_VOID_RETURN;
12596 }
12597
12598
12599 /*
12600 Construct a new quick select object for queries with group by with min/max.
12601
12602 SYNOPSIS
12603 TRP_GROUP_MIN_MAX::make_quick()
12604 param Parameter from test_quick_select
12605 retrieve_full_rows ignored
12606 parent_alloc Memory pool to use, if any.
12607
12608 NOTES
12609 Make_quick ignores the retrieve_full_rows parameter because
12610 QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
12611 The other parameter are ignored as well because all necessary
12612 data to create the QUICK object is computed at this TRP creation
12613 time.
12614
12615 RETURN
12616 New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
12617 NULL otherwise.
12618 */
12619
12620 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)12621 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
12622 MEM_ROOT *parent_alloc)
12623 {
12624 QUICK_GROUP_MIN_MAX_SELECT *quick;
12625 DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
12626
12627 quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
12628 param->thd->lex->current_select->join,
12629 have_min, have_max,
12630 have_agg_distinct, min_max_arg_part,
12631 group_prefix_len, group_key_parts,
12632 used_key_parts, index_info, index,
12633 read_cost, records, key_infix_len,
12634 key_infix, parent_alloc, is_index_scan);
12635 if (!quick)
12636 DBUG_RETURN(NULL);
12637
12638 if (quick->init())
12639 {
12640 delete quick;
12641 DBUG_RETURN(NULL);
12642 }
12643
12644 if (range_tree)
12645 {
12646 DBUG_ASSERT(quick_prefix_records > 0);
12647 if (quick_prefix_records == HA_POS_ERROR)
12648 quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
12649 else
12650 {
12651 /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
12652 quick->quick_prefix_select= get_quick_select(param, param_idx,
12653 index_tree,
12654 HA_MRR_SORTED,
12655 0,
12656 &quick->alloc);
12657 if (!quick->quick_prefix_select)
12658 {
12659 delete quick;
12660 DBUG_RETURN(NULL);
12661 }
12662 }
12663 /*
12664 Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
12665 attribute, and create an array of QUICK_RANGES to be used by the
12666 new quick select.
12667 */
12668 if (min_max_arg_part)
12669 {
12670 SEL_ARG *min_max_range= index_tree;
12671 while (min_max_range) /* Find the tree for the MIN/MAX key part. */
12672 {
12673 if (min_max_range->field->eq(min_max_arg_part->field))
12674 break;
12675 min_max_range= min_max_range->next_key_part;
12676 }
12677 /* Scroll to the leftmost interval for the MIN/MAX argument. */
12678 while (min_max_range && min_max_range->prev)
12679 min_max_range= min_max_range->prev;
12680 /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
12681 while (min_max_range)
12682 {
12683 if (quick->add_range(min_max_range))
12684 {
12685 delete quick;
12686 quick= NULL;
12687 DBUG_RETURN(NULL);
12688 }
12689 min_max_range= min_max_range->next;
12690 }
12691 }
12692 }
12693 else
12694 quick->quick_prefix_select= NULL;
12695
12696 quick->update_key_stat();
12697 quick->adjust_prefix_ranges();
12698
12699 DBUG_RETURN(quick);
12700 }
12701
12702
12703 /*
12704 Construct new quick select for group queries with min/max.
12705
12706 SYNOPSIS
12707 QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
12708 table The table being accessed
12709 join Descriptor of the current query
12710 have_min TRUE if the query selects a MIN function
12711 have_max TRUE if the query selects a MAX function
12712 min_max_arg_part The only argument field of all MIN/MAX functions
12713 group_prefix_len Length of all key parts in the group prefix
12714 prefix_key_parts All key parts in the group prefix
12715 index_info The index chosen for data access
12716 use_index The id of index_info
12717 read_cost Cost of this access method
12718 records Number of records returned
12719 key_infix_len Length of the key infix appended to the group prefix
12720 key_infix Infix of constants from equality predicates
12721 parent_alloc Memory pool for this and quick_prefix_select data
12722 is_index_scan get the next different key not by jumping on it via
12723 index read, but by scanning until the end of the
12724 rows with equal key value.
12725
12726 RETURN
12727 None
12728 */
12729
12730 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,double read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)12731 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
12732 bool have_max_arg, bool have_agg_distinct_arg,
12733 KEY_PART_INFO *min_max_arg_part_arg,
12734 uint group_prefix_len_arg, uint group_key_parts_arg,
12735 uint used_key_parts_arg, KEY *index_info_arg,
12736 uint use_index, double read_cost_arg,
12737 ha_rows records_arg, uint key_infix_len_arg,
12738 uchar *key_infix_arg, MEM_ROOT *parent_alloc,
12739 bool is_index_scan_arg)
12740 :join(join_arg), index_info(index_info_arg),
12741 group_prefix_len(group_prefix_len_arg),
12742 group_key_parts(group_key_parts_arg), have_min(have_min_arg),
12743 have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
12744 seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
12745 key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
12746 min_functions_it(NULL), max_functions_it(NULL),
12747 is_index_scan(is_index_scan_arg)
12748 {
12749 head= table;
12750 index= use_index;
12751 record= head->record[0];
12752 tmp_record= head->record[1];
12753 read_time= read_cost_arg;
12754 records= records_arg;
12755 used_key_parts= used_key_parts_arg;
12756 real_key_parts= used_key_parts_arg;
12757 real_prefix_len= group_prefix_len + key_infix_len;
12758 group_prefix= NULL;
12759 min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
12760
12761 /*
12762 We can't have parent_alloc set as the init function can't handle this case
12763 yet.
12764 */
12765 DBUG_ASSERT(!parent_alloc);
12766 if (!parent_alloc)
12767 {
12768 init_sql_alloc(&alloc, join->thd->variables.range_alloc_block_size, 0);
12769 join->thd->mem_root= &alloc;
12770 }
12771 else
12772 memset(&alloc, 0, sizeof(MEM_ROOT)); // ensure that it's not used
12773 }
12774
12775
12776 /*
12777 Do post-constructor initialization.
12778
12779 SYNOPSIS
12780 QUICK_GROUP_MIN_MAX_SELECT::init()
12781
12782 DESCRIPTION
12783 The method performs initialization that cannot be done in the constructor
12784 such as memory allocations that may fail. It allocates memory for the
12785 group prefix and inifix buffers, and for the lists of MIN/MAX item to be
12786 updated during execution.
12787
12788 RETURN
12789 0 OK
12790 other Error code
12791 */
12792
init()12793 int QUICK_GROUP_MIN_MAX_SELECT::init()
12794 {
12795 if (group_prefix) /* Already initialized. */
12796 return 0;
12797
12798 if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len)))
12799 return 1;
12800 /*
12801 We may use group_prefix to store keys with all select fields, so allocate
12802 enough space for it.
12803 */
12804 if (!(group_prefix= (uchar*) alloc_root(&alloc,
12805 real_prefix_len + min_max_arg_len)))
12806 return 1;
12807
12808 if (key_infix_len > 0)
12809 {
12810 /*
12811 The memory location pointed to by key_infix will be deleted soon, so
12812 allocate a new buffer and copy the key_infix into it.
12813 */
12814 uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
12815 if (!tmp_key_infix)
12816 return 1;
12817 memcpy(tmp_key_infix, this->key_infix, key_infix_len);
12818 this->key_infix= tmp_key_infix;
12819 }
12820
12821 if (min_max_arg_part)
12822 {
12823 if (my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
12824 return 1;
12825
12826 if (have_min)
12827 {
12828 if (!(min_functions= new List<Item_sum>))
12829 return 1;
12830 }
12831 else
12832 min_functions= NULL;
12833 if (have_max)
12834 {
12835 if (!(max_functions= new List<Item_sum>))
12836 return 1;
12837 }
12838 else
12839 max_functions= NULL;
12840
12841 Item_sum *min_max_item;
12842 Item_sum **func_ptr= join->sum_funcs;
12843 while ((min_max_item= *(func_ptr++)))
12844 {
12845 if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
12846 min_functions->push_back(min_max_item);
12847 else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
12848 max_functions->push_back(min_max_item);
12849 }
12850
12851 if (have_min)
12852 {
12853 if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
12854 return 1;
12855 }
12856
12857 if (have_max)
12858 {
12859 if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
12860 return 1;
12861 }
12862 }
12863 else
12864 min_max_ranges.elements= 0;
12865
12866 return 0;
12867 }
12868
12869
~QUICK_GROUP_MIN_MAX_SELECT()12870 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
12871 {
12872 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
12873 if (head->file->inited)
12874 /*
12875 We may have used this object for index access during
12876 create_sort_index() and then switched to rnd access for the rest
12877 of execution. Since we don't do cleanup until now, we must call
12878 ha_*_end() for whatever is the current access method.
12879 */
12880 head->file->ha_index_or_rnd_end();
12881 if (min_max_arg_part)
12882 delete_dynamic(&min_max_ranges);
12883 free_root(&alloc,MYF(0));
12884 delete min_functions_it;
12885 delete max_functions_it;
12886 delete quick_prefix_select;
12887 DBUG_VOID_RETURN;
12888 }
12889
12890
12891 /*
12892 Eventually create and add a new quick range object.
12893
12894 SYNOPSIS
12895 QUICK_GROUP_MIN_MAX_SELECT::add_range()
12896 sel_range Range object from which a
12897
12898 NOTES
12899 Construct a new QUICK_RANGE object from a SEL_ARG object, and
12900 add it to the array min_max_ranges. If sel_arg is an infinite
12901 range, e.g. (x < 5 or x > 4), then skip it and do not construct
12902 a quick range.
12903
12904 RETURN
12905 FALSE on success
12906 TRUE otherwise
12907 */
12908
add_range(SEL_ARG * sel_range)12909 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
12910 {
12911 QUICK_RANGE *range;
12912 uint range_flag= sel_range->min_flag | sel_range->max_flag;
12913
12914 /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
12915 if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
12916 return FALSE;
12917
12918 if (!(sel_range->min_flag & NO_MIN_RANGE) &&
12919 !(sel_range->max_flag & NO_MAX_RANGE))
12920 {
12921 if (sel_range->maybe_null &&
12922 sel_range->min_value[0] && sel_range->max_value[0])
12923 range_flag|= NULL_RANGE; /* IS NULL condition */
12924 /*
12925 Do not perform comparison if one of the argiment is NULL value.
12926 */
12927 else if (!sel_range->min_value[0] &&
12928 !sel_range->max_value[0] &&
12929 memcmp(sel_range->min_value, sel_range->max_value,
12930 min_max_arg_len) == 0)
12931 range_flag|= EQ_RANGE; /* equality condition */
12932 }
12933 range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
12934 make_keypart_map(sel_range->part),
12935 sel_range->max_value, min_max_arg_len,
12936 make_keypart_map(sel_range->part),
12937 range_flag);
12938 if (!range)
12939 return TRUE;
12940 if (insert_dynamic(&min_max_ranges, &range))
12941 return TRUE;
12942 return FALSE;
12943 }
12944
12945
12946 /*
12947 Opens the ranges if there are more conditions in quick_prefix_select than
12948 the ones used for jumping through the prefixes.
12949
12950 SYNOPSIS
12951 QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
12952
12953 NOTES
12954 quick_prefix_select is made over the conditions on the whole key.
12955 It defines a number of ranges of length x.
12956 However when jumping through the prefixes we use only the the first
12957 few most significant keyparts in the range key. However if there
12958 are more keyparts to follow the ones we are using we must make the
12959 condition on the key inclusive (because x < "ab" means
12960 x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
12961 To achive the above we must turn off the NEAR_MIN/NEAR_MAX
12962 */
adjust_prefix_ranges()12963 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
12964 {
12965 if (quick_prefix_select &&
12966 group_prefix_len < quick_prefix_select->max_used_key_length)
12967 {
12968 DYNAMIC_ARRAY *arr;
12969 uint inx;
12970
12971 for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++)
12972 {
12973 QUICK_RANGE *range;
12974
12975 get_dynamic(arr, (uchar*)&range, inx);
12976 range->flag &= ~(NEAR_MIN | NEAR_MAX);
12977 }
12978 }
12979 }
12980
12981
12982 /*
12983 Determine the total number and length of the keys that will be used for
12984 index lookup.
12985
12986 SYNOPSIS
12987 QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
12988
12989 DESCRIPTION
12990 The total length of the keys used for index lookup depends on whether
12991 there are any predicates referencing the min/max argument, and/or if
12992 the min/max argument field can be NULL.
12993 This function does an optimistic analysis whether the search key might
12994 be extended by a constant for the min/max keypart. It is 'optimistic'
12995 because during actual execution it may happen that a particular range
12996 is skipped, and then a shorter key will be used. However this is data
12997 dependent and can't be easily estimated here.
12998
12999 RETURN
13000 None
13001 */
13002
update_key_stat()13003 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13004 {
13005 max_used_key_length= real_prefix_len;
13006 if (min_max_ranges.elements > 0)
13007 {
13008 QUICK_RANGE *cur_range;
13009 if (have_min)
13010 { /* Check if the right-most range has a lower boundary. */
13011 get_dynamic(&min_max_ranges, (uchar*)&cur_range,
13012 min_max_ranges.elements - 1);
13013 if (!(cur_range->flag & NO_MIN_RANGE))
13014 {
13015 max_used_key_length+= min_max_arg_len;
13016 used_key_parts++;
13017 return;
13018 }
13019 }
13020 if (have_max)
13021 { /* Check if the left-most range has an upper boundary. */
13022 get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0);
13023 if (!(cur_range->flag & NO_MAX_RANGE))
13024 {
13025 max_used_key_length+= min_max_arg_len;
13026 used_key_parts++;
13027 return;
13028 }
13029 }
13030 }
13031 else if (have_min && min_max_arg_part &&
13032 min_max_arg_part->field->real_maybe_null())
13033 {
13034 /*
13035 If a MIN/MAX argument value is NULL, we can quickly determine
13036 that we're in the beginning of the next group, because NULLs
13037 are always < any other value. This allows us to quickly
13038 determine the end of the current group and jump to the next
13039 group (see next_min()) and thus effectively increases the
13040 usable key length.
13041 */
13042 max_used_key_length+= min_max_arg_len;
13043 used_key_parts++;
13044 }
13045 }
13046
13047
13048 /*
13049 Initialize a quick group min/max select for key retrieval.
13050
13051 SYNOPSIS
13052 QUICK_GROUP_MIN_MAX_SELECT::reset()
13053
13054 DESCRIPTION
13055 Initialize the index chosen for access and find and store the prefix
13056 of the last group. The method is expensive since it performs disk access.
13057
13058 RETURN
13059 0 OK
13060 other Error code
13061 */
13062
reset(void)13063 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
13064 {
13065 int result;
13066 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
13067
13068 seen_first_key= false;
13069 head->set_keyread(TRUE); /* We need only the key attributes */
13070 /*
13071 Request ordered index access as usage of ::index_last(),
13072 ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
13073 */
13074 if ((result= head->file->ha_index_init(index, true)))
13075 {
13076 head->file->print_error(result, MYF(0));
13077 DBUG_RETURN(result);
13078 }
13079 if (quick_prefix_select && quick_prefix_select->reset())
13080 DBUG_RETURN(1);
13081
13082 result= head->file->ha_index_last(record);
13083 if (result != 0)
13084 {
13085 if (result == HA_ERR_END_OF_FILE)
13086 DBUG_RETURN(0);
13087 else
13088 DBUG_RETURN(result);
13089 }
13090
13091 /* Save the prefix of the last group. */
13092 key_copy(last_prefix, record, index_info, group_prefix_len);
13093
13094 DBUG_RETURN(0);
13095 }
13096
13097
13098
13099 /*
13100 Get the next key containing the MIN and/or MAX key for the next group.
13101
13102 SYNOPSIS
13103 QUICK_GROUP_MIN_MAX_SELECT::get_next()
13104
13105 DESCRIPTION
13106 The method finds the next subsequent group of records that satisfies the
13107 query conditions and finds the keys that contain the MIN/MAX values for
13108 the key part referenced by the MIN/MAX function(s). Once a group and its
13109 MIN/MAX values are found, store these values in the Item_sum objects for
13110 the MIN/MAX functions. The rest of the values in the result row are stored
13111 in the Item_field::result_field of each select field. If the query does
13112 not contain MIN and/or MAX functions, then the function only finds the
13113 group prefix, which is a query answer itself.
13114
13115 NOTES
13116 If both MIN and MAX are computed, then we use the fact that if there is
13117 no MIN key, there can't be a MAX key as well, so we can skip looking
13118 for a MAX key in this case.
13119
13120 RETURN
13121 0 on success
13122 HA_ERR_END_OF_FILE if returned all keys
13123 other if some error occurred
13124 */
13125
get_next()13126 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
13127 {
13128 int min_res= 0;
13129 int max_res= 0;
13130 #ifdef HPUX11
13131 /*
13132 volatile is required by a bug in the HP compiler due to which the
13133 last test of result fails.
13134 */
13135 volatile int result;
13136 #else
13137 int result;
13138 #endif
13139 int is_last_prefix= 0;
13140
13141 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
13142
13143 /*
13144 Loop until a group is found that satisfies all query conditions or the last
13145 group is reached.
13146 */
13147 do
13148 {
13149 result= next_prefix();
13150 /*
13151 Check if this is the last group prefix. Notice that at this point
13152 this->record contains the current prefix in record format.
13153 */
13154 if (!result)
13155 {
13156 is_last_prefix= key_cmp(index_info->key_part, last_prefix,
13157 group_prefix_len);
13158 DBUG_ASSERT(is_last_prefix <= 0);
13159 }
13160 else
13161 {
13162 if (result == HA_ERR_KEY_NOT_FOUND)
13163 continue;
13164 break;
13165 }
13166
13167 if (have_min)
13168 {
13169 min_res= next_min();
13170 if (min_res == 0)
13171 update_min_result();
13172 }
13173 /* If there is no MIN in the group, there is no MAX either. */
13174 if ((have_max && !have_min) ||
13175 (have_max && have_min && (min_res == 0)))
13176 {
13177 max_res= next_max();
13178 if (max_res == 0)
13179 update_max_result();
13180 /* If a MIN was found, a MAX must have been found as well. */
13181 DBUG_ASSERT((have_max && !have_min) ||
13182 (have_max && have_min && (max_res == 0)));
13183 }
13184 /*
13185 If this is just a GROUP BY or DISTINCT without MIN or MAX and there
13186 are equality predicates for the key parts after the group, find the
13187 first sub-group with the extended prefix.
13188 */
13189 if (!have_min && !have_max && key_infix_len > 0)
13190 result= head->file->ha_index_read_map(record, group_prefix,
13191 make_prev_keypart_map(real_key_parts),
13192 HA_READ_KEY_EXACT);
13193
13194 result= have_min ? min_res : have_max ? max_res : result;
13195 } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13196 is_last_prefix != 0);
13197
13198 if (result == HA_ERR_KEY_NOT_FOUND)
13199 result= HA_ERR_END_OF_FILE;
13200
13201 DBUG_RETURN(result);
13202 }
13203
13204
13205 /*
13206 Retrieve the minimal key in the next group.
13207
13208 SYNOPSIS
13209 QUICK_GROUP_MIN_MAX_SELECT::next_min()
13210
13211 DESCRIPTION
13212 Find the minimal key within this group such that the key satisfies the query
13213 conditions and NULL semantics. The found key is loaded into this->record.
13214
13215 IMPLEMENTATION
13216 Depending on the values of min_max_ranges.elements, key_infix_len, and
13217 whether there is a NULL in the MIN field, this function may directly
13218 return without any data access. In this case we use the key loaded into
13219 this->record by the call to this->next_prefix() just before this call.
13220
13221 RETURN
13222 0 on success
13223 HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
13224 HA_ERR_END_OF_FILE - "" -
13225 other if some error occurred
13226 */
13227
next_min()13228 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
13229 {
13230 int result= 0;
13231 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
13232
13233 /* Find the MIN key using the eventually extended group prefix. */
13234 if (min_max_ranges.elements > 0)
13235 {
13236 if ((result= next_min_in_range()))
13237 DBUG_RETURN(result);
13238 }
13239 else
13240 {
13241 /* Apply the constant equality conditions to the non-group select fields */
13242 if (key_infix_len > 0)
13243 {
13244 if ((result= head->file->ha_index_read_map(record, group_prefix,
13245 make_prev_keypart_map(real_key_parts),
13246 HA_READ_KEY_EXACT)))
13247 DBUG_RETURN(result);
13248 }
13249
13250 /*
13251 If the min/max argument field is NULL, skip subsequent rows in the same
13252 group with NULL in it. Notice that:
13253 - if the first row in a group doesn't have a NULL in the field, no row
13254 in the same group has (because NULL < any other value),
13255 - min_max_arg_part->field->ptr points to some place in 'record'.
13256 */
13257 if (min_max_arg_part && min_max_arg_part->field->is_null())
13258 {
13259 uchar key_buf[MAX_KEY_LENGTH];
13260
13261 /* Find the first subsequent record without NULL in the MIN/MAX field. */
13262 key_copy(key_buf, record, index_info, max_used_key_length);
13263 result= head->file->ha_index_read_map(record, key_buf,
13264 make_keypart_map(real_key_parts),
13265 HA_READ_AFTER_KEY);
13266 /*
13267 Check if the new record belongs to the current group by comparing its
13268 prefix with the group's prefix. If it is from the next group, then the
13269 whole group has NULLs in the MIN/MAX field, so use the first record in
13270 the group as a result.
13271 TODO:
13272 It is possible to reuse this new record as the result candidate for the
13273 next call to next_min(), and to save one lookup in the next call. For
13274 this add a new member 'this->next_group_prefix'.
13275 */
13276 if (!result)
13277 {
13278 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13279 key_restore(record, key_buf, index_info, 0);
13280 }
13281 else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
13282 result= 0; /* There is a result in any case. */
13283 }
13284 }
13285
13286 /*
13287 If the MIN attribute is non-nullable, this->record already contains the
13288 MIN key in the group, so just return.
13289 */
13290 DBUG_RETURN(result);
13291 }
13292
13293
13294 /*
13295 Retrieve the maximal key in the next group.
13296
13297 SYNOPSIS
13298 QUICK_GROUP_MIN_MAX_SELECT::next_max()
13299
13300 DESCRIPTION
13301 Lookup the maximal key of the group, and store it into this->record.
13302
13303 RETURN
13304 0 on success
13305 HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
13306 HA_ERR_END_OF_FILE - "" -
13307 other if some error occurred
13308 */
13309
next_max()13310 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
13311 {
13312 int result;
13313
13314 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
13315
13316 /* Get the last key in the (possibly extended) group. */
13317 if (min_max_ranges.elements > 0)
13318 result= next_max_in_range();
13319 else
13320 result= head->file->ha_index_read_map(record, group_prefix,
13321 make_prev_keypart_map(real_key_parts),
13322 HA_READ_PREFIX_LAST);
13323 DBUG_RETURN(result);
13324 }
13325
13326
13327 /**
13328 Find the next different key value by skiping all the rows with the same key
13329 value.
13330
13331 Implements a specialized loose index access method for queries
13332 containing aggregate functions with distinct of the form:
13333 SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
13334 This method comes to replace the index scan + Unique class
13335 (distinct selection) for loose index scan that visits all the rows of a
13336 covering index instead of jumping in the begining of each group.
13337 TODO: Placeholder function. To be replaced by a handler API call
13338
13339 @param is_index_scan hint to use index scan instead of random index read
13340 to find the next different value.
13341 @param file table handler
13342 @param key_part group key to compare
13343 @param record row data
13344 @param group_prefix current key prefix data
13345 @param group_prefix_len length of the current key prefix data
13346 @param group_key_parts number of the current key prefix columns
13347 @return status
13348 @retval 0 success
13349 @retval !0 failure
13350 */
13351
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)13352 static int index_next_different (bool is_index_scan, handler *file,
13353 KEY_PART_INFO *key_part, uchar * record,
13354 const uchar * group_prefix,
13355 uint group_prefix_len,
13356 uint group_key_parts)
13357 {
13358 if (is_index_scan)
13359 {
13360 int result= 0;
13361
13362 while (!key_cmp (key_part, group_prefix, group_prefix_len))
13363 {
13364 result= file->ha_index_next(record);
13365 if (result)
13366 return(result);
13367 }
13368 return result;
13369 }
13370 else
13371 return file->ha_index_read_map(record, group_prefix,
13372 make_prev_keypart_map(group_key_parts),
13373 HA_READ_AFTER_KEY);
13374 }
13375
13376
13377 /*
13378 Determine the prefix of the next group.
13379
13380 SYNOPSIS
13381 QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13382
13383 DESCRIPTION
13384 Determine the prefix of the next group that satisfies the query conditions.
13385 If there is a range condition referencing the group attributes, use a
13386 QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
13387 condition. If there is a key infix of constants, append this infix
13388 immediately after the group attributes. The possibly extended prefix is
13389 stored in this->group_prefix. The first key of the found group is stored in
13390 this->record, on which relies this->next_min().
13391
13392 RETURN
13393 0 on success
13394 HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
13395 HA_ERR_END_OF_FILE if there are no more keys
13396 other if some error occurred
13397 */
next_prefix()13398 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13399 {
13400 int result;
13401 DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
13402
13403 if (quick_prefix_select)
13404 {
13405 uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
13406 if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
13407 group_key_parts,
13408 cur_prefix)))
13409 DBUG_RETURN(result);
13410 seen_first_key= TRUE;
13411 }
13412 else
13413 {
13414 if (!seen_first_key)
13415 {
13416 result= head->file->ha_index_first(record);
13417 if (result)
13418 DBUG_RETURN(result);
13419 seen_first_key= TRUE;
13420 }
13421 else
13422 {
13423 /* Load the first key in this group into record. */
13424 result= index_next_different (is_index_scan, head->file,
13425 index_info->key_part,
13426 record, group_prefix, group_prefix_len,
13427 group_key_parts);
13428 if (result)
13429 DBUG_RETURN(result);
13430 }
13431 }
13432
13433 /* Save the prefix of this group for subsequent calls. */
13434 key_copy(group_prefix, record, index_info, group_prefix_len);
13435 /* Append key_infix to group_prefix. */
13436 if (key_infix_len > 0)
13437 memcpy(group_prefix + group_prefix_len,
13438 key_infix, key_infix_len);
13439
13440 DBUG_RETURN(0);
13441 }
13442
13443
13444 /*
13445 Find the minimal key in a group that satisfies some range conditions for the
13446 min/max argument field.
13447
13448 SYNOPSIS
13449 QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13450
13451 DESCRIPTION
13452 Given the sequence of ranges min_max_ranges, find the minimal key that is
13453 in the left-most possible range. If there is no such key, then the current
13454 group does not have a MIN key that satisfies the WHERE clause. If a key is
13455 found, its value is stored in this->record.
13456
13457 RETURN
13458 0 on success
13459 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13460 the ranges
13461 HA_ERR_END_OF_FILE - "" -
13462 other if some error
13463 */
13464
next_min_in_range()13465 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13466 {
13467 ha_rkey_function find_flag;
13468 key_part_map keypart_map;
13469 QUICK_RANGE *cur_range;
13470 bool found_null= FALSE;
13471 int result= HA_ERR_KEY_NOT_FOUND;
13472
13473 DBUG_ASSERT(min_max_ranges.elements > 0);
13474
13475 for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
13476 { /* Search from the left-most range to the right. */
13477 get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx);
13478
13479 /*
13480 If the current value for the min/max argument is bigger than the right
13481 boundary of cur_range, there is no need to check this range.
13482 */
13483 if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
13484 (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
13485 min_max_arg_len) == 1))
13486 continue;
13487
13488 if (cur_range->flag & NO_MIN_RANGE)
13489 {
13490 keypart_map= make_prev_keypart_map(real_key_parts);
13491 find_flag= HA_READ_KEY_EXACT;
13492 }
13493 else
13494 {
13495 /* Extend the search key with the lower boundary for this range. */
13496 memcpy(group_prefix + real_prefix_len, cur_range->min_key,
13497 cur_range->min_length);
13498 keypart_map= make_keypart_map(real_key_parts);
13499 find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
13500 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
13501 HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
13502 }
13503
13504 result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13505 find_flag);
13506 if (result)
13507 {
13508 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13509 (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
13510 continue; /* Check the next range. */
13511
13512 /*
13513 In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
13514 HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
13515 range, it can't succeed for any other subsequent range.
13516 */
13517 break;
13518 }
13519
13520 /* A key was found. */
13521 if (cur_range->flag & EQ_RANGE)
13522 break; /* No need to perform the checks below for equal keys. */
13523
13524 if (cur_range->flag & NULL_RANGE)
13525 {
13526 /*
13527 Remember this key, and continue looking for a non-NULL key that
13528 satisfies some other condition.
13529 */
13530 memcpy(tmp_record, record, head->s->rec_buff_length);
13531 found_null= TRUE;
13532 continue;
13533 }
13534
13535 /* Check if record belongs to the current group. */
13536 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13537 {
13538 result= HA_ERR_KEY_NOT_FOUND;
13539 continue;
13540 }
13541
13542 /* If there is an upper limit, check if the found key is in the range. */
13543 if ( !(cur_range->flag & NO_MAX_RANGE) )
13544 {
13545 /* Compose the MAX key for the range. */
13546 uchar *max_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13547 memcpy(max_key, group_prefix, real_prefix_len);
13548 memcpy(max_key + real_prefix_len, cur_range->max_key,
13549 cur_range->max_length);
13550 /* Compare the found key with max_key. */
13551 int cmp_res= key_cmp(index_info->key_part, max_key,
13552 real_prefix_len + min_max_arg_len);
13553 /*
13554 The key is outside of the range if:
13555 the interval is open and the key is equal to the maximum boundry
13556 or
13557 the key is greater than the maximum
13558 */
13559 if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
13560 cmp_res > 0)
13561 {
13562 result= HA_ERR_KEY_NOT_FOUND;
13563 continue;
13564 }
13565 }
13566 /* If we got to this point, the current key qualifies as MIN. */
13567 DBUG_ASSERT(result == 0);
13568 break;
13569 }
13570 /*
13571 If there was a key with NULL in the MIN/MAX field, and there was no other
13572 key without NULL from the same group that satisfies some other condition,
13573 then use the key with the NULL.
13574 */
13575 if (found_null && result)
13576 {
13577 memcpy(record, tmp_record, head->s->rec_buff_length);
13578 result= 0;
13579 }
13580 return result;
13581 }
13582
13583
13584 /*
13585 Find the maximal key in a group that satisfies some range conditions for the
13586 min/max argument field.
13587
13588 SYNOPSIS
13589 QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13590
13591 DESCRIPTION
13592 Given the sequence of ranges min_max_ranges, find the maximal key that is
13593 in the right-most possible range. If there is no such key, then the current
13594 group does not have a MAX key that satisfies the WHERE clause. If a key is
13595 found, its value is stored in this->record.
13596
13597 RETURN
13598 0 on success
13599 HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13600 the ranges
13601 HA_ERR_END_OF_FILE - "" -
13602 other if some error
13603 */
13604
next_max_in_range()13605 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13606 {
13607 ha_rkey_function find_flag;
13608 key_part_map keypart_map;
13609 QUICK_RANGE *cur_range;
13610 int result;
13611
13612 DBUG_ASSERT(min_max_ranges.elements > 0);
13613
13614 for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
13615 { /* Search from the right-most range to the left. */
13616 get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1);
13617
13618 /*
13619 If the current value for the min/max argument is smaller than the left
13620 boundary of cur_range, there is no need to check this range.
13621 */
13622 if (range_idx != min_max_ranges.elements &&
13623 !(cur_range->flag & NO_MIN_RANGE) &&
13624 (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
13625 min_max_arg_len) == -1))
13626 continue;
13627
13628 if (cur_range->flag & NO_MAX_RANGE)
13629 {
13630 keypart_map= make_prev_keypart_map(real_key_parts);
13631 find_flag= HA_READ_PREFIX_LAST;
13632 }
13633 else
13634 {
13635 /* Extend the search key with the upper boundary for this range. */
13636 memcpy(group_prefix + real_prefix_len, cur_range->max_key,
13637 cur_range->max_length);
13638 keypart_map= make_keypart_map(real_key_parts);
13639 find_flag= (cur_range->flag & EQ_RANGE) ?
13640 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
13641 HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
13642 }
13643
13644 result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13645 find_flag);
13646
13647 if (result)
13648 {
13649 if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13650 (cur_range->flag & EQ_RANGE))
13651 continue; /* Check the next range. */
13652
13653 /*
13654 In no key was found with this upper bound, there certainly are no keys
13655 in the ranges to the left.
13656 */
13657 return result;
13658 }
13659 /* A key was found. */
13660 if (cur_range->flag & EQ_RANGE)
13661 return 0; /* No need to perform the checks below for equal keys. */
13662
13663 /* Check if record belongs to the current group. */
13664 if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13665 continue; // Row not found
13666
13667 /* If there is a lower limit, check if the found key is in the range. */
13668 if ( !(cur_range->flag & NO_MIN_RANGE) )
13669 {
13670 /* Compose the MIN key for the range. */
13671 uchar *min_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13672 memcpy(min_key, group_prefix, real_prefix_len);
13673 memcpy(min_key + real_prefix_len, cur_range->min_key,
13674 cur_range->min_length);
13675 /* Compare the found key with min_key. */
13676 int cmp_res= key_cmp(index_info->key_part, min_key,
13677 real_prefix_len + min_max_arg_len);
13678 /*
13679 The key is outside of the range if:
13680 the interval is open and the key is equal to the minimum boundry
13681 or
13682 the key is less than the minimum
13683 */
13684 if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
13685 cmp_res < 0)
13686 continue;
13687 }
13688 /* If we got to this point, the current key qualifies as MAX. */
13689 return result;
13690 }
13691 return HA_ERR_KEY_NOT_FOUND;
13692 }
13693
13694
13695 /*
13696 Update all MIN function results with the newly found value.
13697
13698 SYNOPSIS
13699 QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13700
13701 DESCRIPTION
13702 The method iterates through all MIN functions and updates the result value
13703 of each function by calling Item_sum::reset(), which in turn picks the new
13704 result value from this->head->record[0], previously updated by
13705 next_min(). The updated value is stored in a member variable of each of the
13706 Item_sum objects, depending on the value type.
13707
13708 IMPLEMENTATION
13709 The update must be done separately for MIN and MAX, immediately after
13710 next_min() was called and before next_max() is called, because both MIN and
13711 MAX take their result value from the same buffer this->head->record[0]
13712 (i.e. this->record).
13713
13714 RETURN
13715 None
13716 */
13717
update_min_result()13718 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13719 {
13720 Item_sum *min_func;
13721
13722 min_functions_it->rewind();
13723 while ((min_func= (*min_functions_it)++))
13724 min_func->reset_and_add();
13725 }
13726
13727
13728 /*
13729 Update all MAX function results with the newly found value.
13730
13731 SYNOPSIS
13732 QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13733
13734 DESCRIPTION
13735 The method iterates through all MAX functions and updates the result value
13736 of each function by calling Item_sum::reset(), which in turn picks the new
13737 result value from this->head->record[0], previously updated by
13738 next_max(). The updated value is stored in a member variable of each of the
13739 Item_sum objects, depending on the value type.
13740
13741 IMPLEMENTATION
13742 The update must be done separately for MIN and MAX, immediately after
13743 next_max() was called, because both MIN and MAX take their result value
13744 from the same buffer this->head->record[0] (i.e. this->record).
13745
13746 RETURN
13747 None
13748 */
13749
update_max_result()13750 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13751 {
13752 Item_sum *max_func;
13753
13754 max_functions_it->rewind();
13755 while ((max_func= (*max_functions_it)++))
13756 max_func->reset_and_add();
13757 }
13758
13759
13760 /*
13761 Append comma-separated list of keys this quick select uses to key_names;
13762 append comma-separated list of corresponding used lengths to used_lengths.
13763
13764 SYNOPSIS
13765 QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
13766 key_names [out] Names of used indexes
13767 used_lengths [out] Corresponding lengths of the index names
13768
13769 DESCRIPTION
13770 This method is used by select_describe to extract the names of the
13771 indexes used by a quick select.
13772
13773 */
13774
add_keys_and_lengths(String * key_names,String * used_lengths)13775 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
13776 String *used_lengths)
13777 {
13778 char buf[64];
13779 uint length;
13780 key_names->append(index_info->name);
13781 length= longlong2str(max_used_key_length, buf, 10) - buf;
13782 used_lengths->append(buf, length);
13783 }
13784
13785
13786
13787 /**
13788 Traverse the R-B range tree for this and later keyparts to see if
13789 there are at least as many equality ranges as defined by the limit.
13790
13791 @param keypart_root The root of a R-B tree of ranges for a given keypart.
13792 @param count[in,out] The number of equality ranges found so far
13793 @param limit The number of ranges
13794
13795 @retval true if limit > 0 and 'limit' or more equality ranges have been
13796 found in the range R-B trees
13797 @retval false otherwise
13798
13799 */
eq_ranges_exceeds_limit(SEL_ARG * keypart_root,uint * count,uint limit)13800 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count, uint limit)
13801 {
13802 // "Statistics instead of index dives" feature is turned off
13803 if (limit == 0)
13804 return false;
13805
13806 /*
13807 Optimization: if there is at least one equality range, index
13808 statistics will be used when limit is 1. It's safe to return true
13809 even without checking that there is an equality range because if
13810 there are none, index statistics will not be used anyway.
13811 */
13812 if (limit == 1)
13813 return true;
13814
13815 for(SEL_ARG *keypart_range= keypart_root->first();
13816 keypart_range; keypart_range= keypart_range->next)
13817 {
13818 /*
13819 This is an equality range predicate and should be counted if:
13820 1) the range for this keypart does not have a min/max flag
13821 (which indicates <, <= etc), and
13822 2) the lower and upper range boundaries have the same value
13823 (it's not a "x BETWEEN a AND b")
13824
13825 Note, however, that if this is an "x IS NULL" condition we don't
13826 count it because the number of NULL-values is likely to be off
13827 the index statistics we plan to use.
13828 */
13829 if (!keypart_range->min_flag && !keypart_range->max_flag && // 1)
13830 !keypart_range->cmp_max_to_min(keypart_range) && // 2)
13831 !keypart_range->is_null_interval()) // "x IS NULL"
13832 {
13833 /*
13834 Count predicates in the next keypart, but only if that keypart
13835 is the next in the index.
13836 */
13837 if (keypart_range->next_key_part &&
13838 keypart_range->next_key_part->part == keypart_range->part + 1)
13839 eq_ranges_exceeds_limit(keypart_range->next_key_part, count, limit);
13840 else
13841 // We've found a path of equlity predicates down to a keypart leaf
13842 (*count)++;
13843
13844 if (*count >= limit)
13845 return true;
13846 }
13847 }
13848 return false;
13849 }
13850
13851 #ifndef DBUG_OFF
13852
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)13853 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
13854 const char *msg)
13855 {
13856 SEL_ARG **key,**end;
13857 int idx;
13858 char buff[1024];
13859 DBUG_ENTER("print_sel_tree");
13860
13861 String tmp(buff,sizeof(buff),&my_charset_bin);
13862 tmp.length(0);
13863 for (idx= 0,key=tree->keys, end=key+param->keys ;
13864 key != end ;
13865 key++,idx++)
13866 {
13867 if (tree_map->is_set(idx))
13868 {
13869 uint keynr= param->real_keynr[idx];
13870 if (tmp.length())
13871 tmp.append(',');
13872 tmp.append(param->table->key_info[keynr].name);
13873 }
13874 }
13875 if (!tmp.length())
13876 tmp.append(STRING_WITH_LEN("(empty)"));
13877
13878 DBUG_PRINT("info", ("SEL_TREE: %p (%s) scans: %s", tree, msg, tmp.ptr()));
13879 DBUG_VOID_RETURN;
13880 }
13881
13882
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)13883 static void print_ror_scans_arr(TABLE *table, const char *msg,
13884 struct st_ror_scan_info **start,
13885 struct st_ror_scan_info **end)
13886 {
13887 DBUG_ENTER("print_ror_scans_arr");
13888
13889 char buff[1024];
13890 String tmp(buff,sizeof(buff),&my_charset_bin);
13891 tmp.length(0);
13892 for (;start != end; start++)
13893 {
13894 if (tmp.length())
13895 tmp.append(',');
13896 tmp.append(table->key_info[(*start)->keynr].name);
13897 }
13898 if (!tmp.length())
13899 tmp.append(STRING_WITH_LEN("(empty)"));
13900 DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
13901 fprintf(DBUG_FILE,"ROR key scans (%s): %s", msg, tmp.ptr());
13902
13903 DBUG_VOID_RETURN;
13904 }
13905
13906
13907 #endif /* !DBUG_OFF */
13908
13909 /**
13910 Print a key to a string
13911
13912 @param[out] out String the key is appended to
13913 @param[in] key_part Index components description
13914 @param[in] key Key tuple
13915 @param[in] used_length Key tuple length
13916 */
13917 static void
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key)13918 print_key_value(String *out, const KEY_PART_INFO *key_part, const uchar *key)
13919 {
13920 Field *field= key_part->field;
13921
13922 if (field->flags & BLOB_FLAG)
13923 {
13924 // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
13925 if (field->real_maybe_null() && *key)
13926 out->append(STRING_WITH_LEN("NULL"));
13927 else
13928 out->append(STRING_WITH_LEN("unprintable_blob_value"));
13929 return;
13930 }
13931
13932 char buff[128];
13933 String tmp(buff, sizeof(buff), system_charset_info);
13934 tmp.length(0);
13935
13936 TABLE *table= field->table;
13937 my_bitmap_map *old_sets[2];
13938
13939 dbug_tmp_use_all_columns(table, old_sets, table->read_set,
13940 table->write_set);
13941
13942 uint store_length= key_part->store_length;
13943
13944 if (field->real_maybe_null())
13945 {
13946 /*
13947 Byte 0 of key is the null-byte. If set, key is NULL.
13948 Otherwise, print the key value starting immediately after the
13949 null-byte
13950 */
13951 if (*key)
13952 {
13953 out->append(STRING_WITH_LEN("NULL"));
13954 goto restore_col_map;
13955 }
13956 key++; // Skip null byte
13957 store_length--;
13958 }
13959 field->set_key_image(key, key_part->length);
13960 if (field->type() == MYSQL_TYPE_BIT)
13961 (void) field->val_int_as_str(&tmp, 1); // may change tmp's charset
13962 else
13963 field->val_str(&tmp); // may change tmp's charset
13964 out->append(tmp.ptr(), tmp.length(), tmp.charset());
13965
13966 restore_col_map:
13967 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
13968 }
13969
13970 /**
13971 Append range info for a key part to a string
13972
13973 @param[in,out] out String the range info is appended to
13974 @param[in] key_part Indexed column used in a range select
13975 @param[in] min_key Key tuple describing lower bound of range
13976 @param[in] max_key Key tuple describing upper bound of range
13977 @param[in] flag Key range flags defining what min_key
13978 and max_key represent @see my_base.h
13979 */
append_range(String * out,const KEY_PART_INFO * key_part,const uchar * min_key,const uchar * max_key,const uint flag)13980 void append_range(String *out,
13981 const KEY_PART_INFO *key_part,
13982 const uchar *min_key, const uchar *max_key,
13983 const uint flag)
13984 {
13985 if (out->length() > 0)
13986 out->append(STRING_WITH_LEN(" AND "));
13987
13988 if (!(flag & NO_MIN_RANGE))
13989 {
13990 print_key_value(out, key_part, min_key);
13991 if (flag & NEAR_MIN)
13992 out->append(STRING_WITH_LEN(" < "));
13993 else
13994 out->append(STRING_WITH_LEN(" <= "));
13995 }
13996
13997 out->append(key_part->field->field_name);
13998
13999 if (!(flag & NO_MAX_RANGE))
14000 {
14001 if (flag & NEAR_MAX)
14002 out->append(STRING_WITH_LEN(" < "));
14003 else
14004 out->append(STRING_WITH_LEN(" <= "));
14005 print_key_value(out, key_part, max_key);
14006 }
14007 }
14008
14009 /**
14010 Traverse an R-B tree of range conditions and append all ranges for
14011 this keypart and consecutive keyparts to range_trace (if non-NULL)
14012 or to range_string (if range_trace is NULL). See description of R-B
14013 trees/SEL_ARG for details on how ranges are linked.
14014
14015 @param[in,out] range_trace Optimizer trace array ranges are appended to
14016 @param[in,out] range_string The string where range predicates are
14017 appended when the last keypart has
14018 been reached.
14019 @param[in] range_so_far String containing ranges for keyparts prior
14020 to this keypart.
14021 @param[in] keypart_root The root of the R-B tree containing intervals
14022 for this keypart.
14023 @param[in] key_parts Index components description, used when adding
14024 information to the optimizer trace
14025
14026 @note This function mimics the behavior of sel_arg_range_seq_next()
14027 */
append_range_all_keyparts(Opt_trace_array * range_trace,String * range_string,String * range_so_far,SEL_ARG * keypart_root,const KEY_PART_INFO * key_parts)14028 static void append_range_all_keyparts(Opt_trace_array *range_trace,
14029 String *range_string,
14030 String *range_so_far,
14031 SEL_ARG *keypart_root,
14032 const KEY_PART_INFO *key_parts)
14033 {
14034 DBUG_ASSERT(keypart_root && keypart_root != &null_element);
14035
14036 const bool append_to_trace= (range_trace != NULL);
14037
14038 // Either add info to range_string or to range_trace
14039 DBUG_ASSERT(append_to_trace ? !range_string : (range_string != NULL));
14040
14041 // Navigate to first interval in red-black tree
14042 const KEY_PART_INFO *cur_key_part= key_parts + keypart_root->part;
14043 const SEL_ARG *keypart_range= keypart_root->first();
14044
14045 const uint save_range_so_far_length= range_so_far->length();
14046
14047 while (keypart_range)
14048 {
14049 /*
14050 Skip the rest of condition printing to avoid OOM if appending to
14051 range_string and the string becomes too long. Printing very long
14052 range conditions normally doesn't make sense either.
14053 */
14054 if (!append_to_trace && range_string->length() > 500)
14055 {
14056 range_string->append(STRING_WITH_LEN("..."));
14057 break;
14058 }
14059
14060 // Append the current range predicate to the range String
14061 append_range(range_so_far, cur_key_part,
14062 keypart_range->min_value, keypart_range->max_value,
14063 keypart_range->min_flag | keypart_range->max_flag);
14064
14065 /*
14066 Print range predicates for consecutive keyparts if
14067 1) There are predicates for later keyparts
14068 2) There are no "holes" in the used keyparts (keypartX can only
14069 be used if there is a range predicate on keypartX-1)
14070 3) The current range is an equality range
14071 */
14072 if (keypart_range->next_key_part &&
14073 keypart_range->next_key_part->part == keypart_range->part + 1 &&
14074 keypart_range->is_singlepoint())
14075 {
14076 append_range_all_keyparts(range_trace, range_string, range_so_far,
14077 keypart_range->next_key_part, key_parts);
14078 }
14079 else
14080 {
14081 /*
14082 This is the last keypart with a usable range predicate. Print
14083 full range info to the optimizer trace or to the string
14084 */
14085 if (append_to_trace)
14086 range_trace->add_utf8(range_so_far->ptr(),
14087 range_so_far->length());
14088 else
14089 {
14090 if (range_string->length() == 0)
14091 range_string->append(STRING_WITH_LEN("("));
14092 else
14093 range_string->append(STRING_WITH_LEN(" OR ("));
14094
14095 range_string->append(range_so_far->ptr(), range_so_far->length());
14096 range_string->append(STRING_WITH_LEN(")"));
14097 }
14098 }
14099 keypart_range= keypart_range->next;
14100 /*
14101 Now moving to next range for this keypart, so "reset"
14102 range_so_far to include only range description of earlier
14103 keyparts
14104 */
14105 range_so_far->length(save_range_so_far_length);
14106 }
14107 }
14108
14109 /**
14110 Print the ranges in a SEL_TREE to debug log.
14111
14112 @param tree_name Descriptive name of the tree
14113 @param tree The SEL_TREE that will be printed to debug log
14114 @param param PARAM from SQL_SELECT::test_quick_select
14115 */
dbug_print_tree(const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param)14116 static inline void dbug_print_tree(const char *tree_name,
14117 SEL_TREE *tree,
14118 const RANGE_OPT_PARAM *param)
14119 {
14120 #ifndef DBUG_OFF
14121 if (!param->using_real_indexes)
14122 {
14123 DBUG_PRINT("info",
14124 ("sel_tree: "
14125 "%s uses a partitioned index and cannot be printed",
14126 tree_name));
14127 return;
14128 }
14129
14130 if (!tree)
14131 {
14132 DBUG_PRINT("info", ("sel_tree: %s is NULL", tree_name));
14133 return;
14134 }
14135
14136 if (tree->type == SEL_TREE::IMPOSSIBLE)
14137 {
14138 DBUG_PRINT("info", ("sel_tree: %s is IMPOSSIBLE", tree_name));
14139 return;
14140 }
14141
14142 if (tree->type == SEL_TREE::ALWAYS)
14143 {
14144 DBUG_PRINT("info", ("sel_tree: %s is ALWAYS", tree_name));
14145 return;
14146 }
14147
14148 if (tree->type == SEL_TREE::MAYBE)
14149 {
14150 DBUG_PRINT("info", ("sel_tree: %s is MAYBE", tree_name));
14151 return;
14152 }
14153
14154 if (!tree->merges.is_empty())
14155 {
14156 DBUG_PRINT("info",
14157 ("sel_tree: "
14158 "%s contains the following merges", tree_name));
14159
14160 List_iterator<SEL_IMERGE> it(tree->merges);
14161 int i= 0;
14162 for (SEL_IMERGE *el= it++; el; el= it++, i++)
14163 {
14164 for (SEL_TREE** current= el->trees;
14165 current != el->trees_next;
14166 current++)
14167 dbug_print_tree(" merge_tree", *current, param);
14168 }
14169 }
14170
14171 for (uint i= 0; i< param->keys; i++)
14172 {
14173 if (tree->keys[i] == NULL || tree->keys[i] == &null_element)
14174 continue;
14175
14176 uint real_key_nr= param->real_keynr[i];
14177
14178 const KEY &cur_key= param->table->key_info[real_key_nr];
14179 const KEY_PART_INFO *key_part= cur_key.key_part;
14180
14181 /*
14182 String holding the final range description from
14183 append_range_all_keyparts()
14184 */
14185 char buff1[512];
14186 String range_result(buff1, sizeof(buff1), system_charset_info);
14187 range_result.length(0);
14188
14189 /*
14190 Range description up to a certain keypart - used internally in
14191 append_range_all_keyparts()
14192 */
14193 char buff2[128];
14194 String range_so_far(buff2, sizeof(buff2), system_charset_info);
14195 range_so_far.length(0);
14196
14197 append_range_all_keyparts(NULL, &range_result, &range_so_far,
14198 tree->keys[i], key_part);
14199
14200 DBUG_PRINT("info",
14201 ("sel_tree: %s->keys[%d(real_keynr: %d)]: %s",
14202 tree_name, i, real_key_nr, range_result.ptr()));
14203 }
14204 #endif
14205 }
14206
14207 /*****************************************************************************
14208 ** Print a quick range for debugging
14209 ** TODO:
14210 ** This should be changed to use a String to store each row instead
14211 ** of locking the DEBUG stream !
14212 *****************************************************************************/
14213
14214 #ifndef DBUG_OFF
14215
14216 static void
print_multiple_key_values(KEY_PART * key_part,const uchar * key,uint used_length)14217 print_multiple_key_values(KEY_PART *key_part, const uchar *key,
14218 uint used_length)
14219 {
14220 char buff[1024];
14221 const uchar *key_end= key+used_length;
14222 String tmp(buff,sizeof(buff),&my_charset_bin);
14223 uint store_length;
14224 TABLE *table= key_part->field->table;
14225 my_bitmap_map *old_sets[2];
14226
14227 dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14228
14229 for (; key < key_end; key+=store_length, key_part++)
14230 {
14231 Field *field= key_part->field;
14232 store_length= key_part->store_length;
14233
14234 if (field->real_maybe_null())
14235 {
14236 if (*key)
14237 {
14238 fwrite("NULL",sizeof(char),4,DBUG_FILE);
14239 continue;
14240 }
14241 key++; // Skip null byte
14242 store_length--;
14243 }
14244 field->set_key_image(key, key_part->length);
14245 if (field->type() == MYSQL_TYPE_BIT)
14246 (void) field->val_int_as_str(&tmp, 1);
14247 else
14248 field->val_str(&tmp);
14249 fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
14250 if (key+store_length < key_end)
14251 fputc('/',DBUG_FILE);
14252 }
14253 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14254 }
14255
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)14256 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
14257 {
14258 char buf[MAX_KEY/8+1];
14259 TABLE *table;
14260 my_bitmap_map *old_sets[2];
14261 DBUG_ENTER("print_quick");
14262 if (!quick)
14263 DBUG_VOID_RETURN;
14264 DBUG_LOCK_FILE;
14265
14266 table= quick->head;
14267 dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14268 quick->dbug_dump(0, TRUE);
14269 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14270
14271 fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
14272
14273 DBUG_UNLOCK_FILE;
14274 DBUG_VOID_RETURN;
14275 }
14276
dbug_dump(int indent,bool verbose)14277 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
14278 {
14279 /* purecov: begin inspected */
14280 fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
14281 indent, "", head->key_info[index].name, max_used_key_length);
14282
14283 if (verbose)
14284 {
14285 QUICK_RANGE *range;
14286 QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
14287 QUICK_RANGE **end_range= pr + ranges.elements;
14288 for (; pr != end_range; ++pr)
14289 {
14290 fprintf(DBUG_FILE, "%*s", indent + 2, "");
14291 range= *pr;
14292 if (!(range->flag & NO_MIN_RANGE))
14293 {
14294 print_multiple_key_values(key_parts, range->min_key,
14295 range->min_length);
14296 if (range->flag & NEAR_MIN)
14297 fputs(" < ",DBUG_FILE);
14298 else
14299 fputs(" <= ",DBUG_FILE);
14300 }
14301 fputs("X",DBUG_FILE);
14302
14303 if (!(range->flag & NO_MAX_RANGE))
14304 {
14305 if (range->flag & NEAR_MAX)
14306 fputs(" < ",DBUG_FILE);
14307 else
14308 fputs(" <= ",DBUG_FILE);
14309 print_multiple_key_values(key_parts, range->max_key,
14310 range->max_length);
14311 }
14312 fputs("\n",DBUG_FILE);
14313 }
14314 }
14315 /* purecov: end */
14316 }
14317
dbug_dump(int indent,bool verbose)14318 void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
14319 {
14320 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14321 QUICK_RANGE_SELECT *quick;
14322 fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
14323 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14324 while ((quick= it++))
14325 quick->dbug_dump(indent+2, verbose);
14326 if (pk_quick_select)
14327 {
14328 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14329 pk_quick_select->dbug_dump(indent+2, verbose);
14330 }
14331 fprintf(DBUG_FILE, "%*s}\n", indent, "");
14332 }
14333
dbug_dump(int indent,bool verbose)14334 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
14335 {
14336 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14337 QUICK_RANGE_SELECT *quick;
14338 fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
14339 indent, "", need_to_fetch_row? "":"non-");
14340 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14341 while ((quick= it++))
14342 quick->dbug_dump(indent+2, verbose);
14343 if (cpk_quick)
14344 {
14345 fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14346 cpk_quick->dbug_dump(indent+2, verbose);
14347 }
14348 fprintf(DBUG_FILE, "%*s}\n", indent, "");
14349 }
14350
dbug_dump(int indent,bool verbose)14351 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
14352 {
14353 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
14354 QUICK_SELECT_I *quick;
14355 fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
14356 fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14357 while ((quick= it++))
14358 quick->dbug_dump(indent+2, verbose);
14359 fprintf(DBUG_FILE, "%*s}\n", indent, "");
14360 }
14361
14362 /*
14363 Print quick select information to DBUG_FILE.
14364
14365 SYNOPSIS
14366 QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
14367 indent Indentation offset
14368 verbose If TRUE show more detailed output.
14369
14370 DESCRIPTION
14371 Print the contents of this quick select to DBUG_FILE. The method also
14372 calls dbug_dump() for the used quick select if any.
14373
14374 IMPLEMENTATION
14375 Caller is responsible for locking DBUG_FILE before this call and unlocking
14376 it afterwards.
14377
14378 RETURN
14379 None
14380 */
14381
dbug_dump(int indent,bool verbose)14382 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
14383 {
14384 fprintf(DBUG_FILE,
14385 "%*squick_group_min_max_select: index %s (%d), length: %d\n",
14386 indent, "", index_info->name, index, max_used_key_length);
14387 if (key_infix_len > 0)
14388 {
14389 fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
14390 indent, "", key_infix_len);
14391 }
14392 if (quick_prefix_select)
14393 {
14394 fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
14395 quick_prefix_select->dbug_dump(indent + 2, verbose);
14396 }
14397 if (min_max_ranges.elements > 0)
14398 {
14399 fprintf(DBUG_FILE, "%*susing %lu quick_ranges for MIN/MAX:\n",
14400 indent, "", min_max_ranges.elements);
14401 }
14402 }
14403
14404
14405 #endif /* !DBUG_OFF */
14406