1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights
2  * reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 /*
25   TODO:
26   Fix that MAYBE_KEY are stored in the tree so that we can detect use
27   of full hash keys for queries like:
28 
29   select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
30 
31 */
32 
33 /*
34   This file contains:
35 
36   RangeAnalysisModule
37     A module that accepts a condition, index (or partitioning) description,
38     and builds lists of intervals (in index/partitioning space), such that
39     all possible records that match the condition are contained within the
40     intervals.
41     The entry point for the range analysis module is get_mm_tree()
42     (mm=min_max) function.
43 
44     The lists are returned in form of complicated structure of interlinked
45     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
46     See quick_range_seq_next, find_used_partitions for examples of how to walk
47     this structure.
48     All direct "users" of this module are located within this file, too.
49 
50 
51   PartitionPruningModule
52     A module that accepts a partitioned table, condition, and finds which
53     partitions we will need to use in query execution. Search down for
54     "PartitionPruningModule" for description.
55     The module has single entry point - prune_partitions() function.
56 
57 
58   Range/index_merge/groupby-minmax optimizer module
59     A module that accepts a table, condition, and returns
60      - a QUICK_*_SELECT object that can be used to retrieve rows that match
61        the specified condition, or a "no records will match the condition"
62        statement.
63 
64     The module entry points are
65       test_quick_select()
66       get_quick_select_for_ref()
67 
68 
69   Record retrieval code for range/index_merge/groupby-min-max.
70     Implementations of QUICK_*_SELECT classes.
71 
72   KeyTupleFormat
73   ~~~~~~~~~~~~~~
74   The code in this file (and elsewhere) makes operations on key value tuples.
75   Those tuples are stored in the following format:
76 
77   The tuple is a sequence of key part values. The length of key part value
78   depends only on its type (and not depends on the what value is stored)
79 
80     KeyTuple: keypart1-data, keypart2-data, ...
81 
82   The value of each keypart is stored in the following format:
83 
84     keypart_data: [isnull_byte] keypart-value-bytes
85 
86   If a keypart may have a NULL value (key_part->field->real_maybe_null() can
87   be used to check this), then the first byte is a NULL indicator with the
88   following valid values:
89     1  - keypart has NULL value.
90     0  - keypart has non-NULL value.
91 
92   <questionable-statement> If isnull_byte==1 (NULL value), then the following
93   keypart->length bytes must be 0.
94   </questionable-statement>
95 
96   keypart-value-bytes holds the value. Its format depends on the field type.
97   The length of keypart-value-bytes may or may not depend on the value being
98   stored. The default is that length is static and equal to
99   KEY_PART_INFO::length.
100 
101   Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
102   value:
103 
104      keypart-value-bytes: value_length value_bytes
105 
106   The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
107 
108   See key_copy() and key_restore() for code to move data between index tuple
109   and table record
110 
111   CAUTION: the above description is only sergefp's understanding of the
112            subject and may omit some details.
113 */
114 
115 #include "sql_priv.h"
116 #include "key.h"        // is_key_used, key_copy, key_cmp, key_restore
117 #include "sql_parse.h"                          // check_stack_overrun
118 #include "sql_partition.h"    // get_part_id_func, PARTITION_ITERATOR,
119                               // struct partition_info, NOT_A_PARTITION_ID
120 #include "sql_base.h"         // free_io_cache
121 #include "records.h"          // init_read_record, end_read_record
122 #include <m_ctype.h>
123 #include "sql_select.h"
124 #include "opt_trace.h"
125 #include "filesort.h"         // filesort_free_buffers
126 #include "sql_optimizer.h"    // is_indexed_agg_distinct,field_time_cmp_date
127 
128 using std::min;
129 using std::max;
130 
131 /*
132   Convert double value to #rows. Currently this does floor(), and we
133   might consider using round() instead.
134 */
135 #define double2rows(x) ((ha_rows)(x))
136 
137 static int sel_cmp(Field *f,uchar *a,uchar *b,uint8 a_flag,uint8 b_flag);
138 
139 static uchar is_null_string[2]= {1,0};
140 
141 class RANGE_OPT_PARAM;
142 /*
143   A construction block of the SEL_ARG-graph.
144 
145   The following description only covers graphs of SEL_ARG objects with
146   sel_arg->type==KEY_RANGE:
147 
148   One SEL_ARG object represents an "elementary interval" in form
149 
150       min_value <=?  table.keypartX  <=? max_value
151 
152   The interval is a non-empty interval of any kind: with[out] minimum/maximum
153   bound, [half]open/closed, single-point interval, etc.
154 
155   1. SEL_ARG GRAPH STRUCTURE
156 
157   SEL_ARG objects are linked together in a graph. The meaning of the graph
158   is better demostrated by an example:
159 
160      tree->keys[i]
161       |
162       |             $              $
163       |    part=1   $     part=2   $    part=3
164       |             $              $
165       |  +-------+  $   +-------+  $   +--------+
166       |  | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
167       |  +-------+  $   +-------+  $   +--------+
168       |      |      $              $       |
169       |      |      $              $   +--------+
170       |      |      $              $   | kp3=12 |
171       |      |      $              $   +--------+
172       |  +-------+  $              $
173       \->| kp1=2 |--$--------------$-+
174          +-------+  $              $ |   +--------+
175              |      $              $  ==>| kp3=11 |
176          +-------+  $              $ |   +--------+
177          | kp1=3 |--$--------------$-+       |
178          +-------+  $              $     +--------+
179              |      $              $     | kp3=14 |
180             ...     $              $     +--------+
181 
182   The entire graph is partitioned into "interval lists".
183 
184   An interval list is a sequence of ordered disjoint intervals over
185   the same key part. SEL_ARG are linked via "next" and "prev" pointers
186   with NULL as sentinel.
187 
188     In the example pic, there are 4 interval lists:
189     "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
190     The vertical lines represent SEL_ARG::next/prev pointers.
191 
192   Additionally, all intervals in the list form a red-black (RB) tree,
193   linked via left/right/parent pointers with null_element as sentinel. The
194   red-black tree root SEL_ARG object will be further called "root of the
195   interval list".
196 
197   A red-black tree with 7 SEL_ARGs will look similar to what is shown
198   below. Left/right/parent pointers are shown while next pointers go from a
199   node with number X to the node with number X+1 (and prev in the
200   opposite direction):
201 
202                          Root
203                         +---+
204                         | 4 |
205                         +---+
206                    left/     \ right
207                     __/       \__
208                    /             \
209               +---+               +---+
210               | 2 |               | 6 |
211               +---+               +---+
212         left /     \ right  left /     \ right
213             |       |           |       |
214         +---+       +---+   +---+       +---+
215         | 1 |       | 3 |   | 5 |       | 7 |
216         +---+       +---+   +---+       +---+
217 
218   In this tree,
219     * node1->prev == node7->next == NULL
220     * node1->left == node1->right ==
221       node3->left == ... node7->right == &null_element
222 
223   In an interval list, each member X may have SEL_ARG::next_key_part pointer
224   pointing to the root of another interval list Y. The pointed interval list
225   must cover a key part with greater number (i.e. Y->part > X->part).
226 
227     In the example pic, the next_key_part pointers are represented by
228     horisontal lines.
229 
230   2. SEL_ARG GRAPH SEMANTICS
231 
232   It represents a condition in a special form (we don't have a name for it ATM)
233   The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
234 
235   For example, the picture represents the condition in form:
236    (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
237    (kp1=2 AND (kp3=11 OR kp3=14)) OR
238    (kp1=3 AND (kp3=11 OR kp3=14))
239 
240   In red-black tree form:
241 
242                      +-------+                 +--------+
243                      | kp1=2 |.................| kp3=14 |
244                      +-------+                 +--------+
245                       /     \                     /
246              +---------+    +-------+     +--------+
247              | kp1 < 1 |    | kp1=3 |     | kp3=11 |
248              +---------+    +-------+     +--------+
249                  .               .
250             ......               .......
251             .                          .
252         +-------+                  +--------+
253         | kp2=5 |                  | kp3=14 |
254         +-------+                  +--------+
255             .                        /
256             .                   +--------+
257        (root of R-B tree        | kp3=11 |
258         for "kp3={10|12}")      +--------+
259 
260 
261   Where / and \ denote left and right pointers and ... denotes
262   next_key_part pointers to the root of the R-B tree of intervals for
263   consecutive key parts.
264 
265   3. SEL_ARG GRAPH USE
266 
267   Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
268   Then walk the SEL_ARG graph and get a list of dijsoint ordered key
269   intervals (i.e. intervals in form
270 
271    (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
272 
273   Those intervals can be used to access the index. The uses are in:
274    - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
275                             how many table records are contained within all
276                             intervals.
277    - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
278                             and create QUICK_RANGE_SELECT object that will
279                             read records within these intervals.
280 
281   4. SPACE COMPLEXITY NOTES
282 
283     SEL_ARG graph is a representation of an ordered disjoint sequence of
284     intervals over the ordered set of index tuple values.
285 
286     For multi-part keys, one can construct a WHERE expression such that its
287     list of intervals will be of combinatorial size. Here is an example:
288 
289       (keypart1 IN (1,2, ..., n1)) AND
290       (keypart2 IN (1,2, ..., n2)) AND
291       (keypart3 IN (1,2, ..., n3))
292 
293     For this WHERE clause the list of intervals will have n1*n2*n3 intervals
294     of form
295 
296       (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
297 
298     SEL_ARG graph structure aims to reduce the amount of required space by
299     "sharing" the elementary intervals when possible (the pic at the
300     beginning of this comment has examples of such sharing). The sharing may
301     prevent combinatorial blowup:
302 
303       There are WHERE clauses that have combinatorial-size interval lists but
304       will be represented by a compact SEL_ARG graph.
305       Example:
306         (keypartN IN (1,2, ..., n1)) AND
307         ...
308         (keypart2 IN (1,2, ..., n2)) AND
309         (keypart1 IN (1,2, ..., n3))
310 
311     but not in all cases:
312 
313     - There are WHERE clauses that do have a compact SEL_ARG-graph
314       representation but get_mm_tree() and its callees will construct a
315       graph of combinatorial size.
316       Example:
317         (keypart1 IN (1,2, ..., n1)) AND
318         (keypart2 IN (1,2, ..., n2)) AND
319         ...
320         (keypartN IN (1,2, ..., n3))
321 
322     - There are WHERE clauses for which the minimal possible SEL_ARG graph
323       representation will have combinatorial size.
324       Example:
325         By induction: Let's take any interval on some keypart in the middle:
326 
327            kp15=c0
328 
329         Then let's AND it with this interval 'structure' from preceding and
330         following keyparts:
331 
332           (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
333 
334         We will obtain this SEL_ARG graph:
335 
336              kp14     $      kp15      $      kp16
337                       $                $
338          +---------+  $   +---------+  $   +---------+
339          | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
340          +---------+  $   +---------+  $   +---------+
341               |       $                $
342          +---------+  $   +---------+  $
343          | kp14=c2 |--$-->| kp15=c0 |  $
344          +---------+  $   +---------+  $
345                       $                $
346 
347        Note that we had to duplicate "kp15=c0" and there was no way to avoid
348        that.
349        The induction step: AND the obtained expression with another "wrapping"
350        expression like (*).
351        When the process ends because of the limit on max. number of keyparts
352        we'll have:
353 
354          WHERE clause length  is O(3*#max_keyparts)
355          SEL_ARG graph size   is O(2^(#max_keyparts/2))
356 
357        (it is also possible to construct a case where instead of 2 in 2^n we
358         have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
359         nodes)
360 
361     We avoid consuming too much memory by setting a limit on the number of
362     SEL_ARG object we can construct during one range analysis invocation.
363 */
364 
365 class SEL_ARG :public Sql_alloc
366 {
367 public:
368   uint8 min_flag,max_flag,maybe_flag;
369   uint8 part;					// Which key part
370   uint8 maybe_null;
371   /*
372     Number of children of this element in the RB-tree, plus 1 for this
373     element itself.
374   */
375   uint16 elements;
376   /*
377     Valid only for elements which are RB-tree roots: Number of times this
378     RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
379     SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
380   */
381   ulong use_count;
382 
383   Field *field;
384   uchar *min_value,*max_value;			// Pointer to range
385 
386   /*
387     eq_tree(), first(), last() etc require that left == right == NULL
388     if the type is MAYBE_KEY. Todo: fix this so SEL_ARGs without R-B
389     children are handled consistently. See related WL#5894.
390    */
391   SEL_ARG *left,*right;   /* R-B tree children */
392   SEL_ARG *next,*prev;    /* Links for bi-directional interval list */
393   SEL_ARG *parent;        /* R-B tree parent */
394   /*
395     R-B tree root of intervals covering keyparts consecutive to this
396     SEL_ARG. See documentation of SEL_ARG GRAPH semantics for details.
397   */
398   SEL_ARG *next_key_part;
399   enum leaf_color { BLACK,RED } color;
400 
401   /**
402     Starting an effort to document this field:
403 
404     IMPOSSIBLE: if the range predicate for this index is always false.
405 
406     ALWAYS: if the range predicate for this index is always true.
407 
408     KEY_RANGE: if there is a range predicate that can be used on this index.
409   */
410   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, MAYBE_KEY, KEY_RANGE } type;
411 
412   enum { MAX_SEL_ARGS = 16000 };
413 
SEL_ARG()414   SEL_ARG() {}
415   SEL_ARG(SEL_ARG &);
416   SEL_ARG(Field *,const uchar *, const uchar *);
417   SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
418 	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
419   /*
420     Used to construct MAYBE_KEY and IMPOSSIBLE SEL_ARGs. left and
421     right is NULL, so this ctor must not be used to create other
422     SEL_ARG types. See todo for left/right pointers.
423   */
SEL_ARG(enum Type type_arg)424   SEL_ARG(enum Type type_arg)
425     :min_flag(0),elements(1),use_count(1),left(NULL),right(NULL),
426      next_key_part(0), color(BLACK), type(type_arg)
427   {
428     DBUG_ASSERT(type_arg == MAYBE_KEY || type_arg == IMPOSSIBLE);
429   }
430   /**
431     returns true if a range predicate is equal. Use all_same()
432     to check for equality of all the predicates on this keypart.
433   */
is_same(const SEL_ARG * arg) const434   inline bool is_same(const SEL_ARG *arg) const
435   {
436     if (type != arg->type || part != arg->part)
437       return false;
438     if (type != KEY_RANGE)
439       return true;
440     return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
441   }
442   /**
443     returns true if all the predicates in the keypart tree are equal
444   */
all_same(const SEL_ARG * arg) const445   bool all_same(const SEL_ARG *arg) const
446   {
447     if (type != arg->type || part != arg->part)
448       return false;
449     if (type != KEY_RANGE)
450       return true;
451     if (arg == this)
452       return true;
453     const SEL_ARG *cmp_arg= arg->first();
454     const SEL_ARG *cur_arg= first();
455     for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg);
456          cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ;
457     if (cur_arg || cmp_arg)
458       return false;
459     return true;
460   }
merge_flags(SEL_ARG * arg)461   inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
maybe_smaller()462   inline void maybe_smaller() { maybe_flag=1; }
463   /* Return true iff it's a single-point null interval */
is_null_interval()464   inline bool is_null_interval() { return maybe_null && max_value[0] == 1; }
cmp_min_to_min(const SEL_ARG * arg) const465   inline int cmp_min_to_min(const SEL_ARG* arg) const
466   {
467     return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
468   }
cmp_min_to_max(const SEL_ARG * arg) const469   inline int cmp_min_to_max(const SEL_ARG* arg) const
470   {
471     return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
472   }
cmp_max_to_max(const SEL_ARG * arg) const473   inline int cmp_max_to_max(const SEL_ARG* arg) const
474   {
475     return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
476   }
cmp_max_to_min(const SEL_ARG * arg) const477   inline int cmp_max_to_min(const SEL_ARG* arg) const
478   {
479     return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
480   }
clone_and(SEL_ARG * arg)481   SEL_ARG *clone_and(SEL_ARG* arg)
482   {						// Get overlapping range
483     uchar *new_min,*new_max;
484     uint8 flag_min,flag_max;
485     if (cmp_min_to_min(arg) >= 0)
486     {
487       new_min=min_value; flag_min=min_flag;
488     }
489     else
490     {
491       new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
492     }
493     if (cmp_max_to_max(arg) <= 0)
494     {
495       new_max=max_value; flag_max=max_flag;
496     }
497     else
498     {
499       new_max=arg->max_value; flag_max=arg->max_flag;
500     }
501     return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
502 		       MY_TEST(maybe_flag && arg->maybe_flag));
503   }
clone_first(SEL_ARG * arg)504   SEL_ARG *clone_first(SEL_ARG *arg)
505   {						// min <= X < arg->min
506     return new SEL_ARG(field,part, min_value, arg->min_value,
507 		       min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
508 		       maybe_flag | arg->maybe_flag);
509   }
clone_last(SEL_ARG * arg)510   SEL_ARG *clone_last(SEL_ARG *arg)
511   {						// min <= X <= key_max
512     return new SEL_ARG(field, part, min_value, arg->max_value,
513 		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
514   }
515   SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next);
516 
copy_min(SEL_ARG * arg)517   bool copy_min(SEL_ARG* arg)
518   {						// Get overlapping range
519     if (cmp_min_to_min(arg) > 0)
520     {
521       min_value=arg->min_value; min_flag=arg->min_flag;
522       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
523 	return 1;				// Full range
524     }
525     maybe_flag|=arg->maybe_flag;
526     return 0;
527   }
copy_max(SEL_ARG * arg)528   bool copy_max(SEL_ARG* arg)
529   {						// Get overlapping range
530     if (cmp_max_to_max(arg) <= 0)
531     {
532       max_value=arg->max_value; max_flag=arg->max_flag;
533       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
534 	return 1;				// Full range
535     }
536     maybe_flag|=arg->maybe_flag;
537     return 0;
538   }
539 
copy_min_to_min(SEL_ARG * arg)540   void copy_min_to_min(SEL_ARG *arg)
541   {
542     min_value=arg->min_value; min_flag=arg->min_flag;
543   }
copy_min_to_max(SEL_ARG * arg)544   void copy_min_to_max(SEL_ARG *arg)
545   {
546     max_value=arg->min_value;
547     max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
548   }
copy_max_to_min(SEL_ARG * arg)549   void copy_max_to_min(SEL_ARG *arg)
550   {
551     min_value=arg->max_value;
552     min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
553   }
554   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_min(uint length,uchar ** min_key,uint min_key_flag)555   int store_min(uint length, uchar **min_key,uint min_key_flag)
556   {
557     /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
558     if ((min_flag & GEOM_FLAG) ||
559         (!(min_flag & NO_MIN_RANGE) &&
560 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
561     {
562       if (maybe_null && *min_value)
563       {
564 	**min_key=1;
565 	memset(*min_key+1, 0, length-1);
566       }
567       else
568 	memcpy(*min_key,min_value,length);
569       (*min_key)+= length;
570       return 1;
571     }
572     return 0;
573   }
574   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_max(uint length,uchar ** max_key,uint max_key_flag)575   int store_max(uint length, uchar **max_key, uint max_key_flag)
576   {
577     if (!(max_flag & NO_MAX_RANGE) &&
578 	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
579     {
580       if (maybe_null && *max_value)
581       {
582 	**max_key=1;
583 	memset(*max_key+1, 0, length-1);
584       }
585       else
586 	memcpy(*max_key,max_value,length);
587       (*max_key)+= length;
588       return 1;
589     }
590     return 0;
591   }
592 
593   /*
594     Returns a number of keypart values appended to the key buffer
595     for min key and max key. This function is used by both Range
596     Analysis and Partition pruning. For partition pruning we have
597     to ensure that we don't store also subpartition fields. Thus
598     we have to stop at the last partition part and not step into
599     the subpartition fields. For Range Analysis we set last_part
600     to MAX_KEY which we should never reach.
601   */
store_min_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)602   int store_min_key(KEY_PART *key,
603                     uchar **range_key,
604                     uint *range_key_flag,
605                     uint last_part)
606   {
607     SEL_ARG *key_tree= first();
608     uint res= key_tree->store_min(key[key_tree->part].store_length,
609                                   range_key, *range_key_flag);
610     *range_key_flag|= key_tree->min_flag;
611 
612     if (key_tree->next_key_part &&
613 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
614         key_tree->part != last_part &&
615 	key_tree->next_key_part->part == key_tree->part+1 &&
616 	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)))
617       res+= key_tree->next_key_part->store_min_key(key,
618                                                    range_key,
619                                                    range_key_flag,
620                                                    last_part);
621     return res;
622   }
623 
624   /* returns a number of keypart values appended to the key buffer */
store_max_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)625   int store_max_key(KEY_PART *key,
626                     uchar **range_key,
627                     uint *range_key_flag,
628                     uint last_part)
629   {
630     SEL_ARG *key_tree= last();
631     uint res=key_tree->store_max(key[key_tree->part].store_length,
632                                  range_key, *range_key_flag);
633     (*range_key_flag)|= key_tree->max_flag;
634     if (key_tree->next_key_part &&
635 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
636         key_tree->part != last_part &&
637 	key_tree->next_key_part->part == key_tree->part+1 &&
638 	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
639       res+= key_tree->next_key_part->store_max_key(key,
640                                                    range_key,
641                                                    range_key_flag,
642                                                    last_part);
643     return res;
644   }
645 
646   SEL_ARG *insert(SEL_ARG *key);
647   SEL_ARG *tree_delete(SEL_ARG *key);
648   SEL_ARG *find_range(SEL_ARG *key);
649   SEL_ARG *rb_insert(SEL_ARG *leaf);
650   friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
651 #ifndef DBUG_OFF
652   friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
653   void test_use_count(SEL_ARG *root);
654 #endif
655   SEL_ARG *first();
656   const SEL_ARG *first() const;
657   SEL_ARG *last();
658   void make_root();
simple_key()659   inline bool simple_key()
660   {
661     return !next_key_part && elements == 1;
662   }
increment_use_count(long count)663   void increment_use_count(long count)
664   {
665     if (next_key_part)
666     {
667       next_key_part->use_count+=count;
668       for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next)
669 	if (pos->next_key_part)
670 	  pos->increment_use_count(count);
671     }
672   }
free_tree()673   void free_tree()
674   {
675     for (SEL_ARG *pos=first(); pos ; pos=pos->next)
676       if (pos->next_key_part)
677       {
678 	pos->next_key_part->use_count--;
679 	pos->next_key_part->free_tree();
680       }
681   }
682 
parent_ptr()683   inline SEL_ARG **parent_ptr()
684   {
685     return parent->left == this ? &parent->left : &parent->right;
686   }
687 
688 
689   /*
690     Check if this SEL_ARG object represents a single-point interval
691 
692     SYNOPSIS
693       is_singlepoint()
694 
695     DESCRIPTION
696       Check if this SEL_ARG object (not tree) represents a single-point
697       interval, i.e. if it represents a "keypart = const" or
698       "keypart IS NULL".
699 
700     RETURN
701       TRUE   This SEL_ARG object represents a singlepoint interval
702       FALSE  Otherwise
703   */
704 
is_singlepoint() const705   bool is_singlepoint() const
706   {
707     /*
708       Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
709       flags, and the same for right edge.
710     */
711     if (min_flag || max_flag)
712       return FALSE;
713     uchar *min_val= min_value;
714     uchar *max_val= max_value;
715 
716     if (maybe_null)
717     {
718       /* First byte is a NULL value indicator */
719       if (*min_val != *max_val)
720         return FALSE;
721 
722       if (*min_val)
723         return TRUE; /* This "x IS NULL" */
724       min_val++;
725       max_val++;
726     }
727     return !field->key_cmp(min_val, max_val);
728   }
729   SEL_ARG *clone_tree(RANGE_OPT_PARAM *param);
730 };
731 
732 /**
733   Helper function to compare two SEL_ARG's.
734 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)735 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
736 {
737   if (sa1 == NULL && sa2 == NULL)
738     return true;
739   if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
740     return false;
741   return sa1->all_same(sa2);
742 }
743 
744 class SEL_IMERGE;
745 
746 
747 class SEL_TREE :public Sql_alloc
748 {
749 public:
750   /**
751     Starting an effort to document this field:
752 
753     IMPOSSIBLE: if keys[i]->type == SEL_ARG::IMPOSSIBLE for some i,
754       then type == SEL_TREE::IMPOSSIBLE. Rationale: if the predicate for
755       one of the indexes is always false, then the full predicate is also
756       always false.
757 
758     ALWAYS: if either (keys[i]->type == SEL_ARG::ALWAYS) or
759       (keys[i] == NULL) for all i, then type == SEL_TREE::ALWAYS.
760       Rationale: the range access method will not be able to filter
761       out any rows when there are no range predicates that can be used
762       to filter on any index.
763 
764     KEY: There are range predicates that can be used on at least one
765       index.
766 
767     KEY_SMALLER: There are range predicates that can be used on at
768       least one index. In addition, there are predicates that cannot
769       be directly utilized by range access on key parts in the same
770       index. These unused predicates makes it probable that the row
771       estimate for range access on this index is too pessimistic.
772   */
773   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
SEL_TREE(enum Type type_arg)774   SEL_TREE(enum Type type_arg) :type(type_arg) {}
SEL_TREE()775   SEL_TREE() :type(KEY)
776   {
777     memset(keys, 0, sizeof(keys));
778   }
779   SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
780   /*
781     Possible ways to read rows using a single index because the
782     conditions of the query consists of single-index conjunctions:
783 
784        (ranges_for_idx_1) AND (ranges_for_idx_2) AND ...
785 
786     The SEL_ARG graph for each non-NULL element in keys[] may consist
787     of many single-index ranges (disjunctions), so ranges_for_idx_1
788     may e.g. be:
789 
790        "idx_field1 = 1 OR (idx_field1 > 5 AND idx_field2 = 10)"
791 
792     assuming that index1 is a composite index covering
793     (idx_field1,...,idx_field2,..)
794 
795     Index merge intersection intersects ranges on SEL_ARGs from two or
796     more indexes.
797 
798     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
799     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
800     merit in range analyzer functions (e.g. get_mm_parts) returning a
801     pointer to such SEL_TREE instead of NULL)
802   */
803   SEL_ARG *keys[MAX_KEY];
804   key_map keys_map;        /* bitmask of non-NULL elements in keys */
805 
806   /*
807     Possible ways to read rows using Index merge (sort) union.
808 
809     Each element in 'merges' consists of multi-index disjunctions,
810     which means that Index merge (sort) union must be applied to read
811     rows. The nodes in the 'merges' list forms a conjunction of such
812     multi-index disjunctions.
813 
814     The list is non-empty only if type==KEY.
815   */
816   List<SEL_IMERGE> merges;
817 
818   /* The members below are filled/used only after get_mm_tree is done */
819   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
820   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
821 
822   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
823   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
824   /* Note that #records for each key scan is stored in table->quick_rows */
825 };
826 
827 class RANGE_OPT_PARAM
828 {
829 public:
830   THD	*thd;   /* Current thread handle */
831   TABLE *table; /* Table being analyzed */
832   Item *cond;   /* Used inside get_mm_tree(). */
833   table_map prev_tables;
834   table_map read_tables;
835   table_map current_table; /* Bit of the table being analyzed */
836 
837   /* Array of parts of all keys for which range analysis is performed */
838   KEY_PART *key_parts;
839   KEY_PART *key_parts_end;
840   MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
841   MEM_ROOT *old_root; /* Memory that will last until the query end */
842   /*
843     Number of indexes used in range analysis (In SEL_TREE::keys only first
844     #keys elements are not empty)
845   */
846   uint keys;
847 
848   /*
849     If true, the index descriptions describe real indexes (and it is ok to
850     call field->optimize_range(real_keynr[...], ...).
851     Otherwise index description describes fake indexes, like a partitioning
852     expression.
853   */
854   bool using_real_indexes;
855 
856   /*
857     Aggressively remove "scans" that do not have conditions on first
858     keyparts. Such scans are usable when doing partition pruning but not
859     regular range optimization.
860   */
861   bool remove_jump_scans;
862 
863   /*
864     used_key_no -> table_key_no translation table. Only makes sense if
865     using_real_indexes==TRUE
866   */
867   uint real_keynr[MAX_KEY];
868 
869   /*
870     Used to store 'current key tuples', in both range analysis and
871     partitioning (list) analysis
872   */
873   uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
874     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
875 
876   /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
877   uint alloced_sel_args;
878   bool force_default_mrr;
879   /**
880     Whether index statistics or index dives should be used when
881     estimating the number of rows in an equality range. If true, index
882     statistics is used for these indexes.
883   */
884   bool use_index_statistics;
885 
statement_should_be_aborted() const886   bool statement_should_be_aborted() const
887   {
888     return
889       thd->is_fatal_error ||
890       thd->is_error() ||
891       alloced_sel_args > SEL_ARG::MAX_SEL_ARGS;
892   }
893 
894 };
895 
896 class PARAM : public RANGE_OPT_PARAM
897 {
898 public:
899   KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
900   longlong baseflag;
901   uint max_key_part;
902   /* Number of ranges in the last checked tree->key */
903   uint range_count;
904 
905   bool quick;				// Don't calulate possible keys
906 
907   uint fields_bitmap_size;
908   MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
909   MY_BITMAP tmp_covered_fields;
910 
911   key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */
912 
913   uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
914   uint imerge_cost_buff_size; /* size of the buffer */
915 
916   /* TRUE if last checked tree->key can be used for ROR-scan */
917   bool is_ror_scan;
918   /* Number of ranges in the last checked tree->key */
919   uint n_ranges;
920 
921   /*
922      The sort order the range access method must be able
923      to provide. Three-value logic: asc/desc/don't care
924   */
925   ORDER::enum_order order_direction;
926 };
927 
928 class TABLE_READ_PLAN;
929   class TRP_RANGE;
930   class TRP_ROR_INTERSECT;
931   class TRP_ROR_UNION;
932   class TRP_INDEX_MERGE;
933   class TRP_GROUP_MIN_MAX;
934 
935 struct st_ror_scan_info;
936 
937 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,
938                                Item_func *cond_func,Field *field,
939                                Item_func::Functype type,Item *value,
940                                Item_result cmp_type);
941 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,Item *cond_func,Field *field,
942 			    KEY_PART *key_part,
943 			    Item_func::Functype type,Item *value);
944 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond);
945 
946 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts);
947 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
948                                   SEL_ARG *tree, bool update_tbl_stats,
949                                   uint *mrr_flags, uint *bufsize,
950                                   Cost_estimate *cost);
951 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
952                                      SEL_ARG *key_tree, uint mrr_flags,
953                                      uint mrr_buf_size, MEM_ROOT *alloc);
954 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
955                                        bool index_read_must_be_used,
956                                        bool update_tbl_stats,
957                                        double read_time);
958 static
959 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
960                                           double read_time);
961 static
962 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
963                                          double read_time);
964 static
965 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
966                                           double read_time);
967 #ifndef DBUG_OFF
968 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
969                            const char *msg);
970 static void print_ror_scans_arr(TABLE *table, const char *msg,
971                                 struct st_ror_scan_info **start,
972                                 struct st_ror_scan_info **end);
973 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
974 #endif
975 
976 static void append_range_all_keyparts(Opt_trace_array *range_trace,
977                                       String *range_string,
978                                       String *range_so_far,
979                                       SEL_ARG *keypart_root,
980                                       const KEY_PART_INFO *key_parts);
981 static inline void dbug_print_tree(const char *tree_name,
982                                    SEL_TREE *tree,
983                                    const RANGE_OPT_PARAM *param);
984 
985 void append_range(String *out,
986                   const KEY_PART_INFO *key_parts,
987                   const uchar *min_key, const uchar *max_key,
988                   const uint flag);
989 
990 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
991 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
992 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
993 static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
994 static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
995                         uint clone_flag);
996 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
997 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
998                     SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
999                     uchar *max_key,uint max_key_flag);
1000 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
1001 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count,
1002                                     uint limit);
1003 
1004 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
1005 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
1006                              uint length);
1007 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
1008 
1009 
1010 /*
1011   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
1012   a condition in the following form:
1013    (t_1||t_2||...||t_N) && (next)
1014 
1015   where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
1016   (t_i,t_j) contains SEL_ARGS for the same index.
1017 
1018   SEL_TREE contained in SEL_IMERGE always has merges=NULL.
1019 
1020   This class relies on memory manager to do the cleanup.
1021 */
1022 
1023 class SEL_IMERGE : public Sql_alloc
1024 {
1025   enum { PREALLOCED_TREES= 10};
1026 public:
1027   SEL_TREE *trees_prealloced[PREALLOCED_TREES];
1028   SEL_TREE **trees;             /* trees used to do index_merge   */
1029   SEL_TREE **trees_next;        /* last of these trees            */
1030   SEL_TREE **trees_end;         /* end of allocated space         */
1031 
1032   SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
1033 
SEL_IMERGE()1034   SEL_IMERGE() :
1035     trees(&trees_prealloced[0]),
1036     trees_next(trees),
1037     trees_end(trees + PREALLOCED_TREES)
1038   {}
1039   SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
1040   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
1041   int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
1042   int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
1043 };
1044 
1045 
1046 /*
1047   Add SEL_TREE to this index_merge without any checks,
1048 
1049   NOTES
1050     This function implements the following:
1051       (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
1052 
1053   RETURN
1054      0 - OK
1055     -1 - Out of memory.
1056 */
1057 
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)1058 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
1059 {
1060   if (trees_next == trees_end)
1061   {
1062     const int realloc_ratio= 2;		/* Double size for next round */
1063     uint old_elements= (trees_end - trees);
1064     uint old_size= sizeof(SEL_TREE**) * old_elements;
1065     uint new_size= old_size * realloc_ratio;
1066     SEL_TREE **new_trees;
1067     if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
1068       return -1;
1069     memcpy(new_trees, trees, old_size);
1070     trees=      new_trees;
1071     trees_next= trees + old_elements;
1072     trees_end=  trees + old_elements * realloc_ratio;
1073   }
1074   *(trees_next++)= tree;
1075   return 0;
1076 }
1077 
1078 
1079 /*
1080   Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
1081   combining new_tree with one of the trees in this SEL_IMERGE if they both
1082   have SEL_ARGs for the same key.
1083 
1084   SYNOPSIS
1085     or_sel_tree_with_checks()
1086       param    PARAM from SQL_SELECT::test_quick_select
1087       new_tree SEL_TREE with type KEY or KEY_SMALLER.
1088 
1089   NOTES
1090     This does the following:
1091     (t_1||...||t_k)||new_tree =
1092      either
1093        = (t_1||...||t_k||new_tree)
1094      or
1095        = (t_1||....||(t_j|| new_tree)||...||t_k),
1096 
1097      where t_i, y are SEL_TREEs.
1098     new_tree is combined with the first t_j it has a SEL_ARG on common
1099     key with. As a consequence of this, choice of keys to do index_merge
1100     read may depend on the order of conditions in WHERE part of the query.
1101 
1102   RETURN
1103     0  OK
1104     1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
1105        and (*this) should be discarded.
1106    -1  An error occurred.
1107 */
1108 
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,SEL_TREE * new_tree)1109 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
1110 {
1111   for (SEL_TREE** tree = trees;
1112        tree != trees_next;
1113        tree++)
1114   {
1115     if (sel_trees_can_be_ored(*tree, new_tree, param))
1116     {
1117       *tree = tree_or(param, *tree, new_tree);
1118       if (!*tree)
1119         return 1;
1120       if (((*tree)->type == SEL_TREE::MAYBE) ||
1121           ((*tree)->type == SEL_TREE::ALWAYS))
1122         return 1;
1123       /* SEL_TREE::IMPOSSIBLE is impossible here */
1124       return 0;
1125     }
1126   }
1127 
1128   /* New tree cannot be combined with any of existing trees. */
1129   return or_sel_tree(param, new_tree);
1130 }
1131 
1132 
1133 /*
1134   Perform OR operation on this index_merge and supplied index_merge list.
1135 
1136   RETURN
1137     0 - OK
1138     1 - One of conditions in result is always TRUE and this SEL_IMERGE
1139         should be discarded.
1140    -1 - An error occurred
1141 */
1142 
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,SEL_IMERGE * imerge)1143 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
1144 {
1145   for (SEL_TREE** tree= imerge->trees;
1146        tree != imerge->trees_next;
1147        tree++)
1148   {
1149     if (or_sel_tree_with_checks(param, *tree))
1150       return 1;
1151   }
1152   return 0;
1153 }
1154 
1155 
SEL_TREE(SEL_TREE * arg,RANGE_OPT_PARAM * param)1156 SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
1157 {
1158   keys_map= arg->keys_map;
1159   type= arg->type;
1160   for (uint idx= 0; idx < MAX_KEY; idx++)
1161   {
1162     if ((keys[idx]= arg->keys[idx]))
1163     {
1164       keys[idx]->use_count++;
1165       keys[idx]->increment_use_count(1);
1166     }
1167   }
1168 
1169   List_iterator<SEL_IMERGE> it(arg->merges);
1170   for (SEL_IMERGE *el= it++; el; el= it++)
1171   {
1172     SEL_IMERGE *merge= new SEL_IMERGE(el, param);
1173     if (!merge || merge->trees == merge->trees_next)
1174     {
1175       merges.empty();
1176       return;
1177     }
1178     merges.push_back (merge);
1179   }
1180 }
1181 
1182 
SEL_IMERGE(SEL_IMERGE * arg,RANGE_OPT_PARAM * param)1183 SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
1184 {
1185   uint elements= (arg->trees_end - arg->trees);
1186   if (elements > PREALLOCED_TREES)
1187   {
1188     uint size= elements * sizeof (SEL_TREE **);
1189     if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
1190       goto mem_err;
1191   }
1192   else
1193     trees= &trees_prealloced[0];
1194 
1195   trees_next= trees;
1196   trees_end= trees + elements;
1197 
1198   for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end;
1199        tree++, arg_tree++)
1200   {
1201     if (!(*tree= new SEL_TREE(*arg_tree, param)))
1202       goto mem_err;
1203   }
1204 
1205   return;
1206 
1207 mem_err:
1208   trees= &trees_prealloced[0];
1209   trees_next= trees;
1210   trees_end= trees;
1211 }
1212 
1213 
1214 /*
1215   Perform AND operation on two index_merge lists and store result in *im1.
1216 */
1217 
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1218 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
1219 {
1220   im1->concat(im2);
1221 }
1222 
1223 
1224 /*
1225   Perform OR operation on 2 index_merge lists, storing result in first list.
1226 
1227   NOTES
1228     The following conversion is implemented:
1229      (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
1230       => (a_1||b_1).
1231 
1232     i.e. all conjuncts except the first one are currently dropped.
1233     This is done to avoid producing N*K ways to do index_merge.
1234 
1235     If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
1236     and index_merge is discarded (while it is actually possible to try
1237     harder).
1238 
1239     As a consequence of this, choice of keys to do index_merge read may depend
1240     on the order of conditions in WHERE part of the query.
1241 
1242   RETURN
1243     0     OK, result is stored in *im1
1244     other Error, both passed lists are unusable
1245 */
1246 
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1247 int imerge_list_or_list(RANGE_OPT_PARAM *param,
1248                         List<SEL_IMERGE> *im1,
1249                         List<SEL_IMERGE> *im2)
1250 {
1251   SEL_IMERGE *imerge= im1->head();
1252   im1->empty();
1253   im1->push_back(imerge);
1254 
1255   return imerge->or_sel_imerge_with_checks(param, im2->head());
1256 }
1257 
1258 
1259 /*
1260   Perform OR operation on index_merge list and key tree.
1261 
1262   RETURN
1263     false     OK, result is stored in *im1.
1264     true      Error
1265 */
1266 
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,SEL_TREE * tree)1267 static bool imerge_list_or_tree(RANGE_OPT_PARAM *param,
1268                                 List<SEL_IMERGE> *im1,
1269                                 SEL_TREE *tree)
1270 {
1271   DBUG_ENTER("imerge_list_or_tree");
1272   SEL_IMERGE *imerge;
1273   List_iterator<SEL_IMERGE> it(*im1);
1274 
1275   uint remaining_trees= im1->elements;
1276   while ((imerge= it++))
1277   {
1278     SEL_TREE *or_tree;
1279     /*
1280       Need to make a copy of 'tree' for all but the last OR operation
1281       because or_sel_tree_with_checks() may change it.
1282     */
1283     if (--remaining_trees == 0)
1284       or_tree= tree;
1285     else
1286     {
1287       or_tree= new SEL_TREE (tree, param);
1288       if (!or_tree)
1289         DBUG_RETURN(true);
1290       if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
1291         DBUG_RETURN(false);
1292     }
1293 
1294     int result_or= imerge->or_sel_tree_with_checks(param, or_tree);
1295     if (result_or == 1)
1296       it.remove();
1297     else if (result_or == -1)
1298       DBUG_RETURN(true);
1299   }
1300   DBUG_ASSERT(remaining_trees == 0);
1301   DBUG_RETURN(im1->is_empty());
1302 }
1303 
1304 
1305 /***************************************************************************
1306 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1307 ***************************************************************************/
1308 
1309 	/* make a select from mysql info
1310 	   Error is set as following:
1311 	   0 = ok
1312 	   1 = Got some error (out of memory?)
1313 	   */
1314 
make_select(TABLE * head,table_map const_tables,table_map read_tables,Item * conds,bool allow_null_cond,int * error)1315 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1316 			table_map read_tables, Item *conds,
1317                         bool allow_null_cond,
1318                         int *error)
1319 {
1320   SQL_SELECT *select;
1321   DBUG_ENTER("make_select");
1322 
1323   *error=0;
1324 
1325   if (!conds && !allow_null_cond)
1326     DBUG_RETURN(0);
1327   if (!(select= new SQL_SELECT))
1328   {
1329     *error= 1;			// out of memory
1330     DBUG_RETURN(0);		/* purecov: inspected */
1331   }
1332   select->read_tables=read_tables;
1333   select->const_tables=const_tables;
1334   select->head=head;
1335   select->cond=conds;
1336 
1337   if (head->sort.io_cache)
1338   {
1339     select->file= *head->sort.io_cache;
1340     select->records=(ha_rows) (select->file.end_of_file/
1341 			       head->file->ref_length);
1342     my_free(head->sort.io_cache);
1343     head->sort.io_cache=0;
1344   }
1345   DBUG_RETURN(select);
1346 }
1347 
1348 
SQL_SELECT()1349 SQL_SELECT::SQL_SELECT() :
1350   quick(0), cond(0), icp_cond(0),
1351   free_cond(0), traced_before(false)
1352 {
1353   my_b_clear(&file);
1354 }
1355 
1356 
cleanup()1357 void SQL_SELECT::cleanup()
1358 {
1359   set_quick(NULL);
1360   if (free_cond)
1361   {
1362     free_cond=0;
1363     delete cond;
1364     cond= 0;
1365   }
1366   close_cached_file(&file);
1367   traced_before= false;
1368 }
1369 
1370 
~SQL_SELECT()1371 SQL_SELECT::~SQL_SELECT()
1372 {
1373   cleanup();
1374 }
1375 
1376 #undef index					// Fix for Unixware 7
1377 
QUICK_SELECT_I()1378 QUICK_SELECT_I::QUICK_SELECT_I()
1379   :max_used_key_length(0),
1380    used_key_parts(0)
1381 {}
1382 
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1383 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1384                                        bool no_alloc, MEM_ROOT *parent_alloc,
1385                                        bool *create_error)
1386   :free_file(0), cur_range(NULL), last_range(0),
1387    mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
1388    dont_free(0)
1389 {
1390   my_bitmap_map *bitmap;
1391   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1392 
1393   in_ror_merged_scan= 0;
1394   index= key_nr;
1395   head=  table;
1396   key_part_info= head->key_info[index].key_part;
1397   my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
1398 
1399   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1400   mrr_buf_size= thd->variables.read_rnd_buff_size;
1401 
1402   if (!no_alloc && !parent_alloc)
1403   {
1404     // Allocates everything through the internal memroot
1405     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1406     thd->mem_root= &alloc;
1407   }
1408   else
1409     memset(&alloc, 0, sizeof(alloc));
1410   file= head->file;
1411   record= head->record[0];
1412 
1413   /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
1414   if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
1415                                            MYF(MY_WME))))
1416   {
1417     column_bitmap.bitmap= 0;
1418     *create_error= 1;
1419   }
1420   else
1421     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1422   DBUG_VOID_RETURN;
1423 }
1424 
1425 
need_sorted_output()1426 void QUICK_RANGE_SELECT::need_sorted_output()
1427 {
1428   mrr_flags |= HA_MRR_SORTED;
1429 }
1430 
1431 
init()1432 int QUICK_RANGE_SELECT::init()
1433 {
1434   DBUG_ENTER("QUICK_RANGE_SELECT::init");
1435 
1436   if (file->inited)
1437     file->ha_index_or_rnd_end();
1438   DBUG_RETURN(FALSE);
1439 }
1440 
1441 
range_end()1442 void QUICK_RANGE_SELECT::range_end()
1443 {
1444   if (file->inited)
1445     file->ha_index_or_rnd_end();
1446 }
1447 
1448 
~QUICK_RANGE_SELECT()1449 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1450 {
1451   DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1452   if (!dont_free)
1453   {
1454     /* file is NULL for CPK scan on covering ROR-intersection */
1455     if (file)
1456     {
1457       range_end();
1458       if (free_file)
1459       {
1460         DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1461                             free_file));
1462         file->ha_external_lock(current_thd, F_UNLCK);
1463         file->ha_close();
1464         delete file;
1465       }
1466     }
1467     delete_dynamic(&ranges); /* ranges are allocated in alloc */
1468     free_root(&alloc,MYF(0));
1469     my_free(column_bitmap.bitmap);
1470   }
1471   my_free(mrr_buf_desc);
1472   DBUG_VOID_RETURN;
1473 }
1474 
1475 
QUICK_INDEX_MERGE_SELECT(THD * thd_param,TABLE * table)1476 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
1477                                                    TABLE *table)
1478   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1479 {
1480   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
1481   index= MAX_KEY;
1482   head= table;
1483   memset(static_cast<void*>(&read_record), 0, sizeof(read_record));
1484   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1485   DBUG_VOID_RETURN;
1486 }
1487 
init()1488 int QUICK_INDEX_MERGE_SELECT::init()
1489 {
1490   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
1491   DBUG_RETURN(0);
1492 }
1493 
reset()1494 int QUICK_INDEX_MERGE_SELECT::reset()
1495 {
1496   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
1497   const int retval= read_keys_and_merge();
1498   DBUG_RETURN(retval);
1499 }
1500 
1501 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1502 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1503 {
1504   /*
1505     Save quick_select that does scan on clustered primary key as it will be
1506     processed separately.
1507   */
1508   if (head->file->primary_key_is_clustered() &&
1509       quick_sel_range->index == head->s->primary_key)
1510     pk_quick_select= quick_sel_range;
1511   else
1512     return quick_selects.push_back(quick_sel_range);
1513   return 0;
1514 }
1515 
~QUICK_INDEX_MERGE_SELECT()1516 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
1517 {
1518   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1519   QUICK_RANGE_SELECT* quick;
1520   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
1521   delete unique;
1522   quick_it.rewind();
1523   while ((quick= quick_it++))
1524     quick->file= NULL;
1525   quick_selects.delete_elements();
1526   delete pk_quick_select;
1527   /* It's ok to call the next two even if they are already deinitialized */
1528   end_read_record(&read_record);
1529   free_io_cache(head);
1530   free_root(&alloc,MYF(0));
1531   DBUG_VOID_RETURN;
1532 }
1533 
1534 
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1535 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1536                                                        TABLE *table,
1537                                                        bool retrieve_full_rows,
1538                                                        MEM_ROOT *parent_alloc)
1539   : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1540     scans_inited(FALSE)
1541 {
1542   index= MAX_KEY;
1543   head= table;
1544   record= head->record[0];
1545   if (!parent_alloc)
1546     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1547   else
1548     memset(&alloc, 0, sizeof(MEM_ROOT));
1549   last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1550                                   head->file->ref_length);
1551 }
1552 
1553 
1554 /*
1555   Do post-constructor initialization.
1556   SYNOPSIS
1557     QUICK_ROR_INTERSECT_SELECT::init()
1558 
1559   RETURN
1560     0      OK
1561     other  Error code
1562 */
1563 
init()1564 int QUICK_ROR_INTERSECT_SELECT::init()
1565 {
1566   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1567  /* Check if last_rowid was successfully allocated in ctor */
1568   DBUG_RETURN(!last_rowid);
1569 }
1570 
1571 
1572 /*
1573   Initialize this quick select to be a ROR-merged scan.
1574 
1575   SYNOPSIS
1576     QUICK_RANGE_SELECT::init_ror_merged_scan()
1577       reuse_handler If TRUE, use head->file, otherwise create a separate
1578                     handler object
1579 
1580   NOTES
1581     This function creates and prepares for subsequent use a separate handler
1582     object if it can't reuse head->file. The reason for this is that during
1583     ROR-merge several key scans are performed simultaneously, and a single
1584     handler is only capable of preserving context of a single key scan.
1585 
1586     In ROR-merge the quick select doing merge does full records retrieval,
1587     merged quick selects read only keys.
1588 
1589   RETURN
1590     0  ROR child scan initialized, ok to use.
1591     1  error
1592 */
1593 
init_ror_merged_scan(bool reuse_handler)1594 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1595 {
1596   handler *save_file= file, *org_file;
1597   THD *thd;
1598   MY_BITMAP * const save_read_set= head->read_set;
1599   MY_BITMAP * const save_write_set= head->write_set;
1600   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1601 
1602   in_ror_merged_scan= 1;
1603   mrr_flags|= HA_MRR_SORTED;
1604   if (reuse_handler)
1605   {
1606     DBUG_PRINT("info", ("Reusing handler %p", file));
1607     if (init() || reset())
1608     {
1609       DBUG_RETURN(1);
1610     }
1611     head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1612     file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1613     goto end;
1614   }
1615 
1616   /* Create a separate handler object for this quick select */
1617   if (free_file)
1618   {
1619     /* already have own 'handler' object. */
1620     DBUG_RETURN(0);
1621   }
1622 
1623   thd= head->in_use;
1624   if (!(file= head->file->clone(head->s->normalized_path.str, thd->mem_root)))
1625   {
1626     /*
1627       Manually set the error flag. Note: there seems to be quite a few
1628       places where a failure could cause the server to "hang" the client by
1629       sending no response to a query. ATM those are not real errors because
1630       the storage engine calls in question happen to never fail with the
1631       existing storage engines.
1632     */
1633     my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1634     /* Caller will free the memory */
1635     goto failure;  /* purecov: inspected */
1636   }
1637 
1638   head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1639 
1640   if (file->ha_external_lock(thd, F_RDLCK))
1641     goto failure;
1642 
1643   if (init() || reset())
1644   {
1645     file->ha_external_lock(thd, F_UNLCK);
1646     file->ha_close();
1647     goto failure;
1648   }
1649   free_file= TRUE;
1650   last_rowid= file->ref;
1651   file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1652 
1653 end:
1654   /*
1655     We are only going to read key fields and call position() on 'file'
1656     The following sets head->tmp_set to only use this key and then updates
1657     head->read_set and head->write_set to use this bitmap.
1658     The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
1659   */
1660   org_file= head->file;
1661   head->file= file;
1662   /* We don't have to set 'head->keyread' here as the 'file' is unique */
1663   if (!head->no_keyread)
1664     head->mark_columns_used_by_index(index);
1665   head->prepare_for_position();
1666   head->file= org_file;
1667   bitmap_copy(&column_bitmap, head->read_set);
1668 
1669   /*
1670     We have prepared a column_bitmap which get_next() will use. To do this we
1671     used TABLE::read_set/write_set as playground; restore them to their
1672     original value to not pollute other scans.
1673   */
1674   head->column_bitmaps_set(save_read_set, save_write_set);
1675 
1676   DBUG_RETURN(0);
1677 
1678 failure:
1679   head->column_bitmaps_set(save_read_set, save_write_set);
1680   delete file;
1681   file= save_file;
1682   DBUG_RETURN(1);
1683 }
1684 
1685 
1686 /*
1687   Initialize this quick select to be a part of a ROR-merged scan.
1688   SYNOPSIS
1689     QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1690       reuse_handler If TRUE, use head->file, otherwise create separate
1691                     handler object.
1692   RETURN
1693     0     OK
1694     other error code
1695 */
init_ror_merged_scan(bool reuse_handler)1696 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1697 {
1698   int error;
1699   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1700   QUICK_RANGE_SELECT* quick;
1701   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1702 
1703   /* Initialize all merged "children" quick selects */
1704   DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1705   if (!need_to_fetch_row && reuse_handler)
1706   {
1707     quick= quick_it++;
1708     /*
1709       There is no use of this->file. Use it for the first of merged range
1710       selects.
1711     */
1712     int error= quick->init_ror_merged_scan(TRUE);
1713     if (error)
1714       DBUG_RETURN(error);
1715     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1716   }
1717   while ((quick= quick_it++))
1718   {
1719 #ifndef DBUG_OFF
1720     const MY_BITMAP * const save_read_set= quick->head->read_set;
1721     const MY_BITMAP * const save_write_set= quick->head->write_set;
1722 #endif
1723     if ((error= quick->init_ror_merged_scan(FALSE)))
1724       DBUG_RETURN(error);
1725     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1726     // Sets are shared by all members of "quick_selects" so must not change
1727     DBUG_ASSERT(quick->head->read_set == save_read_set);
1728     DBUG_ASSERT(quick->head->write_set == save_write_set);
1729     /* All merged scans share the same record buffer in intersection. */
1730     quick->record= head->record[0];
1731   }
1732 
1733   /* Prepare for ha_rnd_pos calls if needed. */
1734   if (need_to_fetch_row && (error= head->file->ha_rnd_init(false)))
1735   {
1736     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1737     DBUG_RETURN(error);
1738   }
1739   DBUG_RETURN(0);
1740 }
1741 
1742 
1743 /*
1744   Initialize quick select for row retrieval.
1745   SYNOPSIS
1746     reset()
1747   RETURN
1748     0      OK
1749     other  Error code
1750 */
1751 
reset()1752 int QUICK_ROR_INTERSECT_SELECT::reset()
1753 {
1754   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1755   if (!scans_inited && init_ror_merged_scan(TRUE))
1756     DBUG_RETURN(1);
1757   scans_inited= TRUE;
1758   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
1759   QUICK_RANGE_SELECT *quick;
1760   while ((quick= it++))
1761     quick->reset();
1762   DBUG_RETURN(0);
1763 }
1764 
1765 
1766 /*
1767   Add a merged quick select to this ROR-intersection quick select.
1768 
1769   SYNOPSIS
1770     QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1771       quick Quick select to be added. The quick select must return
1772             rows in rowid order.
1773   NOTES
1774     This call can only be made before init() is called.
1775 
1776   RETURN
1777     FALSE OK
1778     TRUE  Out of memory.
1779 */
1780 
1781 bool
push_quick_back(QUICK_RANGE_SELECT * quick)1782 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
1783 {
1784   return quick_selects.push_back(quick);
1785 }
1786 
~QUICK_ROR_INTERSECT_SELECT()1787 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1788 {
1789   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1790   quick_selects.delete_elements();
1791   delete cpk_quick;
1792   free_root(&alloc,MYF(0));
1793   if (need_to_fetch_row && head->file->inited)
1794     head->file->ha_rnd_end();
1795   DBUG_VOID_RETURN;
1796 }
1797 
1798 
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1799 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1800                                                TABLE *table)
1801   : thd(thd_param), scans_inited(FALSE)
1802 {
1803   index= MAX_KEY;
1804   head= table;
1805   rowid_length= table->file->ref_length;
1806   record= head->record[0];
1807   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1808   thd_param->mem_root= &alloc;
1809 }
1810 
1811 
1812 /*
1813   Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1814   queue.
1815 
1816   SYNPOSIS
1817     QUICK_ROR_UNION_SELECT_queue_cmp()
1818       arg   Pointer to QUICK_ROR_UNION_SELECT
1819       val1  First merged select
1820       val2  Second merged select
1821 */
1822 
1823 C_MODE_START
1824 
QUICK_ROR_UNION_SELECT_queue_cmp(void * arg,uchar * val1,uchar * val2)1825 static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1826 {
1827   QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1828   return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1829                                    ((QUICK_SELECT_I*)val2)->last_rowid);
1830 }
1831 
1832 C_MODE_END
1833 
1834 
1835 /*
1836   Do post-constructor initialization.
1837   SYNOPSIS
1838     QUICK_ROR_UNION_SELECT::init()
1839 
1840   RETURN
1841     0      OK
1842     other  Error code
1843 */
1844 
init()1845 int QUICK_ROR_UNION_SELECT::init()
1846 {
1847   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1848   if (init_queue(&queue, quick_selects.elements, 0,
1849                  FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1850                  (void*) this))
1851   {
1852     memset(&queue, 0, sizeof(QUEUE));
1853     DBUG_RETURN(1);
1854   }
1855 
1856   if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1857     DBUG_RETURN(1);
1858   prev_rowid= cur_rowid + head->file->ref_length;
1859   DBUG_RETURN(0);
1860 }
1861 
1862 
1863 /*
1864   Initialize quick select for row retrieval.
1865   SYNOPSIS
1866     reset()
1867 
1868   RETURN
1869     0      OK
1870     other  Error code
1871 */
1872 
reset()1873 int QUICK_ROR_UNION_SELECT::reset()
1874 {
1875   QUICK_SELECT_I *quick;
1876   int error;
1877   DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1878   have_prev_rowid= FALSE;
1879   if (!scans_inited)
1880   {
1881     List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1882     while ((quick= it++))
1883     {
1884       /*
1885         Use mem_root of this "QUICK" as using the statement mem_root
1886         might result in too many allocations when combined with
1887         dynamic range access where range optimizer is invoked many times
1888         for a single statement.
1889       */
1890       THD *thd= quick->head->in_use;
1891       MEM_ROOT *saved_root= thd->mem_root;
1892       thd->mem_root= &alloc;
1893       error= quick->init_ror_merged_scan(false);
1894       thd->mem_root= saved_root;
1895       if (error)
1896         DBUG_RETURN(1);
1897     }
1898     scans_inited= TRUE;
1899   }
1900   queue_remove_all(&queue);
1901   /*
1902     Initialize scans for merged quick selects and put all merged quick
1903     selects into the queue.
1904   */
1905   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1906   while ((quick= it++))
1907   {
1908     if ((error= quick->reset()))
1909       DBUG_RETURN(error);
1910     if ((error= quick->get_next()))
1911     {
1912       if (error == HA_ERR_END_OF_FILE)
1913         continue;
1914       DBUG_RETURN(error);
1915     }
1916     quick->save_last_pos();
1917     queue_insert(&queue, (uchar*)quick);
1918   }
1919 
1920   /* Prepare for ha_rnd_pos calls. */
1921   if (head->file->inited && (error= head->file->ha_rnd_end()))
1922   {
1923     DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1924     DBUG_RETURN(error);
1925   }
1926   if ((error= head->file->ha_rnd_init(false)))
1927   {
1928     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1929     DBUG_RETURN(error);
1930   }
1931 
1932   DBUG_RETURN(0);
1933 }
1934 
1935 
1936 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)1937 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1938 {
1939   return quick_selects.push_back(quick_sel_range);
1940 }
1941 
~QUICK_ROR_UNION_SELECT()1942 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1943 {
1944   DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1945   delete_queue(&queue);
1946   quick_selects.delete_elements();
1947   if (head->file->inited)
1948     head->file->ha_rnd_end();
1949   free_root(&alloc,MYF(0));
1950   DBUG_VOID_RETURN;
1951 }
1952 
1953 
QUICK_RANGE()1954 QUICK_RANGE::QUICK_RANGE()
1955   :min_key(0),max_key(0),min_length(0),max_length(0),
1956    flag(NO_MIN_RANGE | NO_MAX_RANGE),
1957   min_keypart_map(0), max_keypart_map(0)
1958 {}
1959 
QUICK_RANGE(const uchar * min_key_arg,uint min_length_arg,key_part_map min_keypart_map_arg,const uchar * max_key_arg,uint max_length_arg,key_part_map max_keypart_map_arg,uint flag_arg)1960 QUICK_RANGE::QUICK_RANGE(const uchar *min_key_arg, uint min_length_arg,
1961                          key_part_map min_keypart_map_arg,
1962                          const uchar *max_key_arg, uint max_length_arg,
1963                          key_part_map max_keypart_map_arg,
1964                          uint flag_arg)
1965   : min_key(NULL),
1966     max_key(NULL),
1967     min_length((uint16) min_length_arg),
1968     max_length((uint16) max_length_arg),
1969     flag((uint16) flag_arg),
1970     min_keypart_map(min_keypart_map_arg),
1971     max_keypart_map(max_keypart_map_arg)
1972 {
1973   min_key= static_cast<uchar*>(sql_memdup(min_key_arg, min_length_arg + 1));
1974   max_key= static_cast<uchar*>(sql_memdup(max_key_arg, max_length_arg + 1));
1975   // If we get is_null_string as argument, the memdup is undefined behavior.
1976   DBUG_ASSERT(min_key_arg != is_null_string);
1977   DBUG_ASSERT(max_key_arg != is_null_string);
1978 }
1979 
SEL_ARG(SEL_ARG & arg)1980 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1981 {
1982   DBUG_ASSERT(arg.type != MAYBE_KEY);  // Would need left=right=NULL
1983   left=right= &null_element;
1984   prev=next= NULL;
1985   type=arg.type;
1986   min_flag=arg.min_flag;
1987   max_flag=arg.max_flag;
1988   maybe_flag=arg.maybe_flag;
1989   maybe_null=arg.maybe_null;
1990   part=arg.part;
1991   field=arg.field;
1992   min_value=arg.min_value;
1993   max_value=arg.max_value;
1994   next_key_part=arg.next_key_part;
1995   use_count=1; elements=1;
1996 }
1997 
1998 
make_root()1999 inline void SEL_ARG::make_root()
2000 {
2001   left=right= &null_element;
2002   color=BLACK;
2003   next=prev= NULL;
2004   use_count=0; elements=1;
2005 }
2006 
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)2007 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
2008                  const uchar *max_value_arg)
2009   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
2010    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
2011    max_value((uchar*) max_value_arg), next(NULL), prev(NULL),
2012    next_key_part(0), color(BLACK), type(KEY_RANGE)
2013 {
2014   left=right= &null_element;
2015 }
2016 
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)2017 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
2018                  uchar *min_value_, uchar *max_value_,
2019 		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
2020   :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
2021    part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
2022    field(field_), min_value(min_value_), max_value(max_value_),
2023    next(NULL), prev(NULL), next_key_part(0), color(BLACK), type(KEY_RANGE)
2024 {
2025   left=right= &null_element;
2026 }
2027 
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2028 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2029                         SEL_ARG **next_arg)
2030 {
2031   SEL_ARG *tmp;
2032 
2033   /* Bail out if we have already generated too many SEL_ARGs */
2034   if (++param->alloced_sel_args > MAX_SEL_ARGS)
2035     return 0;
2036 
2037   if (type != KEY_RANGE)
2038   {
2039     if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2040       return 0;					// out of memory
2041     tmp->prev= *next_arg;			// Link into next/prev chain
2042     (*next_arg)->next=tmp;
2043     (*next_arg)= tmp;
2044     tmp->part= this->part;
2045   }
2046   else
2047   {
2048     if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2049                                              min_flag, max_flag, maybe_flag)))
2050       return 0;					// OOM
2051     tmp->parent=new_parent;
2052     tmp->next_key_part=next_key_part;
2053     if (left != &null_element)
2054       if (!(tmp->left=left->clone(param, tmp, next_arg)))
2055 	return 0;				// OOM
2056 
2057     tmp->prev= *next_arg;			// Link into next/prev chain
2058     (*next_arg)->next=tmp;
2059     (*next_arg)= tmp;
2060 
2061     if (right != &null_element)
2062       if (!(tmp->right= right->clone(param, tmp, next_arg)))
2063 	return 0;				// OOM
2064   }
2065   increment_use_count(1);
2066   tmp->color= color;
2067   tmp->elements= this->elements;
2068   return tmp;
2069 }
2070 
2071 /**
2072   This gives the first SEL_ARG in the interval list, and the minimal element
2073   in the red-black tree
2074 
2075   @return
2076   SEL_ARG   first SEL_ARG in the interval list
2077 */
first()2078 SEL_ARG *SEL_ARG::first()
2079 {
2080   SEL_ARG *next_arg=this;
2081   if (!next_arg->left)
2082     return 0;					// MAYBE_KEY
2083   while (next_arg->left != &null_element)
2084     next_arg=next_arg->left;
2085   return next_arg;
2086 }
2087 
first() const2088 const SEL_ARG *SEL_ARG::first() const
2089 {
2090   return const_cast<SEL_ARG*>(this)->first();
2091 }
2092 
last()2093 SEL_ARG *SEL_ARG::last()
2094 {
2095   SEL_ARG *next_arg=this;
2096   if (!next_arg->right)
2097     return 0;					// MAYBE_KEY
2098   while (next_arg->right != &null_element)
2099     next_arg=next_arg->right;
2100   return next_arg;
2101 }
2102 
2103 
2104 /*
2105   Check if a compare is ok, when one takes ranges in account
2106   Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
2107 */
2108 
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2109 static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2110                    uint8 b_flag)
2111 {
2112   int cmp;
2113   /* First check if there was a compare to a min or max element */
2114   if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2115   {
2116     if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2117 	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2118       return 0;
2119     return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2120   }
2121   if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2122     return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2123 
2124   if (field->real_maybe_null())			// If null is part of key
2125   {
2126     if (*a != *b)
2127     {
2128       return *a ? -1 : 1;
2129     }
2130     if (*a)
2131       goto end;					// NULL where equal
2132     a++; b++;					// Skip NULL marker
2133   }
2134   cmp=field->key_cmp(a , b);
2135   if (cmp) return cmp < 0 ? -1 : 1;		// The values differed
2136 
2137   // Check if the compared equal arguments was defined with open/closed range
2138  end:
2139   if (a_flag & (NEAR_MIN | NEAR_MAX))
2140   {
2141     if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2142       return 0;
2143     if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2144       return (a_flag & NEAR_MIN) ? 2 : -2;
2145     return (a_flag & NEAR_MIN) ? 1 : -1;
2146   }
2147   if (b_flag & (NEAR_MIN | NEAR_MAX))
2148     return (b_flag & NEAR_MIN) ? -2 : 2;
2149   return 0;					// The elements where equal
2150 }
2151 
2152 
clone_tree(RANGE_OPT_PARAM * param)2153 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2154 {
2155   SEL_ARG tmp_link,*next_arg,*root;
2156   next_arg= &tmp_link;
2157   if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2158     return 0;
2159   next_arg->next=0;				// Fix last link
2160   tmp_link.next->prev=0;			// Fix first link
2161   if (root)					// If not OOM
2162     root->use_count= 0;
2163   return root;
2164 }
2165 
2166 
2167 /*
2168   Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2169   objects from table read plans.
2170 */
2171 class TABLE_READ_PLAN
2172 {
2173 public:
2174   /*
2175     Plan read cost, with or without cost of full row retrieval, depending
2176     on plan creation parameters.
2177   */
2178   double read_cost;
2179   ha_rows records; /* estimate of #rows to be examined */
2180 
2181   /*
2182     If TRUE, the scan returns rows in rowid order. This is used only for
2183     scans that can be both ROR and non-ROR.
2184   */
2185   bool is_ror;
2186 
2187   /*
2188     Create quick select for this plan.
2189     SYNOPSIS
2190      make_quick()
2191        param               Parameter from test_quick_select
2192        retrieve_full_rows  If TRUE, created quick select will do full record
2193                            retrieval.
2194        parent_alloc        Memory pool to use, if any.
2195 
2196     NOTES
2197       retrieve_full_rows is ignored by some implementations.
2198 
2199     RETURN
2200       created quick select
2201       NULL on any error.
2202   */
2203   virtual QUICK_SELECT_I *make_quick(PARAM *param,
2204                                      bool retrieve_full_rows,
2205                                      MEM_ROOT *parent_alloc=NULL) = 0;
2206 
2207   /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2208   static void *operator new(size_t size, MEM_ROOT *mem_root)
2209   { return (void*) alloc_root(mem_root, (uint) size); }
operator delete(void * ptr,size_t size)2210   static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2211   static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2212   virtual ~TABLE_READ_PLAN() {}               /* Remove gcc warning */
2213 
2214   /**
2215      Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2216 
2217      @param param        Parameters for range analysis of this table
2218      @param trace_object The optimizer trace object the info is appended to
2219    */
2220   virtual void trace_basic_info(const PARAM *param,
2221                                 Opt_trace_object *trace_object) const = 0;
2222 };
2223 
2224 /*
2225   Plan for a QUICK_RANGE_SELECT scan.
2226   TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2227   QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2228   record retrieval scans.
2229 */
2230 
2231 class TRP_RANGE : public TABLE_READ_PLAN
2232 {
2233 public:
2234   /**
2235     Root of red-black tree for intervals over key fields to be used in
2236     "range" method retrieval. See SEL_ARG graph description.
2237   */
2238   SEL_ARG *key;
2239   uint     key_idx; /* key number in PARAM::key and PARAM::real_keynr*/
2240   uint     mrr_flags;
2241   uint     mrr_buf_size;
2242 
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2243   TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2244    : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2245   {}
~TRP_RANGE()2246   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
2247 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2248   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2249                              MEM_ROOT *parent_alloc)
2250   {
2251     DBUG_ENTER("TRP_RANGE::make_quick");
2252     QUICK_RANGE_SELECT *quick;
2253     if ((quick= get_quick_select(param, key_idx, key, mrr_flags, mrr_buf_size,
2254                                  parent_alloc)))
2255     {
2256       quick->records= records;
2257       quick->read_time= read_cost;
2258     }
2259     DBUG_RETURN(quick);
2260   }
2261 
2262   void trace_basic_info(const PARAM *param,
2263                         Opt_trace_object *trace_object) const;
2264 };
2265 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2266 void TRP_RANGE::trace_basic_info(const PARAM *param,
2267                                  Opt_trace_object *trace_object) const
2268 {
2269 #ifdef OPTIMIZER_TRACE
2270   DBUG_ASSERT(param->using_real_indexes);
2271   const uint keynr_in_table= param->real_keynr[key_idx];
2272 
2273   const KEY &cur_key= param->table->key_info[keynr_in_table];
2274   const KEY_PART_INFO *key_part= cur_key.key_part;
2275 
2276   trace_object->add_alnum("type", "range_scan").
2277     add_utf8("index", cur_key.name).add("rows", records);
2278 
2279   Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
2280 
2281   // TRP_RANGE should not be created if there are no range intervals
2282   DBUG_ASSERT(key);
2283 
2284   String range_info;
2285   range_info.set_charset(system_charset_info);
2286   append_range_all_keyparts(&trace_range, NULL, &range_info, key, key_part);
2287 
2288 #endif
2289 }
2290 
2291 
2292 typedef struct st_ror_scan_info
2293 {
2294   uint      idx;      ///< # of used key in param->keys
2295   uint      keynr;    ///< # of used key in table
2296   ha_rows   records;  ///< estimate of # records this scan will return
2297 
2298   /** Set of intervals over key fields that will be used for row retrieval. */
2299   SEL_ARG   *sel_arg;
2300 
2301   /** Fields used in the query and covered by this ROR scan. */
2302   MY_BITMAP covered_fields;
2303   /**
2304     Fields used in the query that are a) covered by this ROR scan and
2305     b) not already covered by ROR scans ordered earlier in the merge
2306     sequence.
2307   */
2308   MY_BITMAP covered_fields_remaining;
2309   /** #fields in covered_fields_remaining (caching of bitmap_bits_set()) */
2310   uint      num_covered_fields_remaining;
2311 
2312   /**
2313     Cost of reading all index records with values in sel_arg intervals set
2314     (assuming there is no need to access full table records)
2315   */
2316   double    index_read_cost;
2317 } ROR_SCAN_INFO;
2318 
2319 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2320 
2321 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2322 {
2323 public:
TRP_ROR_INTERSECT()2324   TRP_ROR_INTERSECT() {}                      /* Remove gcc warning */
~TRP_ROR_INTERSECT()2325   virtual ~TRP_ROR_INTERSECT() {}             /* Remove gcc warning */
2326   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2327                              MEM_ROOT *parent_alloc);
2328 
2329   /* Array of pointers to ROR range scans used in this intersection */
2330   struct st_ror_scan_info **first_scan;
2331   struct st_ror_scan_info **last_scan; /* End of the above array */
2332   struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
2333   bool is_covering; /* TRUE if no row retrieval phase is necessary */
2334   double index_scan_costs; /* SUM(cost(index_scan)) */
2335 
2336   void trace_basic_info(const PARAM *param,
2337                         Opt_trace_object *trace_object) const;
2338 };
2339 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2340 void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
2341                                          Opt_trace_object *trace_object) const
2342 {
2343 #ifdef OPTIMIZER_TRACE
2344   trace_object->add_alnum("type", "index_roworder_intersect").
2345     add("rows", records).
2346     add("cost", read_cost).
2347     add("covering", is_covering).
2348     add("clustered_pk_scan", cpk_scan != NULL);
2349 
2350   Opt_trace_context * const trace= &param->thd->opt_trace;
2351   Opt_trace_array ota(trace, "intersect_of");
2352   for (st_ror_scan_info **cur_scan= first_scan;
2353        cur_scan != last_scan;
2354        cur_scan++)
2355   {
2356     const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
2357     const KEY_PART_INFO *key_part= cur_key.key_part;
2358 
2359     Opt_trace_object trace_isect_idx(trace);
2360     trace_isect_idx.add_alnum("type", "range_scan").
2361       add_utf8("index", cur_key.name).add("rows", (*cur_scan)->records);
2362 
2363     Opt_trace_array trace_range(trace, "ranges");
2364     for (const SEL_ARG *current= (*cur_scan)->sel_arg;
2365          current;
2366          current= current->next)
2367     {
2368       String range_info;
2369       range_info.set_charset(system_charset_info);
2370       for (const SEL_ARG *part= current;
2371            part;
2372            part= part->next_key_part)
2373       {
2374         const KEY_PART_INFO *cur_key_part= key_part + part->part;
2375         append_range(&range_info, cur_key_part,
2376                      part->min_value, part->max_value,
2377                      part->min_flag | part->max_flag);
2378       }
2379       trace_range.add_utf8(range_info.ptr(), range_info.length());
2380     }
2381   }
2382 #endif
2383 }
2384 
2385 /*
2386   Plan for QUICK_ROR_UNION_SELECT scan.
2387   QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2388   is ignored by make_quick.
2389 */
2390 
2391 class TRP_ROR_UNION : public TABLE_READ_PLAN
2392 {
2393 public:
TRP_ROR_UNION()2394   TRP_ROR_UNION() {}                          /* Remove gcc warning */
~TRP_ROR_UNION()2395   virtual ~TRP_ROR_UNION() {}                 /* Remove gcc warning */
2396   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2397                              MEM_ROOT *parent_alloc);
2398   TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2399   TABLE_READ_PLAN **last_ror;  /* end of the above array */
2400 
2401   void trace_basic_info(const PARAM *param,
2402                         Opt_trace_object *trace_object) const;
2403 };
2404 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2405 void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
2406                                      Opt_trace_object *trace_object) const
2407 {
2408 #ifdef OPTIMIZER_TRACE
2409   Opt_trace_context * const trace= &param->thd->opt_trace;
2410   trace_object->add_alnum("type", "index_roworder_union");
2411   Opt_trace_array ota(trace, "union_of");
2412   for (TABLE_READ_PLAN **current= first_ror;
2413        current != last_ror;
2414        current++)
2415   {
2416     Opt_trace_object trp_info(trace);
2417     (*current)->trace_basic_info(param, &trp_info);
2418   }
2419 #endif
2420 }
2421 
2422 /*
2423   Plan for QUICK_INDEX_MERGE_SELECT scan.
2424   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2425   is ignored by make_quick.
2426 */
2427 
2428 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2429 {
2430 public:
TRP_INDEX_MERGE()2431   TRP_INDEX_MERGE() {}                        /* Remove gcc warning */
~TRP_INDEX_MERGE()2432   virtual ~TRP_INDEX_MERGE() {}               /* Remove gcc warning */
2433   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2434                              MEM_ROOT *parent_alloc);
2435   TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2436   TRP_RANGE **range_scans_end; /* end of the array */
2437 
2438   void trace_basic_info(const PARAM *param,
2439                         Opt_trace_object *trace_object) const;
2440 };
2441 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2442 void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
2443                                        Opt_trace_object *trace_object) const
2444 {
2445 #ifdef OPTIMIZER_TRACE
2446   Opt_trace_context * const trace= &param->thd->opt_trace;
2447   trace_object->add_alnum("type", "index_merge");
2448   Opt_trace_array ota(trace, "index_merge_of");
2449   for (TRP_RANGE **current= range_scans;
2450        current != range_scans_end;
2451        current++)
2452   {
2453     Opt_trace_object trp_info(trace);
2454     (*current)->trace_basic_info(param, &trp_info);
2455   }
2456 #endif
2457 }
2458 
2459 /*
2460   Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2461 */
2462 
2463 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2464 {
2465 private:
2466   bool have_min;             ///< TRUE if there is a MIN function
2467   bool have_max;             ///< TRUE if there is a MAX function
2468   /**
2469     TRUE if there is an aggregate distinct function, e.g.
2470     "COUNT(DISTINCT x)"
2471    */
2472   bool have_agg_distinct;
2473   /**
2474     The key_part of the only field used by all MIN/MAX functions.
2475     Note that TRP_GROUP_MIN_MAX is not used if there are MIN/MAX
2476     functions on more than one field.
2477   */
2478   KEY_PART_INFO *min_max_arg_part;
2479   uint group_prefix_len;    ///< Length of all key parts in the group prefix
2480   uint used_key_parts;      ///< Number of index key parts used for access
2481   uint group_key_parts;     ///< Number of index key parts in the group prefix
2482   KEY *index_info;          ///< The index chosen for data access
2483   uint index;               ///< The id of the chosen index
2484   uchar key_infix[MAX_KEY_LENGTH];  ///< Constants from equality predicates
2485   uint key_infix_len;       ///< Length of key_infix
2486   SEL_TREE *range_tree;     ///< Represents all range predicates in the query
2487   SEL_ARG  *index_tree;     ///< The sub-tree corresponding to index_info
2488   uint param_idx;           ///< Index of used key in param->key
2489   bool is_index_scan;       ///< Use index_next() instead of random read
2490 public:
2491   /** Number of records selected by the ranges in index_tree. */
2492   ha_rows quick_prefix_records;
2493 public:
2494 
2495   void trace_basic_info(const PARAM *param,
2496                         Opt_trace_object *trace_object) const;
2497 
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2498   TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2499                     bool have_agg_distinct_arg,
2500                     KEY_PART_INFO *min_max_arg_part_arg,
2501                     uint group_prefix_len_arg, uint used_key_parts_arg,
2502                     uint group_key_parts_arg, KEY *index_info_arg,
2503                     uint index_arg, uint key_infix_len_arg,
2504                     uchar *key_infix_arg,
2505                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2506                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
2507   : have_min(have_min_arg), have_max(have_max_arg),
2508     have_agg_distinct(have_agg_distinct_arg),
2509     min_max_arg_part(min_max_arg_part_arg),
2510     group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2511     group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2512     index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2513     index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2514     quick_prefix_records(quick_prefix_records_arg)
2515     {
2516       if (key_infix_len)
2517         memcpy(this->key_infix, key_infix_arg, key_infix_len);
2518     }
~TRP_GROUP_MIN_MAX()2519   virtual ~TRP_GROUP_MIN_MAX() {}             /* Remove gcc warning */
2520 
2521   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2522                              MEM_ROOT *parent_alloc);
use_index_scan()2523   void use_index_scan() { is_index_scan= TRUE; }
2524 };
2525 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2526 void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
2527                                          Opt_trace_object *trace_object) const
2528 {
2529 #ifdef OPTIMIZER_TRACE
2530   trace_object->add_alnum("type", "index_group").
2531     add_utf8("index", index_info->name);
2532   if (min_max_arg_part)
2533     trace_object->add_utf8("group_attribute",
2534                            min_max_arg_part->field->field_name);
2535   else
2536     trace_object->add_null("group_attribute");
2537   trace_object->add("min_aggregate", have_min).
2538     add("max_aggregate", have_max).
2539     add("distinct_aggregate", have_agg_distinct).
2540     add("rows", records).
2541     add("cost", read_cost);
2542 
2543   const KEY_PART_INFO *key_part= index_info->key_part;
2544   Opt_trace_context * const trace= &param->thd->opt_trace;
2545   {
2546     Opt_trace_array trace_keyparts(trace, "key_parts_used_for_access");
2547     for (uint partno= 0; partno < used_key_parts; partno++)
2548     {
2549       const KEY_PART_INFO *cur_key_part= key_part + partno;
2550       trace_keyparts.add_utf8(cur_key_part->field->field_name);
2551     }
2552   }
2553   Opt_trace_array trace_range(trace, "ranges");
2554 
2555   // can have group quick without ranges
2556   if (index_tree)
2557   {
2558     String range_info;
2559     range_info.set_charset(system_charset_info);
2560     append_range_all_keyparts(&trace_range, NULL,
2561                               &range_info, index_tree, key_part);
2562   }
2563 #endif
2564 }
2565 
2566 /*
2567   Fill param->needed_fields with bitmap of fields used in the query.
2568   SYNOPSIS
2569     fill_used_fields_bitmap()
2570       param Parameter from test_quick_select function.
2571 
2572   NOTES
2573     Clustered PK members are not put into the bitmap as they are implicitly
2574     present in all keys (and it is impossible to avoid reading them).
2575   RETURN
2576     0  Ok
2577     1  Out of memory.
2578 */
2579 
fill_used_fields_bitmap(PARAM * param)2580 static int fill_used_fields_bitmap(PARAM *param)
2581 {
2582   TABLE *table= param->table;
2583   my_bitmap_map *tmp;
2584   uint pk;
2585   param->tmp_covered_fields.bitmap= 0;
2586   param->fields_bitmap_size= table->s->column_bitmap_size;
2587   if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2588                                   param->fields_bitmap_size)) ||
2589       bitmap_init(&param->needed_fields, tmp, table->s->fields, FALSE))
2590     return 1;
2591 
2592   bitmap_copy(&param->needed_fields, table->read_set);
2593   bitmap_union(&param->needed_fields, table->write_set);
2594 
2595   pk= param->table->s->primary_key;
2596   if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2597   {
2598     /* The table uses clustered PK and it is not internally generated */
2599     KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2600     KEY_PART_INFO *key_part_end=
2601       key_part + param->table->key_info[pk].user_defined_key_parts;
2602     for (;key_part != key_part_end; ++key_part)
2603       bitmap_clear_bit(&param->needed_fields, key_part->fieldnr-1);
2604   }
2605   return 0;
2606 }
2607 
2608 
2609 /*
2610   Test if a key can be used in different ranges
2611 
2612   SYNOPSIS
2613     SQL_SELECT::test_quick_select()
2614       thd               Current thread
2615       keys_to_use       Keys to use for range retrieval
2616       prev_tables       Tables assumed to be already read when the scan is
2617                         performed (but not read at the moment of this call)
2618       limit             Query limit
2619       force_quick_range Prefer to use range (instead of full table scan) even
2620                         if it is more expensive.
2621       interesting_order The sort order the range access method must be able
2622                         to provide. Three-value logic: asc/desc/don't care
2623 
2624   NOTES
2625     Updates the following in the select parameter:
2626       needed_reg - Bits for keys with may be used if all prev regs are read
2627       quick      - Parameter to use when reading records.
2628 
2629     In the table struct the following information is updated:
2630       quick_keys           - Which keys can be used
2631       quick_rows           - How many rows the key matches
2632       quick_condition_rows - E(# rows that will satisfy the table condition)
2633 
2634   IMPLEMENTATION
2635     quick_condition_rows value is obtained as follows:
2636 
2637       It is a minimum of E(#output rows) for all considered table access
2638       methods (range and index_merge accesses over various indexes).
2639 
2640     The obtained value is not a true E(#rows that satisfy table condition)
2641     but rather a pessimistic estimate. To obtain a true E(#...) one would
2642     need to combine estimates of various access methods, taking into account
2643     correlations between sets of rows they will return.
2644 
2645     For example, if values of tbl.key1 and tbl.key2 are independent (a right
2646     assumption if we have no information about their correlation) then the
2647     correct estimate will be:
2648 
2649       E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2650       = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2651 
2652     which is smaller than
2653 
2654        MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2655 
2656     which is currently produced.
2657 
2658   TODO
2659    * Change the value returned in quick_condition_rows from a pessimistic
2660      estimate to true E(#rows that satisfy table condition).
2661      (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2662       for this)
2663 
2664    * Check if this function really needs to modify keys_to_use, and change the
2665      code to pass it by reference if it doesn't.
2666 
2667    * In addition to force_quick_range other means can be (an usually are) used
2668      to make this function prefer range over full table scan. Figure out if
2669      force_quick_range is really needed.
2670 
2671   RETURN
2672    -1 if impossible select (i.e. certainly no rows will be selected)
2673     0 if can't use quick_select
2674     1 if found usable ranges and quick select has been successfully created.
2675 */
2676 
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,const ORDER::enum_order interesting_order)2677 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2678                                   table_map prev_tables,
2679                                   ha_rows limit, bool force_quick_range,
2680                                   const ORDER::enum_order interesting_order)
2681 {
2682   uint idx;
2683   double scan_time;
2684   DBUG_ENTER("SQL_SELECT::test_quick_select");
2685   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
2686 		      (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2687 		      (ulong) const_tables));
2688 
2689   set_quick(NULL);
2690   needed_reg.clear_all();
2691   quick_keys.clear_all();
2692   if (keys_to_use.is_clear_all())
2693     DBUG_RETURN(0);
2694   records= head->file->stats.records;
2695   if (!records)
2696     records++;					/* purecov: inspected */
2697   scan_time= records * ROW_EVALUATE_COST + 1;
2698   read_time= head->file->scan_time() + scan_time + 1.1;
2699   if (head->force_index)
2700     scan_time= read_time= DBL_MAX;
2701   if (limit < records)
2702     read_time= (double) records + scan_time + 1; // Force to use index
2703   else if (read_time <= 2.0 && !force_quick_range)
2704     DBUG_RETURN(0);				/* No need for quick select */
2705 
2706   Opt_trace_context * const trace= &thd->opt_trace;
2707   Opt_trace_object trace_range(trace, "range_analysis");
2708   Opt_trace_object(trace, "table_scan").
2709     add("rows", head->file->stats.records).
2710     add("cost", read_time);
2711 
2712   keys_to_use.intersect(head->keys_in_use_for_query);
2713   if (!keys_to_use.is_clear_all())
2714   {
2715     MEM_ROOT alloc;
2716     SEL_TREE *tree= NULL;
2717     KEY_PART *key_parts;
2718     KEY *key_info;
2719     PARAM param;
2720 
2721     /*
2722       Use the 3 multiplier as range optimizer allocates big PARAM structure
2723       and may evaluate a subquery expression
2724       TODO During the optimization phase we should evaluate only inexpensive
2725            single-lookup subqueries.
2726     */
2727     if (check_stack_overrun(thd, 3*STACK_MIN_SIZE + sizeof(PARAM), NULL))
2728       DBUG_RETURN(0);                           // Fatal error flag is set
2729 
2730     /* set up parameter that is passed to all functions */
2731     param.thd= thd;
2732     param.baseflag= head->file->ha_table_flags();
2733     param.prev_tables=prev_tables | const_tables;
2734     param.read_tables=read_tables;
2735     param.current_table= head->map;
2736     param.table=head;
2737     param.keys=0;
2738     param.mem_root= &alloc;
2739     param.old_root= thd->mem_root;
2740     param.needed_reg= &needed_reg;
2741     param.imerge_cost_buff_size= 0;
2742     param.using_real_indexes= TRUE;
2743     param.remove_jump_scans= TRUE;
2744     param.force_default_mrr= (interesting_order == ORDER::ORDER_DESC);
2745     param.order_direction= interesting_order;
2746     param.use_index_statistics= false;
2747 
2748     thd->no_errors=1;				// Don't warn about NULL
2749     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
2750     if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
2751                                                   sizeof(KEY_PART)*
2752                                                   head->s->key_parts)) ||
2753         fill_used_fields_bitmap(&param))
2754     {
2755       thd->no_errors=0;
2756       free_root(&alloc,MYF(0));			// Return memory & allocator
2757       DBUG_RETURN(0);				// Can't use range
2758     }
2759     key_parts= param.key_parts;
2760     thd->mem_root= &alloc;
2761 
2762     {
2763       Opt_trace_array trace_idx(trace,
2764                                 "potential_range_indices",
2765                                 Opt_trace_context::RANGE_OPTIMIZER);
2766       /*
2767         Make an array with description of all key parts of all table keys.
2768         This is used in get_mm_parts function.
2769       */
2770       key_info= head->key_info;
2771       for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2772       {
2773         Opt_trace_object trace_idx_details(trace);
2774         trace_idx_details.add_utf8("index", key_info->name);
2775         KEY_PART_INFO *key_part_info;
2776         if (!keys_to_use.is_set(idx))
2777         {
2778           trace_idx_details.add("usable", false).
2779             add_alnum("cause", "not_applicable");
2780           continue;
2781         }
2782         if (key_info->flags & HA_FULLTEXT)
2783         {
2784           trace_idx_details.add("usable", false).
2785             add_alnum("cause", "fulltext");
2786           continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG
2787         }
2788 
2789         trace_idx_details.add("usable", true);
2790 
2791         param.key[param.keys]=key_parts;
2792         key_part_info= key_info->key_part;
2793         Opt_trace_array trace_keypart(trace, "key_parts");
2794         for (uint part=0 ; part < actual_key_parts(key_info) ;
2795              part++, key_parts++, key_part_info++)
2796         {
2797           key_parts->key=          param.keys;
2798           key_parts->part=         part;
2799           key_parts->length=       key_part_info->length;
2800           key_parts->store_length= key_part_info->store_length;
2801           key_parts->field=        key_part_info->field;
2802           key_parts->null_bit=     key_part_info->null_bit;
2803           key_parts->image_type =
2804             (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2805           /* Only HA_PART_KEY_SEG is used */
2806           key_parts->flag=         (uint8) key_part_info->key_part_flag;
2807           trace_keypart.add_utf8(key_parts->field->field_name);
2808         }
2809         param.real_keynr[param.keys++]=idx;
2810       }
2811     }
2812     param.key_parts_end=key_parts;
2813     param.alloced_sel_args= 0;
2814 
2815     /* Calculate cost of full index read for the shortest covering index */
2816     if (!head->covering_keys.is_clear_all())
2817     {
2818       int key_for_use= find_shortest_key(head, &head->covering_keys);
2819       double key_read_time=
2820         param.table->file->index_only_read_time(key_for_use,
2821                                                 rows2double(records)) +
2822         records * ROW_EVALUATE_COST;
2823 
2824       bool chosen= false;
2825       if (key_read_time < read_time)
2826       {
2827         read_time= key_read_time;
2828         chosen= true;
2829       }
2830 
2831       Opt_trace_object trace_cov(trace,
2832                                  "best_covering_index_scan",
2833                                  Opt_trace_context::RANGE_OPTIMIZER);
2834       trace_cov.add_utf8("index", head->key_info[key_for_use].name).
2835         add("cost", key_read_time).add("chosen", chosen);
2836       if (!chosen)
2837         trace_cov.add_alnum("cause", "cost");
2838     }
2839 
2840     TABLE_READ_PLAN *best_trp= NULL;
2841     TRP_GROUP_MIN_MAX *group_trp;
2842     double best_read_time= read_time;
2843 
2844     if (cond)
2845     {
2846       {
2847         Opt_trace_array trace_setup_cond(trace, "setup_range_conditions");
2848         tree= get_mm_tree(&param,cond);
2849       }
2850       if (tree)
2851       {
2852         if (tree->type == SEL_TREE::IMPOSSIBLE)
2853         {
2854           trace_range.add("impossible_range", true);
2855           records=0L;                      /* Return -1 from this function. */
2856           read_time= (double) HA_POS_ERROR;
2857           goto free_mem;
2858         }
2859         /*
2860           If the tree can't be used for range scans, proceed anyway, as we
2861           can construct a group-min-max quick select
2862         */
2863         if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2864         {
2865           trace_range.add("range_scan_possible", false);
2866           if (tree->type == SEL_TREE::ALWAYS)
2867             trace_range.add_alnum("cause", "condition_always_true");
2868 
2869           tree= NULL;
2870         }
2871       }
2872     }
2873 
2874     /*
2875       Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
2876       Notice that it can be constructed no matter if there is a range tree.
2877     */
2878     group_trp= get_best_group_min_max(&param, tree, best_read_time);
2879     if (group_trp)
2880     {
2881       param.table->quick_condition_rows= min(group_trp->records,
2882                                              head->file->stats.records);
2883       Opt_trace_object grp_summary(trace,
2884                                    "best_group_range_summary",
2885                                    Opt_trace_context::RANGE_OPTIMIZER);
2886       if (unlikely(trace->is_started()))
2887         group_trp->trace_basic_info(&param, &grp_summary);
2888       if (group_trp->read_cost < best_read_time)
2889       {
2890         grp_summary.add("chosen", true);
2891         best_trp= group_trp;
2892         best_read_time= best_trp->read_cost;
2893       }
2894       else
2895         grp_summary.add("chosen", false).add_alnum("cause", "cost");
2896     }
2897 
2898     if (tree)
2899     {
2900       /*
2901         It is possible to use a range-based quick select (but it might be
2902         slower than 'all' table scan).
2903       */
2904       dbug_print_tree("final_tree", tree, &param);
2905 
2906       {
2907         /*
2908           Calculate cost of single index range scan and possible
2909           intersections of these
2910         */
2911         Opt_trace_object trace_range(trace,
2912                                      "analyzing_range_alternatives",
2913                                      Opt_trace_context::RANGE_OPTIMIZER);
2914         TRP_RANGE         *range_trp;
2915         TRP_ROR_INTERSECT *rori_trp;
2916 
2917         /* Get best 'range' plan and prepare data for making other plans */
2918         if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
2919                                              best_read_time)))
2920         {
2921           best_trp= range_trp;
2922           best_read_time= best_trp->read_cost;
2923         }
2924 
2925         /*
2926           Simultaneous key scans and row deletes on several handler
2927           objects are not allowed so don't use ROR-intersection for
2928           table deletes. Also, ROR-intersection cannot return rows in
2929           descending order
2930         */
2931         if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2932             thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2933             interesting_order != ORDER::ORDER_DESC)
2934         {
2935           /*
2936             Get best non-covering ROR-intersection plan and prepare data for
2937             building covering ROR-intersection.
2938           */
2939           if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time)))
2940           {
2941             best_trp= rori_trp;
2942             best_read_time= best_trp->read_cost;
2943           }
2944         }
2945       }
2946 
2947       // Here we calculate cost of union index merge
2948       if (!tree->merges.is_empty())
2949       {
2950         // Cannot return rows in descending order.
2951         if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2952             interesting_order != ORDER::ORDER_DESC &&
2953             param.table->file->stats.records)
2954         {
2955           /* Try creating index_merge/ROR-union scan. */
2956           SEL_IMERGE *imerge;
2957           TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
2958           LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
2959           List_iterator_fast<SEL_IMERGE> it(tree->merges);
2960           Opt_trace_array trace_idx_merge(trace,
2961                                           "analyzing_index_merge",
2962                                           Opt_trace_context::RANGE_OPTIMIZER);
2963           while ((imerge= it++))
2964           {
2965             new_conj_trp= get_best_disjunct_quick(&param, imerge,
2966                                                   best_read_time);
2967             if (new_conj_trp)
2968               set_if_smaller(param.table->quick_condition_rows,
2969                              new_conj_trp->records);
2970             if (!best_conj_trp ||
2971                 (new_conj_trp &&
2972                  new_conj_trp->read_cost < best_conj_trp->read_cost))
2973             {
2974               best_conj_trp= new_conj_trp;
2975             }
2976           }
2977           if (best_conj_trp)
2978             best_trp= best_conj_trp;
2979         }
2980       }
2981     }
2982 
2983     thd->mem_root= param.old_root;
2984 
2985     /* If we got a read plan, create a quick select from it. */
2986     if (best_trp)
2987     {
2988       records= best_trp->records;
2989       if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
2990         set_quick(NULL);
2991     }
2992 
2993 free_mem:
2994     if (unlikely(quick && trace->is_started() && best_trp))
2995     {
2996       // best_trp cannot be NULL if quick is set, done to keep fortify happy
2997       Opt_trace_object trace_range_summary(trace,
2998                                            "chosen_range_access_summary");
2999       {
3000         Opt_trace_object trace_range_plan(trace,
3001                                           "range_access_plan");
3002         best_trp->trace_basic_info(&param, &trace_range_plan);
3003       }
3004       trace_range_summary.add("rows_for_plan", quick->records).
3005         add("cost_for_plan", quick->read_time).
3006         add("chosen", true);
3007     }
3008 
3009     free_root(&alloc,MYF(0));			// Return memory & allocator
3010     thd->mem_root= param.old_root;
3011     thd->no_errors=0;
3012   }
3013 
3014   DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
3015 
3016   /*
3017     Assume that if the user is using 'limit' we will only need to scan
3018     limit rows if we are using a key
3019   */
3020   DBUG_RETURN(records ? MY_TEST(quick) : -1);
3021 }
3022 
3023 /****************************************************************************
3024  * Partition pruning module
3025  ****************************************************************************/
3026 #ifdef WITH_PARTITION_STORAGE_ENGINE
3027 
3028 /*
3029   PartitionPruningModule
3030 
3031   This part of the code does partition pruning. Partition pruning solves the
3032   following problem: given a query over partitioned tables, find partitions
3033   that we will not need to access (i.e. partitions that we can assume to be
3034   empty) when executing the query.
3035   The set of partitions to prune doesn't depend on which query execution
3036   plan will be used to execute the query.
3037 
3038   HOW IT WORKS
3039 
3040   Partition pruning module makes use of RangeAnalysisModule. The following
3041   examples show how the problem of partition pruning can be reduced to the
3042   range analysis problem:
3043 
3044   EXAMPLE 1
3045     Consider a query:
3046 
3047       SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3048 
3049     where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
3050     way to find the used (i.e. not pruned away) partitions is as follows:
3051 
3052     1. analyze the WHERE clause and extract the list of intervals over t1.a
3053        for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3054 
3055     2. for each interval I
3056        {
3057          find partitions that have non-empty intersection with I;
3058          mark them as used;
3059        }
3060 
3061   EXAMPLE 2
3062     Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3063     we need to:
3064 
3065     1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3066        The list of intervals we'll obtain will look like this:
3067        ((t1.a, t1.b) = (1,'foo')),
3068        ((t1.a, t1.b) = (2,'bar')),
3069        ((t1,a, t1.b) > (10,'zz'))
3070 
3071     2. for each interval I
3072        {
3073          if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3074          {
3075            calculate HASH(part_func(t1.a, t1.b));
3076            find which partition has records with this hash value and mark
3077              it as used;
3078          }
3079          else
3080          {
3081            mark all partitions as used;
3082            break;
3083          }
3084        }
3085 
3086    For both examples the step #1 is exactly what RangeAnalysisModule could
3087    be used to do, if it was provided with appropriate index description
3088    (array of KEY_PART structures).
3089    In example #1, we need to provide it with description of index(t1.a),
3090    in example #2, we need to provide it with description of index(t1.a, t1.b).
3091 
3092    These index descriptions are further called "partitioning index
3093    descriptions". Note that it doesn't matter if such indexes really exist,
3094    as range analysis module only uses the description.
3095 
3096    Putting it all together, partitioning module works as follows:
3097 
3098    prune_partitions() {
3099      call create_partition_index_description();
3100 
3101      call get_mm_tree(); // invoke the RangeAnalysisModule
3102 
3103      // analyze the obtained interval list and get used partitions
3104      call find_used_partitions();
3105   }
3106 
3107 */
3108 
3109 struct st_part_prune_param;
3110 struct st_part_opt_info;
3111 
3112 typedef void (*mark_full_part_func)(partition_info*, uint32);
3113 
3114 /*
3115   Partition pruning operation context
3116 */
3117 typedef struct st_part_prune_param
3118 {
3119   RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3120 
3121   /***************************************************************
3122    Following fields are filled in based solely on partitioning
3123    definition and not modified after that:
3124    **************************************************************/
3125   partition_info *part_info; /* Copy of table->part_info */
3126   /* Function to get partition id from partitioning fields only */
3127   get_part_id_func get_top_partition_id_func;
3128   /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3129   mark_full_part_func mark_full_partition_used;
3130 
3131   /* Partitioning 'index' description, array of key parts */
3132   KEY_PART *key;
3133 
3134   /*
3135     Number of fields in partitioning 'index' definition created for
3136     partitioning (0 if partitioning 'index' doesn't include partitioning
3137     fields)
3138   */
3139   uint part_fields;
3140   uint subpart_fields; /* Same as above for subpartitioning */
3141 
3142   /*
3143     Number of the last partitioning field keypart in the index, or -1 if
3144     partitioning index definition doesn't include partitioning fields.
3145   */
3146   int last_part_partno;
3147   int last_subpart_partno; /* Same as above for supartitioning */
3148 
3149   /*
3150     is_part_keypart[i] == test(keypart #i in partitioning index is a member
3151                                used in partitioning)
3152     Used to maintain current values of cur_part_fields and cur_subpart_fields
3153   */
3154   my_bool *is_part_keypart;
3155   /* Same as above for subpartitioning */
3156   my_bool *is_subpart_keypart;
3157 
3158   my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3159 
3160   /***************************************************************
3161    Following fields form find_used_partitions() recursion context:
3162    **************************************************************/
3163   SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
3164   SEL_ARG **arg_stack_end; /* Top of the stack    */
3165   /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3166   uint cur_part_fields;
3167   /* Same as cur_part_fields, but for subpartitioning */
3168   uint cur_subpart_fields;
3169 
3170   /* Iterator to be used to obtain the "current" set of used partitions */
3171   PARTITION_ITERATOR part_iter;
3172 
3173   /* Initialized bitmap of num_subparts size */
3174   MY_BITMAP subparts_bitmap;
3175 
3176   uchar *cur_min_key;
3177   uchar *cur_max_key;
3178 
3179   uint cur_min_flag, cur_max_flag;
3180 } PART_PRUNE_PARAM;
3181 
3182 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3183 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3184 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3185                                        SEL_IMERGE *imerge);
3186 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3187                                             List<SEL_IMERGE> &merges);
3188 static void mark_all_partitions_as_used(partition_info *part_info);
3189 
3190 #ifndef DBUG_OFF
3191 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3192 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3193 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3194 #endif
3195 
3196 
3197 /**
3198   Perform partition pruning for a given table and condition.
3199 
3200   @param      thd            Thread handle
3201   @param      table          Table to perform partition pruning for
3202   @param      pprune_cond    Condition to use for partition pruning
3203 
3204   @note This function assumes that lock_partitions are setup when it
3205   is invoked. The function analyzes the condition, finds partitions that
3206   need to be used to retrieve the records that match the condition, and
3207   marks them as used by setting appropriate bit in part_info->read_partitions
3208   In the worst case all partitions are marked as used. If the table is not
3209   yet locked, it will also unset bits in part_info->lock_partitions that is
3210   not set in read_partitions.
3211 
3212   This function returns promptly if called for non-partitioned table.
3213 
3214   @return Operation status
3215     @retval true  Failure
3216     @retval false Success
3217 */
3218 
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3219 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3220 {
3221   partition_info *part_info = table->part_info;
3222   DBUG_ENTER("prune_partitions");
3223   table->all_partitions_pruned_away= false;
3224 
3225   if (!part_info)
3226     DBUG_RETURN(FALSE); /* not a partitioned table */
3227 
3228   if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION &&
3229       part_info->is_auto_partitioned)
3230     DBUG_RETURN(false); /* Should not prune auto partitioned table */
3231 
3232   if (!pprune_cond)
3233   {
3234     mark_all_partitions_as_used(part_info);
3235     DBUG_RETURN(FALSE);
3236   }
3237 
3238   /* No need to continue pruning if there is no more partitions to prune! */
3239   if (bitmap_is_clear_all(&part_info->lock_partitions))
3240     bitmap_clear_all(&part_info->read_partitions);
3241   if (bitmap_is_clear_all(&part_info->read_partitions))
3242   {
3243     table->all_partitions_pruned_away= true;
3244     DBUG_RETURN(false);
3245   }
3246 
3247   /*
3248     If the prepare stage already have completed pruning successfully,
3249     it is no use of running prune_partitions() again on the same condition.
3250     Since it will not be able to prune anything more than the previous call
3251     from the prepare step.
3252   */
3253   if (part_info->is_pruning_completed)
3254     DBUG_RETURN(false);
3255 
3256   PART_PRUNE_PARAM prune_param;
3257   MEM_ROOT alloc;
3258   RANGE_OPT_PARAM  *range_par= &prune_param.range_param;
3259   my_bitmap_map *old_sets[2];
3260 
3261   prune_param.part_info= part_info;
3262   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
3263   range_par->mem_root= &alloc;
3264   range_par->old_root= thd->mem_root;
3265 
3266   if (create_partition_index_description(&prune_param))
3267   {
3268     mark_all_partitions_as_used(part_info);
3269     free_root(&alloc,MYF(0));		// Return memory & allocator
3270     DBUG_RETURN(FALSE);
3271   }
3272 
3273   dbug_tmp_use_all_columns(table, old_sets,
3274                            table->read_set, table->write_set);
3275   range_par->thd= thd;
3276   range_par->table= table;
3277   /* range_par->cond doesn't need initialization */
3278   range_par->prev_tables= range_par->read_tables= 0;
3279   range_par->current_table= table->map;
3280 
3281   range_par->keys= 1; // one index
3282   range_par->using_real_indexes= FALSE;
3283   range_par->remove_jump_scans= FALSE;
3284   range_par->real_keynr[0]= 0;
3285   range_par->alloced_sel_args= 0;
3286 
3287   thd->no_errors=1;				// Don't warn about NULL
3288   thd->mem_root=&alloc;
3289 
3290   bitmap_clear_all(&part_info->read_partitions);
3291 
3292   prune_param.key= prune_param.range_param.key_parts;
3293   SEL_TREE *tree;
3294   int res;
3295 
3296   tree= get_mm_tree(range_par, pprune_cond);
3297   if (!tree)
3298     goto all_used;
3299 
3300   if (tree->type == SEL_TREE::IMPOSSIBLE)
3301   {
3302     /* Cannot improve the pruning any further. */
3303     part_info->is_pruning_completed= true;
3304     goto end;
3305   }
3306 
3307   if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3308     goto all_used;
3309 
3310   if (tree->merges.is_empty())
3311   {
3312     /* Range analysis has produced a single list of intervals. */
3313     prune_param.arg_stack_end= prune_param.arg_stack;
3314     prune_param.cur_part_fields= 0;
3315     prune_param.cur_subpart_fields= 0;
3316 
3317     prune_param.cur_min_key= prune_param.range_param.min_key;
3318     prune_param.cur_max_key= prune_param.range_param.max_key;
3319     prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3320 
3321     init_all_partitions_iterator(part_info, &prune_param.part_iter);
3322     if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3323                                                             tree->keys[0]))))
3324       goto all_used;
3325   }
3326   else
3327   {
3328     if (tree->merges.elements == 1)
3329     {
3330       /*
3331         Range analysis has produced a "merge" of several intervals lists, a
3332         SEL_TREE that represents an expression in form
3333           sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3334         that cannot be reduced to one tree. This can only happen when
3335         partitioning index has several keyparts and the condition is OR of
3336         conditions that refer to different key parts. For example, we'll get
3337         here for "partitioning_field=const1 OR subpartitioning_field=const2"
3338       */
3339       if (-1 == (res= find_used_partitions_imerge(&prune_param,
3340                                                   tree->merges.head())))
3341         goto all_used;
3342     }
3343     else
3344     {
3345       /*
3346         Range analysis has produced a list of several imerges, i.e. a
3347         structure that represents a condition in form
3348         imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3349         This is produced for complicated WHERE clauses that range analyzer
3350         can't really analyze properly.
3351       */
3352       if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3353                                                        tree->merges)))
3354         goto all_used;
3355     }
3356   }
3357 
3358   /*
3359     If the condition can be evaluated now, we are done with pruning.
3360 
3361     During the prepare phase, before locking, subqueries and stored programs
3362     are not evaluated. So we need to run prune_partitions() a second time in
3363     the optimize phase to prune partitions for reading, when subqueries and
3364     stored programs may be evaluated.
3365   */
3366   if (pprune_cond->can_be_evaluated_now())
3367     part_info->is_pruning_completed= true;
3368   goto end;
3369 
3370 all_used:
3371   mark_all_partitions_as_used(prune_param.part_info);
3372 end:
3373   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3374   thd->no_errors=0;
3375   thd->mem_root= range_par->old_root;
3376   free_root(&alloc,MYF(0));			// Return memory & allocator
3377   /*
3378     Must be a subset of the locked partitions.
3379     lock_partitions contains the partitions marked by explicit partition
3380     selection (... t PARTITION (pX) ...) and we must only use partitions
3381     within that set.
3382   */
3383   bitmap_intersect(&prune_param.part_info->read_partitions,
3384                    &prune_param.part_info->lock_partitions);
3385   /*
3386     If not yet locked, also prune partitions to lock if not UPDATEing
3387     partition key fields. This will also prune lock_partitions if we are under
3388     LOCK TABLES, so prune away calls to start_stmt().
3389     TODO: enhance this prune locking to also allow pruning of
3390     'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3391     a lock for part_key partition.
3392   */
3393   if (!thd->lex->is_query_tables_locked() &&
3394       !partition_key_modified(table, table->write_set))
3395   {
3396     bitmap_copy(&prune_param.part_info->lock_partitions,
3397                 &prune_param.part_info->read_partitions);
3398   }
3399   if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3400     table->all_partitions_pruned_away= true;
3401   DBUG_RETURN(false);
3402 }
3403 
3404 
3405 /*
3406   Store field key image to table record
3407 
3408   SYNOPSIS
3409     store_key_image_to_rec()
3410       field  Field which key image should be stored
3411       ptr    Field value in key format
3412       len    Length of the value, in bytes
3413 
3414   DESCRIPTION
3415     Copy the field value from its key image to the table record. The source
3416     is the value in key image format, occupying len bytes in buffer pointed
3417     by ptr. The destination is table record, in "field value in table record"
3418     format.
3419 */
3420 
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3421 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3422 {
3423   /* Do the same as print_key_value() does */
3424   my_bitmap_map *old_map;
3425 
3426   if (field->real_maybe_null())
3427   {
3428     if (*ptr)
3429     {
3430       field->set_null();
3431       return;
3432     }
3433     field->set_notnull();
3434     ptr++;
3435   }
3436   old_map= dbug_tmp_use_all_columns(field->table,
3437                                     field->table->write_set);
3438   field->set_key_image(ptr, len);
3439   dbug_tmp_restore_column_map(field->table->write_set, old_map);
3440 }
3441 
3442 
3443 /*
3444   For SEL_ARG* array, store sel_arg->min values into table record buffer
3445 
3446   SYNOPSIS
3447     store_selargs_to_rec()
3448       ppar   Partition pruning context
3449       start  Array of SEL_ARG* for which the minimum values should be stored
3450       num    Number of elements in the array
3451 
3452   DESCRIPTION
3453     For each SEL_ARG* interval in the specified array, store the left edge
3454     field value (sel_arg->min, key image format) into the table record.
3455 */
3456 
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)3457 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3458                                  int num)
3459 {
3460   KEY_PART *parts= ppar->range_param.key_parts;
3461   for (SEL_ARG **end= start + num; start != end; start++)
3462   {
3463     SEL_ARG *sel_arg= (*start);
3464     store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3465                            parts[sel_arg->part].length);
3466   }
3467 }
3468 
3469 
3470 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)3471 static void mark_full_partition_used_no_parts(partition_info* part_info,
3472                                               uint32 part_id)
3473 {
3474   DBUG_ENTER("mark_full_partition_used_no_parts");
3475   DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3476   bitmap_set_bit(&part_info->read_partitions, part_id);
3477   DBUG_VOID_RETURN;
3478 }
3479 
3480 
3481 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)3482 static void mark_full_partition_used_with_parts(partition_info *part_info,
3483                                                 uint32 part_id)
3484 {
3485   uint32 start= part_id * part_info->num_subparts;
3486   uint32 end=   start + part_info->num_subparts;
3487   DBUG_ENTER("mark_full_partition_used_with_parts");
3488 
3489   for (; start != end; start++)
3490   {
3491     DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3492     bitmap_set_bit(&part_info->read_partitions, start);
3493   }
3494   DBUG_VOID_RETURN;
3495 }
3496 
3497 /*
3498   Find the set of used partitions for List<SEL_IMERGE>
3499   SYNOPSIS
3500     find_used_partitions_imerge_list
3501       ppar      Partition pruning context.
3502       key_tree  Intervals tree to perform pruning for.
3503 
3504   DESCRIPTION
3505     List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3506     The set of used partitions is an intersection of used partitions sets
3507     for imerge_{i}.
3508     We accumulate this intersection in a separate bitmap.
3509 
3510   RETURN
3511     See find_used_partitions()
3512 */
3513 
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)3514 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3515                                             List<SEL_IMERGE> &merges)
3516 {
3517   MY_BITMAP all_merges;
3518   uint bitmap_bytes;
3519   my_bitmap_map *bitmap_buf;
3520   uint n_bits= ppar->part_info->read_partitions.n_bits;
3521   bitmap_bytes= bitmap_buffer_size(n_bits);
3522   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3523                                                 bitmap_bytes)))
3524   {
3525     /*
3526       Fallback, process just the first SEL_IMERGE. This can leave us with more
3527       partitions marked as used then actually needed.
3528     */
3529     return find_used_partitions_imerge(ppar, merges.head());
3530   }
3531   bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3532   bitmap_set_prefix(&all_merges, n_bits);
3533 
3534   List_iterator<SEL_IMERGE> it(merges);
3535   SEL_IMERGE *imerge;
3536   while ((imerge=it++))
3537   {
3538     int res= find_used_partitions_imerge(ppar, imerge);
3539     if (!res)
3540     {
3541       /* no used partitions on one ANDed imerge => no used partitions at all */
3542       return 0;
3543     }
3544 
3545     if (res != -1)
3546       bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3547 
3548     if (bitmap_is_clear_all(&all_merges))
3549       return 0;
3550 
3551     bitmap_clear_all(&ppar->part_info->read_partitions);
3552   }
3553   memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3554          bitmap_bytes);
3555   return 1;
3556 }
3557 
3558 
3559 /*
3560   Find the set of used partitions for SEL_IMERGE structure
3561   SYNOPSIS
3562     find_used_partitions_imerge()
3563       ppar      Partition pruning context.
3564       key_tree  Intervals tree to perform pruning for.
3565 
3566   DESCRIPTION
3567     SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3568     trivial - just use mark used partitions for each tree and bail out early
3569     if for some tree_{i} all partitions are used.
3570 
3571   RETURN
3572     See find_used_partitions().
3573 */
3574 
3575 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)3576 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3577 {
3578   int res= 0;
3579   for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3580   {
3581     ppar->arg_stack_end= ppar->arg_stack;
3582     ppar->cur_part_fields= 0;
3583     ppar->cur_subpart_fields= 0;
3584 
3585     ppar->cur_min_key= ppar->range_param.min_key;
3586     ppar->cur_max_key= ppar->range_param.max_key;
3587     ppar->cur_min_flag= ppar->cur_max_flag= 0;
3588 
3589     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3590     SEL_ARG *key_tree= (*ptree)->keys[0];
3591     if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3592       return -1;
3593   }
3594   return res;
3595 }
3596 
3597 
3598 /*
3599   Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3600 
3601   SYNOPSIS
3602     find_used_partitions()
3603       ppar      Partition pruning context.
3604       key_tree  SEL_ARG range tree to perform pruning for
3605 
3606   DESCRIPTION
3607     This function
3608       * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3609       * finds the partitions one needs to use to get rows in these intervals
3610       * marks these partitions as used.
3611     The next session desribes the process in greater detail.
3612 
3613   IMPLEMENTATION
3614     TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3615     We can find out which [sub]partitions to use if we obtain restrictions on
3616     [sub]partitioning fields in the following form:
3617     1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
3618     1.1  Same as (1) but for subpartition fields
3619 
3620     If partitioning supports interval analysis (i.e. partitioning is a
3621     function of a single table field, and partition_info::
3622     get_part_iter_for_interval != NULL), then we can also use condition in
3623     this form:
3624     2.  "const1 <=? partition_field <=? const2"
3625     2.1  Same as (2) but for subpartition_field
3626 
3627     INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3628 
3629     The below is an example of what SEL_ARG tree may represent:
3630 
3631     (start)
3632      |                           $
3633      |   Partitioning keyparts   $  subpartitioning keyparts
3634      |                           $
3635      |     ...          ...      $
3636      |      |            |       $
3637      | +---------+  +---------+  $  +-----------+  +-----------+
3638      \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3639        +---------+  +---------+  $  +-----------+  +-----------+
3640             |                    $        |             |
3641             |                    $        |        +-----------+
3642             |                    $        |        | subpar2=c6|
3643             |                    $        |        +-----------+
3644             |                    $        |
3645             |                    $  +-----------+  +-----------+
3646             |                    $  | subpar1=c4|--| subpar2=c8|
3647             |                    $  +-----------+  +-----------+
3648             |                    $
3649             |                    $
3650        +---------+               $  +------------+  +------------+
3651        | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3652        +---------+               $  +------------+  +------------+
3653             |                    $
3654            ...                   $
3655 
3656     The up-down connections are connections via SEL_ARG::left and
3657     SEL_ARG::right. A horizontal connection to the right is the
3658     SEL_ARG::next_key_part connection.
3659 
3660     find_used_partitions() traverses the entire tree via recursion on
3661      * SEL_ARG::next_key_part (from left to right on the picture)
3662      * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3663        performed for each depth level.
3664 
3665     Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3666     ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3667     For the example in the above picture, one of stack states is:
3668       in find_used_partitions(key_tree = "subpar2=c5") (***)
3669       in find_used_partitions(key_tree = "subpar1=c3")
3670       in find_used_partitions(key_tree = "par2=c2")   (**)
3671       in find_used_partitions(key_tree = "par1=c1")
3672       in prune_partitions(...)
3673     We apply partitioning limits as soon as possible, e.g. when we reach the
3674     depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3675     and save them in ppar->part_iter.
3676     When we reach the depth (***), we find which subpartition(s) correspond to
3677     "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3678     appropriate subpartitions as used.
3679 
3680     It is possible that constraints on some partitioning fields are missing.
3681     For the above example, consider this stack state:
3682       in find_used_partitions(key_tree = "subpar2=c12") (***)
3683       in find_used_partitions(key_tree = "subpar1=c10")
3684       in find_used_partitions(key_tree = "par1=c2")
3685       in prune_partitions(...)
3686     Here we don't have constraints for all partitioning fields. Since we've
3687     never set the ppar->part_iter to contain used set of partitions, we use
3688     its default "all partitions" value.  We get  subpartition id for
3689     "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3690     partition.
3691 
3692     The inverse is also possible: we may get constraints on partitioning
3693     fields, but not constraints on subpartitioning fields. In that case,
3694     calls to find_used_partitions() with depth below (**) will return -1,
3695     and we will mark entire partition as used.
3696 
3697   TODO
3698     Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3699 
3700   RETURN
3701     1   OK, one or more [sub]partitions are marked as used.
3702     0   The passed condition doesn't match any partitions
3703    -1   Couldn't infer any partition pruning "intervals" from the passed
3704         SEL_ARG* tree (which means that all partitions should be marked as
3705         used) Marking partitions as used is the responsibility of the caller.
3706 */
3707 
3708 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)3709 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3710 {
3711   int res, left_res=0, right_res=0;
3712   int key_tree_part= (int)key_tree->part;
3713   bool set_full_part_if_bad_ret= FALSE;
3714   bool ignore_part_fields= ppar->ignore_part_fields;
3715   bool did_set_ignore_part_fields= FALSE;
3716   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3717 
3718   if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3719     return -1;
3720 
3721   if (key_tree->left != &null_element)
3722   {
3723     if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3724       return -1;
3725   }
3726 
3727   /* Push SEL_ARG's to stack to enable looking backwards as well */
3728   ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3729   ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3730   *(ppar->arg_stack_end++)= key_tree;
3731 
3732   if (ignore_part_fields)
3733   {
3734     /*
3735       We come here when a condition on the first partitioning
3736       fields led to evaluating the partitioning condition
3737       (due to finding a condition of the type a < const or
3738       b > const). Thus we must ignore the rest of the
3739       partitioning fields but we still want to analyse the
3740       subpartitioning fields.
3741     */
3742     if (key_tree->next_key_part)
3743       res= find_used_partitions(ppar, key_tree->next_key_part);
3744     else
3745       res= -1;
3746     goto pop_and_go_right;
3747   }
3748 
3749   if (key_tree->type == SEL_ARG::KEY_RANGE)
3750   {
3751     if (ppar->part_info->get_part_iter_for_interval &&
3752         key_tree->part <= ppar->last_part_partno)
3753     {
3754       /* Collect left and right bound, their lengths and flags */
3755       uchar *min_key= ppar->cur_min_key;
3756       uchar *max_key= ppar->cur_max_key;
3757       uchar *tmp_min_key= min_key;
3758       uchar *tmp_max_key= max_key;
3759       key_tree->store_min(ppar->key[key_tree->part].store_length,
3760                           &tmp_min_key, ppar->cur_min_flag);
3761       key_tree->store_max(ppar->key[key_tree->part].store_length,
3762                           &tmp_max_key, ppar->cur_max_flag);
3763       uint flag;
3764       if (key_tree->next_key_part &&
3765           key_tree->next_key_part->part == key_tree->part+1 &&
3766           key_tree->next_key_part->part <= ppar->last_part_partno &&
3767           key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3768       {
3769         /*
3770           There are more key parts for partition pruning to handle
3771           This mainly happens when the condition is an equality
3772           condition.
3773         */
3774         if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3775             (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3776             !key_tree->min_flag && !key_tree->max_flag)
3777         {
3778           /* Set 'parameters' */
3779           ppar->cur_min_key= tmp_min_key;
3780           ppar->cur_max_key= tmp_max_key;
3781           uint save_min_flag= ppar->cur_min_flag;
3782           uint save_max_flag= ppar->cur_max_flag;
3783 
3784           ppar->cur_min_flag|= key_tree->min_flag;
3785           ppar->cur_max_flag|= key_tree->max_flag;
3786 
3787           res= find_used_partitions(ppar, key_tree->next_key_part);
3788 
3789           /* Restore 'parameters' back */
3790           ppar->cur_min_key= min_key;
3791           ppar->cur_max_key= max_key;
3792 
3793           ppar->cur_min_flag= save_min_flag;
3794           ppar->cur_max_flag= save_max_flag;
3795           goto pop_and_go_right;
3796         }
3797         /* We have arrived at the last field in the partition pruning */
3798         uint tmp_min_flag= key_tree->min_flag,
3799              tmp_max_flag= key_tree->max_flag;
3800         if (!tmp_min_flag)
3801           key_tree->next_key_part->store_min_key(ppar->key,
3802                                                  &tmp_min_key,
3803                                                  &tmp_min_flag,
3804                                                  ppar->last_part_partno);
3805         if (!tmp_max_flag)
3806           key_tree->next_key_part->store_max_key(ppar->key,
3807                                                  &tmp_max_key,
3808                                                  &tmp_max_flag,
3809                                                  ppar->last_part_partno);
3810         flag= tmp_min_flag | tmp_max_flag;
3811       }
3812       else
3813         flag= key_tree->min_flag | key_tree->max_flag;
3814 
3815       if (tmp_min_key != range_par->min_key)
3816         flag&= ~NO_MIN_RANGE;
3817       else
3818         flag|= NO_MIN_RANGE;
3819       if (tmp_max_key != range_par->max_key)
3820         flag&= ~NO_MAX_RANGE;
3821       else
3822         flag|= NO_MAX_RANGE;
3823 
3824       /*
3825         We need to call the interval mapper if we have a condition which
3826         makes sense to prune on. In the example of COLUMNS on a and
3827         b it makes sense if we have a condition on a, or conditions on
3828         both a and b. If we only have conditions on b it might make sense
3829         but this is a harder case we will solve later. For the harder case
3830         this clause then turns into use of all partitions and thus we
3831         simply set res= -1 as if the mapper had returned that.
3832         TODO: What to do here is defined in WL#4065.
3833       */
3834       if (ppar->arg_stack[0]->part == 0)
3835       {
3836         uint32 i;
3837         uint32 store_length_array[MAX_KEY];
3838         uint32 num_keys= ppar->part_fields;
3839 
3840         for (i= 0; i < num_keys; i++)
3841           store_length_array[i]= ppar->key[i].store_length;
3842         res= ppar->part_info->
3843              get_part_iter_for_interval(ppar->part_info,
3844                                         FALSE,
3845                                         store_length_array,
3846                                         range_par->min_key,
3847                                         range_par->max_key,
3848                                         tmp_min_key - range_par->min_key,
3849                                         tmp_max_key - range_par->max_key,
3850                                         flag,
3851                                         &ppar->part_iter);
3852         if (!res)
3853           goto pop_and_go_right; /* res==0 --> no satisfying partitions */
3854       }
3855       else
3856         res= -1;
3857 
3858       if (res == -1)
3859       {
3860         /* get a full range iterator */
3861         init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3862       }
3863       /*
3864         Save our intent to mark full partition as used if we will not be able
3865         to obtain further limits on subpartitions
3866       */
3867       if (key_tree_part < ppar->last_part_partno)
3868       {
3869         /*
3870           We need to ignore the rest of the partitioning fields in all
3871           evaluations after this
3872         */
3873         did_set_ignore_part_fields= TRUE;
3874         ppar->ignore_part_fields= TRUE;
3875       }
3876       set_full_part_if_bad_ret= TRUE;
3877       goto process_next_key_part;
3878     }
3879 
3880     if (key_tree_part == ppar->last_subpart_partno &&
3881         (NULL != ppar->part_info->get_subpart_iter_for_interval))
3882     {
3883       PARTITION_ITERATOR subpart_iter;
3884       DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
3885                                                     range_par->key_parts););
3886       res= ppar->part_info->
3887            get_subpart_iter_for_interval(ppar->part_info,
3888                                          TRUE,
3889                                          NULL, /* Currently not used here */
3890                                          key_tree->min_value,
3891                                          key_tree->max_value,
3892                                          0, 0, /* Those are ignored here */
3893                                          key_tree->min_flag |
3894                                            key_tree->max_flag,
3895                                          &subpart_iter);
3896       if (res == 0)
3897       {
3898         /*
3899            The only case where we can get "no satisfying subpartitions"
3900            returned from the above call is when an error has occurred.
3901         */
3902         DBUG_ASSERT(range_par->thd->is_error());
3903         return 0;
3904       }
3905 
3906       if (res == -1)
3907         goto pop_and_go_right; /* all subpartitions satisfy */
3908 
3909       uint32 subpart_id;
3910       bitmap_clear_all(&ppar->subparts_bitmap);
3911       while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
3912              NOT_A_PARTITION_ID)
3913         bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
3914 
3915       /* Mark each partition as used in each subpartition.  */
3916       uint32 part_id;
3917       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3918               NOT_A_PARTITION_ID)
3919       {
3920         for (uint i= 0; i < ppar->part_info->num_subparts; i++)
3921           if (bitmap_is_set(&ppar->subparts_bitmap, i))
3922             bitmap_set_bit(&ppar->part_info->read_partitions,
3923                            part_id * ppar->part_info->num_subparts + i);
3924       }
3925       goto pop_and_go_right;
3926     }
3927 
3928     if (key_tree->is_singlepoint())
3929     {
3930       if (key_tree_part == ppar->last_part_partno &&
3931           ppar->cur_part_fields == ppar->part_fields &&
3932           ppar->part_info->get_part_iter_for_interval == NULL)
3933       {
3934         /*
3935           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
3936           fields. Save all constN constants into table record buffer.
3937         */
3938         store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
3939         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
3940                                                        ppar->part_fields););
3941         uint32 part_id;
3942         longlong func_value;
3943         /* Find in which partition the {const1, ...,constN} tuple goes */
3944         if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
3945                                             &func_value))
3946         {
3947           res= 0; /* No satisfying partitions */
3948           goto pop_and_go_right;
3949         }
3950         /* Rembember the limit we got - single partition #part_id */
3951         init_single_partition_iterator(part_id, &ppar->part_iter);
3952 
3953         /*
3954           If there are no subpartitions/we fail to get any limit for them,
3955           then we'll mark full partition as used.
3956         */
3957         set_full_part_if_bad_ret= TRUE;
3958         goto process_next_key_part;
3959       }
3960 
3961       if (key_tree_part == ppar->last_subpart_partno &&
3962           ppar->cur_subpart_fields == ppar->subpart_fields)
3963       {
3964         /*
3965           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
3966           fields. Save all constN constants into table record buffer.
3967         */
3968         store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
3969                              ppar->subpart_fields);
3970         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
3971                                                        ppar->subpart_fields,
3972                                                        ppar->subpart_fields););
3973         /* Find the subpartition (it's HASH/KEY so we always have one) */
3974         partition_info *part_info= ppar->part_info;
3975         uint32 part_id, subpart_id;
3976 
3977         if (part_info->get_subpartition_id(part_info, &subpart_id))
3978           return 0;
3979 
3980         /* Mark this partition as used in each subpartition. */
3981         while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3982                 NOT_A_PARTITION_ID)
3983         {
3984           bitmap_set_bit(&part_info->read_partitions,
3985                          part_id * part_info->num_subparts + subpart_id);
3986         }
3987         res= 1; /* Some partitions were marked as used */
3988         goto pop_and_go_right;
3989       }
3990     }
3991     else
3992     {
3993       /*
3994         Can't handle condition on current key part. If we're that deep that
3995         we're processing subpartititoning's key parts, this means we'll not be
3996         able to infer any suitable condition, so bail out.
3997       */
3998       if (key_tree_part >= ppar->last_part_partno)
3999       {
4000         res= -1;
4001         goto pop_and_go_right;
4002       }
4003       /*
4004         No meaning in continuing with rest of partitioning key parts.
4005         Will try to continue with subpartitioning key parts.
4006       */
4007       ppar->ignore_part_fields= true;
4008       did_set_ignore_part_fields= true;
4009       goto process_next_key_part;
4010     }
4011   }
4012 
4013 process_next_key_part:
4014   if (key_tree->next_key_part)
4015     res= find_used_partitions(ppar, key_tree->next_key_part);
4016   else
4017     res= -1;
4018 
4019   if (did_set_ignore_part_fields)
4020   {
4021     /*
4022       We have returned from processing all key trees linked to our next
4023       key part. We are ready to be moving down (using right pointers) and
4024       this tree is a new evaluation requiring its own decision on whether
4025       to ignore partitioning fields.
4026     */
4027     ppar->ignore_part_fields= FALSE;
4028   }
4029   if (set_full_part_if_bad_ret)
4030   {
4031     if (res == -1)
4032     {
4033       /* Got "full range" for subpartitioning fields */
4034       uint32 part_id;
4035       bool found= FALSE;
4036       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4037              NOT_A_PARTITION_ID)
4038       {
4039         ppar->mark_full_partition_used(ppar->part_info, part_id);
4040         found= TRUE;
4041       }
4042       res= MY_TEST(found);
4043     }
4044     /*
4045       Restore the "used partitions iterator" to the default setting that
4046       specifies iteration over all partitions.
4047     */
4048     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4049   }
4050 
4051 pop_and_go_right:
4052   /* Pop this key part info off the "stack" */
4053   ppar->arg_stack_end--;
4054   ppar->cur_part_fields-=    ppar->is_part_keypart[key_tree_part];
4055   ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4056 
4057   if (res == -1)
4058     return -1;
4059   if (key_tree->right != &null_element)
4060   {
4061     if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4062       return -1;
4063   }
4064   return (left_res || right_res || res);
4065 }
4066 
4067 
mark_all_partitions_as_used(partition_info * part_info)4068 static void mark_all_partitions_as_used(partition_info *part_info)
4069 {
4070   bitmap_copy(&(part_info->read_partitions),
4071               &(part_info->lock_partitions));
4072 }
4073 
4074 
4075 /*
4076   Check if field types allow to construct partitioning index description
4077 
4078   SYNOPSIS
4079     fields_ok_for_partition_index()
4080       pfield  NULL-terminated array of pointers to fields.
4081 
4082   DESCRIPTION
4083     For an array of fields, check if we can use all of the fields to create
4084     partitioning index description.
4085 
4086     We can't process GEOMETRY fields - for these fields singlepoint intervals
4087     cant be generated, and non-singlepoint are "special" kinds of intervals
4088     to which our processing logic can't be applied.
4089 
4090     It is not known if we could process ENUM fields, so they are disabled to be
4091     on the safe side.
4092 
4093   RETURN
4094     TRUE   Yes, fields can be used in partitioning index
4095     FALSE  Otherwise
4096 */
4097 
fields_ok_for_partition_index(Field ** pfield)4098 static bool fields_ok_for_partition_index(Field **pfield)
4099 {
4100   if (!pfield)
4101     return FALSE;
4102   for (; (*pfield); pfield++)
4103   {
4104     enum_field_types ftype= (*pfield)->real_type();
4105     if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4106       return FALSE;
4107   }
4108   return TRUE;
4109 }
4110 
4111 
4112 /*
4113   Create partition index description and fill related info in the context
4114   struct
4115 
4116   SYNOPSIS
4117     create_partition_index_description()
4118       prune_par  INOUT Partition pruning context
4119 
4120   DESCRIPTION
4121     Create partition index description. Partition index description is:
4122 
4123       part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4124 
4125     If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4126     corresponding fields_list(...) is not included into index description
4127     and we don't perform partition pruning for partitions/subpartitions.
4128 
4129   RETURN
4130     TRUE   Out of memory or can't do partition pruning at all
4131     FALSE  OK
4132 */
4133 
create_partition_index_description(PART_PRUNE_PARAM * ppar)4134 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4135 {
4136   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4137   partition_info *part_info= ppar->part_info;
4138   uint used_part_fields, used_subpart_fields;
4139 
4140   used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4141                       part_info->num_part_fields : 0;
4142   used_subpart_fields=
4143     fields_ok_for_partition_index(part_info->subpart_field_array)?
4144       part_info->num_subpart_fields : 0;
4145 
4146   uint total_parts= used_part_fields + used_subpart_fields;
4147 
4148   ppar->ignore_part_fields= FALSE;
4149   ppar->part_fields=      used_part_fields;
4150   ppar->last_part_partno= (int)used_part_fields - 1;
4151 
4152   ppar->subpart_fields= used_subpart_fields;
4153   ppar->last_subpart_partno=
4154     used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4155 
4156   if (part_info->is_sub_partitioned())
4157   {
4158     ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
4159     ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4160   }
4161   else
4162   {
4163     ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
4164     ppar->get_top_partition_id_func= part_info->get_partition_id;
4165   }
4166 
4167   KEY_PART *key_part;
4168   MEM_ROOT *alloc= range_par->mem_root;
4169   if (!total_parts ||
4170       !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4171                                                total_parts)) ||
4172       !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4173                                                       total_parts)) ||
4174       !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4175                                                            total_parts)) ||
4176       !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4177                                                            total_parts)))
4178     return TRUE;
4179 
4180   if (ppar->subpart_fields)
4181   {
4182     my_bitmap_map *buf;
4183     uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4184     if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4185       return TRUE;
4186     bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4187                 FALSE);
4188   }
4189   range_par->key_parts= key_part;
4190   Field **field= (ppar->part_fields)? part_info->part_field_array :
4191                                            part_info->subpart_field_array;
4192   bool in_subpart_fields= FALSE;
4193   for (uint part= 0; part < total_parts; part++, key_part++)
4194   {
4195     key_part->key=          0;
4196     key_part->part=	    part;
4197     key_part->length= (uint16)(*field)->key_length();
4198     key_part->store_length= (uint16)get_partition_field_store_length(*field);
4199 
4200     DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4201                          key_part->length, key_part->store_length));
4202 
4203     key_part->field=        (*field);
4204     key_part->image_type =  Field::itRAW;
4205     /*
4206       We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4207       in the RangeAnalysisModule.
4208     */
4209     key_part->flag=         0;
4210     /* We don't set key_parts->null_bit as it will not be used */
4211 
4212     ppar->is_part_keypart[part]= !in_subpart_fields;
4213     ppar->is_subpart_keypart[part]= in_subpart_fields;
4214 
4215     /*
4216       Check if this was last field in this array, in this case we
4217       switch to subpartitioning fields. (This will only happens if
4218       there are subpartitioning fields to cater for).
4219     */
4220     if (!*(++field))
4221     {
4222       field= part_info->subpart_field_array;
4223       in_subpart_fields= TRUE;
4224     }
4225   }
4226   range_par->key_parts_end= key_part;
4227 
4228   DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4229                                                 range_par->key_parts_end););
4230   return FALSE;
4231 }
4232 
4233 
4234 #ifndef DBUG_OFF
4235 
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4236 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4237 {
4238   DBUG_ENTER("print_partitioning_index");
4239   DBUG_LOCK_FILE;
4240   fprintf(DBUG_FILE, "partitioning INDEX(");
4241   for (KEY_PART *p=parts; p != parts_end; p++)
4242   {
4243     fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
4244   }
4245   fputs(");\n", DBUG_FILE);
4246   DBUG_UNLOCK_FILE;
4247   DBUG_VOID_RETURN;
4248 }
4249 
4250 
4251 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4252 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4253 {
4254   DBUG_ENTER("dbug_print_segment_range");
4255   DBUG_LOCK_FILE;
4256   if (!(arg->min_flag & NO_MIN_RANGE))
4257   {
4258     store_key_image_to_rec(part->field, arg->min_value, part->length);
4259     part->field->dbug_print();
4260     if (arg->min_flag & NEAR_MIN)
4261       fputs(" < ", DBUG_FILE);
4262     else
4263       fputs(" <= ", DBUG_FILE);
4264   }
4265 
4266   fprintf(DBUG_FILE, "%s", part->field->field_name);
4267 
4268   if (!(arg->max_flag & NO_MAX_RANGE))
4269   {
4270     if (arg->max_flag & NEAR_MAX)
4271       fputs(" < ", DBUG_FILE);
4272     else
4273       fputs(" <= ", DBUG_FILE);
4274     store_key_image_to_rec(part->field, arg->max_value, part->length);
4275     part->field->dbug_print();
4276   }
4277   fputs("\n", DBUG_FILE);
4278   DBUG_UNLOCK_FILE;
4279   DBUG_VOID_RETURN;
4280 }
4281 
4282 
4283 /*
4284   Print a singlepoint multi-keypart range interval to debug trace
4285 
4286   SYNOPSIS
4287     dbug_print_singlepoint_range()
4288       start  Array of SEL_ARG* ptrs representing conditions on key parts
4289       num    Number of elements in the array.
4290 
4291   DESCRIPTION
4292     This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4293     interval to debug trace.
4294 */
4295 
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4296 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4297 {
4298   DBUG_ENTER("dbug_print_singlepoint_range");
4299   DBUG_LOCK_FILE;
4300   SEL_ARG **end= start + num;
4301 
4302   for (SEL_ARG **arg= start; arg != end; arg++)
4303   {
4304     Field *field= (*arg)->field;
4305     fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
4306     field->dbug_print();
4307   }
4308   fputs("\n", DBUG_FILE);
4309   DBUG_UNLOCK_FILE;
4310   DBUG_VOID_RETURN;
4311 }
4312 #endif
4313 
4314 /****************************************************************************
4315  * Partition pruning code ends
4316  ****************************************************************************/
4317 #endif
4318 
4319 
4320 /*
4321   Get best plan for a SEL_IMERGE disjunctive expression.
4322   SYNOPSIS
4323     get_best_disjunct_quick()
4324       param     Parameter from check_quick_select function
4325       imerge    Expression to use
4326       read_time Don't create scans with cost > read_time
4327 
4328   NOTES
4329     index_merge cost is calculated as follows:
4330     index_merge_cost =
4331       cost(index_reads) +         (see #1)
4332       cost(rowid_to_row_scan) +   (see #2)
4333       cost(unique_use)            (see #3)
4334 
4335     1. cost(index_reads) =SUM_i(cost(index_read_i))
4336        For non-CPK scans,
4337          cost(index_read_i) = {cost of ordinary 'index only' scan}
4338        For CPK scan,
4339          cost(index_read_i) = {cost of non-'index only' scan}
4340 
4341     2. cost(rowid_to_row_scan)
4342       If table PK is clustered then
4343         cost(rowid_to_row_scan) =
4344           {cost of ordinary clustered PK scan with n_ranges=n_rows}
4345 
4346       Otherwise, we use the following model to calculate costs:
4347       We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4348       We assume that offsets of rows we need are independent variates with
4349       uniform distribution in [0..max_file_offset] range.
4350 
4351       We'll denote block as "busy" if it contains row(s) we need to retrieve
4352       and "empty" if doesn't contain rows we need.
4353 
4354       Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4355       applies to any block in file). Let x_i be a variate taking value 1 if
4356       block #i is empty and 0 otherwise.
4357 
4358       Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4359 
4360       E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4361         = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4362        ~= n_blocks * exp(-n_rows/n_blocks).
4363 
4364       E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4365        ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4366 
4367       Average size of "hole" between neighbor non-empty blocks is
4368            E(hole_size) = n_blocks/E(n_busy_blocks).
4369 
4370       The total cost of reading all needed blocks in one "sweep" is:
4371 
4372       E(n_busy_blocks)*
4373        (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
4374 
4375     3. Cost of Unique use is calculated in Unique::get_use_cost function.
4376 
4377   ROR-union cost is calculated in the same way index_merge, but instead of
4378   Unique a priority queue is used.
4379 
4380   RETURN
4381     Created read plan
4382     NULL - Out of memory or no read scan could be built.
4383 */
4384 
4385 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,double read_time)4386 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4387                                          double read_time)
4388 {
4389   SEL_TREE **ptree;
4390   TRP_INDEX_MERGE *imerge_trp= NULL;
4391   uint n_child_scans= imerge->trees_next - imerge->trees;
4392   TRP_RANGE **range_scans;
4393   TRP_RANGE **cur_child;
4394   TRP_RANGE **cpk_scan= NULL;
4395   bool imerge_too_expensive= FALSE;
4396   double imerge_cost= 0.0;
4397   ha_rows cpk_scan_records= 0;
4398   ha_rows non_cpk_scan_records= 0;
4399   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4400   bool all_scans_ror_able= TRUE;
4401   bool all_scans_rors= TRUE;
4402   uint unique_calc_buff_size;
4403   TABLE_READ_PLAN **roru_read_plans;
4404   TABLE_READ_PLAN **cur_roru_plan;
4405   double roru_index_costs;
4406   ha_rows roru_total_records;
4407   double roru_intersect_part= 1.0;
4408   DBUG_ENTER("get_best_disjunct_quick");
4409   DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
4410 
4411   DBUG_ASSERT(param->table->file->stats.records);
4412 
4413   Opt_trace_context * const trace= &param->thd->opt_trace;
4414   Opt_trace_object trace_best_disjunct(trace);
4415   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4416                                              sizeof(TRP_RANGE*)*
4417                                              n_child_scans)))
4418     DBUG_RETURN(NULL);
4419   // Note: to_merge.end() is called to close this object after this for-loop.
4420   Opt_trace_array to_merge(trace, "indices_to_merge");
4421   /*
4422     Collect best 'range' scan for each of disjuncts, and, while doing so,
4423     analyze possibility of ROR scans. Also calculate some values needed by
4424     other parts of the code.
4425   */
4426   for (ptree= imerge->trees, cur_child= range_scans;
4427        ptree != imerge->trees_next;
4428        ptree++, cur_child++)
4429   {
4430     DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4431                                         "tree in SEL_IMERGE"););
4432     Opt_trace_object trace_idx(trace);
4433     if (!(*cur_child=
4434           get_key_scans_params(param, *ptree, true, false, read_time)))
4435     {
4436       /*
4437         One of index scans in this index_merge is more expensive than entire
4438         table read for another available option. The entire index_merge (and
4439         any possible ROR-union) will be more expensive then, too. We continue
4440         here only to update SQL_SELECT members.
4441       */
4442       imerge_too_expensive= true;
4443     }
4444     if (imerge_too_expensive)
4445     {
4446       trace_idx.add("chosen", false).add_alnum("cause", "cost");
4447       continue;
4448     }
4449 
4450     const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
4451     imerge_cost += (*cur_child)->read_cost;
4452     all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4453     all_scans_rors &= (*cur_child)->is_ror;
4454     if (pk_is_clustered &&
4455         keynr_in_table == param->table->s->primary_key)
4456     {
4457       cpk_scan= cur_child;
4458       cpk_scan_records= (*cur_child)->records;
4459     }
4460     else
4461       non_cpk_scan_records += (*cur_child)->records;
4462 
4463     trace_idx.
4464       add_utf8("index_to_merge", param->table->key_info[keynr_in_table].name).
4465       add("cumulated_cost", imerge_cost);
4466   }
4467 
4468   // Note: to_merge trace object is closed here
4469   to_merge.end();
4470 
4471 
4472   trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
4473   if (imerge_too_expensive || (imerge_cost > read_time) ||
4474       ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
4475       read_time != DBL_MAX))
4476   {
4477     /*
4478       Bail out if it is obvious that both index_merge and ROR-union will be
4479       more expensive
4480     */
4481     DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4482                         "full table scan, bailing out"));
4483     trace_best_disjunct.add("chosen", false).add_alnum("cause", "cost");
4484     DBUG_RETURN(NULL);
4485   }
4486 
4487   /*
4488     If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4489     guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4490     disabled in @@optimizer_switch
4491   */
4492   if (all_scans_rors &&
4493       param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4494   {
4495     roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4496     trace_best_disjunct.add("use_roworder_union", true).
4497       add_alnum("cause", "always_cheaper_than_not_roworder_retrieval");
4498     goto skip_to_ror_scan;
4499   }
4500 
4501   if (cpk_scan)
4502   {
4503     /*
4504       Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
4505       is done in QUICK_RANGE_SELECT::row_in_ranges)
4506      */
4507     const double rid_comp_cost= non_cpk_scan_records * ROWID_COMPARE_COST;
4508     imerge_cost+= rid_comp_cost;
4509     trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
4510                             rid_comp_cost);
4511   }
4512 
4513   /* Calculate cost(rowid_to_row_scan) */
4514   {
4515     Cost_estimate sweep_cost;
4516     JOIN *join= param->thd->lex->select_lex.join;
4517     const bool is_interrupted= join && join->tables != 1;
4518     get_sweep_read_cost(param->table, non_cpk_scan_records, is_interrupted,
4519                         &sweep_cost);
4520     const double sweep_total_cost= sweep_cost.total_cost();
4521     imerge_cost+= sweep_total_cost;
4522     trace_best_disjunct.add("cost_sort_rowid_and_read_disk",
4523                             sweep_total_cost);
4524   }
4525   DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4526                      imerge_cost));
4527   if (imerge_cost > read_time ||
4528       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
4529   {
4530     trace_best_disjunct.add("use_roworder_index_merge", true).
4531       add_alnum("cause", "cost");
4532     goto build_ror_index_merge;
4533   }
4534 
4535   /* Add Unique operations cost */
4536   unique_calc_buff_size=
4537     Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4538                                     param->table->file->ref_length,
4539                                     param->thd->variables.sortbuff_size);
4540   if (param->imerge_cost_buff_size < unique_calc_buff_size)
4541   {
4542     if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
4543                                                      unique_calc_buff_size)))
4544       DBUG_RETURN(NULL);
4545     param->imerge_cost_buff_size= unique_calc_buff_size;
4546   }
4547 
4548   {
4549     const double dup_removal_cost=
4550       Unique::get_use_cost(param->imerge_cost_buff,
4551                            (uint)non_cpk_scan_records,
4552                            param->table->file->ref_length,
4553                            param->thd->variables.sortbuff_size);
4554 
4555     trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost);
4556     imerge_cost += dup_removal_cost;
4557     trace_best_disjunct.add("total_cost", imerge_cost);
4558     DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
4559                        imerge_cost, read_time));
4560   }
4561   if (imerge_cost < read_time)
4562   {
4563     if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
4564     {
4565       imerge_trp->read_cost= imerge_cost;
4566       imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
4567       imerge_trp->records= min(imerge_trp->records,
4568                                param->table->file->stats.records);
4569       imerge_trp->range_scans= range_scans;
4570       imerge_trp->range_scans_end= range_scans + n_child_scans;
4571       read_time= imerge_cost;
4572     }
4573   }
4574 
4575 build_ror_index_merge:
4576   if (!all_scans_ror_able ||
4577       param->thd->lex->sql_command == SQLCOM_DELETE ||
4578       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4579     DBUG_RETURN(imerge_trp);
4580 
4581   /* Ok, it is possible to build a ROR-union, try it. */
4582   if (!(roru_read_plans=
4583           (TABLE_READ_PLAN**)alloc_root(param->mem_root,
4584                                         sizeof(TABLE_READ_PLAN*)*
4585                                         n_child_scans)))
4586     DBUG_RETURN(imerge_trp);
4587 skip_to_ror_scan:
4588   roru_index_costs= 0.0;
4589   roru_total_records= 0;
4590   cur_roru_plan= roru_read_plans;
4591 
4592   /*
4593     Note: trace_analyze_ror.end() is called to close this object after
4594     this for-loop.
4595   */
4596   Opt_trace_array trace_analyze_ror(trace, "analyzing_roworder_scans");
4597   /* Find 'best' ROR scan for each of trees in disjunction */
4598   for (ptree= imerge->trees, cur_child= range_scans;
4599        ptree != imerge->trees_next;
4600        ptree++, cur_child++, cur_roru_plan++)
4601   {
4602     Opt_trace_object trp_info(trace);
4603     if (unlikely(trace->is_started()))
4604       (*cur_child)->trace_basic_info(param, &trp_info);
4605 
4606     /*
4607       Assume the best ROR scan is the one that has cheapest
4608       full-row-retrieval scan cost.
4609       Also accumulate index_only scan costs as we'll need them to
4610       calculate overall index_intersection cost.
4611     */
4612     double cost;
4613     if ((*cur_child)->is_ror)
4614     {
4615       /* Ok, we have index_only cost, now get full rows scan cost */
4616       cost= param->table->file->
4617         read_time(param->real_keynr[(*cur_child)->key_idx], 1,
4618                   (*cur_child)->records) +
4619         rows2double((*cur_child)->records) * ROW_EVALUATE_COST;
4620     }
4621     else
4622       cost= read_time;
4623 
4624     TABLE_READ_PLAN *prev_plan= *cur_child;
4625     if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost)))
4626     {
4627       if (prev_plan->is_ror)
4628         *cur_roru_plan= prev_plan;
4629       else
4630         DBUG_RETURN(imerge_trp);
4631       roru_index_costs += (*cur_roru_plan)->read_cost;
4632     }
4633     else
4634       roru_index_costs +=
4635         ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
4636     roru_total_records += (*cur_roru_plan)->records;
4637     roru_intersect_part *= (*cur_roru_plan)->records /
4638       param->table->file->stats.records;
4639   }
4640   // Note: trace_analyze_ror trace object is closed here
4641   trace_analyze_ror.end();
4642 
4643   /*
4644     rows to retrieve=
4645       SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
4646     This is valid because index_merge construction guarantees that conditions
4647     in disjunction do not share key parts.
4648   */
4649   roru_total_records -= (ha_rows)(roru_intersect_part*
4650                                   param->table->file->stats.records);
4651   /* ok, got a ROR read plan for each of the disjuncts
4652     Calculate cost:
4653     cost(index_union_scan(scan_1, ... scan_n)) =
4654       SUM_i(cost_of_index_only_scan(scan_i)) +
4655       queue_use_cost(rowid_len, n) +
4656       cost_of_row_retrieval
4657     See get_merge_buffers_cost function for queue_use_cost formula derivation.
4658   */
4659   double roru_total_cost;
4660   {
4661     Cost_estimate sweep_cost;
4662     JOIN *join= param->thd->lex->select_lex.join;
4663     const bool is_interrupted= join && join->tables != 1;
4664     get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
4665                         &sweep_cost);
4666     roru_total_cost= roru_index_costs +
4667                      rows2double(roru_total_records) *
4668                      log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 +
4669                      sweep_cost.total_cost();
4670   }
4671 
4672   trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost).
4673     add("members", n_child_scans);
4674   TRP_ROR_UNION* roru;
4675   if (roru_total_cost < read_time)
4676   {
4677     if ((roru= new (param->mem_root) TRP_ROR_UNION))
4678     {
4679       trace_best_disjunct.add("chosen", true);
4680       roru->first_ror= roru_read_plans;
4681       roru->last_ror= roru_read_plans + n_child_scans;
4682       roru->read_cost= roru_total_cost;
4683       roru->records= roru_total_records;
4684       DBUG_RETURN(roru);
4685     }
4686   }
4687   trace_best_disjunct.add("chosen", false);
4688 
4689   DBUG_RETURN(imerge_trp);
4690 }
4691 
4692 
4693 /*
4694   Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
4695   sel_arg set of intervals.
4696 
4697   SYNOPSIS
4698     make_ror_scan()
4699       param    Parameter from test_quick_select function
4700       idx      Index of key in param->keys
4701       sel_arg  Set of intervals for a given key
4702 
4703   RETURN
4704     NULL - out of memory
4705     ROR scan structure containing a scan for {idx, sel_arg}
4706 */
4707 
4708 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)4709 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
4710 {
4711   ROR_SCAN_INFO *ror_scan;
4712   my_bitmap_map *bitmap_buf1;
4713   my_bitmap_map *bitmap_buf2;
4714   uint keynr;
4715   DBUG_ENTER("make_ror_scan");
4716 
4717   if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
4718                                              sizeof(ROR_SCAN_INFO))))
4719     DBUG_RETURN(NULL);
4720 
4721   ror_scan->idx= idx;
4722   ror_scan->keynr= keynr= param->real_keynr[idx];
4723   ror_scan->sel_arg= sel_arg;
4724   ror_scan->records= param->table->quick_rows[keynr];
4725 
4726   if (!(bitmap_buf1= (my_bitmap_map*) alloc_root(param->mem_root,
4727                                                  param->fields_bitmap_size)))
4728     DBUG_RETURN(NULL);
4729   if (!(bitmap_buf2= (my_bitmap_map*) alloc_root(param->mem_root,
4730                                                  param->fields_bitmap_size)))
4731     DBUG_RETURN(NULL);
4732 
4733   if (bitmap_init(&ror_scan->covered_fields, bitmap_buf1,
4734                   param->table->s->fields, FALSE))
4735     DBUG_RETURN(NULL);
4736   if (bitmap_init(&ror_scan->covered_fields_remaining, bitmap_buf2,
4737                   param->table->s->fields, FALSE))
4738     DBUG_RETURN(NULL);
4739 
4740   bitmap_clear_all(&ror_scan->covered_fields);
4741 
4742   KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
4743   KEY_PART_INFO *key_part_end=
4744     key_part + param->table->key_info[keynr].user_defined_key_parts;
4745   for (;key_part != key_part_end; ++key_part)
4746   {
4747     if (bitmap_is_set(&param->needed_fields, key_part->fieldnr-1))
4748       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
4749   }
4750   bitmap_copy(&ror_scan->covered_fields_remaining, &ror_scan->covered_fields);
4751 
4752   double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
4753   ror_scan->index_read_cost=
4754     param->table->file->index_only_read_time(ror_scan->keynr, rows);
4755   DBUG_RETURN(ror_scan);
4756 }
4757 
4758 
4759 /**
4760   Compare two ROR_SCAN_INFO* by
4761     1. #fields in this index that are not already covered
4762        by other indexes earlier in the intersect ordering: descending
4763     2. E(#records): ascending
4764 
4765   @param scan1   first ror scan to compare
4766   @param scan2   second ror scan to compare
4767 
4768   @return true if scan1 > scan2, false otherwise
4769 */
is_better_intersect_match(const ROR_SCAN_INFO * scan1,const ROR_SCAN_INFO * scan2)4770 static bool is_better_intersect_match(const ROR_SCAN_INFO *scan1,
4771                                       const ROR_SCAN_INFO *scan2)
4772 {
4773   if (scan1 == scan2)
4774     return false;
4775 
4776   if (scan1->num_covered_fields_remaining >
4777       scan2->num_covered_fields_remaining)
4778     return false;
4779 
4780   if (scan1->num_covered_fields_remaining <
4781       scan2->num_covered_fields_remaining)
4782     return true;
4783 
4784   return (scan1->records > scan2->records);
4785 }
4786 
4787 /**
4788   Sort indexes in an order that is likely to be a good index merge
4789   intersection order. After running this function, [start, ..., end-1]
4790   is ordered according to this strategy:
4791 
4792     1) Minimize the number of indexes that must be used in the
4793        intersection. I.e., the index covering most fields not already
4794        covered by other indexes earlier in the sort order is picked first.
4795     2) When multiple indexes cover equally many uncovered fields, the
4796        index with lowest E(#rows) is chosen.
4797 
4798   Note that all permutations of index ordering are not tested, so this
4799   function may not find the optimal order.
4800 
4801   @param[in,out] start     Pointer to the start of indexes that may
4802                            be used in index merge intersection
4803   @param         end       Pointer past the last index that may be used.
4804   @param         param     Parameter from test_quick_select function.
4805 */
find_intersect_order(ROR_SCAN_INFO ** start,ROR_SCAN_INFO ** end,const PARAM * param)4806 static void find_intersect_order(ROR_SCAN_INFO **start,
4807                                  ROR_SCAN_INFO **end,
4808                                  const PARAM *param)
4809 {
4810   // nothing to sort if there are only zero or one ROR scans
4811   if ((start == end) || (start + 1 == end))
4812     return;
4813 
4814   /*
4815     Bitmap of fields we would like the ROR scans to cover. Will be
4816     modified by the loop below so that when we're looking for a ROR
4817     scan in position 'x' in the ordering, all fields covered by ROR
4818     scans 0,...,x-1 have been removed.
4819   */
4820   MY_BITMAP fields_to_cover;
4821   my_bitmap_map *map;
4822   if (!(map= (my_bitmap_map*) alloc_root(param->mem_root,
4823                                          param->fields_bitmap_size)))
4824     return;
4825   bitmap_init(&fields_to_cover, map, param->needed_fields.n_bits, FALSE);
4826   bitmap_copy(&fields_to_cover, &param->needed_fields);
4827 
4828   // Sort ROR scans in [start,...,end-1]
4829   for (ROR_SCAN_INFO **place= start; place < (end - 1); place++)
4830   {
4831     /* Placeholder for the best ROR scan found for position 'place' so far */
4832     ROR_SCAN_INFO **best= place;
4833     ROR_SCAN_INFO **current= place + 1;
4834 
4835     {
4836       /*
4837         Calculate how many fields in 'fields_to_cover' not already
4838         covered by [start,...,place-1] the 'best' index covers. The
4839         result is used in is_better_intersect_match() and is valid
4840         when finding the best ROR scan for position 'place' only.
4841       */
4842       bitmap_intersect(&(*best)->covered_fields_remaining, &fields_to_cover);
4843       (*best)->num_covered_fields_remaining=
4844         bitmap_bits_set(&(*best)->covered_fields_remaining);
4845     }
4846     for (; current < end; current++)
4847     {
4848       {
4849         /*
4850           Calculate how many fields in 'fields_to_cover' not already
4851           covered by [start,...,place-1] the 'current' index covers.
4852           The result is used in is_better_intersect_match() and is
4853           valid when finding the best ROR scan for position 'place' only.
4854         */
4855         bitmap_intersect(&(*current)->covered_fields_remaining,
4856                          &fields_to_cover);
4857         (*current)->num_covered_fields_remaining=
4858           bitmap_bits_set(&(*current)->covered_fields_remaining);
4859 
4860         /*
4861           No need to compare with 'best' if 'current' does not
4862           contribute with uncovered fields.
4863         */
4864         if ((*current)->num_covered_fields_remaining == 0)
4865           continue;
4866       }
4867 
4868       if (is_better_intersect_match(*best, *current))
4869         best= current;
4870     }
4871 
4872     /*
4873       'best' is now the ROR scan that will be sorted in position
4874       'place'. When searching for the best ROR scans later in the sort
4875       sequence we do not need coverage of the fields covered by 'best'
4876      */
4877     bitmap_subtract(&fields_to_cover, &(*best)->covered_fields);
4878     if (best != place)
4879       swap_variables(ROR_SCAN_INFO*, *best, *place);
4880 
4881     if (bitmap_is_clear_all(&fields_to_cover))
4882       return;                                   // No more fields to cover
4883   }
4884 }
4885 
4886 /* Auxiliary structure for incremental ROR-intersection creation */
4887 typedef struct
4888 {
4889   const PARAM *param;
4890   MY_BITMAP covered_fields; /* union of fields covered by all scans */
4891   /*
4892     Fraction of table records that satisfies conditions of all scans.
4893     This is the number of full records that will be retrieved if a
4894     non-index_only index intersection will be employed.
4895   */
4896   double out_rows;
4897   /* TRUE if covered_fields is a superset of needed_fields */
4898   bool is_covering;
4899 
4900   ha_rows index_records; /* sum(#records to look in indexes) */
4901   double index_scan_costs; /* SUM(cost of 'index-only' scans) */
4902   double total_cost;
4903 } ROR_INTERSECT_INFO;
4904 
4905 
4906 /*
4907   Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
4908 
4909   SYNOPSIS
4910     ror_intersect_init()
4911       param         Parameter from test_quick_select
4912 
4913   RETURN
4914     allocated structure
4915     NULL on error
4916 */
4917 
4918 static
ror_intersect_init(const PARAM * param)4919 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
4920 {
4921   ROR_INTERSECT_INFO *info;
4922   my_bitmap_map* buf;
4923   if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
4924                                               sizeof(ROR_INTERSECT_INFO))))
4925     return NULL;
4926   info->param= param;
4927   if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
4928                                          param->fields_bitmap_size)))
4929     return NULL;
4930   if (bitmap_init(&info->covered_fields, buf, param->table->s->fields,
4931                   FALSE))
4932     return NULL;
4933   info->is_covering= FALSE;
4934   info->index_scan_costs= 0.0;
4935   info->total_cost= 0.0;
4936   info->index_records= 0;
4937   info->out_rows= (double) param->table->file->stats.records;
4938   bitmap_clear_all(&info->covered_fields);
4939   return info;
4940 }
4941 
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)4942 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
4943 {
4944   dst->param= src->param;
4945   memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
4946          no_bytes_in_map(&src->covered_fields));
4947   dst->out_rows= src->out_rows;
4948   dst->is_covering= src->is_covering;
4949   dst->index_records= src->index_records;
4950   dst->index_scan_costs= src->index_scan_costs;
4951   dst->total_cost= src->total_cost;
4952 }
4953 
4954 
4955 /*
4956   Get selectivity of adding a ROR scan to the ROR-intersection.
4957 
4958   SYNOPSIS
4959     ror_scan_selectivity()
4960       info  ROR-interection, an intersection of ROR index scans
4961       scan  ROR scan that may or may not improve the selectivity
4962             of 'info'
4963 
4964   NOTES
4965     Suppose we have conditions on several keys
4966     cond=k_11=c_11 AND k_12=c_12 AND ...  // key_parts of first key in 'info'
4967          k_21=c_21 AND k_22=c_22 AND ...  // key_parts of second key in 'info'
4968           ...
4969          k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //key_parts of 'scan'
4970 
4971     where k_ij may be the same as any k_pq (i.e. keys may have common parts).
4972 
4973     Note that for ROR retrieval, only equality conditions are usable so there
4974     are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'
4975 
4976     A full row is retrieved if entire condition holds.
4977 
4978     The recursive procedure for finding P(cond) is as follows:
4979 
4980     First step:
4981     Pick 1st part of 1st key and break conjunction (1) into two parts:
4982       cond= (k_11=c_11 AND R)
4983 
4984     Here R may still contain condition(s) equivalent to k_11=c_11.
4985     Nevertheless, the following holds:
4986 
4987       P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
4988 
4989     Mark k_11 as fixed field (and satisfied condition) F, save P(F),
4990     save R to be cond and proceed to recursion step.
4991 
4992     Recursion step:
4993     We have a set of fixed fields/satisfied conditions) F, probability P(F),
4994     and remaining conjunction R
4995     Pick next key part on current key and its condition "k_ij=c_ij".
4996     We will add "k_ij=c_ij" into F and update P(F).
4997     Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
4998 
4999      P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
5000 
5001     (where '|' mean conditional probability, not "or")
5002 
5003     Consider the first multiplier in (2). One of the following holds:
5004     a) F contains condition on field used in t (i.e. t AND F = F).
5005       Then P(t|F) = 1
5006 
5007     b) F doesn't contain condition on field used in t. Then F and t are
5008      considered independent.
5009 
5010      P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
5011           = P(t|fields_before_t_in_key).
5012 
5013      P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
5014                                    #records(fields_before_t_in_key, t)
5015 
5016     The second multiplier is calculated by applying this step recursively.
5017 
5018   IMPLEMENTATION
5019     This function calculates the result of application of the "recursion step"
5020     described above for all fixed key members of a single key, accumulating set
5021     of covered fields, selectivity, etc.
5022 
5023     The calculation is conducted as follows:
5024     Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
5025 
5026      n_{k1}      n_{k2}
5027     --------- * ---------  * .... (3)
5028      n_{k1-1}    n_{k2-1}
5029 
5030     where k1,k2,... are key parts which fields were not yet marked as fixed
5031     ( this is result of application of option b) of the recursion step for
5032       parts of a single key).
5033     Since it is reasonable to expect that most of the fields are not marked
5034     as fixed, we calculate (3) as
5035 
5036                                   n_{i1}      n_{i2}
5037     (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
5038                                   n_{i1-1}    n_{i2-1}
5039 
5040     where i1,i2, .. are key parts that were already marked as fixed.
5041 
5042     In order to minimize number of expensive records_in_range calls we
5043     group and reduce adjacent fractions. Note that on the optimizer's
5044     request, index statistics may be used instead of records_in_range
5045     @see RANGE_OPT_PARAM::use_index_statistics.
5046 
5047   RETURN
5048     Selectivity of given ROR scan, a number between 0 and 1. 1 means that
5049     adding 'scan' to the intersection does not improve the selectivity.
5050 */
5051 
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)5052 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
5053                                    const ROR_SCAN_INFO *scan)
5054 {
5055   double selectivity_mult= 1.0;
5056   const TABLE * const table= info->param->table;
5057   const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;
5058   /**
5059     key values tuple, used to store both min_range.key and
5060     max_range.key. This function is only called for equality ranges;
5061     open ranges (e.g. "min_value < X < max_value") cannot be used for
5062     rowid ordered retrieval, so in this function we know that
5063     min_range.key == max_range.key
5064   */
5065   uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
5066   uchar *key_ptr= key_val;
5067   SEL_ARG *sel_arg, *tuple_arg= NULL;
5068   key_part_map keypart_map= 0;
5069   bool cur_covered;
5070   bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5071                                            key_part->fieldnr-1));
5072   key_range min_range;
5073   key_range max_range;
5074   min_range.key= key_val;
5075   min_range.flag= HA_READ_KEY_EXACT;
5076   max_range.key= key_val;
5077   max_range.flag= HA_READ_AFTER_KEY;
5078   ha_rows prev_records= table->file->stats.records;
5079   DBUG_ENTER("ror_scan_selectivity");
5080 
5081   for (sel_arg= scan->sel_arg; sel_arg;
5082        sel_arg= sel_arg->next_key_part)
5083   {
5084     DBUG_PRINT("info",("sel_arg step"));
5085     cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5086                                        key_part[sel_arg->part].fieldnr-1));
5087     if (cur_covered != prev_covered)
5088     {
5089       /* create (part1val, ..., part{n-1}val) tuple. */
5090       bool is_null_range= false;
5091       ha_rows records;
5092       if (!tuple_arg)
5093       {
5094         tuple_arg= scan->sel_arg;
5095         /* Here we use the length of the first key part */
5096         tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);
5097         is_null_range|= tuple_arg->is_null_interval();
5098         keypart_map= 1;
5099       }
5100       while (tuple_arg->next_key_part != sel_arg)
5101       {
5102         tuple_arg= tuple_arg->next_key_part;
5103         tuple_arg->store_min(key_part[tuple_arg->part].store_length,
5104                              &key_ptr, 0);
5105         is_null_range|= tuple_arg->is_null_interval();
5106         keypart_map= (keypart_map << 1) | 1;
5107       }
5108       min_range.length= max_range.length= (size_t) (key_ptr - key_val);
5109       min_range.keypart_map= max_range.keypart_map= keypart_map;
5110 
5111       /*
5112         Get the number of rows in this range. This is done by calling
5113         records_in_range() unless all these are true:
5114           1) The user has requested that index statistics should be used
5115              for equality ranges to avoid the incurred overhead of
5116              index dives in records_in_range()
5117           2) The range is not on the form "x IS NULL". The reason is
5118              that the number of rows with this value are likely to be
5119              very different than the values in the index statistics
5120           3) Index statistics is available.
5121         @see key_val
5122       */
5123       if (!info->param->use_index_statistics ||        // (1)
5124           is_null_range ||                             // (2)
5125           !(records= table->key_info[scan->keynr].
5126                      rec_per_key[tuple_arg->part]))    // (3)
5127       {
5128         DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5129         DBUG_ASSERT(min_range.length > 0);
5130         records= (table->file->
5131                   records_in_range(scan->keynr, &min_range, &max_range));
5132       }
5133       if (cur_covered)
5134       {
5135         /* uncovered -> covered */
5136         double tmp= rows2double(records)/rows2double(prev_records);
5137         DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5138         selectivity_mult *= tmp;
5139         prev_records= HA_POS_ERROR;
5140       }
5141       else
5142       {
5143         /* covered -> uncovered */
5144         prev_records= records;
5145       }
5146     }
5147     prev_covered= cur_covered;
5148   }
5149   if (!prev_covered)
5150   {
5151     double tmp= rows2double(table->quick_rows[scan->keynr]) /
5152                 rows2double(prev_records);
5153     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5154     selectivity_mult *= tmp;
5155   }
5156   // Todo: This assert fires in PB sysqa RQG tests.
5157   // DBUG_ASSERT(selectivity_mult <= 1.0);
5158   DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
5159   DBUG_RETURN(selectivity_mult);
5160 }
5161 
5162 
5163 /*
5164   Check if adding a ROR scan to a ROR-intersection reduces its cost of
5165   ROR-intersection and if yes, update parameters of ROR-intersection,
5166   including its cost.
5167 
5168   SYNOPSIS
5169     ror_intersect_add()
5170       param        Parameter from test_quick_select
5171       info         ROR-intersection structure to add the scan to.
5172       ror_scan     ROR scan info to add.
5173       is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
5174                    from other parameters and is passed separately only to
5175                    avoid duplicating the inference code)
5176       trace_costs  Optimizer trace object cost details are added to
5177 
5178   NOTES
5179     Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
5180     intersection decreases. The cost of ROR-intersection is calculated as
5181     follows:
5182 
5183     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
5184 
5185     When we add a scan the first increases and the second decreases.
5186 
5187     cost_of_full_rows_retrieval=
5188       (union of indexes used covers all needed fields) ?
5189         cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
5190         0
5191 
5192     E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
5193                            ror_scan_selectivity({scan1}, scan2) * ... *
5194                            ror_scan_selectivity({scan1,...}, scanN).
5195   RETURN
5196     TRUE   ROR scan added to ROR-intersection, cost updated.
5197     FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
5198 */
5199 
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,bool is_cpk_scan,Opt_trace_object * trace_costs)5200 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
5201                               ROR_SCAN_INFO* ror_scan, bool is_cpk_scan,
5202                               Opt_trace_object *trace_costs)
5203 {
5204   double selectivity_mult= 1.0;
5205 
5206   DBUG_ENTER("ror_intersect_add");
5207   DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
5208   DBUG_PRINT("info", ("Adding scan on %s",
5209                       info->param->table->key_info[ror_scan->keynr].name));
5210   DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
5211 
5212   selectivity_mult = ror_scan_selectivity(info, ror_scan);
5213   if (selectivity_mult == 1.0)
5214   {
5215     /* Don't add this scan if it doesn't improve selectivity. */
5216     DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
5217     DBUG_RETURN(FALSE);
5218   }
5219 
5220   info->out_rows *= selectivity_mult;
5221 
5222   if (is_cpk_scan)
5223   {
5224     /*
5225       CPK scan is used to filter out rows. We apply filtering for
5226       each record of every scan. Assuming ROWID_COMPARE_COST
5227       per check this gives us:
5228     */
5229     const double idx_cost=
5230       rows2double(info->index_records) * ROWID_COMPARE_COST;
5231     info->index_scan_costs+= idx_cost;
5232     trace_costs->add("index_scan_cost", idx_cost);
5233   }
5234   else
5235   {
5236     info->index_records += info->param->table->quick_rows[ror_scan->keynr];
5237     info->index_scan_costs += ror_scan->index_read_cost;
5238     trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
5239     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
5240     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
5241                                                &info->covered_fields))
5242     {
5243       DBUG_PRINT("info", ("ROR-intersect is covering now"));
5244       info->is_covering= TRUE;
5245     }
5246   }
5247 
5248   info->total_cost= info->index_scan_costs;
5249   trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs);
5250 
5251   if (!info->is_covering)
5252   {
5253     Cost_estimate sweep_cost;
5254     JOIN *join= info->param->thd->lex->select_lex.join;
5255     const bool is_interrupted= join && join->tables != 1;
5256     get_sweep_read_cost(info->param->table, double2rows(info->out_rows),
5257                         is_interrupted, &sweep_cost);
5258     info->total_cost += sweep_cost.total_cost();
5259     trace_costs->add("disk_sweep_cost", sweep_cost.total_cost());
5260   }
5261   else
5262     trace_costs->add("disk_sweep_cost", 0);
5263 
5264   DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
5265   DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
5266                       info->is_covering?"" : "non-"));
5267   DBUG_RETURN(TRUE);
5268 }
5269 
5270 
5271 /*
5272   Get best ROR-intersection plan using non-covering ROR-intersection search
5273   algorithm. The returned plan may be covering.
5274 
5275   SYNOPSIS
5276     get_best_ror_intersect()
5277       param            Parameter from test_quick_select function.
5278       tree             Transformed restriction condition to be used to look
5279                        for ROR scans.
5280       read_time        Do not return read plans with cost > read_time.
5281       are_all_covering [out] set to TRUE if union of all scans covers all
5282                        fields needed by the query (and it is possible to build
5283                        a covering ROR-intersection)
5284 
5285   NOTES
5286     get_key_scans_params must be called before this function can be called.
5287 
5288     When this function is called by ROR-union construction algorithm it
5289     assumes it is building an uncovered ROR-intersection (and thus # of full
5290     records to be retrieved is wrong here). This is a hack.
5291 
5292   IMPLEMENTATION
5293     The approximate best non-covering plan search algorithm is as follows:
5294 
5295     find_min_ror_intersection_scan()
5296     {
5297       R= select all ROR scans;
5298       order R by (E(#records_matched) * key_record_length).
5299 
5300       S= first(R); -- set of scans that will be used for ROR-intersection
5301       R= R-first(S);
5302       min_cost= cost(S);
5303       min_scan= make_scan(S);
5304       while (R is not empty)
5305       {
5306         firstR= R - first(R);
5307         if (!selectivity(S + firstR < selectivity(S)))
5308           continue;
5309 
5310         S= S + first(R);
5311         if (cost(S) < min_cost)
5312         {
5313           min_cost= cost(S);
5314           min_scan= make_scan(S);
5315         }
5316       }
5317       return min_scan;
5318     }
5319 
5320     See ror_intersect_add function for ROR intersection costs.
5321 
5322     Special handling for Clustered PK scans
5323     Clustered PK contains all table fields, so using it as a regular scan in
5324     index intersection doesn't make sense: a range scan on CPK will be less
5325     expensive in this case.
5326     Clustered PK scan has special handling in ROR-intersection: it is not used
5327     to retrieve rows, instead its condition is used to filter row references
5328     we get from scans on other keys.
5329 
5330   RETURN
5331     ROR-intersection table read plan
5332     NULL if out of memory or no suitable plan found.
5333 */
5334 
5335 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,double read_time)5336 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
5337                                           double read_time)
5338 {
5339   uint idx;
5340   double min_cost= DBL_MAX;
5341   Opt_trace_context * const trace= &param->thd->opt_trace;
5342   DBUG_ENTER("get_best_ror_intersect");
5343 
5344   Opt_trace_object trace_ror(trace, "analyzing_roworder_intersect");
5345 
5346   if ((tree->n_ror_scans < 2) || !param->table->file->stats.records ||
5347       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
5348   {
5349     trace_ror.add("usable", false);
5350     if (tree->n_ror_scans < 2)
5351       trace_ror.add_alnum("cause", "too_few_roworder_scans");
5352     else
5353       trace_ror.add("need_tracing", true);
5354     DBUG_RETURN(NULL);
5355   }
5356 
5357   if (param->order_direction == ORDER::ORDER_DESC)
5358     DBUG_RETURN(NULL);
5359 
5360   /*
5361     Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
5362     them. Also find and save clustered PK scan if there is one.
5363   */
5364   ROR_SCAN_INFO **cur_ror_scan;
5365   ROR_SCAN_INFO *cpk_scan= NULL;
5366   uint cpk_no;
5367   bool cpk_scan_used= FALSE;
5368 
5369   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5370                                                      sizeof(ROR_SCAN_INFO*)*
5371                                                      param->keys)))
5372     return NULL;
5373   cpk_no= ((param->table->file->primary_key_is_clustered()) ?
5374            param->table->s->primary_key : MAX_KEY);
5375 
5376   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
5377   {
5378     ROR_SCAN_INFO *scan;
5379     if (!tree->ror_scans_map.is_set(idx))
5380       continue;
5381     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
5382       return NULL;
5383     if (param->real_keynr[idx] == cpk_no)
5384     {
5385       cpk_scan= scan;
5386       tree->n_ror_scans--;
5387     }
5388     else
5389       *(cur_ror_scan++)= scan;
5390   }
5391 
5392   tree->ror_scans_end= cur_ror_scan;
5393   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
5394                                           tree->ror_scans,
5395                                           tree->ror_scans_end););
5396   /*
5397     Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
5398     ROR_SCAN_INFO's.
5399     Step 2: Get best ROR-intersection using an approximate algorithm.
5400   */
5401   find_intersect_order(tree->ror_scans, tree->ror_scans_end, param);
5402 
5403   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
5404                                           tree->ror_scans,
5405                                           tree->ror_scans_end););
5406 
5407   ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
5408   ROR_SCAN_INFO **intersect_scans_end;
5409   if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5410                                                      sizeof(ROR_SCAN_INFO*)*
5411                                                      tree->n_ror_scans)))
5412     return NULL;
5413   intersect_scans_end= intersect_scans;
5414 
5415   /* Create and incrementally update ROR intersection. */
5416   ROR_INTERSECT_INFO *intersect, *intersect_best;
5417   if (!(intersect= ror_intersect_init(param)) ||
5418       !(intersect_best= ror_intersect_init(param)))
5419     return NULL;
5420 
5421   /* [intersect_scans,intersect_scans_best) will hold the best intersection */
5422   ROR_SCAN_INFO **intersect_scans_best;
5423   cur_ror_scan= tree->ror_scans;
5424   intersect_scans_best= intersect_scans;
5425   /*
5426     Note: trace_isect_idx.end() is called to close this object after
5427     this while-loop.
5428   */
5429   Opt_trace_array trace_isect_idx(trace, "intersecting_indices");
5430   while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
5431   {
5432     Opt_trace_object trace_idx(trace);
5433     trace_idx.add_utf8("index",
5434                        param->table->key_info[(*cur_ror_scan)->keynr].name);
5435     /* S= S + first(R);  R= R - first(R); */
5436     if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE, &trace_idx))
5437     {
5438       trace_idx.add("cumulated_total_cost", intersect->total_cost).
5439         add("usable", false).
5440         add_alnum("cause", "does_not_reduce_cost_of_intersect");
5441       cur_ror_scan++;
5442       continue;
5443     }
5444 
5445     trace_idx.add("cumulated_total_cost", intersect->total_cost).
5446       add("usable", true).
5447       add("matching_rows_now", intersect->out_rows).
5448       add("isect_covering_with_this_index", intersect->is_covering);
5449 
5450     *(intersect_scans_end++)= *(cur_ror_scan++);
5451 
5452     if (intersect->total_cost < min_cost)
5453     {
5454       /* Local minimum found, save it */
5455       ror_intersect_cpy(intersect_best, intersect);
5456       intersect_scans_best= intersect_scans_end;
5457       min_cost = intersect->total_cost;
5458       trace_idx.add("chosen", true);
5459     }
5460     else
5461     {
5462       trace_idx.add("chosen", false).
5463         add_alnum("cause", "does_not_reduce_cost");
5464     }
5465   }
5466   // Note: trace_isect_idx trace object is closed here
5467   trace_isect_idx.end();
5468 
5469   if (intersect_scans_best == intersect_scans)
5470   {
5471     trace_ror.add("chosen", false).
5472       add_alnum("cause", "does_not_increase_selectivity");
5473     DBUG_PRINT("info", ("None of scans increase selectivity"));
5474     DBUG_RETURN(NULL);
5475   }
5476 
5477   DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
5478                                           "best ROR-intersection",
5479                                           intersect_scans,
5480                                           intersect_scans_best););
5481 
5482   uint best_num= intersect_scans_best - intersect_scans;
5483   ror_intersect_cpy(intersect, intersect_best);
5484 
5485   /*
5486     Ok, found the best ROR-intersection of non-CPK key scans.
5487     Check if we should add a CPK scan. If the obtained ROR-intersection is
5488     covering, it doesn't make sense to add CPK scan.
5489   */
5490   { // Scope for trace object
5491     Opt_trace_object trace_cpk(trace, "clustered_pk");
5492     if (cpk_scan && !intersect->is_covering)
5493     {
5494       if (ror_intersect_add(intersect, cpk_scan, TRUE, &trace_cpk) &&
5495           (intersect->total_cost < min_cost))
5496       {
5497         trace_cpk.add("clustered_pk_scan_added_to_intersect", true).
5498           add("cumulated_cost", intersect->total_cost);
5499         cpk_scan_used= TRUE;
5500         intersect_best= intersect; //just set pointer here
5501       }
5502       else
5503         trace_cpk.add("clustered_pk_added_to_intersect", false).
5504           add_alnum("cause", "cost");
5505     }
5506     else
5507     {
5508       trace_cpk.add("clustered_pk_added_to_intersect", false).
5509         add_alnum("cause", cpk_scan ?
5510                   "roworder_is_covering" : "no_clustered_pk_index");
5511     }
5512   }
5513   /* Ok, return ROR-intersect plan if we have found one */
5514   TRP_ROR_INTERSECT *trp= NULL;
5515   if (min_cost < read_time && (cpk_scan_used || best_num > 1))
5516   {
5517     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
5518       DBUG_RETURN(trp);
5519     if (!(trp->first_scan=
5520            (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5521                                        sizeof(ROR_SCAN_INFO*)*best_num)))
5522       DBUG_RETURN(NULL);
5523     memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
5524     trp->last_scan=  trp->first_scan + best_num;
5525     trp->is_covering= intersect_best->is_covering;
5526     trp->read_cost= intersect_best->total_cost;
5527     /* Prevent divisons by zero */
5528     ha_rows best_rows = double2rows(intersect_best->out_rows);
5529     if (!best_rows)
5530       best_rows= 1;
5531     set_if_smaller(param->table->quick_condition_rows, best_rows);
5532     trp->records= best_rows;
5533     trp->index_scan_costs= intersect_best->index_scan_costs;
5534     trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
5535 
5536     trace_ror.add("rows", trp->records).
5537       add("cost", trp->read_cost).
5538       add("covering", trp->is_covering).
5539       add("chosen", true);
5540 
5541     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
5542                         "cost %g, records %lu",
5543                         trp->read_cost, (ulong) trp->records));
5544   }
5545   else
5546   {
5547     trace_ror.add("chosen", false).
5548       add_alnum("cause", (min_cost >= read_time) ? "cost" :
5549                 "too_few_indexes_to_merge");
5550 
5551   }
5552   DBUG_RETURN(trp);
5553 }
5554 
5555 /*
5556   Get best "range" table read plan for given SEL_TREE, also update some info
5557 
5558   SYNOPSIS
5559     get_key_scans_params()
5560       param                    Parameters from test_quick_select
5561       tree                     Make range select for this SEL_TREE
5562       index_read_must_be_used  TRUE <=> assume 'index only' option will be set
5563                                (except for clustered PK indexes)
5564       update_tbl_stats         TRUE <=> update table->quick_* with information
5565                                about range scans we've evaluated.
5566       read_time                Maximum cost. i.e. don't create read plans with
5567                                cost > read_time.
5568 
5569   DESCRIPTION
5570     Find the best "range" table read plan for given SEL_TREE.
5571     The side effects are
5572      - tree->ror_scans is updated to indicate which scans are ROR scans.
5573      - if update_tbl_stats=TRUE then table->quick_* is updated with info
5574        about every possible range scan.
5575 
5576   RETURN
5577     Best range read plan
5578     NULL if no plan found or error occurred
5579 */
5580 
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool update_tbl_stats,double read_time)5581 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
5582                                        bool index_read_must_be_used,
5583                                        bool update_tbl_stats,
5584                                        double read_time)
5585 {
5586   uint idx;
5587   SEL_ARG **key,**end, **key_to_read= NULL;
5588   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
5589   uint    best_mrr_flags, best_buf_size;
5590   TRP_RANGE* read_plan= NULL;
5591   DBUG_ENTER("get_key_scans_params");
5592   LINT_INIT(best_mrr_flags); /* protected by key_to_read */
5593   LINT_INIT(best_buf_size); /* protected by key_to_read */
5594   Opt_trace_context * const trace= &param->thd->opt_trace;
5595   /*
5596     Note that there may be trees that have type SEL_TREE::KEY but contain no
5597     key reads at all, e.g. tree for expression "key1 is not null" where key1
5598     is defined as "not null".
5599   */
5600   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
5601                                       "tree scans"););
5602   Opt_trace_array ota(trace, "range_scan_alternatives");
5603 
5604   tree->ror_scans_map.clear_all();
5605   tree->n_ror_scans= 0;
5606   for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
5607   {
5608     if (*key)
5609     {
5610       ha_rows found_records;
5611       Cost_estimate cost;
5612       double found_read_time;
5613       uint mrr_flags, buf_size;
5614       uint keynr= param->real_keynr[idx];
5615       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
5616           (*key)->maybe_flag)
5617         param->needed_reg->set_bit(keynr);
5618 
5619       bool read_index_only= index_read_must_be_used ? TRUE :
5620                             (bool) param->table->covering_keys.is_set(keynr);
5621 
5622       Opt_trace_object trace_idx(trace);
5623       trace_idx.add_utf8("index", param->table->key_info[keynr].name);
5624 
5625       found_records= check_quick_select(param, idx, read_index_only, *key,
5626                                         update_tbl_stats, &mrr_flags,
5627                                         &buf_size, &cost);
5628 
5629 #ifdef OPTIMIZER_TRACE
5630       // check_quick_select() says don't use range if it returns HA_POS_ERROR
5631       if (found_records != HA_POS_ERROR &&
5632           param->thd->opt_trace.is_started())
5633       {
5634         Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
5635 
5636         const KEY &cur_key= param->table->key_info[keynr];
5637         const KEY_PART_INFO *key_part= cur_key.key_part;
5638 
5639         String range_info;
5640         range_info.set_charset(system_charset_info);
5641         append_range_all_keyparts(&trace_range, NULL,
5642                                   &range_info, *key, key_part);
5643         trace_range.end(); // NOTE: ends the tracing scope
5644 
5645         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics).
5646           add("rowid_ordered", param->is_ror_scan).
5647           add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)).
5648           add("index_only", read_index_only).
5649           add("rows", found_records).
5650           add("cost", cost.total_cost());
5651         if (param->thd->optimizer_switch_flag(
5652                 OPTIMIZER_SWITCH_FAVOR_RANGE_SCAN))
5653           trace_idx.add("revised_cost", cost.total_cost() * 0.1);
5654       }
5655 #endif
5656       if (param->thd->optimizer_switch_flag(
5657               OPTIMIZER_SWITCH_FAVOR_RANGE_SCAN))
5658         cost.multiply(0.1);
5659 
5660       if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
5661       {
5662         tree->n_ror_scans++;
5663         tree->ror_scans_map.set_bit(idx);
5664       }
5665 
5666 
5667       if (found_records != HA_POS_ERROR &&
5668           read_time > (found_read_time= cost.total_cost()))
5669       {
5670         trace_idx.add("chosen", true);
5671         read_time=    found_read_time;
5672         best_records= found_records;
5673         key_to_read=  key;
5674         best_mrr_flags= mrr_flags;
5675         best_buf_size=  buf_size;
5676       }
5677       else
5678         trace_idx.add("chosen", false).
5679           add_alnum("cause",
5680                     (found_records == HA_POS_ERROR) ? "unknown" : "cost");
5681 
5682     }
5683   }
5684 
5685   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
5686                                       "ROR scans"););
5687   if (key_to_read)
5688   {
5689     idx= key_to_read - tree->keys;
5690     if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
5691                                                     best_mrr_flags)))
5692     {
5693       read_plan->records= best_records;
5694       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
5695       read_plan->read_cost= read_time;
5696       read_plan->mrr_buf_size= best_buf_size;
5697       DBUG_PRINT("info",
5698                 ("Returning range plan for key %s, cost %g, records %lu",
5699                  param->table->key_info[param->real_keynr[idx]].name,
5700                  read_plan->read_cost, (ulong) read_plan->records));
5701     }
5702   }
5703   else
5704     DBUG_PRINT("info", ("No 'range' table read plan found"));
5705 
5706   DBUG_RETURN(read_plan);
5707 }
5708 
5709 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5710 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
5711                                             bool retrieve_full_rows,
5712                                             MEM_ROOT *parent_alloc)
5713 {
5714   QUICK_INDEX_MERGE_SELECT *quick_imerge;
5715   QUICK_RANGE_SELECT *quick;
5716   /* index_merge always retrieves full rows, ignore retrieve_full_rows */
5717   if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
5718     return NULL;
5719 
5720   quick_imerge->records= records;
5721   quick_imerge->read_time= read_cost;
5722   for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
5723        range_scan++)
5724   {
5725     if (!(quick= (QUICK_RANGE_SELECT*)
5726           ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
5727         quick_imerge->push_quick_back(quick))
5728     {
5729       delete quick;
5730       delete quick_imerge;
5731       return NULL;
5732     }
5733   }
5734   return quick_imerge;
5735 }
5736 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5737 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
5738                                               bool retrieve_full_rows,
5739                                               MEM_ROOT *parent_alloc)
5740 {
5741   QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
5742   QUICK_RANGE_SELECT *quick;
5743   DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
5744   MEM_ROOT *alloc;
5745 
5746   if ((quick_intrsect=
5747          new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
5748                                         (retrieve_full_rows? (!is_covering) :
5749                                          FALSE),
5750                                         parent_alloc)))
5751   {
5752     DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
5753                                              "creating ROR-intersect",
5754                                              first_scan, last_scan););
5755     alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
5756     for (st_ror_scan_info **current= first_scan;
5757          current != last_scan;
5758          current++)
5759     {
5760       if (!(quick= get_quick_select(param, (*current)->idx,
5761                                     (*current)->sel_arg,
5762                                     HA_MRR_SORTED,
5763                                     0, alloc)) ||
5764           quick_intrsect->push_quick_back(quick))
5765       {
5766         delete quick_intrsect;
5767         DBUG_RETURN(NULL);
5768       }
5769     }
5770     if (cpk_scan)
5771     {
5772       if (!(quick= get_quick_select(param, cpk_scan->idx,
5773                                     cpk_scan->sel_arg,
5774                                     HA_MRR_SORTED,
5775                                     0, alloc)))
5776       {
5777         delete quick_intrsect;
5778         DBUG_RETURN(NULL);
5779       }
5780       quick->file= NULL;
5781       quick_intrsect->cpk_quick= quick;
5782     }
5783     quick_intrsect->records= records;
5784     quick_intrsect->read_time= read_cost;
5785   }
5786   DBUG_RETURN(quick_intrsect);
5787 }
5788 
5789 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5790 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
5791                                           bool retrieve_full_rows,
5792                                           MEM_ROOT *parent_alloc)
5793 {
5794   QUICK_ROR_UNION_SELECT *quick_roru;
5795   TABLE_READ_PLAN **scan;
5796   QUICK_SELECT_I *quick;
5797   DBUG_ENTER("TRP_ROR_UNION::make_quick");
5798   /*
5799     It is impossible to construct a ROR-union that will not retrieve full
5800     rows, ignore retrieve_full_rows parameter.
5801   */
5802   if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
5803   {
5804     for (scan= first_ror; scan != last_ror; scan++)
5805     {
5806       if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
5807           quick_roru->push_quick_back(quick))
5808         DBUG_RETURN(NULL);
5809     }
5810     quick_roru->records= records;
5811     quick_roru->read_time= read_cost;
5812   }
5813   DBUG_RETURN(quick_roru);
5814 }
5815 
5816 
5817 /**
5818    If EXPLAIN EXTENDED, add a warning that the index cannot be
5819    used for range access due to either type conversion or different
5820    collations on the field used for comparison
5821 
5822    @param param              PARAM from SQL_SELECT::test_quick_select
5823    @param key_num            Key number
5824    @param field              Field in the predicate
5825  */
5826 static void
if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM * param,const uint key_num,const Field * field)5827 if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM *param,
5828                                               const uint key_num,
5829                                               const Field *field)
5830 {
5831   if (param->using_real_indexes &&
5832       param->thd->lex->describe & DESCRIBE_EXTENDED)
5833     push_warning_printf(
5834             param->thd,
5835             Sql_condition::WARN_LEVEL_WARN,
5836             ER_WARN_INDEX_NOT_APPLICABLE,
5837             ER(ER_WARN_INDEX_NOT_APPLICABLE),
5838             "range",
5839             field->table->key_info[param->real_keynr[key_num]].name,
5840             field->field_name);
5841 }
5842 
5843 
5844 /*
5845   Build a SEL_TREE for <> or NOT BETWEEN predicate
5846 
5847   SYNOPSIS
5848     get_ne_mm_tree()
5849       param       PARAM from SQL_SELECT::test_quick_select
5850       cond_func   item for the predicate
5851       field       field in the predicate
5852       lt_value    constant that field should be smaller
5853       gt_value    constant that field should be greaterr
5854       cmp_type    compare type for the field
5855 
5856   RETURN
5857     #  Pointer to tree built tree
5858     0  on error
5859 */
get_ne_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * lt_value,Item * gt_value,Item_result cmp_type)5860 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5861                                 Field *field,
5862                                 Item *lt_value, Item *gt_value,
5863                                 Item_result cmp_type)
5864 {
5865   SEL_TREE *tree;
5866   tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
5867                      lt_value, cmp_type);
5868   if (tree)
5869   {
5870     tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
5871 					    Item_func::GT_FUNC,
5872 					    gt_value, cmp_type));
5873   }
5874   return tree;
5875 }
5876 
5877 
5878 /*
5879   Build a SEL_TREE for a simple predicate
5880 
5881   SYNOPSIS
5882     get_func_mm_tree()
5883       param       PARAM from SQL_SELECT::test_quick_select
5884       cond_func   item for the predicate
5885       field       field in the predicate
5886       value       constant in the predicate
5887       cmp_type    compare type for the field
5888       inv         TRUE <> NOT cond_func is considered
5889                   (makes sense only when cond_func is BETWEEN or IN)
5890 
5891   RETURN
5892     Pointer to the tree built tree
5893 */
5894 
get_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * value,Item_result cmp_type,bool inv)5895 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5896                                   Field *field, Item *value,
5897                                   Item_result cmp_type, bool inv)
5898 {
5899   SEL_TREE *tree= 0;
5900   DBUG_ENTER("get_func_mm_tree");
5901 
5902   switch (cond_func->functype()) {
5903 
5904   case Item_func::XOR_FUNC:
5905     DBUG_RETURN(NULL); // Always true (don't use range access on XOR).
5906     break;             // See WL#5800
5907 
5908   case Item_func::NE_FUNC:
5909     tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
5910     break;
5911 
5912   case Item_func::BETWEEN:
5913   {
5914     if (!value)
5915     {
5916       if (inv)
5917       {
5918         tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1],
5919                              cond_func->arguments()[2], cmp_type);
5920       }
5921       else
5922       {
5923         tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
5924 		           cond_func->arguments()[1],cmp_type);
5925         if (tree)
5926         {
5927           tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
5928 					           Item_func::LE_FUNC,
5929 					           cond_func->arguments()[2],
5930                                                    cmp_type));
5931         }
5932       }
5933     }
5934     else
5935       tree= get_mm_parts(param, cond_func, field,
5936                          (inv ?
5937                           (value == (Item*)1 ? Item_func::GT_FUNC :
5938                                                Item_func::LT_FUNC):
5939                           (value == (Item*)1 ? Item_func::LE_FUNC :
5940                                                Item_func::GE_FUNC)),
5941                          cond_func->arguments()[0], cmp_type);
5942     break;
5943   }
5944   case Item_func::IN_FUNC:
5945   {
5946     Item_func_in *func=(Item_func_in*) cond_func;
5947 
5948     /*
5949       Array for IN() is constructed when all values have the same result
5950       type. Tree won't be built for values with different result types,
5951       so we check it here to avoid unnecessary work.
5952     */
5953     if (!func->arg_types_compatible)
5954       break;
5955 
5956     if (inv)
5957     {
5958       if (func->array && func->array->result_type() != ROW_RESULT)
5959       {
5960         /*
5961           We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
5962           where c{i} are constants. Our goal is to produce a SEL_TREE that
5963           represents intervals:
5964 
5965           ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
5966 
5967           where $MIN is either "-inf" or NULL.
5968 
5969           The most straightforward way to produce it is to convert NOT IN
5970           into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
5971           analyzer to build SEL_TREE from that. The problem is that the
5972           range analyzer will use O(N^2) memory (which is probably a bug),
5973           and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
5974           will run out of memory.
5975 
5976           Another problem with big lists like (*) is that a big list is
5977           unlikely to produce a good "range" access, while considering that
5978           range access will require expensive CPU calculations (and for
5979           MyISAM even index accesses). In short, big NOT IN lists are rarely
5980           worth analyzing.
5981 
5982           Considering the above, we'll handle NOT IN as follows:
5983           * if the number of entries in the NOT IN list is less than
5984             NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
5985           * Otherwise, don't produce a SEL_TREE.
5986         */
5987 #define NOT_IN_IGNORE_THRESHOLD 1000
5988         MEM_ROOT *tmp_root= param->mem_root;
5989         param->thd->mem_root= param->old_root;
5990         /*
5991           Create one Item_type constant object. We'll need it as
5992           get_mm_parts only accepts constant values wrapped in Item_Type
5993           objects.
5994           We create the Item on param->mem_root which points to
5995           per-statement mem_root (while thd->mem_root is currently pointing
5996           to mem_root local to range optimizer).
5997         */
5998         Item *value_item= func->array->create_item();
5999         param->thd->mem_root= tmp_root;
6000 
6001         if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
6002           break;
6003 
6004         /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
6005         uint i=0;
6006         do
6007         {
6008           func->array->value_to_item(i, value_item);
6009           tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6010                              value_item, cmp_type);
6011           if (!tree)
6012             break;
6013           i++;
6014         } while (i < func->array->count && tree->type == SEL_TREE::IMPOSSIBLE);
6015 
6016         if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
6017         {
6018           /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
6019           tree= NULL;
6020           break;
6021         }
6022         SEL_TREE *tree2;
6023         for (; i < func->array->count; i++)
6024         {
6025           if (func->array->compare_elems(i, i-1))
6026           {
6027             /* Get a SEL_TREE for "-inf < X < c_i" interval */
6028             func->array->value_to_item(i, value_item);
6029             tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6030                                 value_item, cmp_type);
6031             if (!tree2)
6032             {
6033               tree= NULL;
6034               break;
6035             }
6036 
6037             /* Change all intervals to be "c_{i-1} < X < c_i" */
6038             for (uint idx= 0; idx < param->keys; idx++)
6039             {
6040               SEL_ARG *new_interval, *last_val;
6041               if (((new_interval= tree2->keys[idx])) &&
6042                   (tree->keys[idx]) &&
6043                   ((last_val= tree->keys[idx]->last())))
6044               {
6045                 new_interval->min_value= last_val->max_value;
6046                 new_interval->min_flag= NEAR_MIN;
6047 
6048                 /*
6049                   If the interval is over a partial keypart, the
6050                   interval must be "c_{i-1} <= X < c_i" instead of
6051                   "c_{i-1} < X < c_i". Reason:
6052 
6053                   Consider a table with a column "my_col VARCHAR(3)",
6054                   and an index with definition
6055                   "INDEX my_idx my_col(1)". If the table contains rows
6056                   with my_col values "f" and "foo", the index will not
6057                   distinguish the two rows.
6058 
6059                   Note that tree_or() below will effectively merge
6060                   this range with the range created for c_{i-1} and
6061                   we'll eventually end up with only one range:
6062                   "NULL < X".
6063 
6064                   Partitioning indexes are never partial.
6065                 */
6066                 if (param->using_real_indexes)
6067                 {
6068                   const KEY key=
6069                     param->table->key_info[param->real_keynr[idx]];
6070                   const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
6071 
6072                   if (kpi->key_part_flag & HA_PART_KEY_SEG)
6073                     new_interval->min_flag= 0;
6074                 }
6075               }
6076             }
6077             /*
6078               The following doesn't try to allocate memory so no need to
6079               check for NULL.
6080             */
6081             tree= tree_or(param, tree, tree2);
6082           }
6083         }
6084 
6085         if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
6086         {
6087           /*
6088             Get the SEL_TREE for the last "c_last < X < +inf" interval
6089             (value_item cotains c_last already)
6090           */
6091           tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC,
6092                               value_item, cmp_type);
6093           tree= tree_or(param, tree, tree2);
6094         }
6095       }
6096       else
6097       {
6098         tree= get_ne_mm_tree(param, cond_func, field,
6099                              func->arguments()[1], func->arguments()[1],
6100                              cmp_type);
6101         if (tree)
6102         {
6103           Item **arg, **end;
6104           for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6105                arg < end ; arg++)
6106           {
6107             tree=  tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
6108                                                         *arg, *arg, cmp_type));
6109           }
6110         }
6111       }
6112     }
6113     else
6114     {
6115       tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC,
6116                          func->arguments()[1], cmp_type);
6117       if (tree)
6118       {
6119         Item **arg, **end;
6120         for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6121              arg < end ; arg++)
6122         {
6123           tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
6124                                                   Item_func::EQ_FUNC,
6125                                                   *arg, cmp_type));
6126         }
6127       }
6128     }
6129     break;
6130   }
6131   default:
6132   {
6133     /*
6134        Here the function for the following predicates are processed:
6135        <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL and GIS functions.
6136        If the predicate is of the form (value op field) it is handled
6137        as the equivalent predicate (field rev_op value), e.g.
6138        2 <= a is handled as a >= 2.
6139     */
6140     Item_func::Functype func_type=
6141       (value != cond_func->arguments()[0]) ? cond_func->functype() :
6142         ((Item_bool_func2*) cond_func)->rev_functype();
6143     tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
6144   }
6145   }
6146 
6147   DBUG_RETURN(tree);
6148 }
6149 
6150 
6151 /*
6152   Build conjunction of all SEL_TREEs for a simple predicate applying equalities
6153 
6154   SYNOPSIS
6155     get_full_func_mm_tree()
6156       param       PARAM from SQL_SELECT::test_quick_select
6157       cond_func   item for the predicate
6158       field_item  field in the predicate
6159       value       constant in the predicate (or a field already read from
6160                   a table in the case of dynamic range access)
6161                   (for BETWEEN it contains the number of the field argument,
6162                    for IN it's always 0)
6163       inv         TRUE <> NOT cond_func is considered
6164                   (makes sense only when cond_func is BETWEEN or IN)
6165 
6166   DESCRIPTION
6167     For a simple SARGable predicate of the form (f op c), where f is a field and
6168     c is a constant, the function builds a conjunction of all SEL_TREES that can
6169     be obtained by the substitution of f for all different fields equal to f.
6170 
6171   NOTES
6172     If the WHERE condition contains a predicate (fi op c),
6173     then not only SELL_TREE for this predicate is built, but
6174     the trees for the results of substitution of fi for
6175     each fj belonging to the same multiple equality as fi
6176     are built as well.
6177     E.g. for WHERE t1.a=t2.a AND t2.a > 10
6178     a SEL_TREE for t2.a > 10 will be built for quick select from t2
6179     and
6180     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
6181 
6182     A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
6183     in a similar way: we build a conjuction of trees for the results
6184     of all substitutions of fi for equal fj.
6185     Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
6186     differently. It is considered as a conjuction of two SARGable
6187     predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
6188     is called for each of them separately producing trees for
6189        AND j (f1j <=c ) and AND j (f2j <= c)
6190     After this these two trees are united in one conjunctive tree.
6191     It's easy to see that the same tree is obtained for
6192        AND j,k (f1j <=c AND f2k<=c)
6193     which is equivalent to
6194        AND j,k (c BETWEEN f1j AND f2k).
6195     The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
6196     which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
6197     function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
6198     producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
6199     trees are united in one OR-tree. The expression
6200       (AND j (f1j > c) OR AND j (f2j < c)
6201     is equivalent to the expression
6202       AND j,k (f1j > c OR f2k < c)
6203     which is just a translation of
6204       AND j,k (c NOT BETWEEN f1j AND f2k)
6205 
6206     In the cases when one of the items f1, f2 is a constant c1 we do not create
6207     a tree for it at all. It works for BETWEEN predicates but does not
6208     work for NOT BETWEEN predicates as we have to evaluate the expression
6209     with it. If it is TRUE then the other tree can be completely ignored.
6210     We do not do it now and no trees are built in these cases for
6211     NOT BETWEEN predicates.
6212 
6213     As to IN predicates only ones of the form (f IN (c1,...,cn)),
6214     where f1 is a field and c1,...,cn are constant, are considered as
6215     SARGable. We never try to narrow the index scan using predicates of
6216     the form (c IN (c1,...,f,...,cn)).
6217 
6218   RETURN
6219     Pointer to the tree representing the built conjunction of SEL_TREEs
6220 */
6221 
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Item_field * field_item,Item * value,bool inv)6222 static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
6223                                        Item_func *cond_func,
6224                                        Item_field *field_item, Item *value,
6225                                        bool inv)
6226 {
6227   SEL_TREE *tree= 0;
6228   SEL_TREE *ftree= 0;
6229   table_map ref_tables= 0;
6230   table_map param_comp= ~(param->prev_tables | param->read_tables |
6231 		          param->current_table);
6232   DBUG_ENTER("get_full_func_mm_tree");
6233 
6234   for (uint i= 0; i < cond_func->arg_count; i++)
6235   {
6236     Item *arg= cond_func->arguments()[i]->real_item();
6237     if (arg != field_item)
6238       ref_tables|= arg->used_tables();
6239   }
6240   Field *field= field_item->field;
6241   Item_result cmp_type= field->cmp_type();
6242   if (!((ref_tables | field->table->map) & param_comp))
6243     ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv);
6244   Item_equal *item_equal= field_item->item_equal;
6245   if (item_equal)
6246   {
6247     Item_equal_iterator it(*item_equal);
6248     Item_field *item;
6249     while ((item= it++))
6250     {
6251       Field *f= item->field;
6252       if (field->eq(f))
6253         continue;
6254       if (!((ref_tables | f->table->map) & param_comp))
6255       {
6256         tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv);
6257         ftree= !ftree ? tree : tree_and(param, ftree, tree);
6258       }
6259     }
6260   }
6261   DBUG_RETURN(ftree);
6262 }
6263 
6264 /**
6265   The Range Analysis Module, which finds range access alternatives
6266   applicable to single or multi-index (UNION) access. The function
6267   does not calculate or care about the cost of the different
6268   alternatives.
6269 
6270   get_mm_tree() employs a relaxed boolean algebra where the solution
6271   may be bigger than what the rules of boolean algebra accept. In
6272   other words, get_mm_tree() may return range access plans that will
6273   read more rows than the input conditions dictate. In it's simplest
6274   form, consider a condition on two fields indexed by two different
6275   indexes:
6276 
6277      "WHERE fld1 > 'x' AND fld2 > 'y'"
6278 
6279   In this case, there are two single-index range access alternatives.
6280   No matter which access path is chosen, rows that are not in the
6281   result set may be read.
6282 
6283   In the case above, get_mm_tree() will create range access
6284   alternatives for both indexes, so boolean algebra is still correct.
6285   In other cases, however, the conditions are too complex to be used
6286   without relaxing the rules. This typically happens when ORing a
6287   conjunction to a multi-index disjunctions (@see e.g.
6288   imerge_list_or_tree()). When this happens, the range optimizer may
6289   choose to ignore conjunctions (any condition connected with AND). The
6290   effect of this is that the result includes a "bigger" solution than
6291   neccessary. This is OK since all conditions will be used as filters
6292   after row retrieval.
6293 
6294   @see SEL_TREE::keys and SEL_TREE::merges for details of how single
6295   and multi-index range access alternatives are stored.
6296 */
get_mm_tree(RANGE_OPT_PARAM * param,Item * cond)6297 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond)
6298 {
6299   SEL_TREE *tree=0;
6300   SEL_TREE *ftree= 0;
6301   Item_field *field_item= 0;
6302   bool inv= FALSE;
6303   Item *value= 0;
6304   DBUG_ENTER("get_mm_tree");
6305 
6306   if (cond->type() == Item::COND_ITEM)
6307   {
6308     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
6309 
6310     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6311     {
6312       tree= NULL;
6313       Item *item;
6314       while ((item=li++))
6315       {
6316         SEL_TREE *new_tree= get_mm_tree(param,item);
6317         if (param->statement_should_be_aborted())
6318           DBUG_RETURN(NULL);
6319         tree= tree_and(param,tree,new_tree);
6320         dbug_print_tree("after_and", tree, param);
6321         if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
6322           break;
6323       }
6324     }
6325     else
6326     {                                           // Item OR
6327       tree= get_mm_tree(param,li++);
6328       if (param->statement_should_be_aborted())
6329         DBUG_RETURN(NULL);
6330       if (tree)
6331       {
6332         Item *item;
6333         while ((item=li++))
6334         {
6335           SEL_TREE *new_tree=get_mm_tree(param,item);
6336           if (new_tree == NULL || param->statement_should_be_aborted())
6337             DBUG_RETURN(NULL);
6338           tree= tree_or(param,tree,new_tree);
6339           dbug_print_tree("after_or", tree, param);
6340           if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
6341             break;
6342         }
6343       }
6344     }
6345     dbug_print_tree("tree_returned", tree, param);
6346     DBUG_RETURN(tree);
6347   }
6348   /*
6349     Here when simple cond
6350     There are limits on what kinds of const items we can evaluate.
6351     At this stage a subquery in 'cond' might not be fully transformed yet
6352     (example: semijoin) thus cannot be evaluated.
6353   */
6354   if (cond->const_item() && !cond->is_expensive() && !cond->has_subquery())
6355   {
6356     /*
6357       During the cond->val_int() evaluation we can come across a subselect
6358       item which may allocate memory on the thd->mem_root and assumes
6359       all the memory allocated has the same life span as the subselect
6360       item itself. So we have to restore the thread's mem_root here.
6361     */
6362     MEM_ROOT *tmp_root= param->mem_root;
6363     param->thd->mem_root= param->old_root;
6364     tree= cond->val_int() ? new(tmp_root) SEL_TREE(SEL_TREE::ALWAYS) :
6365                             new(tmp_root) SEL_TREE(SEL_TREE::IMPOSSIBLE);
6366     param->thd->mem_root= tmp_root;
6367     dbug_print_tree("tree_returned", tree, param);
6368     DBUG_RETURN(tree);
6369   }
6370 
6371   table_map ref_tables= 0;
6372   table_map param_comp= ~(param->prev_tables | param->read_tables |
6373 		          param->current_table);
6374   if (cond->type() != Item::FUNC_ITEM)
6375   {						// Should be a field
6376     ref_tables= cond->used_tables();
6377     if ((ref_tables & param->current_table) ||
6378 	(ref_tables & ~(param->prev_tables | param->read_tables)))
6379       DBUG_RETURN(0);
6380     DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE));
6381   }
6382 
6383   Item_func *cond_func= (Item_func*) cond;
6384   if (cond_func->functype() == Item_func::BETWEEN ||
6385       cond_func->functype() == Item_func::IN_FUNC)
6386     inv= ((Item_func_opt_neg *) cond_func)->negated;
6387   else
6388   {
6389     /*
6390       During the cond_func->select_optimize() evaluation we can come across a
6391       subselect item which may allocate memory on the thd->mem_root and assumes
6392       all the memory allocated has the same life span as the subselect item
6393       itself. So we have to restore the thread's mem_root here.
6394     */
6395     MEM_ROOT *tmp_root= param->mem_root;
6396     param->thd->mem_root= param->old_root;
6397     Item_func::optimize_type opt_type= cond_func->select_optimize();
6398     param->thd->mem_root= tmp_root;
6399     if (opt_type == Item_func::OPTIMIZE_NONE)
6400       DBUG_RETURN(NULL);
6401   }
6402 
6403   param->cond= cond;
6404 
6405   switch (cond_func->functype()) {
6406   case Item_func::BETWEEN:
6407     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6408     {
6409       field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6410       ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6411     }
6412 
6413     /*
6414       Concerning the code below see the NOTES section in
6415       the comments for the function get_full_func_mm_tree()
6416     */
6417     for (uint i= 1 ; i < cond_func->arg_count ; i++)
6418     {
6419       if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
6420       {
6421         field_item= (Item_field*) (cond_func->arguments()[i]->real_item());
6422         SEL_TREE *tmp= get_full_func_mm_tree(param, cond_func,
6423                                     field_item, (Item*)(intptr)i, inv);
6424         if (inv)
6425         {
6426           tree= !tree ? tmp : tree_or(param, tree, tmp);
6427           if (tree == NULL)
6428             break;
6429         }
6430         else
6431           tree= tree_and(param, tree, tmp);
6432       }
6433       else if (inv)
6434       {
6435         tree= 0;
6436         break;
6437       }
6438     }
6439 
6440     ftree = tree_and(param, ftree, tree);
6441     break;
6442   case Item_func::IN_FUNC:
6443   {
6444     Item_func_in *func=(Item_func_in*) cond_func;
6445     if (func->key_item()->real_item()->type() != Item::FIELD_ITEM)
6446       DBUG_RETURN(0);
6447     field_item= (Item_field*) (func->key_item()->real_item());
6448     ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6449     break;
6450   }
6451   case Item_func::MULT_EQUAL_FUNC:
6452   {
6453     Item_equal *item_equal= (Item_equal *) cond;
6454     if (!(value= item_equal->get_const()))
6455       DBUG_RETURN(0);
6456     Item_equal_iterator it(*item_equal);
6457     ref_tables= value->used_tables();
6458     while ((field_item= it++))
6459     {
6460       Field *field= field_item->field;
6461       Item_result cmp_type= field->cmp_type();
6462       if (!((ref_tables | field->table->map) & param_comp))
6463       {
6464         tree= get_mm_parts(param, item_equal, field, Item_func::EQ_FUNC,
6465 		           value,cmp_type);
6466         ftree= !ftree ? tree : tree_and(param, ftree, tree);
6467       }
6468     }
6469 
6470     dbug_print_tree("tree_returned", ftree, param);
6471     DBUG_RETURN(ftree);
6472   }
6473   default:
6474 
6475     DBUG_ASSERT (!ftree);
6476     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6477     {
6478       field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6479       value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : NULL;
6480       ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6481     }
6482     /*
6483       Even if get_full_func_mm_tree() was executed above and did not
6484       return a range predicate it may still be possible to create one
6485       by reversing the order of the operands. Note that this only
6486       applies to predicates where both operands are fields. Example: A
6487       query of the form
6488 
6489          WHERE t1.a OP t2.b
6490 
6491       In this case, arguments()[0] == t1.a and arguments()[1] == t2.b.
6492       When creating range predicates for t2, get_full_func_mm_tree()
6493       above will return NULL because 'field' belongs to t1 and only
6494       predicates that applies to t2 are of interest. In this case a
6495       call to get_full_func_mm_tree() with reversed operands (see
6496       below) may succeed.
6497      */
6498     if (!ftree && cond_func->have_rev_func() &&
6499         cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM)
6500     {
6501       field_item= (Item_field*) (cond_func->arguments()[1]->real_item());
6502       value= cond_func->arguments()[0];
6503       ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6504     }
6505   }
6506 
6507   dbug_print_tree("tree_returned", ftree, param);
6508   DBUG_RETURN(ftree);
6509 }
6510 
6511 /**
6512   Test whether a comparison operator is a spatial comparison
6513   operator, i.e. Item_func::SP_*.
6514 
6515   Used to check if range access using operator 'op_type' is applicable
6516   for a non-spatial index.
6517 
6518   @param   op_type  The comparison operator.
6519   @return  true if 'op_type' is a spatial comparison operator, false otherwise.
6520 
6521 */
is_spatial_operator(Item_func::Functype op_type)6522 bool is_spatial_operator(Item_func::Functype op_type)
6523 {
6524   switch (op_type)
6525   {
6526   case Item_func::SP_EQUALS_FUNC:
6527   case Item_func::SP_DISJOINT_FUNC:
6528   case Item_func::SP_INTERSECTS_FUNC:
6529   case Item_func::SP_TOUCHES_FUNC:
6530   case Item_func::SP_CROSSES_FUNC:
6531   case Item_func::SP_WITHIN_FUNC:
6532   case Item_func::SP_CONTAINS_FUNC:
6533   case Item_func::SP_OVERLAPS_FUNC:
6534   case Item_func::SP_STARTPOINT:
6535   case Item_func::SP_ENDPOINT:
6536   case Item_func::SP_EXTERIORRING:
6537   case Item_func::SP_POINTN:
6538   case Item_func::SP_GEOMETRYN:
6539   case Item_func::SP_INTERIORRINGN:
6540     return true;
6541   default:
6542     return false;
6543   }
6544 }
6545 
6546 static SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item_func::Functype type,Item * value,Item_result cmp_type)6547 get_mm_parts(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field,
6548 	     Item_func::Functype type,
6549 	     Item *value, Item_result cmp_type)
6550 {
6551   DBUG_ENTER("get_mm_parts");
6552   if (field->table != param->table)
6553     DBUG_RETURN(0);
6554 
6555   KEY_PART *key_part = param->key_parts;
6556   KEY_PART *end = param->key_parts_end;
6557   SEL_TREE *tree=0;
6558   if (value &&
6559       value->used_tables() & ~(param->prev_tables | param->read_tables))
6560     DBUG_RETURN(0);
6561   for (; key_part != end ; key_part++)
6562   {
6563     if (field->eq(key_part->field))
6564     {
6565       /*
6566         Cannot do range access for spatial operators when a
6567         non-spatial index is used.
6568       */
6569       if (key_part->image_type != Field::itMBR &&
6570           is_spatial_operator(cond_func->functype()))
6571         continue;
6572 
6573       SEL_ARG *sel_arg=0;
6574       if (!tree && !(tree=new SEL_TREE()))
6575 	DBUG_RETURN(0);				// OOM
6576       if (!value || !(value->used_tables() & ~param->read_tables))
6577       {
6578 	sel_arg=get_mm_leaf(param,cond_func,
6579 			    key_part->field,key_part,type,value);
6580 	if (!sel_arg)
6581 	  continue;
6582 	if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
6583 	{
6584 	  tree->type=SEL_TREE::IMPOSSIBLE;
6585 	  DBUG_RETURN(tree);
6586 	}
6587       }
6588       else
6589       {
6590 	// This key may be used later
6591 	if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
6592 	  DBUG_RETURN(0);			// OOM
6593       }
6594       sel_arg->part=(uchar) key_part->part;
6595       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
6596       tree->keys_map.set_bit(key_part->key);
6597     }
6598   }
6599 
6600   if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
6601     tree= NULL;
6602   DBUG_RETURN(tree);
6603 }
6604 
6605 /**
6606   Saves 'value' in 'field' and handles potential type conversion
6607   problems.
6608 
6609   @param tree [out]                 The SEL_ARG leaf under construction. If
6610                                     an always false predicate is found it is
6611                                     modified to point to a SEL_ARG with
6612                                     type == SEL_ARG::IMPOSSIBLE
6613   @param value                      The Item that contains a value that shall
6614                                     be stored in 'field'.
6615   @param comp_op                    Comparison operator: >, >=, <=> etc.
6616   @param field                      The field that 'value' is stored into.
6617   @param impossible_cond_cause[out] Set to a descriptive string if an
6618                                     impossible condition is found.
6619   @param memroot                    Memroot for creation of new SEL_ARG.
6620 
6621   @retval false  if saving went fine and it makes sense to continue
6622                  optimizing for this predicate.
6623   @retval true   if always true/false predicate was found, in which
6624                  case 'tree' has been modified to reflect this: NULL
6625                  pointer if always true, SEL_ARG with type IMPOSSIBLE
6626                  if always false.
6627 */
save_value_and_handle_conversion(SEL_ARG ** tree,Item * value,const Item_func::Functype comp_op,Field * field,const char ** impossible_cond_cause,MEM_ROOT * memroot)6628 static bool save_value_and_handle_conversion(SEL_ARG **tree,
6629                                              Item *value,
6630                                              const Item_func::Functype comp_op,
6631                                              Field *field,
6632                                              const char **impossible_cond_cause,
6633                                              MEM_ROOT *memroot)
6634 {
6635   // A SEL_ARG should not have been created for this predicate yet.
6636   DBUG_ASSERT(*tree == NULL);
6637 
6638   if (!value->can_be_evaluated_now())
6639   {
6640     /*
6641       We cannot evaluate the value yet (i.e. required tables are not yet
6642       locked.)
6643       This is the case of prune_partitions() called during JOIN::prepare().
6644     */
6645     return true;
6646   }
6647 
6648   // For comparison purposes allow invalid dates like 2000-01-32
6649   const sql_mode_t orig_sql_mode= field->table->in_use->variables.sql_mode;
6650   field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
6651 
6652   /*
6653     We want to change "field > value" to "field OP V"
6654     where:
6655     * V is what is in "field" after we stored "value" in it via
6656     save_in_field_no_warning() (such store operation may have done
6657     rounding...)
6658     * OP is > or >=, depending on what's correct.
6659     For example, if c is an INT column,
6660     "c > 2.9" is changed to "c OP 3"
6661     where OP is ">=" (">" would not be correct, as 3 > 2.9, a comparison
6662     done with stored_field_cmp_to_item()). And
6663     "c > 3.1" is changed to "c OP 3" where OP is ">" (3 < 3.1...).
6664   */
6665 
6666   // Note that value may be a stored function call, executed here.
6667   const type_conversion_status err= value->save_in_field_no_warnings(field, 1);
6668   field->table->in_use->variables.sql_mode= orig_sql_mode;
6669 
6670   switch (err) {
6671   case TYPE_OK:
6672   case TYPE_NOTE_TRUNCATED:
6673     return false;
6674   case TYPE_ERR_BAD_VALUE:
6675     /*
6676       In the case of incompatible values, MySQL's SQL dialect has some
6677       strange interpretations. For example,
6678 
6679           "int_col > 'foo'" is interpreted as "int_col > 0"
6680 
6681       instead of always false. Because of this, we assume that the
6682       range predicate is always true instead of always false and let
6683       evaluate_join_record() decide the outcome.
6684     */
6685     return true;
6686   case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
6687     // Checking NULL value on a field that cannot contain NULL.
6688     *impossible_cond_cause= "null_field_in_non_null_column";
6689     goto impossible_cond;
6690   case TYPE_WARN_OUT_OF_RANGE:
6691     /*
6692       value to store was either higher than field::max_value or lower
6693       than field::min_value. The field's max/min value has been stored
6694       instead.
6695      */
6696     if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
6697     {
6698       /*
6699         Independent of data type, "out_of_range_value =/<=> field" is
6700         always false.
6701       */
6702       *impossible_cond_cause= "value_out_of_range";
6703       goto impossible_cond;
6704     }
6705 
6706     // If the field is numeric, we can interpret the out of range value.
6707     if ((field->type() != FIELD_TYPE_BIT) &&
6708         (field->result_type() == REAL_RESULT ||
6709          field->result_type() == INT_RESULT ||
6710          field->result_type() == DECIMAL_RESULT))
6711     {
6712       /*
6713         value to store was higher than field::max_value if
6714            a) field has a value greater than 0, or
6715            b) if field is unsigned and has a negative value (which, when
6716               cast to unsigned, means some value higher than LONGLONG_MAX).
6717       */
6718       if ((field->val_int() > 0) ||                              // a)
6719           (static_cast<Field_num*>(field)->unsigned_flag &&
6720            field->val_int() < 0))                                // b)
6721       {
6722         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6723         {
6724           /*
6725             '<' or '<=' compared to a value higher than the field
6726             can store is always true.
6727           */
6728           return true;
6729         }
6730         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6731         {
6732           /*
6733             '>' or '>=' compared to a value higher than the field can
6734             store is always false.
6735           */
6736           *impossible_cond_cause= "value_out_of_range";
6737           goto impossible_cond;
6738         }
6739       }
6740       else // value is lower than field::min_value
6741       {
6742         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6743         {
6744           /*
6745             '>' or '>=' compared to a value lower than the field
6746             can store is always true.
6747           */
6748           return true;
6749         }
6750         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6751         {
6752           /*
6753             '<' or '=' compared to a value lower than the field can
6754             store is always false.
6755           */
6756           *impossible_cond_cause= "value_out_of_range";
6757           goto impossible_cond;
6758         }
6759       }
6760     }
6761     /*
6762       Value is out of range on a datatype where it can't be decided if
6763       it was underflow or overflow. It is therefore not possible to
6764       determine whether or not the condition is impossible or always
6765       true and we have to assume always true.
6766     */
6767     return true;
6768   case TYPE_NOTE_TIME_TRUNCATED:
6769     if (field->type() == FIELD_TYPE_DATE &&
6770         (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC ||
6771          comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC))
6772     {
6773       /*
6774         We were saving DATETIME into a DATE column, the conversion went ok
6775         but a non-zero time part was cut off.
6776 
6777         In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
6778         values. Index over a DATE column uses DATE comparison. Changing
6779         from one comparison to the other is possible:
6780 
6781         datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
6782         datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
6783 
6784         datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
6785         datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
6786 
6787         but we'll need to convert '>' to '>=' and '<' to '<='. This will
6788         be done together with other types at the end of get_mm_leaf()
6789         (grep for stored_field_cmp_to_item)
6790       */
6791       return false;
6792     }
6793     if (comp_op == Item_func::EQ_FUNC || comp_op == Item_func::EQUAL_FUNC)
6794     {
6795       // Equality comparison is always false when time info has been truncated.
6796       goto impossible_cond;
6797     }
6798     // Fall through
6799   default:
6800     return true;
6801   }
6802 
6803   DBUG_ASSERT(FALSE); // Should never get here.
6804 
6805 impossible_cond:
6806   *tree= new (memroot) SEL_ARG(field, 0, 0);
6807   (*tree)->type= SEL_ARG::IMPOSSIBLE;
6808   return true;
6809 }
6810 
6811 static SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Item * conf_func,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)6812 get_mm_leaf(RANGE_OPT_PARAM *param, Item *conf_func, Field *field,
6813             KEY_PART *key_part, Item_func::Functype type,Item *value)
6814 {
6815   uint maybe_null=(uint) field->real_maybe_null();
6816   bool optimize_range;
6817   SEL_ARG *tree= 0;
6818   MEM_ROOT *alloc= param->mem_root;
6819   uchar *str;
6820   const char *impossible_cond_cause= NULL;
6821   DBUG_ENTER("get_mm_leaf");
6822 
6823   /*
6824     We need to restore the runtime mem_root of the thread in this
6825     function because it evaluates the value of its argument, while
6826     the argument can be any, e.g. a subselect. The subselect
6827     items, in turn, assume that all the memory allocated during
6828     the evaluation has the same life span as the item itself.
6829     TODO: opt_range.cc should not reset thd->mem_root at all.
6830   */
6831   param->thd->mem_root= param->old_root;
6832   if (!value)					// IS NULL or IS NOT NULL
6833   {
6834     if (field->table->maybe_null)		// Can't use a key on this
6835       goto end;
6836     if (!maybe_null)				// Not null field
6837     {
6838       if (type == Item_func::ISNULL_FUNC)
6839         tree= &null_element;
6840       goto end;
6841     }
6842     uchar *null_string=
6843       static_cast<uchar*>(alloc_root(alloc, key_part->store_length + 1));
6844     if (!null_string)
6845       goto end;                                 // out of memory
6846 
6847     TRASH(null_string, key_part->store_length + 1);
6848     memcpy(null_string, is_null_string, sizeof(is_null_string));
6849 
6850     if (!(tree= new (alloc) SEL_ARG(field, null_string, null_string)))
6851       goto end;                                 // out of memory
6852     if (type == Item_func::ISNOTNULL_FUNC)
6853     {
6854       tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
6855       tree->max_flag=NO_MAX_RANGE;
6856     }
6857     goto end;
6858   }
6859 
6860   /*
6861     1. Usually we can't use an index if the column collation
6862        differ from the operation collation.
6863 
6864     2. However, we can reuse a case insensitive index for
6865        the binary searches:
6866 
6867        WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;
6868 
6869        WHERE latin1_swedish_ci_colimn = BINARY 'a '
6870   */
6871   if ((field->result_type() == STRING_RESULT &&
6872        field->match_collation_to_optimize_range() &&
6873        value->result_type() == STRING_RESULT &&
6874        key_part->image_type == Field::itRAW &&
6875        field->charset() != conf_func->compare_collation() &&
6876        !(conf_func->compare_collation()->state & MY_CS_BINSORT &&
6877          (type == Item_func::EQUAL_FUNC || type == Item_func::EQ_FUNC))))
6878   {
6879     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6880     goto end;
6881   }
6882 
6883   /*
6884     Temporal values: Cannot use range access if:
6885       1) 'temporal_value = indexed_varchar_column' because there are
6886          many ways to represent the same date as a string. A few
6887          examples: "01-01-2001", "1-1-2001", "2001-01-01",
6888          "2001#01#01". The same problem applies to time. Thus, we
6889          cannot create a usefull range predicate for temporal values
6890          into VARCHAR column indexes. @see add_key_field()
6891       2) 'temporal_value_with_date_part = indexed_time' because:
6892          - without index, a TIME column with value '48:00:00' is
6893            equal to a DATETIME column with value
6894            'CURDATE() + 2 days'
6895          - with range access into the TIME column, CURDATE() + 2
6896            days becomes "00:00:00" (Field_timef::store_internal()
6897            simply extracts the time part from the datetime) which
6898            is a lookup key which does not match "48:00:00"; so
6899            ref access is not be able to give the same result as
6900            On the other hand, we can do ref access for
6901            IndexedDatetimeComparedToTime because
6902            Field_temporal_with_date::store_time() will convert
6903            48:00:00 to CURDATE() + 2 days which is the correct
6904            lookup key.
6905    */
6906   if ((!field->is_temporal() && value->is_temporal()) ||   // 1)
6907       field_time_cmp_date(field, value))                   // 2)
6908   {
6909     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6910     goto end;
6911   }
6912 
6913   if (key_part->image_type == Field::itMBR)
6914   {
6915     // @todo: use is_spatial_operator() instead?
6916     switch (type) {
6917     case Item_func::SP_EQUALS_FUNC:
6918     case Item_func::SP_DISJOINT_FUNC:
6919     case Item_func::SP_INTERSECTS_FUNC:
6920     case Item_func::SP_TOUCHES_FUNC:
6921     case Item_func::SP_CROSSES_FUNC:
6922     case Item_func::SP_WITHIN_FUNC:
6923     case Item_func::SP_CONTAINS_FUNC:
6924     case Item_func::SP_OVERLAPS_FUNC:
6925       break;
6926     default:
6927       /*
6928         We cannot involve spatial indexes for queries that
6929         don't use MBREQUALS(), MBRDISJOINT(), etc. functions.
6930       */
6931       goto end;
6932     }
6933   }
6934 
6935   if (param->using_real_indexes)
6936     optimize_range= field->optimize_range(param->real_keynr[key_part->key],
6937                                           key_part->part);
6938   else
6939     optimize_range= TRUE;
6940 
6941   if (type == Item_func::LIKE_FUNC)
6942   {
6943     bool like_error;
6944     char buff1[MAX_FIELD_WIDTH];
6945     uchar *min_str,*max_str;
6946     String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
6947     size_t length, offset, min_length, max_length;
6948     uint field_length= field->pack_length()+maybe_null;
6949 
6950     if (!optimize_range)
6951       goto end;
6952     if (!(res= value->val_str(&tmp)))
6953     {
6954       tree= &null_element;
6955       goto end;
6956     }
6957 
6958     /*
6959       TODO:
6960       Check if this was a function. This should have be optimized away
6961       in the sql_select.cc
6962     */
6963     if (res != &tmp)
6964     {
6965       tmp.copy(*res);				// Get own copy
6966       res= &tmp;
6967     }
6968     if (field->cmp_type() != STRING_RESULT)
6969       goto end;                                 // Can only optimize strings
6970 
6971     offset=maybe_null;
6972     length=key_part->store_length;
6973 
6974     if (length != key_part->length  + maybe_null)
6975     {
6976       /* key packed with length prefix */
6977       offset+= HA_KEY_BLOB_LENGTH;
6978       field_length= length - HA_KEY_BLOB_LENGTH;
6979     }
6980     else
6981     {
6982       if (unlikely(length < field_length))
6983       {
6984 	/*
6985 	  This can only happen in a table created with UNIREG where one key
6986 	  overlaps many fields
6987 	*/
6988 	length= field_length;
6989       }
6990       else
6991 	field_length= length;
6992     }
6993     length+=offset;
6994     if (!(min_str= (uchar*) alloc_root(alloc, length*2)))
6995       goto end;
6996 
6997     max_str=min_str+length;
6998     if (maybe_null)
6999       max_str[0]= min_str[0]=0;
7000 
7001     field_length-= maybe_null;
7002     like_error= my_like_range(field->charset(),
7003 			      res->ptr(), res->length(),
7004 			      ((Item_func_like*)(param->cond))->escape,
7005 			      wild_one, wild_many,
7006 			      field_length,
7007 			      (char*) min_str+offset, (char*) max_str+offset,
7008 			      &min_length, &max_length);
7009     if (like_error)				// Can't optimize with LIKE
7010       goto end;
7011 
7012     if (offset != maybe_null)			// BLOB or VARCHAR
7013     {
7014       int2store(min_str+maybe_null,min_length);
7015       int2store(max_str+maybe_null,max_length);
7016     }
7017     tree= new (alloc) SEL_ARG(field, min_str, max_str);
7018     goto end;
7019   }
7020 
7021   if (!optimize_range &&
7022       type != Item_func::EQ_FUNC &&
7023       type != Item_func::EQUAL_FUNC)
7024     goto end;                                   // Can't optimize this
7025 
7026   /*
7027     We can't always use indexes when comparing a string index to a number
7028     cmp_type() is checked to allow compare of dates to numbers
7029   */
7030   if (field->result_type() == STRING_RESULT &&
7031       value->result_type() != STRING_RESULT &&
7032       field->cmp_type() != value->result_type())
7033   {
7034     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
7035     goto end;
7036   }
7037 
7038   if (save_value_and_handle_conversion(&tree, value, type, field,
7039                                        &impossible_cond_cause, alloc))
7040     goto end;
7041 
7042   /*
7043     Any sargable predicate except "<=>" involving NULL as a constant is always
7044     FALSE
7045   */
7046   if (type != Item_func::EQUAL_FUNC && field->is_real_null())
7047   {
7048     impossible_cond_cause= "comparison_with_null_always_false";
7049     tree= &null_element;
7050     goto end;
7051   }
7052 
7053   str= (uchar*) alloc_root(alloc, key_part->store_length+1);
7054   if (!str)
7055     goto end;
7056   if (maybe_null)
7057     *str= (uchar) field->is_real_null();        // Set to 1 if null
7058   field->get_key_image(str+maybe_null, key_part->length,
7059                        key_part->image_type);
7060   if (!(tree= new (alloc) SEL_ARG(field, str, str)))
7061     goto end;                                   // out of memory
7062 
7063   /*
7064     Check if we are comparing an UNSIGNED integer with a negative constant.
7065     In this case we know that:
7066     (a) (unsigned_int [< | <=] negative_constant) == FALSE
7067     (b) (unsigned_int [> | >=] negative_constant) == TRUE
7068     In case (a) the condition is false for all values, and in case (b) it
7069     is true for all values, so we can avoid unnecessary retrieval and condition
7070     testing, and we also get correct comparison of unsinged integers with
7071     negative integers (which otherwise fails because at query execution time
7072     negative integers are cast to unsigned if compared with unsigned).
7073    */
7074   if (field->result_type() == INT_RESULT &&
7075       value->result_type() == INT_RESULT &&
7076       ((field->type() == FIELD_TYPE_BIT ||
7077        ((Field_num *) field)->unsigned_flag) &&
7078        !((Item_int*) value)->unsigned_flag))
7079   {
7080     longlong item_val= value->val_int();
7081     if (item_val < 0)
7082     {
7083       if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
7084       {
7085         impossible_cond_cause= "unsigned_int_cannot_be_negative";
7086         tree->type= SEL_ARG::IMPOSSIBLE;
7087         goto end;
7088       }
7089       if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
7090       {
7091         tree= 0;
7092         goto end;
7093       }
7094     }
7095   }
7096 
7097   switch (type) {
7098   case Item_func::LT_FUNC:
7099     /* Don't use open ranges for partial key_segments */
7100     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7101         stored_field_cmp_to_item(param->thd, field, value) == 0)
7102       tree->max_flag=NEAR_MAX;
7103     /* fall through */
7104   case Item_func::LE_FUNC:
7105     if (!maybe_null)
7106       tree->min_flag=NO_MIN_RANGE;		/* From start */
7107     else
7108     {						// > NULL
7109       if (!(tree->min_value=
7110             static_cast<uchar*>(alloc_root(alloc, key_part->store_length+1))))
7111         goto end;
7112       TRASH(tree->min_value, key_part->store_length + 1);
7113       memcpy(tree->min_value, is_null_string, sizeof(is_null_string));
7114       tree->min_flag=NEAR_MIN;
7115     }
7116     break;
7117   case Item_func::GT_FUNC:
7118     /* Don't use open ranges for partial key_segments */
7119     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7120         (stored_field_cmp_to_item(param->thd, field, value) <= 0))
7121       tree->min_flag=NEAR_MIN;
7122     tree->max_flag= NO_MAX_RANGE;
7123     break;
7124   case Item_func::GE_FUNC:
7125     /* Don't use open ranges for partial key_segments */
7126     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7127         (stored_field_cmp_to_item(param->thd, field, value) < 0))
7128       tree->min_flag= NEAR_MIN;
7129     tree->max_flag=NO_MAX_RANGE;
7130     break;
7131   case Item_func::SP_EQUALS_FUNC:
7132     tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512;
7133     tree->max_flag=NO_MAX_RANGE;
7134     break;
7135   case Item_func::SP_DISJOINT_FUNC:
7136     tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512;
7137     tree->max_flag=NO_MAX_RANGE;
7138     break;
7139   case Item_func::SP_INTERSECTS_FUNC:
7140     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7141     tree->max_flag=NO_MAX_RANGE;
7142     break;
7143   case Item_func::SP_TOUCHES_FUNC:
7144     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7145     tree->max_flag=NO_MAX_RANGE;
7146     break;
7147 
7148   case Item_func::SP_CROSSES_FUNC:
7149     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7150     tree->max_flag=NO_MAX_RANGE;
7151     break;
7152   case Item_func::SP_WITHIN_FUNC:
7153     /*
7154       Adjust the min_flag as MyISAM implements this function
7155       in reverse order.
7156     */
7157     tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512;
7158     tree->max_flag=NO_MAX_RANGE;
7159     break;
7160 
7161   case Item_func::SP_CONTAINS_FUNC:
7162     /*
7163       Adjust the min_flag as MyISAM implements this function
7164       in reverse order.
7165     */
7166     tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512;
7167     tree->max_flag=NO_MAX_RANGE;
7168     break;
7169   case Item_func::SP_OVERLAPS_FUNC:
7170     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7171     tree->max_flag=NO_MAX_RANGE;
7172     break;
7173 
7174   default:
7175     break;
7176   }
7177 
7178 end:
7179   if (impossible_cond_cause != NULL)
7180   {
7181     Opt_trace_object wrapper (&param->thd->opt_trace);
7182     Opt_trace_object (&param->thd->opt_trace, "impossible_condition",
7183                       Opt_trace_context::RANGE_OPTIMIZER).
7184       add_alnum("cause", impossible_cond_cause);
7185   }
7186   param->thd->mem_root= alloc;
7187   DBUG_RETURN(tree);
7188 }
7189 
7190 
7191 /******************************************************************************
7192 ** Tree manipulation functions
7193 ** If tree is 0 it means that the condition can't be tested. It refers
7194 ** to a non existent table or to a field in current table with isn't a key.
7195 ** The different tree flags:
7196 ** IMPOSSIBLE:	 Condition is never TRUE
7197 ** ALWAYS:	 Condition is always TRUE
7198 ** MAYBE:	 Condition may exists when tables are read
7199 ** MAYBE_KEY:	 Condition refers to a key that may be used in join loop
7200 ** KEY_RANGE:	 Condition uses a key
7201 ******************************************************************************/
7202 
7203 /*
7204   Add a new key test to a key when scanning through all keys
7205   This will never be called for same key parts.
7206 */
7207 
7208 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)7209 sel_add(SEL_ARG *key1,SEL_ARG *key2)
7210 {
7211   SEL_ARG *root,**key_link;
7212 
7213   if (!key1)
7214     return key2;
7215   if (!key2)
7216     return key1;
7217 
7218   key_link= &root;
7219   while (key1 && key2)
7220   {
7221     if (key1->part < key2->part)
7222     {
7223       *key_link= key1;
7224       key_link= &key1->next_key_part;
7225       key1=key1->next_key_part;
7226     }
7227     else
7228     {
7229       *key_link= key2;
7230       key_link= &key2->next_key_part;
7231       key2=key2->next_key_part;
7232     }
7233   }
7234   *key_link=key1 ? key1 : key2;
7235   return root;
7236 }
7237 
7238 #define CLONE_KEY1_MAYBE 1
7239 #define CLONE_KEY2_MAYBE 2
7240 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
7241 
7242 
7243 static SEL_TREE *
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7244 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7245 {
7246   DBUG_ENTER("tree_and");
7247   if (!tree1)
7248     DBUG_RETURN(tree2);
7249   if (!tree2)
7250     DBUG_RETURN(tree1);
7251   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7252     DBUG_RETURN(tree1);
7253   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7254     DBUG_RETURN(tree2);
7255   if (tree1->type == SEL_TREE::MAYBE)
7256   {
7257     if (tree2->type == SEL_TREE::KEY)
7258       tree2->type=SEL_TREE::KEY_SMALLER;
7259     DBUG_RETURN(tree2);
7260   }
7261   if (tree2->type == SEL_TREE::MAYBE)
7262   {
7263     tree1->type=SEL_TREE::KEY_SMALLER;
7264     DBUG_RETURN(tree1);
7265   }
7266 
7267   dbug_print_tree("tree1", tree1, param);
7268   dbug_print_tree("tree2", tree2, param);
7269 
7270   key_map  result_keys;
7271 
7272   /* Join the trees key per key */
7273   SEL_ARG **key1,**key2,**end;
7274   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
7275        key1 != end ; key1++,key2++)
7276   {
7277     uint flag=0;
7278     if (*key1 || *key2)
7279     {
7280       if (*key1 && !(*key1)->simple_key())
7281 	flag|=CLONE_KEY1_MAYBE;
7282       if (*key2 && !(*key2)->simple_key())
7283 	flag|=CLONE_KEY2_MAYBE;
7284       *key1=key_and(param, *key1, *key2, flag);
7285       if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
7286       {
7287 	tree1->type= SEL_TREE::IMPOSSIBLE;
7288         DBUG_RETURN(tree1);
7289       }
7290       result_keys.set_bit(key1 - tree1->keys);
7291 #ifndef DBUG_OFF
7292         if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7293           (*key1)->test_use_count(*key1);
7294 #endif
7295     }
7296   }
7297   tree1->keys_map= result_keys;
7298 
7299   /* ok, both trees are index_merge trees */
7300   imerge_list_and_list(&tree1->merges, &tree2->merges);
7301   DBUG_RETURN(tree1);
7302 }
7303 
7304 
7305 /*
7306   Check if two SEL_TREES can be combined into one (i.e. a single key range
7307   read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
7308   using index_merge.
7309 */
7310 
sel_trees_can_be_ored(SEL_TREE * tree1,SEL_TREE * tree2,RANGE_OPT_PARAM * param)7311 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
7312                            RANGE_OPT_PARAM* param)
7313 {
7314   key_map common_keys= tree1->keys_map;
7315   DBUG_ENTER("sel_trees_can_be_ored");
7316   common_keys.intersect(tree2->keys_map);
7317 
7318   dbug_print_tree("tree1", tree1, param);
7319   dbug_print_tree("tree2", tree2, param);
7320 
7321   if (common_keys.is_clear_all())
7322     DBUG_RETURN(FALSE);
7323 
7324   /* trees have a common key, check if they refer to same key part */
7325   SEL_ARG **key1,**key2;
7326   for (uint key_no=0; key_no < param->keys; key_no++)
7327   {
7328     if (common_keys.is_set(key_no))
7329     {
7330       key1= tree1->keys + key_no;
7331       key2= tree2->keys + key_no;
7332       if ((*key1)->part == (*key2)->part)
7333         DBUG_RETURN(TRUE);
7334     }
7335   }
7336   DBUG_RETURN(FALSE);
7337 }
7338 
7339 
7340 /*
7341   Remove the trees that are not suitable for record retrieval.
7342   SYNOPSIS
7343     param  Range analysis parameter
7344     tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
7345 
7346   DESCRIPTION
7347     This function walks through tree->keys[] and removes the SEL_ARG* trees
7348     that are not "maybe" trees (*) and cannot be used to construct quick range
7349     selects.
7350     (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
7351           these types here as well.
7352 
7353     A SEL_ARG* tree cannot be used to construct quick select if it has
7354     tree->part != 0. (e.g. it could represent "keypart2 < const").
7355 
7356     WHY THIS FUNCTION IS NEEDED
7357 
7358     Normally we allow construction of SEL_TREE objects that have SEL_ARG
7359     trees that do not allow quick range select construction. For example for
7360     " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
7361     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
7362     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
7363                                                from this
7364     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
7365                                    tree.
7366 
7367     There is an exception though: when we construct index_merge SEL_TREE,
7368     any SEL_ARG* tree that cannot be used to construct quick range select can
7369     be removed, because current range analysis code doesn't provide any way
7370     that tree could be later combined with another tree.
7371     Consider an example: we should not construct
7372     st1 = SEL_TREE {
7373       merges = SEL_IMERGE {
7374                             SEL_TREE(t.key1part1 = 1),
7375                             SEL_TREE(t.key2part2 = 2)   -- (*)
7376                           }
7377                    };
7378     because
7379      - (*) cannot be used to construct quick range select,
7380      - There is no execution path that would cause (*) to be converted to
7381        a tree that could be used.
7382 
7383     The latter is easy to verify: first, notice that the only way to convert
7384     (*) into a usable tree is to call tree_and(something, (*)).
7385 
7386     Second look at what tree_and/tree_or function would do when passed a
7387     SEL_TREE that has the structure like st1 tree has, and conlcude that
7388     tree_and(something, (*)) will not be called.
7389 
7390   RETURN
7391     0  Ok, some suitable trees left
7392     1  No tree->keys[] left.
7393 */
7394 
remove_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree)7395 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
7396 {
7397   bool res= FALSE;
7398   for (uint i=0; i < param->keys; i++)
7399   {
7400     if (tree->keys[i])
7401     {
7402       if (tree->keys[i]->part)
7403       {
7404         tree->keys[i]= NULL;
7405         tree->keys_map.clear_bit(i);
7406       }
7407       else
7408         res= TRUE;
7409     }
7410   }
7411   return !res;
7412 }
7413 
7414 
7415 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7416 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7417 {
7418   DBUG_ENTER("tree_or");
7419   if (!tree1 || !tree2)
7420     DBUG_RETURN(0);
7421   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7422     DBUG_RETURN(tree2);
7423   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7424     DBUG_RETURN(tree1);
7425   if (tree1->type == SEL_TREE::MAYBE)
7426     DBUG_RETURN(tree1);				// Can't use this
7427   if (tree2->type == SEL_TREE::MAYBE)
7428     DBUG_RETURN(tree2);
7429 
7430   /*
7431     It is possible that a tree contains both
7432     a) simple range predicates (in tree->keys[]) and
7433     b) index merge range predicates (in tree->merges)
7434 
7435     If a tree has both, they represent equally *valid* range
7436     predicate alternatives; both will return all relevant rows from
7437     the table but one may return more unnecessary rows than the
7438     other (additional rows will be filtered later). However, doing
7439     an OR operation on trees with both types of predicates is too
7440     complex at the time. We therefore remove the index merge
7441     predicates (if we have both types) before OR'ing the trees.
7442 
7443     TODO: enable tree_or() for trees with both simple and index
7444     merge range predicates.
7445   */
7446   if (!tree1->merges.is_empty())
7447   {
7448     for (uint i= 0; i < param->keys; i++)
7449       if (tree1->keys[i] != NULL && tree2->keys[i] != &null_element)
7450       {
7451         tree1->merges.empty();
7452         break;
7453       }
7454   }
7455   if (!tree2->merges.is_empty())
7456   {
7457     for (uint i= 0; i< param->keys; i++)
7458       if (tree2->keys[i] != NULL && tree2->keys[i] != &null_element)
7459       {
7460         tree2->merges.empty();
7461         break;
7462       }
7463   }
7464 
7465   SEL_TREE *result= 0;
7466   key_map  result_keys;
7467   if (sel_trees_can_be_ored(tree1, tree2, param))
7468   {
7469     /* Join the trees key per key */
7470     SEL_ARG **key1,**key2,**end;
7471     for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
7472          key1 != end ; key1++,key2++)
7473     {
7474       *key1=key_or(param, *key1, *key2);
7475       if (*key1)
7476       {
7477         result=tree1;				// Added to tree1
7478         result_keys.set_bit(key1 - tree1->keys);
7479 #ifndef DBUG_OFF
7480         if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7481           (*key1)->test_use_count(*key1);
7482 #endif
7483       }
7484     }
7485     if (result)
7486       result->keys_map= result_keys;
7487   }
7488   else
7489   {
7490     /* ok, two trees have KEY type but cannot be used without index merge */
7491     if (tree1->merges.is_empty() && tree2->merges.is_empty())
7492     {
7493       if (param->remove_jump_scans)
7494       {
7495         bool no_trees= remove_nonrange_trees(param, tree1);
7496         no_trees= no_trees || remove_nonrange_trees(param, tree2);
7497         if (no_trees)
7498           DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7499       }
7500       SEL_IMERGE *merge;
7501       /* both trees are "range" trees, produce new index merge structure */
7502       if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
7503           (result->merges.push_back(merge)) ||
7504           (merge->or_sel_tree(param, tree1)) ||
7505           (merge->or_sel_tree(param, tree2)))
7506         result= NULL;
7507       else
7508         result->type= tree1->type;
7509     }
7510     else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
7511     {
7512       if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
7513         result= new SEL_TREE(SEL_TREE::ALWAYS);
7514       else
7515         result= tree1;
7516     }
7517     else
7518     {
7519       /* one tree is index merge tree and another is range tree */
7520       if (tree1->merges.is_empty())
7521         swap_variables(SEL_TREE*, tree1, tree2);
7522 
7523       if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
7524          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7525       /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
7526       if (imerge_list_or_tree(param, &tree1->merges, tree2))
7527         result= new SEL_TREE(SEL_TREE::ALWAYS);
7528       else
7529         result= tree1;
7530     }
7531   }
7532   DBUG_RETURN(result);
7533 }
7534 
7535 
7536 /* And key trees where key1->part < key2 -> part */
7537 
7538 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7539 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
7540              uint clone_flag)
7541 {
7542   SEL_ARG *next;
7543   ulong use_count=key1->use_count;
7544 
7545   if (key1->elements != 1)
7546   {
7547     key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
7548     key2->increment_use_count((int) key1->elements-1);
7549   }
7550   if (key1->type == SEL_ARG::MAYBE_KEY)
7551   {
7552     // See todo for left/right pointers
7553     DBUG_ASSERT(!key1->left);
7554     DBUG_ASSERT(!key1->right);
7555     key1->next= key1->prev= 0;
7556   }
7557   for (next=key1->first(); next ; next=next->next)
7558   {
7559     if (next->next_key_part)
7560     {
7561       SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
7562       if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
7563       {
7564 	key1=key1->tree_delete(next);
7565 	continue;
7566       }
7567       next->next_key_part=tmp;
7568       if (use_count)
7569 	next->increment_use_count(use_count);
7570       if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
7571         break;
7572     }
7573     else
7574       next->next_key_part=key2;
7575   }
7576   if (!key1)
7577     return &null_element;			// Impossible ranges
7578   key1->use_count++;
7579   return key1;
7580 }
7581 
7582 
7583 /*
7584   Produce a SEL_ARG graph that represents "key1 AND key2"
7585 
7586   SYNOPSIS
7587     key_and()
7588       param   Range analysis context (needed to track if we have allocated
7589               too many SEL_ARGs)
7590       key1    First argument, root of its RB-tree
7591       key2    Second argument, root of its RB-tree
7592 
7593   RETURN
7594     RB-tree root of the resulting SEL_ARG graph.
7595     NULL if the result of AND operation is an empty interval {0}.
7596 */
7597 
7598 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7599 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
7600 {
7601   if (!key1)
7602     return key2;
7603   if (!key2)
7604     return key1;
7605   if (key1->part != key2->part)
7606   {
7607     if (key1->part > key2->part)
7608     {
7609       swap_variables(SEL_ARG *, key1, key2);
7610       clone_flag=swap_clone_flag(clone_flag);
7611     }
7612     // key1->part < key2->part
7613     key1->use_count--;
7614     /*
7615      Clone key1 if the use_count is greater than 0 otherwise use the
7616      "clone_flag" to determine if a key needs to be cloned.
7617      "clone_flag" is set to true if the conditions which need to be
7618      ANDed (in tree_and) are not simple (has many OR conditions within).
7619    */
7620     if (key1->use_count > 0 || (clone_flag & CLONE_KEY2_MAYBE))
7621       if (!(key1= key1->clone_tree(param)))
7622 	return 0;				// OOM
7623     return and_all_keys(param, key1, key2, clone_flag);
7624   }
7625 
7626   if (((clone_flag & CLONE_KEY2_MAYBE) &&
7627        !(clone_flag & CLONE_KEY1_MAYBE) &&
7628        key2->type != SEL_ARG::MAYBE_KEY) ||
7629       key1->type == SEL_ARG::MAYBE_KEY)
7630   {						// Put simple key in key2
7631     swap_variables(SEL_ARG *, key1, key2);
7632     clone_flag=swap_clone_flag(clone_flag);
7633   }
7634 
7635   /* If one of the key is MAYBE_KEY then the found region may be smaller */
7636   if (key2->type == SEL_ARG::MAYBE_KEY)
7637   {
7638     if (key1->use_count > 1)
7639     {
7640       key1->use_count--;
7641       if (!(key1=key1->clone_tree(param)))
7642 	return 0;				// OOM
7643       key1->use_count++;
7644     }
7645     if (key1->type == SEL_ARG::MAYBE_KEY)
7646     {						// Both are maybe key
7647       key1->next_key_part=key_and(param, key1->next_key_part,
7648                                   key2->next_key_part, clone_flag);
7649       if (key1->next_key_part &&
7650 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
7651 	return key1;
7652     }
7653     else
7654     {
7655       key1->maybe_smaller();
7656       if (key2->next_key_part)
7657       {
7658 	key1->use_count--;			// Incremented in and_all_keys
7659 	return and_all_keys(param, key1, key2, clone_flag);
7660       }
7661       key2->use_count--;			// Key2 doesn't have a tree
7662     }
7663     return key1;
7664   }
7665 
7666   if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
7667   {
7668     /* TODO: why not leave one of the trees? */
7669     key1->free_tree();
7670     key2->free_tree();
7671     return 0;					// Can't optimize this
7672   }
7673 
7674   key1->use_count--;
7675   key2->use_count--;
7676   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
7677 
7678   while (e1 && e2)
7679   {
7680     int cmp=e1->cmp_min_to_min(e2);
7681     if (cmp < 0)
7682     {
7683       if (get_range(&e1,&e2,key1))
7684 	continue;
7685     }
7686     else if (get_range(&e2,&e1,key2))
7687       continue;
7688     SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
7689                           clone_flag);
7690     e1->increment_use_count(1);
7691     e2->increment_use_count(1);
7692     if (!next || next->type != SEL_ARG::IMPOSSIBLE)
7693     {
7694       SEL_ARG *new_arg= e1->clone_and(e2);
7695       if (!new_arg)
7696 	return &null_element;			// End of memory
7697       new_arg->next_key_part=next;
7698       if (!new_tree)
7699       {
7700 	new_tree=new_arg;
7701       }
7702       else
7703 	new_tree=new_tree->insert(new_arg);
7704     }
7705     if (e1->cmp_max_to_max(e2) < 0)
7706       e1=e1->next;				// e1 can't overlapp next e2
7707     else
7708       e2=e2->next;
7709   }
7710   key1->free_tree();
7711   key2->free_tree();
7712   if (!new_tree)
7713     return &null_element;			// Impossible range
7714   return new_tree;
7715 }
7716 
7717 
7718 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)7719 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
7720 {
7721   (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
7722   if ((*e1)->cmp_max_to_min(*e2) < 0)
7723   {
7724     if (!((*e1)=(*e1)->next))
7725       return 1;
7726     if ((*e1)->cmp_min_to_max(*e2) > 0)
7727     {
7728       (*e2)=(*e2)->next;
7729       return 1;
7730     }
7731   }
7732   return 0;
7733 }
7734 
7735 
7736 /**
7737    Combine two range expression under a common OR. On a logical level, the
7738    transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
7739 
7740    Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
7741    theformat is reminiscent of DNF, since an expression such as the following
7742 
7743    ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
7744 
7745    where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
7746    and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
7747    SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
7748    the first range and ranges must not overlap. It follows that they are also
7749    ordered by maximum endpoints. Thus
7750 
7751    ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
7752 
7753    Is a a valid SER_ARG expression for a key of at least 2 keyparts.
7754 
7755    For simplicity, we will assume that expr2 is a single range predicate,
7756    i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
7757    disjunction of several predicates by subsequently call key_or for each
7758    disjunct.
7759 
7760    The algorithm iterates over each disjunct of expr1, and for each disjunct
7761    where the first keypart's range overlaps with the first keypart's range in
7762    expr2:
7763 
7764    If the predicates are equal for the rest of the keyparts, or if there are
7765    no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
7766    node in expr2 is deallocated. If more ranges became connected in expr1, the
7767    surplus is also dealocated. If they differ, two ranges are created.
7768 
7769    - The range leading up to the overlap. Empty if endpoints are equal.
7770 
7771    - The overlapping sub-range. May be the entire range if they are equal.
7772 
7773    Finally, there may be one more range if expr2's first keypart's range has a
7774    greater maximum endpoint than the last range in expr1.
7775 
7776    For the overlapping sub-range, we recursively call key_or. Thus in order to
7777    compute key_or of
7778 
7779      (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
7780 
7781      (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
7782 
7783    We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
7784    first one, we simply hook on the condition for the second keypart from (1)
7785    : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
7786    < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
7787    the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
7788 
7789    ( 1  <  kp1 <= 2 AND 1 < kp2 < 10 ) OR
7790    ( 2  <  kp1 < 10 AND 1 < kp2 < 20 ) OR
7791    ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
7792 
7793    @param param    PARAM from SQL_SELECT::test_quick_select
7794    @param key1     Root of RB-tree of SEL_ARGs to be ORed with key2
7795    @param key2     Root of RB-tree of SEL_ARGs to be ORed with key1
7796 */
7797 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)7798 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2)
7799 {
7800   if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
7801   {
7802     if (key2)
7803     {
7804       key2->use_count--;
7805       key2->free_tree();
7806     }
7807     return key1;
7808   }
7809   if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
7810     // Case is symmetric to the one above, just flip parameters.
7811     return key_or(param, key2, key1);
7812 
7813   key1->use_count--;
7814   key2->use_count--;
7815 
7816   if (key1->part != key2->part ||
7817       (key1->min_flag | key2->min_flag) & GEOM_FLAG)
7818   {
7819     key1->free_tree();
7820     key2->free_tree();
7821     return 0;                                   // Can't optimize this
7822   }
7823 
7824   // If one of the key is MAYBE_KEY then the found region may be bigger
7825   if (key1->type == SEL_ARG::MAYBE_KEY)
7826   {
7827     key2->free_tree();
7828     key1->use_count++;
7829     return key1;
7830   }
7831   if (key2->type == SEL_ARG::MAYBE_KEY)
7832   {
7833     key1->free_tree();
7834     key2->use_count++;
7835     return key2;
7836   }
7837 
7838   if (key1->use_count > 0)
7839   {
7840     if (key2->use_count == 0 || key1->elements > key2->elements)
7841     {
7842       swap_variables(SEL_ARG *,key1,key2);
7843     }
7844     if (key1->use_count > 0 && (key1= key1->clone_tree(param)) == NULL)
7845       return 0;                                 // OOM
7846   }
7847 
7848   // Add tree at key2 to tree at key1
7849   const bool key2_shared= (key2->use_count != 0);
7850   key1->maybe_flag|= key2->maybe_flag;
7851 
7852   /*
7853     Notation for illustrations used in the rest of this function:
7854 
7855       Range: [--------]
7856              ^        ^
7857              start    stop
7858 
7859       Two overlapping ranges:
7860         [-----]               [----]            [--]
7861             [---]     or    [---]       or   [-------]
7862 
7863       Ambiguity: ***
7864         The range starts or stops somewhere in the "***" range.
7865         Example: a starts before b and may end before/the same place/after b
7866         a: [----***]
7867         b:   [---]
7868 
7869       Adjacent ranges:
7870         Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
7871         a: ----]
7872         b:      [----
7873    */
7874 
7875   SEL_ARG *cur_key2= key2->first();
7876   while (cur_key2)
7877   {
7878     /*
7879       key1 consists of one or more ranges. cur_key1 is the
7880       range currently being handled.
7881 
7882       initialize cur_key1 to the latest range in key1 that starts the
7883       same place or before the range in cur_key2 starts
7884 
7885       cur_key2:            [------]
7886       key1:      [---] [-----] [----]
7887                        ^
7888                        cur_key1
7889     */
7890     SEL_ARG *cur_key1= key1->find_range(cur_key2);
7891 
7892     /*
7893       Used to describe how two key values are positioned compared to
7894       each other. Consider key_value_a.<cmp_func>(key_value_b):
7895 
7896         -2: key_value_a is smaller than key_value_b, and they are adjacent
7897         -1: key_value_a is smaller than key_value_b (not adjacent)
7898          0: the key values are equal
7899          1: key_value_a is bigger than key_value_b (not adjacent)
7900          2: key_value_a is bigger than key_value_b, and they are adjacent
7901 
7902       Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
7903 
7904       cur_key2:          [--------           (10 <= x ...  )
7905       cur_key1:    -----]                    (  ... x <  10) => cmp==-2
7906       cur_key1:    ----]                     (  ... x <   9) => cmp==-1
7907       cur_key1:    ------]                   (  ... x <= 10) => cmp== 0
7908       cur_key1:    --------]                 (  ... x <= 12) => cmp== 1
7909       (cmp == 2 does not make sense for cmp_max_to_min())
7910      */
7911     int cmp= 0;
7912 
7913     if (!cur_key1)
7914     {
7915       /*
7916         The range in cur_key2 starts before the first range in key1. Use
7917         the first range in key1 as cur_key1.
7918 
7919         cur_key2: [--------]
7920         key1:            [****--] [----]   [-------]
7921                          ^
7922                          cur_key1
7923       */
7924       cur_key1= key1->first();
7925       cmp= -1;
7926     }
7927     else if ((cmp= cur_key1->cmp_max_to_min(cur_key2)) < 0)
7928     {
7929       /*
7930         This is the case:
7931         cur_key2:           [-------]
7932         cur_key1:   [----**]
7933        */
7934       SEL_ARG *next_key1= cur_key1->next;
7935       if (cmp == -2 &&
7936           eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
7937       {
7938         /*
7939           Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
7940 
7941           This is the case:
7942           cur_key2:           [-------]
7943           cur_key1:     [----]
7944 
7945           Result:
7946           cur_key2:     [-------------]     => inserted into key1 below
7947           cur_key1:                         => deleted
7948         */
7949         SEL_ARG *next_key2= cur_key2->next;
7950         if (key2_shared)
7951         {
7952           if (!(cur_key2= new SEL_ARG(*cur_key2)))
7953             return 0;           // out of memory
7954           cur_key2->increment_use_count(key1->use_count+1);
7955           cur_key2->next= next_key2;                 // New copy of cur_key2
7956         }
7957 
7958         if (cur_key2->copy_min(cur_key1))
7959         {
7960           // cur_key2 is full range: [-inf <= cur_key2 <= +inf]
7961           key1->free_tree();
7962           key2->free_tree();
7963           key1->type= SEL_ARG::ALWAYS;
7964           key2->type= SEL_ARG::ALWAYS;
7965           if (key1->maybe_flag)
7966             return new SEL_ARG(SEL_ARG::MAYBE_KEY);
7967           return 0;
7968         }
7969 
7970         if (!(key1= key1->tree_delete(cur_key1)))
7971         {
7972           /*
7973             cur_key1 was the last range in key1; move the cur_key2
7974             range that was merged above to key1
7975           */
7976           key1= cur_key2;
7977           key1->make_root();
7978           cur_key2= next_key2;
7979           break;
7980         }
7981       }
7982       // Move to next range in key1. Now cur_key1.min > cur_key2.min
7983       if (!(cur_key1= next_key1))
7984         break;         // No more ranges in key1. Copy rest of key2
7985     }
7986 
7987     if (cmp < 0)
7988     {
7989       /*
7990         This is the case:
7991         cur_key2:   [--***]
7992         cur_key1:       [----]
7993       */
7994       int cur_key1_cmp;
7995       if ((cur_key1_cmp= cur_key1->cmp_min_to_max(cur_key2)) > 0)
7996       {
7997         /*
7998           This is the case:
7999           cur_key2:  [------**]
8000           cur_key1:            [----]
8001         */
8002         if (cur_key1_cmp == 2 &&
8003             eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8004         {
8005           /*
8006             Adjacent ranges with equal next_key_part. Merge like this:
8007 
8008             This is the case:
8009             cur_key2:    [------]
8010             cur_key1:            [-----]
8011 
8012             Result:
8013             cur_key2:    [------]
8014             cur_key1:    [-------------]
8015 
8016             Then move on to next key2 range.
8017           */
8018           cur_key1->copy_min_to_min(cur_key2);
8019           key1->merge_flags(cur_key2); //should be cur_key1->merge...() ?
8020           if (cur_key1->min_flag & NO_MIN_RANGE &&
8021               cur_key1->max_flag & NO_MAX_RANGE)
8022           {
8023             if (key1->maybe_flag)
8024               return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8025             return 0;
8026           }
8027           cur_key2->increment_use_count(-1);        // Free not used tree
8028           cur_key2=cur_key2->next;
8029           continue;
8030         }
8031         else
8032         {
8033           /*
8034             cur_key2 not adjacent to cur_key1 or has different next_key_part.
8035             Insert into key1 and move to next range in key2
8036 
8037             This is the case:
8038             cur_key2:   [------**]
8039             cur_key1:             [----]
8040 
8041             Result:
8042             key1:       [------**][----]
8043                         ^         ^
8044                         insert    cur_key1
8045           */
8046           SEL_ARG *next_key2= cur_key2->next;
8047           if (key2_shared)
8048           {
8049             SEL_ARG *cpy= new SEL_ARG(*cur_key2);   // Must make copy
8050             if (!cpy)
8051               return 0;                         // OOM
8052             key1= key1->insert(cpy);
8053             cur_key2->increment_use_count(key1->use_count+1);
8054           }
8055           else
8056             key1= key1->insert(cur_key2); // Will destroy key2_root
8057           cur_key2= next_key2;
8058           continue;
8059         }
8060       }
8061     }
8062 
8063     /*
8064       The ranges in cur_key1 and cur_key2 are overlapping:
8065 
8066       cur_key2:       [----------]
8067       cur_key1:    [*****-----*****]
8068 
8069       Corollary: cur_key1.min <= cur_key2.max
8070     */
8071     if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8072     {
8073       // Merge overlapping ranges with equal next_key_part
8074       if (cur_key1->is_same(cur_key2))
8075       {
8076         /*
8077           cur_key1 covers exactly the same range as cur_key2
8078           Use the relevant range in key1.
8079         */
8080         cur_key1->merge_flags(cur_key2);        // Copy maybe flags
8081         cur_key2->increment_use_count(-1);      // Free not used tree
8082       }
8083       else
8084       {
8085         SEL_ARG *last= cur_key1;
8086         SEL_ARG *first= cur_key1;
8087 
8088         /*
8089           Find the last range in key1 that overlaps cur_key2 and
8090           where all ranges first...last have the same next_key_part as
8091           cur_key2.
8092 
8093           cur_key2:  [****----------------------*******]
8094           key1:         [--]  [----] [---]  [-----] [xxxx]
8095                         ^                   ^       ^
8096                         first               last    different next_key_part
8097 
8098           Since cur_key2 covers them, the ranges between first and last
8099           are merged into one range by deleting first...last-1 from
8100           the key1 tree. In the figure, this applies to first and the
8101           two consecutive ranges. The range of last is then extended:
8102             * last.min: Set to min(cur_key2.min, first.min)
8103             * last.max: If there is a last->next that overlaps cur_key2
8104                         (i.e., last->next has a different next_key_part):
8105                                         Set adjacent to last->next.min
8106                         Otherwise:      Set to max(cur_key2.max, last.max)
8107 
8108           Result:
8109           cur_key2:  [****----------------------*******]
8110                         [--]  [----] [---]                 => deleted from key1
8111           key1:      [**------------------------***][xxxx]
8112                      ^                              ^
8113                      cur_key1=last                  different next_key_part
8114         */
8115         while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
8116                eq_tree(last->next->next_key_part, cur_key2->next_key_part))
8117         {
8118           /*
8119             last->next is covered by cur_key2 and has same next_key_part.
8120             last can be deleted
8121           */
8122           SEL_ARG *save=last;
8123           last=last->next;
8124           key1= key1->tree_delete(save);
8125         }
8126         // Redirect cur_key1 to last which will cover the entire range
8127         cur_key1= last;
8128 
8129         /*
8130           Extend last to cover the entire range of
8131           [min(first.min_value,cur_key2.min_value)...last.max_value].
8132           If this forms a full range (the range covers all possible
8133           values) we return no SEL_ARG RB-tree.
8134         */
8135         bool full_range= last->copy_min(first);
8136         if (!full_range)
8137           full_range= last->copy_min(cur_key2);
8138 
8139         if (!full_range)
8140         {
8141           if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0)
8142           {
8143             /*
8144               This is the case:
8145               cur_key2:   [-------------]
8146               key1:     [***------]  [xxxx]
8147                         ^            ^
8148                         last         different next_key_part
8149 
8150               Extend range of last up to last->next:
8151               cur_key2:   [-------------]
8152               key1:     [***--------][xxxx]
8153             */
8154             last->copy_min_to_max(last->next);
8155           }
8156           else
8157             /*
8158               This is the case:
8159               cur_key2:   [--------*****]
8160               key1:     [***---------]    [xxxx]
8161                         ^                 ^
8162                         last              different next_key_part
8163 
8164               Extend range of last up to max(last.max, cur_key2.max):
8165               cur_key2:   [--------*****]
8166               key1:     [***----------**] [xxxx]
8167              */
8168             full_range= last->copy_max(cur_key2);
8169         }
8170         if (full_range)
8171         {                                       // Full range
8172           key1->free_tree();
8173           key1->type= SEL_ARG::ALWAYS;
8174           key2->type= SEL_ARG::ALWAYS;
8175           for (; cur_key2 ; cur_key2= cur_key2->next)
8176             cur_key2->increment_use_count(-1);  // Free not used tree
8177           if (key1->maybe_flag)
8178             return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8179           return 0;
8180         }
8181       }
8182     }
8183 
8184     if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0)
8185     {
8186       /*
8187         This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
8188         cur_key2:                [-------]
8189         cur_key1:         [----------*******]
8190       */
8191 
8192       if (!cur_key1->next_key_part)
8193       {
8194         /*
8195           cur_key1->next_key_part is empty: cut the range that
8196           is covered by cur_key1 from cur_key2.
8197           Reason: (cur_key2->next_key_part OR
8198           cur_key1->next_key_part) will be empty and therefore
8199           equal to cur_key1->next_key_part. Thus, this part of
8200           the cur_key2 range is completely covered by cur_key1.
8201         */
8202         if (cur_key1->cmp_max_to_max(cur_key2) >= 0)
8203         {
8204           /*
8205             cur_key1 covers the entire range in cur_key2.
8206             cur_key2:            [-------]
8207             cur_key1:     [-----------------]
8208 
8209             Move on to next range in key2
8210           */
8211           cur_key2->increment_use_count(-1); // Free not used tree
8212           cur_key2= cur_key2->next;
8213           continue;
8214         }
8215         else
8216         {
8217           /*
8218             This is the case:
8219             cur_key2:            [-------]
8220             cur_key1:     [---------]
8221 
8222             Result:
8223             cur_key2:                [---]
8224             cur_key1:     [---------]
8225           */
8226           cur_key2->copy_max_to_min(cur_key1);
8227           continue;
8228         }
8229       }
8230 
8231       /*
8232         The ranges are overlapping but have not been merged because
8233         next_key_part of cur_key1 and cur_key2 differ.
8234         cur_key2:               [----]
8235         cur_key1:     [------------*****]
8236 
8237         Split cur_key1 in two where cur_key2 starts:
8238         cur_key2:               [----]
8239         key1:         [--------][--*****]
8240                       ^         ^
8241                       insert    cur_key1
8242       */
8243       SEL_ARG *new_arg= cur_key1->clone_first(cur_key2);
8244       if (!new_arg)
8245         return 0;                               // OOM
8246       if ((new_arg->next_key_part= cur_key1->next_key_part))
8247         new_arg->increment_use_count(key1->use_count+1);
8248       cur_key1->copy_min_to_min(cur_key2);
8249       key1= key1->insert(new_arg);
8250     } // cur_key1.min >= cur_key2.min due to this if()
8251 
8252     /*
8253       Now cur_key2.min <= cur_key1.min <= cur_key2.max:
8254       cur_key2:    [---------]
8255       cur_key1:    [****---*****]
8256      */
8257     SEL_ARG key2_cpy(*cur_key2); // Get copy we can modify
8258     for (;;)
8259     {
8260       if (cur_key1->cmp_min_to_min(&key2_cpy) > 0)
8261       {
8262         /*
8263           This is the case:
8264           key2_cpy:    [------------]
8265           key1:                 [-*****]
8266                                 ^
8267                                 cur_key1
8268 
8269           Result:
8270           key2_cpy:             [---]
8271           key1:        [-------][-*****]
8272                        ^        ^
8273                        insert   cur_key1
8274          */
8275         SEL_ARG *new_arg=key2_cpy.clone_first(cur_key1);
8276         if (!new_arg)
8277           return 0; // OOM
8278         if ((new_arg->next_key_part=key2_cpy.next_key_part))
8279           new_arg->increment_use_count(key1->use_count+1);
8280         key1= key1->insert(new_arg);
8281         key2_cpy.copy_min_to_min(cur_key1);
8282       }
8283       // Now key2_cpy.min == cur_key1.min
8284 
8285       if ((cmp= cur_key1->cmp_max_to_max(&key2_cpy)) <= 0)
8286       {
8287         /*
8288           cur_key1.max <= key2_cpy.max:
8289           key2_cpy:       a)  [-------]    or b)     [----]
8290           cur_key1:           [----]                 [----]
8291 
8292           Steps:
8293 
8294            1) Update next_key_part of cur_key1: OR it with
8295               key2_cpy->next_key_part.
8296            2) If case a: Insert range [cur_key1.max, key2_cpy.max]
8297               into key1 using next_key_part of key2_cpy
8298 
8299            Result:
8300            key1:          a)  [----][-]    or b)     [----]
8301          */
8302         cur_key1->maybe_flag|= key2_cpy.maybe_flag;
8303         key2_cpy.increment_use_count(key1->use_count+1);
8304         cur_key1->next_key_part=
8305           key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part);
8306 
8307         if (!cmp)
8308           break;                     // case b: done with this key2 range
8309 
8310         // Make key2_cpy the range [cur_key1.max, key2_cpy.max]
8311         key2_cpy.copy_max_to_min(cur_key1);
8312         if (!(cur_key1= cur_key1->next))
8313         {
8314           /*
8315             No more ranges in key1. Insert key2_cpy and go to "end"
8316             label to insert remaining ranges in key2 if any.
8317           */
8318           SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8319           if (!new_key1_range)
8320             return 0; // OOM
8321           key1= key1->insert(new_key1_range);
8322           cur_key2= cur_key2->next;
8323           goto end;
8324         }
8325         if (cur_key1->cmp_min_to_max(&key2_cpy) > 0)
8326         {
8327           /*
8328             The next range in key1 does not overlap with key2_cpy.
8329             Insert this range into key1 and move on to the next range
8330             in key2.
8331           */
8332           SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8333           if (!new_key1_range)
8334             return 0;                           // OOM
8335           key1= key1->insert(new_key1_range);
8336           break;
8337         }
8338         /*
8339           key2_cpy overlaps with the next range in key1 and the case
8340           is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
8341           to for(;;) to handle this situation.
8342         */
8343         continue;
8344       }
8345       else
8346       {
8347         /*
8348           This is the case:
8349           key2_cpy:        [-------]
8350           cur_key1:        [------------]
8351 
8352           Result:
8353           key1:            [-------][---]
8354                            ^        ^
8355                            new_arg  cur_key1
8356           Steps:
8357 
8358            0) If cur_key1->next_key_part is empty: do nothing.
8359               Reason: (key2_cpy->next_key_part OR
8360               cur_key1->next_key_part) will be empty and
8361               therefore equal to cur_key1->next_key_part. Thus,
8362               the range in key2_cpy is completely covered by
8363               cur_key1
8364            1) Make new_arg with range [cur_key1.min, key2_cpy.max].
8365               new_arg->next_key_part is OR between next_key_part of
8366               cur_key1 and key2_cpy
8367            2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
8368            3) Insert new_arg into key1
8369         */
8370         if (!cur_key1->next_key_part) // Step 0
8371         {
8372           key2_cpy.increment_use_count(-1);     // Free not used tree
8373           break;
8374         }
8375         SEL_ARG *new_arg= cur_key1->clone_last(&key2_cpy);
8376         if (!new_arg)
8377           return 0; // OOM
8378         cur_key1->copy_max_to_min(&key2_cpy);
8379         cur_key1->increment_use_count(key1->use_count+1);
8380         /* Increment key count as it may be used for next loop */
8381         key2_cpy.increment_use_count(1);
8382         new_arg->next_key_part= key_or(param, cur_key1->next_key_part,
8383                                        key2_cpy.next_key_part);
8384         key1= key1->insert(new_arg);
8385         break;
8386       }
8387     }
8388     // Move on to next range in key2
8389     cur_key2= cur_key2->next;
8390   }
8391 
8392 end:
8393   /*
8394     Add key2 ranges that are non-overlapping with and higher than the
8395     highest range in key1.
8396   */
8397   while (cur_key2)
8398   {
8399     SEL_ARG *next= cur_key2->next;
8400     if (key2_shared)
8401     {
8402       SEL_ARG *key2_cpy=new SEL_ARG(*cur_key2);  // Must make copy
8403       if (!key2_cpy)
8404         return 0;
8405       cur_key2->increment_use_count(key1->use_count+1);
8406       key1= key1->insert(key2_cpy);
8407     }
8408     else
8409       key1= key1->insert(cur_key2);   // Will destroy key2_root
8410     cur_key2= next;
8411   }
8412   key1->use_count++;
8413 
8414   return key1;
8415 }
8416 
8417 
8418 /* Compare if two trees are equal */
8419 
eq_tree(SEL_ARG * a,SEL_ARG * b)8420 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
8421 {
8422   if (a == b)
8423     return 1;
8424   if (!a || !b || !a->is_same(b))
8425     return 0;
8426   if (a->left != &null_element && b->left != &null_element)
8427   {
8428     if (!eq_tree(a->left,b->left))
8429       return 0;
8430   }
8431   else if (a->left != &null_element || b->left != &null_element)
8432     return 0;
8433   if (a->right != &null_element && b->right != &null_element)
8434   {
8435     if (!eq_tree(a->right,b->right))
8436       return 0;
8437   }
8438   else if (a->right != &null_element || b->right != &null_element)
8439     return 0;
8440   if (a->next_key_part != b->next_key_part)
8441   {						// Sub range
8442     if (!a->next_key_part != !b->next_key_part ||
8443 	!eq_tree(a->next_key_part, b->next_key_part))
8444       return 0;
8445   }
8446   return 1;
8447 }
8448 
8449 
8450 SEL_ARG *
insert(SEL_ARG * key)8451 SEL_ARG::insert(SEL_ARG *key)
8452 {
8453   SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element);
8454 
8455   for (element= this; element != &null_element ; )
8456   {
8457     last_element=element;
8458     if (key->cmp_min_to_min(element) > 0)
8459     {
8460       par= &element->right; element= element->right;
8461     }
8462     else
8463     {
8464       par = &element->left; element= element->left;
8465     }
8466   }
8467   *par=key;
8468   key->parent=last_element;
8469 	/* Link in list */
8470   if (par == &last_element->left)
8471   {
8472     key->next=last_element;
8473     if ((key->prev=last_element->prev))
8474       key->prev->next=key;
8475     last_element->prev=key;
8476   }
8477   else
8478   {
8479     if ((key->next=last_element->next))
8480       key->next->prev=key;
8481     key->prev=last_element;
8482     last_element->next=key;
8483   }
8484   key->left=key->right= &null_element;
8485   SEL_ARG *root=rb_insert(key);			// rebalance tree
8486   root->use_count=this->use_count;		// copy root info
8487   root->elements= this->elements+1;
8488   root->maybe_flag=this->maybe_flag;
8489   return root;
8490 }
8491 
8492 
8493 /*
8494 ** Find best key with min <= given key
8495 ** Because the call context this should never return 0 to get_range
8496 */
8497 
8498 SEL_ARG *
find_range(SEL_ARG * key)8499 SEL_ARG::find_range(SEL_ARG *key)
8500 {
8501   SEL_ARG *element=this,*found=0;
8502 
8503   for (;;)
8504   {
8505     if (element == &null_element)
8506       return found;
8507     int cmp=element->cmp_min_to_min(key);
8508     if (cmp == 0)
8509       return element;
8510     if (cmp < 0)
8511     {
8512       found=element;
8513       element=element->right;
8514     }
8515     else
8516       element=element->left;
8517   }
8518 }
8519 
8520 
8521 /*
8522   Remove a element from the tree
8523 
8524   SYNOPSIS
8525     tree_delete()
8526     key		Key that is to be deleted from tree (this)
8527 
8528   NOTE
8529     This also frees all sub trees that is used by the element
8530 
8531   RETURN
8532     root of new tree (with key deleted)
8533 */
8534 
8535 SEL_ARG *
tree_delete(SEL_ARG * key)8536 SEL_ARG::tree_delete(SEL_ARG *key)
8537 {
8538   enum leaf_color remove_color;
8539   SEL_ARG *root,*nod,**par,*fix_par;
8540   DBUG_ENTER("tree_delete");
8541 
8542   root=this;
8543   this->parent= 0;
8544 
8545   /* Unlink from list */
8546   if (key->prev)
8547     key->prev->next=key->next;
8548   if (key->next)
8549     key->next->prev=key->prev;
8550   key->increment_use_count(-1);
8551   if (!key->parent)
8552     par= &root;
8553   else
8554     par=key->parent_ptr();
8555 
8556   if (key->left == &null_element)
8557   {
8558     *par=nod=key->right;
8559     fix_par=key->parent;
8560     if (nod != &null_element)
8561       nod->parent=fix_par;
8562     remove_color= key->color;
8563   }
8564   else if (key->right == &null_element)
8565   {
8566     *par= nod=key->left;
8567     nod->parent=fix_par=key->parent;
8568     remove_color= key->color;
8569   }
8570   else
8571   {
8572     SEL_ARG *tmp=key->next;			// next bigger key (exist!)
8573     nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
8574     fix_par=tmp->parent;
8575     if (nod != &null_element)
8576       nod->parent=fix_par;
8577     remove_color= tmp->color;
8578 
8579     tmp->parent=key->parent;			// Move node in place of key
8580     (tmp->left=key->left)->parent=tmp;
8581     if ((tmp->right=key->right) != &null_element)
8582       tmp->right->parent=tmp;
8583     tmp->color=key->color;
8584     *par=tmp;
8585     if (fix_par == key)				// key->right == key->next
8586       fix_par=tmp;				// new parent of nod
8587   }
8588 
8589   if (root == &null_element)
8590     DBUG_RETURN(0);				// Maybe root later
8591   if (remove_color == BLACK)
8592     root=rb_delete_fixup(root,nod,fix_par);
8593 #ifndef DBUG_OFF
8594   test_rb_tree(root,root->parent);
8595 #endif
8596   root->use_count=this->use_count;		// Fix root counters
8597   root->elements=this->elements-1;
8598   root->maybe_flag=this->maybe_flag;
8599   DBUG_RETURN(root);
8600 }
8601 
8602 
8603 	/* Functions to fix up the tree after insert and delete */
8604 
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)8605 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
8606 {
8607   SEL_ARG *y=leaf->right;
8608   leaf->right=y->left;
8609   if (y->left != &null_element)
8610     y->left->parent=leaf;
8611   if (!(y->parent=leaf->parent))
8612     *root=y;
8613   else
8614     *leaf->parent_ptr()=y;
8615   y->left=leaf;
8616   leaf->parent=y;
8617 }
8618 
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)8619 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
8620 {
8621   SEL_ARG *y=leaf->left;
8622   leaf->left=y->right;
8623   if (y->right != &null_element)
8624     y->right->parent=leaf;
8625   if (!(y->parent=leaf->parent))
8626     *root=y;
8627   else
8628     *leaf->parent_ptr()=y;
8629   y->right=leaf;
8630   leaf->parent=y;
8631 }
8632 
8633 
8634 SEL_ARG *
rb_insert(SEL_ARG * leaf)8635 SEL_ARG::rb_insert(SEL_ARG *leaf)
8636 {
8637   SEL_ARG *y,*par,*par2,*root;
8638   root= this; root->parent= 0;
8639 
8640   leaf->color=RED;
8641   while (leaf != root && (par= leaf->parent)->color == RED)
8642   {					// This can't be root or 1 level under
8643     if (par == (par2= leaf->parent->parent)->left)
8644     {
8645       y= par2->right;
8646       if (y->color == RED)
8647       {
8648 	par->color=BLACK;
8649 	y->color=BLACK;
8650 	leaf=par2;
8651 	leaf->color=RED;		/* And the loop continues */
8652       }
8653       else
8654       {
8655 	if (leaf == par->right)
8656 	{
8657 	  left_rotate(&root,leaf->parent);
8658 	  par=leaf;			/* leaf is now parent to old leaf */
8659 	}
8660 	par->color=BLACK;
8661 	par2->color=RED;
8662 	right_rotate(&root,par2);
8663 	break;
8664       }
8665     }
8666     else
8667     {
8668       y= par2->left;
8669       if (y->color == RED)
8670       {
8671 	par->color=BLACK;
8672 	y->color=BLACK;
8673 	leaf=par2;
8674 	leaf->color=RED;		/* And the loop continues */
8675       }
8676       else
8677       {
8678 	if (leaf == par->left)
8679 	{
8680 	  right_rotate(&root,par);
8681 	  par=leaf;
8682 	}
8683 	par->color=BLACK;
8684 	par2->color=RED;
8685 	left_rotate(&root,par2);
8686 	break;
8687       }
8688     }
8689   }
8690   root->color=BLACK;
8691 #ifndef DBUG_OFF
8692   test_rb_tree(root,root->parent);
8693 #endif
8694   return root;
8695 }
8696 
8697 
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)8698 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
8699 {
8700   SEL_ARG *x,*w;
8701   root->parent=0;
8702 
8703   x= key;
8704   while (x != root && x->color == SEL_ARG::BLACK)
8705   {
8706     if (x == par->left)
8707     {
8708       w=par->right;
8709       if (w->color == SEL_ARG::RED)
8710       {
8711 	w->color=SEL_ARG::BLACK;
8712 	par->color=SEL_ARG::RED;
8713 	left_rotate(&root,par);
8714 	w=par->right;
8715       }
8716       if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
8717       {
8718 	w->color=SEL_ARG::RED;
8719 	x=par;
8720       }
8721       else
8722       {
8723 	if (w->right->color == SEL_ARG::BLACK)
8724 	{
8725 	  w->left->color=SEL_ARG::BLACK;
8726 	  w->color=SEL_ARG::RED;
8727 	  right_rotate(&root,w);
8728 	  w=par->right;
8729 	}
8730 	w->color=par->color;
8731 	par->color=SEL_ARG::BLACK;
8732 	w->right->color=SEL_ARG::BLACK;
8733 	left_rotate(&root,par);
8734 	x=root;
8735 	break;
8736       }
8737     }
8738     else
8739     {
8740       w=par->left;
8741       if (w->color == SEL_ARG::RED)
8742       {
8743 	w->color=SEL_ARG::BLACK;
8744 	par->color=SEL_ARG::RED;
8745 	right_rotate(&root,par);
8746 	w=par->left;
8747       }
8748       if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
8749       {
8750 	w->color=SEL_ARG::RED;
8751 	x=par;
8752       }
8753       else
8754       {
8755 	if (w->left->color == SEL_ARG::BLACK)
8756 	{
8757 	  w->right->color=SEL_ARG::BLACK;
8758 	  w->color=SEL_ARG::RED;
8759 	  left_rotate(&root,w);
8760 	  w=par->left;
8761 	}
8762 	w->color=par->color;
8763 	par->color=SEL_ARG::BLACK;
8764 	w->left->color=SEL_ARG::BLACK;
8765 	right_rotate(&root,par);
8766 	x=root;
8767 	break;
8768       }
8769     }
8770     par=x->parent;
8771   }
8772   x->color=SEL_ARG::BLACK;
8773   return root;
8774 }
8775 
8776 
8777 #ifndef DBUG_OFF
8778 	/* Test that the properties for a red-black tree hold */
8779 
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)8780 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
8781 {
8782   int count_l,count_r;
8783 
8784   if (element == &null_element)
8785     return 0;					// Found end of tree
8786   if (element->parent != parent)
8787   {
8788     sql_print_error("Wrong tree: Parent doesn't point at parent");
8789     return -1;
8790   }
8791   if (element->color == SEL_ARG::RED &&
8792       (element->left->color == SEL_ARG::RED ||
8793        element->right->color == SEL_ARG::RED))
8794   {
8795     sql_print_error("Wrong tree: Found two red in a row");
8796     return -1;
8797   }
8798   if (element->left == element->right && element->left != &null_element)
8799   {						// Dummy test
8800     sql_print_error("Wrong tree: Found right == left");
8801     return -1;
8802   }
8803   count_l=test_rb_tree(element->left,element);
8804   count_r=test_rb_tree(element->right,element);
8805   if (count_l >= 0 && count_r >= 0)
8806   {
8807     if (count_l == count_r)
8808       return count_l+(element->color == SEL_ARG::BLACK);
8809     sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
8810 	    count_l,count_r);
8811   }
8812   return -1;					// Error, no more warnings
8813 }
8814 
8815 
8816 /**
8817   Count how many times SEL_ARG graph "root" refers to its part "key" via
8818   transitive closure.
8819 
8820   @param root  An RB-Root node in a SEL_ARG graph.
8821   @param key   Another RB-Root node in that SEL_ARG graph.
8822 
8823   The passed "root" node may refer to "key" node via root->next_key_part,
8824   root->next->n
8825 
8826   This function counts how many times the node "key" is referred (via
8827   SEL_ARG::next_key_part) by
8828   - intervals of RB-tree pointed by "root",
8829   - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
8830   intervals of RB-tree pointed by "root",
8831   - and so on.
8832 
8833   Here is an example (horizontal links represent next_key_part pointers,
8834   vertical links - next/prev prev pointers):
8835 
8836          +----+               $
8837          |root|-----------------+
8838          +----+               $ |
8839            |                  $ |
8840            |                  $ |
8841          +----+       +---+   $ |     +---+    Here the return value
8842          |    |- ... -|   |---$-+--+->|key|    will be 4.
8843          +----+       +---+   $ |  |  +---+
8844            |                  $ |  |
8845           ...                 $ |  |
8846            |                  $ |  |
8847          +----+   +---+       $ |  |
8848          |    |---|   |---------+  |
8849          +----+   +---+       $    |
8850            |        |         $    |
8851           ...     +---+       $    |
8852                   |   |------------+
8853                   +---+       $
8854   @return
8855   Number of links to "key" from nodes reachable from "root".
8856 */
8857 
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)8858 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
8859 {
8860   ulong count= 0;
8861   for (root=root->first(); root ; root=root->next)
8862   {
8863     if (root->next_key_part)
8864     {
8865       if (root->next_key_part == key)
8866 	count++;
8867       if (root->next_key_part->part < key->part)
8868 	count+=count_key_part_usage(root->next_key_part,key);
8869     }
8870   }
8871   return count;
8872 }
8873 
8874 
8875 /*
8876   Check if SEL_ARG::use_count value is correct
8877 
8878   SYNOPSIS
8879     SEL_ARG::test_use_count()
8880       root  The root node of the SEL_ARG graph (an RB-tree root node that
8881             has the least value of sel_arg->part in the entire graph, and
8882             thus is the "origin" of the graph)
8883 
8884   DESCRIPTION
8885     Check if SEL_ARG::use_count value is correct. See the definition of
8886     use_count for what is "correct".
8887 */
8888 
test_use_count(SEL_ARG * root)8889 void SEL_ARG::test_use_count(SEL_ARG *root)
8890 {
8891   uint e_count=0;
8892   if (this == root && use_count != 1)
8893   {
8894     sql_print_information("Use_count: Wrong count %lu for root",use_count);
8895     // DBUG_ASSERT(false); // Todo - enable and clean up mess
8896     return;
8897   }
8898   if (this->type != SEL_ARG::KEY_RANGE)
8899     return;
8900   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
8901   {
8902     e_count++;
8903     if (pos->next_key_part)
8904     {
8905       ulong count=count_key_part_usage(root,pos->next_key_part);
8906       if (count > pos->next_key_part->use_count)
8907       {
8908         sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu "
8909                               "should be %lu", (long unsigned int)pos,
8910                               pos->next_key_part->use_count, count);
8911         // DBUG_ASSERT(false); // Todo - enable and clean up mess
8912 	return;
8913       }
8914       pos->next_key_part->test_use_count(root);
8915     }
8916   }
8917   if (e_count != elements)
8918   {
8919     sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
8920                       e_count, elements, (long unsigned int) this);
8921     // DBUG_ASSERT(false); // Todo - enable and clean up mess
8922   }
8923 }
8924 #endif
8925 
8926 /****************************************************************************
8927   MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
8928  ****************************************************************************/
8929 
8930 /* MRR range sequence, SEL_ARG* implementation: stack entry */
8931 typedef struct st_range_seq_entry
8932 {
8933   /*
8934     Pointers in min and max keys. They point to right-after-end of key
8935     images. The 0-th entry has these pointing to key tuple start.
8936   */
8937   uchar *min_key, *max_key;
8938 
8939   /*
8940     Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
8941     min_key_flag may have NULL_RANGE set.
8942   */
8943   uint min_key_flag, max_key_flag;
8944 
8945   /* Number of key parts */
8946   uint min_key_parts, max_key_parts;
8947   /**
8948     Pointer into the R-B tree for this keypart. It points to the
8949     currently active range for the keypart, so calling next on it will
8950     get to the next range. sel_arg_range_seq_next() uses this to avoid
8951     reparsing the R-B range trees each time a new range is fetched.
8952   */
8953   SEL_ARG *key_tree;
8954 } RANGE_SEQ_ENTRY;
8955 
8956 
8957 /*
8958   MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
8959 */
8960 class Sel_arg_range_sequence
8961 {
8962 private:
8963 
8964   /**
8965     Stack of ranges for the curr_kp first keyparts. Used by
8966     sel_arg_range_seq_next() so that if the next range is equal to the
8967     previous one for the first x keyparts, stack[x-1] can be
8968     accumulated with the new range in keyparts > x to quickly form
8969     the next range to return.
8970 
8971     Notation used below: "x:y" means a range where
8972     "column_in_keypart_0=x" and "column_in_keypart_1=y". For
8973     simplicity, only equality (no BETWEEN, < etc) is considered in the
8974     example but the same principle applies to other range predicate
8975     operators too.
8976 
8977     Consider a query with these range predicates:
8978       (kp0=1 and kp1=2 and kp2=3) or
8979       (kp0=1 and kp1=2 and kp2=4) or
8980       (kp0=1 and kp1=3 and kp2=5) or
8981       (kp0=1 and kp1=3 and kp2=6)
8982 
8983     1) sel_arg_range_seq_next() is called the first time
8984        - traverse the R-B tree (see SEL_ARG) to find the first range
8985        - returns range "1:2:3"
8986        - values in stack after this: stack[1, 1:2, 1:2:3]
8987     2) sel_arg_range_seq_next() is called second time
8988        - keypart 2 has another range, so the next range in
8989          keypart 2 is appended to stack[1] and saved
8990          in stack[2]
8991        - returns range "1:2:4"
8992        - values in stack after this: stack[1, 1:2, 1:2:4]
8993     3) sel_arg_range_seq_next() is called the third time
8994        - no more ranges in keypart 2, but keypart 1 has
8995          another range, so the next range in keypart 1 is
8996          appended to stack[0] and saved in stack[1]. The first
8997          range in keypart 2 is then appended to stack[1] and
8998          saved in stack[2]
8999        - returns range "1:3:5"
9000        - values in stack after this: stack[1, 1:3, 1:3:5]
9001     4) sel_arg_range_seq_next() is called the fourth time
9002        - keypart 2 has another range, see 2)
9003        - returns range "1:3:6"
9004        - values in stack after this: stack[1, 1:3, 1:3:6]
9005    */
9006   RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
9007   /*
9008     Index of last used element in the above array. A value of -1 means
9009     that the stack is empty.
9010   */
9011   int curr_kp;
9012 
9013 public:
9014   uint keyno;      /* index of used tree in SEL_TREE structure */
9015   uint real_keyno; /* Number of the index in tables */
9016 
9017   PARAM * const param;
9018   SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
9019 
Sel_arg_range_sequence(PARAM * param_arg)9020   Sel_arg_range_sequence(PARAM *param_arg) : param(param_arg) { reset(); }
9021 
reset()9022   void reset()
9023   {
9024     stack[0].key_tree= NULL;
9025     stack[0].min_key= (uchar*)param->min_key;
9026     stack[0].min_key_flag= 0;
9027     stack[0].min_key_parts= 0;
9028 
9029     stack[0].max_key= (uchar*)param->max_key;
9030     stack[0].max_key_flag= 0;
9031     stack[0].max_key_parts= 0;
9032     curr_kp= -1;
9033   }
9034 
stack_empty() const9035   bool stack_empty() const { return (curr_kp == -1); }
9036 
9037   void stack_push_range(SEL_ARG *key_tree);
9038 
stack_pop_range()9039   void stack_pop_range()
9040   {
9041     DBUG_ASSERT(!stack_empty());
9042     if (curr_kp == 0)
9043       reset();
9044     else
9045       curr_kp--;
9046   }
9047 
stack_size() const9048   int stack_size() const { return curr_kp + 1; }
9049 
stack_top()9050   RANGE_SEQ_ENTRY *stack_top()
9051   {
9052     return stack_empty() ? NULL : &stack[curr_kp];
9053   }
9054 };
9055 
9056 
9057 /*
9058   Range sequence interface, SEL_ARG* implementation: Initialize the traversal
9059 
9060   SYNOPSIS
9061     init()
9062       init_params  SEL_ARG tree traversal context
9063       n_ranges     [ignored] The number of ranges obtained
9064       flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
9065 
9066   RETURN
9067     Value of init_param
9068 */
9069 
sel_arg_range_seq_init(void * init_param,uint n_ranges,uint flags)9070 range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
9071 {
9072   Sel_arg_range_sequence *seq=
9073     static_cast<Sel_arg_range_sequence*>(init_param);
9074   seq->reset();
9075   return init_param;
9076 }
9077 
9078 
stack_push_range(SEL_ARG * key_tree)9079 void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree)
9080 {
9081 
9082   DBUG_ASSERT((uint)curr_kp+1 < MAX_REF_PARTS);
9083 
9084   RANGE_SEQ_ENTRY *push_position= &stack[curr_kp + 1];
9085   RANGE_SEQ_ENTRY *last_added_kp= stack_top();
9086   if (stack_empty())
9087   {
9088     /*
9089        If we get here this is either
9090          a) the first time a range sequence is constructed for this
9091             range access method (in which case stack[0] has not been
9092             modified since the constructor was called), or
9093          b) there are multiple ranges for the first keypart in the
9094             condition (and we have called stack_pop_range() to empty
9095             the stack).
9096        In both cases, reset() has been called and all fields in
9097        push_position have been reset. All we need to do is to copy the
9098        min/max key flags from the predicate we're about to add to
9099        stack[0].
9100     */
9101     push_position->min_key_flag= key_tree->min_flag;
9102     push_position->max_key_flag= key_tree->max_flag;
9103   }
9104   else
9105   {
9106     push_position->min_key= last_added_kp->min_key;
9107     push_position->max_key= last_added_kp->max_key;
9108     push_position->min_key_parts= last_added_kp->min_key_parts;
9109     push_position->max_key_parts= last_added_kp->max_key_parts;
9110     push_position->min_key_flag= last_added_kp->min_key_flag |
9111                                  key_tree->min_flag;
9112     push_position->max_key_flag= last_added_kp->max_key_flag |
9113                                  key_tree->max_flag;
9114   }
9115 
9116   push_position->key_tree= key_tree;
9117   uint16 stor_length= param->key[keyno][key_tree->part].store_length;
9118   /* psergey-merge-done:
9119   key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
9120                   &cur->min_key, prev->min_key_flag,
9121                   &cur->max_key, prev->max_key_flag);
9122   */
9123   push_position->min_key_parts+=
9124     key_tree->store_min(stor_length, &push_position->min_key,
9125                         last_added_kp ? last_added_kp->min_key_flag : 0);
9126   push_position->max_key_parts+=
9127     key_tree->store_max(stor_length, &push_position->max_key,
9128                         last_added_kp ? last_added_kp->max_key_flag : 0);
9129 
9130   if (key_tree->is_null_interval())
9131     push_position->min_key_flag |= NULL_RANGE;
9132   curr_kp++;
9133 }
9134 
9135 
9136 /*
9137   Range sequence interface, SEL_ARG* implementation: get the next interval
9138   in the R-B tree
9139 
9140   SYNOPSIS
9141     sel_arg_range_seq_next()
9142       rseq        Value returned from sel_arg_range_seq_init
9143       range  OUT  Store information about the range here
9144 
9145   DESCRIPTION
9146     This is "get_next" function for Range sequence interface implementation
9147     for SEL_ARG* tree.
9148 
9149   IMPLEMENTATION
9150     The traversal also updates those param members:
9151       - is_ror_scan
9152       - range_count
9153       - max_key_part
9154 
9155   RETURN
9156     0  Ok
9157     1  No more ranges in the sequence
9158 
9159   NOTE: append_range_all_keyparts(), which is used to e.g. print
9160   ranges to Optimizer Trace in a human readable format, mimics the
9161   behavior of this function.
9162 */
9163 
9164 //psergey-merge-todo: support check_quick_keys:max_keypart
sel_arg_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)9165 uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
9166 {
9167   SEL_ARG *key_tree;
9168   Sel_arg_range_sequence *seq= static_cast<Sel_arg_range_sequence*>(rseq);
9169 
9170   if (seq->stack_empty())
9171   {
9172     /*
9173       This is the first time sel_arg_range_seq_next is called.
9174       seq->start points to the root of the R-B tree for the first
9175       keypart
9176     */
9177     key_tree= seq->start;
9178 
9179     /*
9180       Move to the first range for the first keypart. Save this range
9181       in seq->stack[0] and carry on to ranges in the next keypart if
9182       any
9183     */
9184     key_tree= key_tree->first();
9185     seq->stack_push_range(key_tree);
9186   }
9187   else
9188   {
9189     /*
9190       This is not the first time sel_arg_range_seq_next is called, so
9191       seq->stack is populated with the range the last call to this
9192       function found. seq->stack[current_keypart].key_tree points to a
9193       leaf in the R-B tree of the last keypart that was part of the
9194       former range. This is the starting point for finding the next
9195       range. @see Sel_arg_range_sequence::stack
9196     */
9197     // See if there are more ranges in this or any of the previous keyparts
9198     while (true)
9199     {
9200       key_tree= seq->stack_top()->key_tree;
9201       seq->stack_pop_range();
9202       if (key_tree->next)
9203       {
9204         /* This keypart has more ranges */
9205         DBUG_ASSERT(key_tree->next != &null_element);
9206         key_tree= key_tree->next;
9207 
9208         /*
9209           save the next range for this keypart and carry on to ranges in
9210           the next keypart if any
9211         */
9212         seq->stack_push_range(key_tree);
9213         seq->param->is_ror_scan= FALSE;
9214         break;
9215       }
9216 
9217       if (seq->stack_empty())
9218       {
9219         // There are no more ranges for the first keypart: we're done
9220         return 1;
9221       }
9222       /*
9223          There are no more ranges for the current keypart. Step back
9224          to the previous keypart and see if there are more ranges
9225          there.
9226       */
9227     }
9228   }
9229 
9230   DBUG_ASSERT(!seq->stack_empty());
9231 
9232   /*
9233     Add range info for the next keypart if
9234       1) there is a range predicate for a later keypart
9235       2) the range predicate is for the next keypart in the index: a
9236          range predicate on keypartX+1 can only be used if there is a
9237          range predicate on keypartX.
9238       3) the range predicate on the next keypart is usable
9239   */
9240   while (key_tree->next_key_part &&                              // 1)
9241          key_tree->next_key_part != &null_element &&             // 1)
9242          key_tree->next_key_part->part == key_tree->part + 1 &&  // 2)
9243          key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)    // 3)
9244   {
9245     {
9246       DBUG_PRINT("info", ("while(): key_tree->part %d",key_tree->part));
9247       RANGE_SEQ_ENTRY *cur= seq->stack_top();
9248       const uint min_key_total_length= cur->min_key - seq->param->min_key;
9249       const uint max_key_total_length= cur->max_key - seq->param->max_key;
9250 
9251       /*
9252         Check if more ranges can be added. This is the case if all
9253         predicates for keyparts handled so far are equality
9254         predicates. If either of the following apply, there are
9255         non-equality predicates in stack[]:
9256 
9257         1) min_key_total_length != max_key_total_length (because
9258            equality ranges are stored as "min_key = max_key = <value>")
9259         2) memcmp(<min_key_values>,<max_key_values>) != 0 (same argument as 1)
9260         3) A min or max flag has been set: Because flags denote ranges
9261            ('<', '<=' etc), any value but 0 indicates a non-equality
9262            predicate.
9263        */
9264 
9265       uchar* min_key_start;
9266       uchar* max_key_start;
9267       uint cur_key_length;
9268 
9269       if (seq->stack_size() == 1)
9270       {
9271         min_key_start= seq->param->min_key;
9272         max_key_start= seq->param->max_key;
9273         cur_key_length= min_key_total_length;
9274       }
9275       else
9276       {
9277         const RANGE_SEQ_ENTRY prev= cur[-1];
9278         min_key_start= prev.min_key;
9279         max_key_start= prev.max_key;
9280         cur_key_length= cur->min_key - prev.min_key;
9281       }
9282 
9283       if ((min_key_total_length != max_key_total_length) ||         // 1)
9284           (memcmp(min_key_start, max_key_start, cur_key_length)) || // 2)
9285           (key_tree->min_flag || key_tree->max_flag))               // 3)
9286       {
9287         DBUG_PRINT("info", ("while(): inside if()"));
9288         /*
9289           The range predicate up to and including the one in key_tree
9290           is usable by range access but does not allow subranges made
9291           up from predicates in later keyparts. This may e.g. be
9292           because the predicate operator is "<". Since there are range
9293           predicates on more keyparts, we use those to more closely
9294           specify the start and stop locations for the range. Example:
9295 
9296                 "SELECT * FROM t1 WHERE a >= 2 AND b >= 3":
9297 
9298                 t1 content:
9299                 -----------
9300                 1 1
9301                 2 1     <- 1)
9302                 2 2
9303                 2 3     <- 2)
9304                 2 4
9305                 3 1
9306                 3 2
9307                 3 3
9308 
9309           The predicate cannot be translated into something like
9310              "(a=2 and b>=3) or (a=3 and b>=3) or ..."
9311           I.e., it cannot be divided into subranges, but by storing
9312           min/max key below we can at least start the scan from 2)
9313           instead of 1)
9314         */
9315         SEL_ARG *store_key_part= key_tree->next_key_part;
9316         seq->param->is_ror_scan= FALSE;
9317         if (!key_tree->min_flag)
9318           cur->min_key_parts +=
9319             store_key_part->store_min_key(seq->param->key[seq->keyno],
9320                                           &cur->min_key,
9321                                           &cur->min_key_flag,
9322                                           MAX_KEY);
9323         if (!key_tree->max_flag)
9324           cur->max_key_parts +=
9325             store_key_part->store_max_key(seq->param->key[seq->keyno],
9326                                           &cur->max_key,
9327                                           &cur->max_key_flag,
9328                                           MAX_KEY);
9329         break;
9330       }
9331     }
9332 
9333     /*
9334       There are usable range predicates for the next keypart and the
9335       range predicate for the current keypart allows us to make use of
9336       them. Move to the first range predicate for the next keypart.
9337       Push this range predicate to seq->stack and move on to the next
9338       keypart (if any). @see Sel_arg_range_sequence::stack
9339     */
9340     key_tree= key_tree->next_key_part->first();
9341     seq->stack_push_range(key_tree);
9342   }
9343 
9344   DBUG_ASSERT(!seq->stack_empty() && (seq->stack_top() != NULL));
9345 
9346   // We now have a full range predicate in seq->stack_top()
9347   RANGE_SEQ_ENTRY *cur= seq->stack_top();
9348   PARAM *param= seq->param;
9349   uint min_key_length= cur->min_key - param->min_key;
9350 
9351   if (cur->min_key_flag & GEOM_FLAG)
9352   {
9353     range->range_flag= cur->min_key_flag;
9354 
9355     /* Here minimum contains also function code bits, and maximum is +inf */
9356     range->start_key.key=    param->min_key;
9357     range->start_key.length= min_key_length;
9358     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9359     range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
9360     /*
9361       Spatial operators are only allowed on spatial indexes, and no
9362       spatial index can at the moment return rows in ROWID order
9363     */
9364     DBUG_ASSERT(!param->is_ror_scan);
9365   }
9366   else
9367   {
9368     const KEY *cur_key_info= &param->table->key_info[seq->real_keyno];
9369     range->range_flag= cur->min_key_flag | cur->max_key_flag;
9370 
9371     range->start_key.key=    param->min_key;
9372     range->start_key.length= cur->min_key - param->min_key;
9373     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9374     range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
9375                                                            HA_READ_KEY_EXACT);
9376 
9377     range->end_key.key=    param->max_key;
9378     range->end_key.length= cur->max_key - param->max_key;
9379     range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
9380     range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
9381                                                          HA_READ_AFTER_KEY);
9382 
9383     /*
9384       This is an equality range (keypart_0=X and ... and keypart_n=Z) if
9385         1) There are no flags indicating open range (e.g.,
9386            "keypart_x > y") or GIS.
9387         2) The lower bound and the upper bound of the range has the
9388            same value (min_key == max_key).
9389      */
9390     const uint is_open_range= (NO_MIN_RANGE | NO_MAX_RANGE |
9391                                NEAR_MIN | NEAR_MAX | GEOM_FLAG);
9392     const bool is_eq_range_pred=
9393       !(cur->min_key_flag & is_open_range) &&                           // 1)
9394       !(cur->max_key_flag & is_open_range) &&                           // 1)
9395       range->start_key.length == range->end_key.length &&               // 2)
9396       !memcmp(param->min_key, param->max_key, range->start_key.length);
9397 
9398     if (is_eq_range_pred)
9399     {
9400       range->range_flag= EQ_RANGE;
9401       /*
9402         Use statistics instead of index dives for estimates of rows in
9403         this range if the user requested it
9404       */
9405       if (param->use_index_statistics)
9406         range->range_flag|= USE_INDEX_STATISTICS;
9407 
9408       /*
9409         An equality range is a unique range (0 or 1 rows in the range)
9410         if the index is unique (1) and all keyparts are used (2).
9411         Note that keys which are extended with PK parts have no
9412         HA_NOSAME flag. So we can use user_defined_key_parts.
9413       */
9414       if (cur_key_info->flags & HA_NOSAME &&                              // 1)
9415           (uint)key_tree->part+1 == cur_key_info->user_defined_key_parts) // 2)
9416         range->range_flag|= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
9417     }
9418 
9419     if (param->is_ror_scan)
9420     {
9421       const uint key_part_number= key_tree->part + 1;
9422       /*
9423         If we get here, the condition on the key was converted to form
9424         "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
9425           somecond(keyXpart{key_tree->part})"
9426         Check if
9427           somecond is "keyXpart{key_tree->part} = const" and
9428           uncovered "tail" of KeyX parts is either empty or is identical to
9429           first members of clustered primary key.
9430 
9431         If last key part is PK part added to the key as an extension
9432         and is_key_scan_ror() result is TRUE then it's possible to
9433         use ROR scan.
9434       */
9435       if ((!is_eq_range_pred &&
9436            key_part_number <= cur_key_info->user_defined_key_parts) ||
9437           !is_key_scan_ror(param, seq->real_keyno, key_part_number))
9438         param->is_ror_scan= FALSE;
9439     }
9440   }
9441 
9442   seq->param->range_count++;
9443   seq->param->max_key_part=max<uint>(seq->param->max_key_part,key_tree->part);
9444 
9445   return 0;
9446 }
9447 
9448 
9449 /*
9450   Calculate estimate of number records that will be retrieved by a range
9451   scan on given index using given SEL_ARG intervals tree.
9452 
9453   SYNOPSIS
9454     check_quick_select()
9455       param             Parameter from test_quick_select
9456       idx               Number of index to use in PARAM::key SEL_TREE::key
9457       index_only        TRUE  - assume only index tuples will be accessed
9458                         FALSE - assume full table rows will be read
9459       tree              Transformed selection condition, tree->key[idx] holds
9460                         the intervals for the given index.
9461       update_tbl_stats  TRUE <=> update table->quick_* with information
9462                         about range scan we've evaluated.
9463       mrr_flags   INOUT MRR access flags
9464       cost        OUT   Scan cost
9465 
9466   NOTES
9467     param->is_ror_scan is set to reflect if the key scan is a ROR (see
9468     is_key_scan_ror function for more info)
9469     param->table->quick_*, param->range_count (and maybe others) are
9470     updated with data of given key scan, see quick_range_seq_next for details.
9471 
9472   RETURN
9473     Estimate # of records to be retrieved.
9474     HA_POS_ERROR if estimate calculation failed due to table handler problems.
9475 */
9476 
9477 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost)9478 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
9479                            SEL_ARG *tree, bool update_tbl_stats,
9480                            uint *mrr_flags, uint *bufsize, Cost_estimate *cost)
9481 {
9482   Sel_arg_range_sequence seq(param);
9483   RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
9484   handler *file= param->table->file;
9485   ha_rows rows;
9486   uint keynr= param->real_keynr[idx];
9487   DBUG_ENTER("check_quick_select");
9488 
9489   /* Handle cases when we don't have a valid non-empty list of range */
9490   if (!tree)
9491     DBUG_RETURN(HA_POS_ERROR);
9492   if (tree->type == SEL_ARG::IMPOSSIBLE)
9493     DBUG_RETURN(0L);
9494   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
9495     DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
9496 
9497   seq.keyno= idx;
9498   seq.real_keyno= keynr;
9499   seq.start= tree;
9500 
9501   param->range_count=0;
9502   param->max_key_part=0;
9503 
9504   /*
9505     If there are more equality ranges than specified by the
9506     eq_range_index_dive_limit variable we switches from using index
9507     dives to use statistics.
9508   */
9509   uint range_count= 0;
9510   param->use_index_statistics=
9511     eq_ranges_exceeds_limit(tree, &range_count,
9512                             param->thd->variables.eq_range_index_dive_limit);
9513 
9514   param->is_ror_scan= TRUE;
9515   if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9516     param->is_ror_scan= FALSE;
9517 
9518   *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
9519   *mrr_flags|= HA_MRR_NO_ASSOCIATION;
9520   /*
9521     Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
9522   */
9523   if (param->order_direction != ORDER::ORDER_NOT_RELEVANT)
9524     *mrr_flags|= HA_MRR_SORTED;
9525 
9526   bool pk_is_clustered= file->primary_key_is_clustered();
9527   if (index_only &&
9528       (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
9529       !(pk_is_clustered && keynr == param->table->s->primary_key))
9530      *mrr_flags |= HA_MRR_INDEX_ONLY;
9531 
9532   if (current_thd->lex->sql_command != SQLCOM_SELECT)
9533     *mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
9534 
9535   *bufsize= param->thd->variables.read_rnd_buff_size;
9536   // Sets is_ror_scan to false for some queries, e.g. multi-ranges
9537   rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
9538                                           bufsize, mrr_flags, cost);
9539   if (rows != HA_POS_ERROR)
9540   {
9541     param->table->quick_rows[keynr]=rows;
9542     if (update_tbl_stats)
9543     {
9544       param->table->quick_keys.set_bit(keynr);
9545       param->table->quick_key_parts[keynr]=param->max_key_part+1;
9546       param->table->quick_n_ranges[keynr]= param->range_count;
9547       param->table->quick_condition_rows=
9548         min(param->table->quick_condition_rows, rows);
9549     }
9550     param->table->possible_quick_keys.set_bit(keynr);
9551   }
9552   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
9553   enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
9554   if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
9555   {
9556     /*
9557       All scans are non-ROR scans for those index types.
9558       TODO: Don't have this logic here, make table engines return
9559       appropriate flags instead.
9560     */
9561     param->is_ror_scan= FALSE;
9562   }
9563   else
9564   {
9565     /* Clustered PK scan is always a ROR scan (TODO: same as above) */
9566     if (param->table->s->primary_key == keynr && pk_is_clustered)
9567       param->is_ror_scan= TRUE;
9568   }
9569   if (param->table->file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9570     param->is_ror_scan= FALSE;
9571   DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
9572   DBUG_RETURN(rows);
9573 }
9574 
9575 
9576 /*
9577   Check if key scan on given index with equality conditions on first n key
9578   parts is a ROR scan.
9579 
9580   SYNOPSIS
9581     is_key_scan_ror()
9582       param  Parameter from test_quick_select
9583       keynr  Number of key in the table. The key must not be a clustered
9584              primary key.
9585       nparts Number of first key parts for which equality conditions
9586              are present.
9587 
9588   NOTES
9589     ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
9590     ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
9591 
9592     This function is needed to handle a practically-important special case:
9593     an index scan is a ROR scan if it is done using a condition in form
9594 
9595         "key1_1=c_1 AND ... AND key1_n=c_n"
9596 
9597     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
9598 
9599     and the table has a clustered Primary Key defined as
9600 
9601       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
9602 
9603     i.e. the first key parts of it are identical to uncovered parts ot the
9604     key being scanned. This function assumes that the index flags do not
9605     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
9606 
9607     Check (1) is made in quick_range_seq_next()
9608 
9609   RETURN
9610     TRUE   The scan is ROR-scan
9611     FALSE  Otherwise
9612 */
9613 
is_key_scan_ror(PARAM * param,uint keynr,uint nparts)9614 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts)
9615 {
9616   KEY *table_key= param->table->key_info + keynr;
9617 
9618   /*
9619     Range predicates on hidden key parts do not change the fact
9620     that a scan is rowid ordered, so we only care about user
9621     defined keyparts
9622   */
9623   const uint user_defined_nparts=
9624     std::min<uint>(nparts, table_key->user_defined_key_parts);
9625 
9626   KEY_PART_INFO *key_part= table_key->key_part + user_defined_nparts;
9627   KEY_PART_INFO *key_part_end= (table_key->key_part +
9628                                 table_key->user_defined_key_parts);
9629   uint pk_number;
9630 
9631   for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
9632   {
9633     uint16 fieldnr= param->table->key_info[keynr].
9634                     key_part[kp - table_key->key_part].fieldnr - 1;
9635     if (param->table->field[fieldnr]->key_length() != kp->length)
9636       return FALSE;
9637   }
9638 
9639   if (key_part == key_part_end)
9640     return TRUE;
9641 
9642   key_part= table_key->key_part + user_defined_nparts;
9643   pk_number= param->table->s->primary_key;
9644   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
9645     return FALSE;
9646 
9647   KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
9648   KEY_PART_INFO *pk_part_end=
9649     pk_part + param->table->key_info[pk_number].user_defined_key_parts;
9650   for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
9651        ++key_part, ++pk_part)
9652   {
9653     if ((key_part->field != pk_part->field) ||
9654         (key_part->length != pk_part->length))
9655       return FALSE;
9656   }
9657   return (key_part == key_part_end);
9658 }
9659 
9660 
9661 /*
9662   Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
9663 
9664   SYNOPSIS
9665     get_quick_select()
9666       param
9667       idx            Index of used key in param->key.
9668       key_tree       SEL_ARG tree for the used key
9669       mrr_flags      MRR parameter for quick select
9670       mrr_buf_size   MRR parameter for quick select
9671       parent_alloc   If not NULL, use it to allocate memory for
9672                      quick select data. Otherwise use quick->alloc.
9673   NOTES
9674     The caller must call QUICK_SELECT::init for returned quick select.
9675 
9676     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
9677     deallocated when the returned quick select is deleted.
9678 
9679   RETURN
9680     NULL on error
9681     otherwise created quick select
9682 */
9683 
9684 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)9685 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
9686                  uint mrr_buf_size, MEM_ROOT *parent_alloc)
9687 {
9688   QUICK_RANGE_SELECT *quick;
9689   bool create_err= FALSE;
9690   DBUG_ENTER("get_quick_select");
9691 
9692   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
9693     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
9694                                       param->real_keynr[idx],
9695                                       MY_TEST(parent_alloc),
9696                                       parent_alloc, &create_err);
9697   else
9698     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
9699                                  param->real_keynr[idx],
9700                                  MY_TEST(parent_alloc), NULL, &create_err);
9701 
9702   if (quick)
9703   {
9704     if (create_err ||
9705 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
9706 		       param->max_key,0))
9707     {
9708       delete quick;
9709       quick=0;
9710     }
9711     else
9712     {
9713       quick->mrr_flags= mrr_flags;
9714       quick->mrr_buf_size= mrr_buf_size;
9715       quick->key_parts=(KEY_PART*)
9716         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
9717                     (char*) param->key[idx],
9718                     sizeof(KEY_PART) *
9719                     actual_key_parts(&param->
9720                                      table->key_info[param->real_keynr[idx]]));
9721     }
9722   }
9723   DBUG_RETURN(quick);
9724 }
9725 
9726 
9727 /*
9728 ** Fix this to get all possible sub_ranges
9729 */
9730 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)9731 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
9732 	       SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
9733 	       uchar *max_key, uint max_key_flag)
9734 {
9735   QUICK_RANGE *range;
9736   uint flag;
9737   int min_part= key_tree->part-1, // # of keypart values in min_key buffer
9738       max_part= key_tree->part-1; // # of keypart values in max_key buffer
9739 
9740   if (key_tree->left != &null_element)
9741   {
9742     if (get_quick_keys(param,quick,key,key_tree->left,
9743 		       min_key,min_key_flag, max_key, max_key_flag))
9744       return 1;
9745   }
9746   uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
9747   min_part+= key_tree->store_min(key[key_tree->part].store_length,
9748                                  &tmp_min_key,min_key_flag);
9749   max_part+= key_tree->store_max(key[key_tree->part].store_length,
9750                                  &tmp_max_key,max_key_flag);
9751 
9752   if (key_tree->next_key_part &&
9753       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
9754       key_tree->next_key_part->part == key_tree->part+1)
9755   {						  // const key as prefix
9756     if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
9757          memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
9758 	 key_tree->min_flag==0 && key_tree->max_flag==0)
9759     {
9760       if (get_quick_keys(param,quick,key,key_tree->next_key_part,
9761 			 tmp_min_key, min_key_flag | key_tree->min_flag,
9762 			 tmp_max_key, max_key_flag | key_tree->max_flag))
9763 	return 1;
9764       goto end;					// Ugly, but efficient
9765     }
9766     {
9767       uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
9768       if (!tmp_min_flag)
9769         min_part+= key_tree->next_key_part->store_min_key(key,
9770                                                           &tmp_min_key,
9771                                                           &tmp_min_flag,
9772                                                           MAX_KEY);
9773       if (!tmp_max_flag)
9774         max_part+= key_tree->next_key_part->store_max_key(key,
9775                                                           &tmp_max_key,
9776                                                           &tmp_max_flag,
9777                                                           MAX_KEY);
9778       flag=tmp_min_flag | tmp_max_flag;
9779     }
9780   }
9781   else
9782   {
9783     flag = (key_tree->min_flag & GEOM_FLAG) ?
9784       key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
9785   }
9786 
9787   /*
9788     Ensure that some part of min_key and max_key are used.  If not,
9789     regard this as no lower/upper range
9790   */
9791   if ((flag & GEOM_FLAG) == 0)
9792   {
9793     if (tmp_min_key != param->min_key)
9794       flag&= ~NO_MIN_RANGE;
9795     else
9796       flag|= NO_MIN_RANGE;
9797     if (tmp_max_key != param->max_key)
9798       flag&= ~NO_MAX_RANGE;
9799     else
9800       flag|= NO_MAX_RANGE;
9801   }
9802   if (flag == 0)
9803   {
9804     uint length= (uint) (tmp_min_key - param->min_key);
9805     if (length == (uint) (tmp_max_key - param->max_key) &&
9806 	!memcmp(param->min_key,param->max_key,length))
9807     {
9808       const KEY *table_key=quick->head->key_info+quick->index;
9809       flag=EQ_RANGE;
9810       /*
9811         Note that keys which are extended with PK parts have no
9812         HA_NOSAME flag. So we can use user_defined_key_parts.
9813       */
9814       if ((table_key->flags & HA_NOSAME) &&
9815           key_tree->part == table_key->user_defined_key_parts - 1)
9816       {
9817         if ((table_key->flags & HA_NULL_PART_KEY) &&
9818             null_part_in_key(key,
9819                              param->min_key,
9820                              (uint) (tmp_min_key - param->min_key)))
9821           flag|= NULL_RANGE;
9822         else
9823           flag|= UNIQUE_RANGE;
9824       }
9825     }
9826   }
9827 
9828   /* Get range for retrieving rows in QUICK_SELECT::get_next */
9829   if (!(range= new QUICK_RANGE(param->min_key,
9830 			       (uint) (tmp_min_key - param->min_key),
9831                                min_part >=0 ? make_keypart_map(min_part) : 0,
9832 			       param->max_key,
9833 			       (uint) (tmp_max_key - param->max_key),
9834                                max_part >=0 ? make_keypart_map(max_part) : 0,
9835 			       flag)))
9836     return 1;			// out of memory
9837 
9838   set_if_bigger(quick->max_used_key_length, range->min_length);
9839   set_if_bigger(quick->max_used_key_length, range->max_length);
9840   set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
9841   if (insert_dynamic(&quick->ranges, &range))
9842     return 1;
9843 
9844  end:
9845   if (key_tree->right != &null_element)
9846     return get_quick_keys(param,quick,key,key_tree->right,
9847 			  min_key,min_key_flag,
9848 			  max_key,max_key_flag);
9849   return 0;
9850 }
9851 
9852 /*
9853   Return 1 if there is only one range and this uses the whole unique key
9854 */
9855 
unique_key_range()9856 bool QUICK_RANGE_SELECT::unique_key_range()
9857 {
9858   if (ranges.elements == 1)
9859   {
9860     QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
9861     if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
9862     {
9863       KEY *key=head->key_info+index;
9864       return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
9865     }
9866   }
9867   return 0;
9868 }
9869 
9870 
9871 
9872 /*
9873   Return TRUE if any part of the key is NULL
9874 
9875   SYNOPSIS
9876     null_part_in_key()
9877       key_part  Array of key parts (index description)
9878       key       Key values tuple
9879       length    Length of key values tuple in bytes.
9880 
9881   RETURN
9882     TRUE   The tuple has at least one "keypartX is NULL"
9883     FALSE  Otherwise
9884 */
9885 
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)9886 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
9887 {
9888   for (const uchar *end=key+length ;
9889        key < end;
9890        key+= key_part++->store_length)
9891   {
9892     if (key_part->null_bit && *key)
9893       return 1;
9894   }
9895   return 0;
9896 }
9897 
9898 
is_keys_used(const MY_BITMAP * fields)9899 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
9900 {
9901   return is_key_used(head, index, fields);
9902 }
9903 
is_keys_used(const MY_BITMAP * fields)9904 bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
9905 {
9906   QUICK_RANGE_SELECT *quick;
9907   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9908   while ((quick= it++))
9909   {
9910     if (is_key_used(head, quick->index, fields))
9911       return 1;
9912   }
9913   return 0;
9914 }
9915 
is_keys_used(const MY_BITMAP * fields)9916 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
9917 {
9918   QUICK_RANGE_SELECT *quick;
9919   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9920   while ((quick= it++))
9921   {
9922     if (is_key_used(head, quick->index, fields))
9923       return 1;
9924   }
9925   return 0;
9926 }
9927 
is_keys_used(const MY_BITMAP * fields)9928 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
9929 {
9930   QUICK_SELECT_I *quick;
9931   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
9932   while ((quick= it++))
9933   {
9934     if (quick->is_keys_used(fields))
9935       return 1;
9936   }
9937   return 0;
9938 }
9939 
9940 
get_ft_select(THD * thd,TABLE * table,uint key)9941 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
9942 {
9943   bool create_err= FALSE;
9944   FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
9945   if (create_err)
9946   {
9947     delete fts;
9948     return NULL;
9949   }
9950   else
9951     return fts;
9952 }
9953 
9954 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
9955 static bool
key_has_nulls(const KEY * key_info,const uchar * key,uint key_len)9956 key_has_nulls(const KEY* key_info, const uchar *key, uint key_len)
9957 {
9958   KEY_PART_INFO *curr_part, *end_part;
9959   const uchar* end_ptr= key + key_len;
9960   curr_part= key_info->key_part;
9961   end_part= curr_part + key_info->user_defined_key_parts;
9962 
9963   for (; curr_part != end_part && key < end_ptr; curr_part++)
9964   {
9965     if (curr_part->null_bit && *key)
9966       return TRUE;
9967 
9968     key += curr_part->store_length;
9969   }
9970   return FALSE;
9971 }
9972 #endif
9973 
9974 /*
9975   Create quick select from ref/ref_or_null scan.
9976 
9977   SYNOPSIS
9978     get_quick_select_for_ref()
9979       thd      Thread handle
9980       table    Table to access
9981       ref      ref[_or_null] scan parameters
9982       records  Estimate of number of records (needed only to construct
9983                quick select)
9984   NOTES
9985     This allocates things in a new memory root, as this may be called many
9986     times during a query.
9987 
9988   RETURN
9989     Quick select that retrieves the same rows as passed ref scan
9990     NULL on error.
9991 */
9992 
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)9993 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
9994                                              TABLE_REF *ref, ha_rows records)
9995 {
9996   MEM_ROOT *old_root, *alloc;
9997   QUICK_RANGE_SELECT *quick;
9998   KEY *key_info = &table->key_info[ref->key];
9999   KEY_PART *key_part;
10000   QUICK_RANGE *range;
10001   uint part;
10002   bool create_err= FALSE;
10003   Cost_estimate cost;
10004 
10005   old_root= thd->mem_root;
10006   /* The following call may change thd->mem_root */
10007   quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
10008   /* save mem_root set by QUICK_RANGE_SELECT constructor */
10009   alloc= thd->mem_root;
10010   /*
10011     return back default mem_root (thd->mem_root) changed by
10012     QUICK_RANGE_SELECT constructor
10013   */
10014   thd->mem_root= old_root;
10015 
10016   if (!quick || create_err)
10017     return 0;			/* no ranges found */
10018   if (quick->init())
10019     goto err;
10020   quick->records= records;
10021 
10022   if ((cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error) ||
10023       !(range= new(alloc) QUICK_RANGE()))
10024     goto err;                                   // out of memory
10025 
10026   range->min_key= range->max_key= ref->key_buff;
10027   range->min_length= range->max_length= ref->key_length;
10028   range->min_keypart_map= range->max_keypart_map=
10029     make_prev_keypart_map(ref->key_parts);
10030   range->flag= (ref->key_length == key_info->key_length ? EQ_RANGE : 0);
10031 
10032   if (!(quick->key_parts=key_part=(KEY_PART *)
10033 	alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
10034     goto err;
10035 
10036   for (part=0 ; part < ref->key_parts ;part++,key_part++)
10037   {
10038     key_part->part=part;
10039     key_part->field=        key_info->key_part[part].field;
10040     key_part->length=       key_info->key_part[part].length;
10041     key_part->store_length= key_info->key_part[part].store_length;
10042     key_part->null_bit=     key_info->key_part[part].null_bit;
10043     key_part->flag=         (uint8) key_info->key_part[part].key_part_flag;
10044   }
10045   if (insert_dynamic(&quick->ranges, &range))
10046     goto err;
10047 
10048   /*
10049      Add a NULL range if REF_OR_NULL optimization is used.
10050      For example:
10051        if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
10052        and have ref->null_ref_key set. Will create a new NULL range here.
10053   */
10054   if (ref->null_ref_key)
10055   {
10056     QUICK_RANGE *null_range;
10057 
10058     *ref->null_ref_key= 1;		// Set null byte then create a range
10059     if (!(null_range= new (alloc)
10060           QUICK_RANGE(ref->key_buff, ref->key_length,
10061                       make_prev_keypart_map(ref->key_parts),
10062                       ref->key_buff, ref->key_length,
10063                       make_prev_keypart_map(ref->key_parts), EQ_RANGE)))
10064       goto err;
10065     *ref->null_ref_key= 0;		// Clear null byte
10066     if (insert_dynamic(&quick->ranges, &null_range))
10067       goto err;
10068   }
10069 
10070   /* Call multi_range_read_info() to get the MRR flags and buffer size */
10071   quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
10072                     (table->key_read ? HA_MRR_INDEX_ONLY : 0);
10073   if (thd->lex->sql_command != SQLCOM_SELECT)
10074     quick->mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10075 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
10076   if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
10077                                            ref->key_length))
10078     quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
10079 #endif
10080 
10081   quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
10082   if (table->file->multi_range_read_info(quick->index, 1, records,
10083                                          &quick->mrr_buf_size,
10084                                          &quick->mrr_flags, &cost))
10085     goto err;
10086 
10087   return quick;
10088 err:
10089   delete quick;
10090   return 0;
10091 }
10092 
10093 
10094 /*
10095   Perform key scans for all used indexes (except CPK), get rowids and merge
10096   them into an ordered non-recurrent sequence of rowids.
10097 
10098   The merge/duplicate removal is performed using Unique class. We put all
10099   rowids into Unique, get the sorted sequence and destroy the Unique.
10100 
10101   If table has a clustered primary key that covers all rows (TRUE for bdb
10102   and innodb currently) and one of the index_merge scans is a scan on PK,
10103   then rows that will be retrieved by PK scan are not put into Unique and
10104   primary key scan is not performed here, it is performed later separately.
10105 
10106   RETURN
10107     0     OK
10108     other error
10109 */
10110 
read_keys_and_merge()10111 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
10112 {
10113   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
10114   QUICK_RANGE_SELECT* cur_quick;
10115   int result;
10116   handler *file= head->file;
10117   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
10118 
10119   /* We're going to just read rowids. */
10120   head->set_keyread(TRUE);
10121   head->prepare_for_position();
10122 
10123   cur_quick_it.rewind();
10124   cur_quick= cur_quick_it++;
10125   DBUG_ASSERT(cur_quick != 0);
10126 
10127   DBUG_EXECUTE_IF("simulate_bug13919180",
10128                   {
10129                     my_error(ER_UNKNOWN_ERROR, MYF(0));
10130                     DBUG_RETURN(1);
10131                   });
10132   /*
10133     We reuse the same instance of handler so we need to call both init and
10134     reset here.
10135   */
10136   if (cur_quick->init() || cur_quick->reset())
10137     DBUG_RETURN(1);
10138 
10139   if (unique == NULL)
10140   {
10141     DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_ABORT(); );
10142     DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
10143                     DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
10144 
10145     unique= new Unique(refpos_order_cmp, (void *)file,
10146                        file->ref_length,
10147                        thd->variables.sortbuff_size);
10148   }
10149   else
10150   {
10151     unique->reset();
10152     filesort_free_buffers(head, false);
10153   }
10154 
10155   DBUG_ASSERT(file->ref_length == unique->get_size());
10156   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
10157 
10158   if (!unique)
10159     DBUG_RETURN(1);
10160   for (;;)
10161   {
10162     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
10163     {
10164       cur_quick->range_end();
10165       cur_quick= cur_quick_it++;
10166       if (!cur_quick)
10167         break;
10168 
10169       if (cur_quick->file->inited)
10170         cur_quick->file->ha_index_end();
10171       if (cur_quick->init() || cur_quick->reset())
10172         DBUG_RETURN(1);
10173     }
10174 
10175     if (result)
10176     {
10177       if (result != HA_ERR_END_OF_FILE)
10178       {
10179         cur_quick->range_end();
10180         DBUG_RETURN(result);
10181       }
10182       break;
10183     }
10184 
10185     if (thd->killed)
10186       DBUG_RETURN(1);
10187 
10188     /* skip row if it will be retrieved by clustered PK scan */
10189     if (pk_quick_select && pk_quick_select->row_in_ranges())
10190       continue;
10191 
10192     cur_quick->file->position(cur_quick->record);
10193     result= unique->unique_add((char*)cur_quick->file->ref);
10194     if (result)
10195       DBUG_RETURN(1);
10196   }
10197 
10198   /*
10199     Ok all rowids are in the Unique now. The next call will initialize
10200     head->sort structure so it can be used to iterate through the rowids
10201     sequence.
10202   */
10203   result= unique->get(head);
10204   doing_pk_scan= FALSE;
10205   /* index_merge currently doesn't support "using index" at all */
10206   head->set_keyread(FALSE);
10207   if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1, TRUE))
10208     DBUG_RETURN(1);
10209   DBUG_RETURN(result);
10210 }
10211 
10212 
10213 /*
10214   Get next row for index_merge.
10215   NOTES
10216     The rows are read from
10217       1. rowids stored in Unique.
10218       2. QUICK_RANGE_SELECT with clustered primary key (if any).
10219     The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
10220 */
10221 
get_next()10222 int QUICK_INDEX_MERGE_SELECT::get_next()
10223 {
10224   int result;
10225   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
10226 
10227   if (doing_pk_scan)
10228     DBUG_RETURN(pk_quick_select->get_next());
10229 
10230   if ((result= read_record.read_record(&read_record)) == -1)
10231   {
10232     result= HA_ERR_END_OF_FILE;
10233     end_read_record(&read_record);
10234     free_io_cache(head);
10235     /* All rows from Unique have been retrieved, do a clustered PK scan */
10236     if (pk_quick_select)
10237     {
10238       doing_pk_scan= TRUE;
10239       if ((result= pk_quick_select->init()) ||
10240           (result= pk_quick_select->reset()))
10241         DBUG_RETURN(result);
10242       DBUG_RETURN(pk_quick_select->get_next());
10243     }
10244   }
10245 
10246   DBUG_RETURN(result);
10247 }
10248 
10249 
10250 /*
10251   Retrieve next record.
10252   SYNOPSIS
10253      QUICK_ROR_INTERSECT_SELECT::get_next()
10254 
10255   NOTES
10256     Invariant on enter/exit: all intersected selects have retrieved all index
10257     records with rowid <= some_rowid_val and no intersected select has
10258     retrieved any index records with rowid > some_rowid_val.
10259     We start fresh and loop until we have retrieved the same rowid in each of
10260     the key scans or we got an error.
10261 
10262     If a Clustered PK scan is present, it is used only to check if row
10263     satisfies its condition (and never used for row retrieval).
10264 
10265     Locking: to ensure that exclusive locks are only set on records that
10266     are included in the final result we must release the lock
10267     on all rows we read but do not include in the final result. This
10268     must be done on each index that reads the record and the lock
10269     must be released using the same handler (the same quick object) as
10270     used when reading the record.
10271 
10272   RETURN
10273    0     - Ok
10274    other - Error code if any error occurred.
10275 */
10276 
get_next()10277 int QUICK_ROR_INTERSECT_SELECT::get_next()
10278 {
10279   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
10280   QUICK_RANGE_SELECT* quick;
10281 
10282   /* quick that reads the given rowid first. This is needed in order
10283   to be able to unlock the row using the same handler object that locked
10284   it */
10285   QUICK_RANGE_SELECT* quick_with_last_rowid;
10286 
10287   int error, cmp;
10288   uint last_rowid_count=0;
10289   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
10290 
10291   do
10292   {
10293     /* Get a rowid for first quick and save it as a 'candidate' */
10294     quick= quick_it++;
10295     error= quick->get_next();
10296     if (cpk_quick)
10297     {
10298       while (!error && !cpk_quick->row_in_ranges())
10299       {
10300         quick->file->unlock_row(); /* row not in range; unlock */
10301         error= quick->get_next();
10302       }
10303     }
10304     if (error)
10305       DBUG_RETURN(error);
10306 
10307     quick->file->position(quick->record);
10308     memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10309     last_rowid_count= 1;
10310     quick_with_last_rowid= quick;
10311 
10312     while (last_rowid_count < quick_selects.elements)
10313     {
10314       if (!(quick= quick_it++))
10315       {
10316         quick_it.rewind();
10317         quick= quick_it++;
10318       }
10319 
10320       do
10321       {
10322         DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
10323                         DBUG_SET("+d,innodb_report_deadlock"););
10324         if ((error= quick->get_next()))
10325         {
10326           /* On certain errors like deadlock, trx might be rolled back.*/
10327           if (!current_thd->transaction_rollback_request)
10328             quick_with_last_rowid->file->unlock_row();
10329           DBUG_RETURN(error);
10330         }
10331         quick->file->position(quick->record);
10332         cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
10333         if (cmp < 0)
10334         {
10335           /* This row is being skipped.  Release lock on it. */
10336           quick->file->unlock_row();
10337         }
10338       } while (cmp < 0);
10339 
10340       /* Ok, current select 'caught up' and returned ref >= cur_ref */
10341       if (cmp > 0)
10342       {
10343         /* Found a row with ref > cur_ref. Make it a new 'candidate' */
10344         if (cpk_quick)
10345         {
10346           while (!cpk_quick->row_in_ranges())
10347           {
10348             quick->file->unlock_row(); /* row not in range; unlock */
10349             if ((error= quick->get_next()))
10350             {
10351               /* On certain errors like deadlock, trx might be rolled back.*/
10352               if (!current_thd->transaction_rollback_request)
10353                 quick_with_last_rowid->file->unlock_row();
10354               DBUG_RETURN(error);
10355             }
10356           }
10357           quick->file->position(quick->record);
10358         }
10359         memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10360         quick_with_last_rowid->file->unlock_row();
10361         last_rowid_count= 1;
10362         quick_with_last_rowid= quick;
10363       }
10364       else
10365       {
10366         /* current 'candidate' row confirmed by this select */
10367         last_rowid_count++;
10368       }
10369     }
10370 
10371     /* We get here if we got the same row ref in all scans. */
10372     if (need_to_fetch_row)
10373       error= head->file->ha_rnd_pos(head->record[0], last_rowid);
10374   } while (error == HA_ERR_RECORD_DELETED);
10375   DBUG_RETURN(error);
10376 }
10377 
10378 
10379 /*
10380   Retrieve next record.
10381   SYNOPSIS
10382     QUICK_ROR_UNION_SELECT::get_next()
10383 
10384   NOTES
10385     Enter/exit invariant:
10386     For each quick select in the queue a {key,rowid} tuple has been
10387     retrieved but the corresponding row hasn't been passed to output.
10388 
10389   RETURN
10390    0     - Ok
10391    other - Error code if any error occurred.
10392 */
10393 
get_next()10394 int QUICK_ROR_UNION_SELECT::get_next()
10395 {
10396   int error, dup_row;
10397   QUICK_SELECT_I *quick;
10398   uchar *tmp;
10399   DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
10400 
10401   do
10402   {
10403     do
10404     {
10405       if (!queue.elements)
10406         DBUG_RETURN(HA_ERR_END_OF_FILE);
10407       /* Ok, we have a queue with >= 1 scans */
10408 
10409       quick= (QUICK_SELECT_I*)queue_top(&queue);
10410       memcpy(cur_rowid, quick->last_rowid, rowid_length);
10411 
10412       /* put into queue rowid from the same stream as top element */
10413       if ((error= quick->get_next()))
10414       {
10415         if (error != HA_ERR_END_OF_FILE)
10416           DBUG_RETURN(error);
10417         queue_remove(&queue, 0);
10418       }
10419       else
10420       {
10421         quick->save_last_pos();
10422         queue_replaced(&queue);
10423       }
10424 
10425       if (!have_prev_rowid)
10426       {
10427         /* No rows have been returned yet */
10428         dup_row= FALSE;
10429         have_prev_rowid= TRUE;
10430       }
10431       else
10432         dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
10433     } while (dup_row);
10434 
10435     tmp= cur_rowid;
10436     cur_rowid= prev_rowid;
10437     prev_rowid= tmp;
10438 
10439     error= head->file->ha_rnd_pos(quick->record, prev_rowid);
10440   } while (error == HA_ERR_RECORD_DELETED);
10441   DBUG_RETURN(error);
10442 }
10443 
10444 
reset()10445 int QUICK_RANGE_SELECT::reset()
10446 {
10447   uint  buf_size;
10448   uchar *mrange_buff;
10449   int   error;
10450   HANDLER_BUFFER empty_buf;
10451   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
10452   last_range= NULL;
10453   cur_range= (QUICK_RANGE**) ranges.buffer;
10454 
10455   /* set keyread to TRUE if index is covering */
10456   if(!head->no_keyread && head->covering_keys.is_set(index))
10457     head->set_keyread(true);
10458   else
10459     head->set_keyread(false);
10460 
10461   if (!file->inited)
10462   {
10463     if (in_ror_merged_scan)
10464       head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10465     const bool sorted= (mrr_flags & HA_MRR_SORTED);
10466     DBUG_EXECUTE_IF("bug14365043_2",
10467                     DBUG_SET("+d,ha_index_init_fail"););
10468     if ((error= file->ha_index_init(index, sorted)))
10469     {
10470       file->print_error(error, MYF(0));
10471       DBUG_RETURN(error);
10472     }
10473   }
10474 
10475   /* Allocate buffer if we need one but haven't allocated it yet */
10476   if (mrr_buf_size && !mrr_buf_desc)
10477   {
10478     buf_size= mrr_buf_size;
10479     while (buf_size && !my_multi_malloc(MYF(MY_WME),
10480                                         &mrr_buf_desc, sizeof(*mrr_buf_desc),
10481                                         &mrange_buff, buf_size,
10482                                         NullS))
10483     {
10484       /* Try to shrink the buffers until both are 0. */
10485       buf_size/= 2;
10486     }
10487     if (!mrr_buf_desc)
10488       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
10489 
10490     /* Initialize the handler buffer. */
10491     mrr_buf_desc->buffer= mrange_buff;
10492     mrr_buf_desc->buffer_end= mrange_buff + buf_size;
10493     mrr_buf_desc->end_of_used_area= mrange_buff;
10494 #ifdef HAVE_purify
10495     /*
10496       We need this until ndb will use the buffer efficiently
10497       (Now ndb stores  complete row in here, instead of only the used fields
10498       which gives us valgrind warnings in compare_record[])
10499     */
10500     memset(mrange_buff, 0, buf_size);
10501 #endif
10502   }
10503 
10504   if (!mrr_buf_desc)
10505     empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
10506 
10507   RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
10508   error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
10509                                      mrr_flags, mrr_buf_desc? mrr_buf_desc:
10510                                                               &empty_buf);
10511   DBUG_RETURN(error);
10512 }
10513 
10514 
10515 /*
10516   Range sequence interface implementation for array<QUICK_RANGE>: initialize
10517 
10518   SYNOPSIS
10519     quick_range_seq_init()
10520       init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
10521       n_ranges    Number of ranges in the sequence (ignored)
10522       flags       MRR flags (currently not used)
10523 
10524   RETURN
10525     Opaque value to be passed to quick_range_seq_next
10526 */
10527 
quick_range_seq_init(void * init_param,uint n_ranges,uint flags)10528 range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
10529 {
10530   QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
10531   quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
10532   quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
10533   quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur +
10534                                   quick->ranges.elements;
10535   return &quick->qr_traversal_ctx;
10536 }
10537 
10538 
10539 /*
10540   Range sequence interface implementation for array<QUICK_RANGE>: get next
10541 
10542   SYNOPSIS
10543     quick_range_seq_next()
10544       rseq        Value returned from quick_range_seq_init
10545       range  OUT  Store information about the range here
10546 
10547   RETURN
10548     0  Ok
10549     1  No more ranges in the sequence
10550 */
10551 
quick_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)10552 uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
10553 {
10554   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
10555 
10556   if (ctx->cur == ctx->last)
10557     return 1; /* no more ranges */
10558 
10559   QUICK_RANGE *cur= *(ctx->cur);
10560   key_range *start_key= &range->start_key;
10561   key_range *end_key=   &range->end_key;
10562 
10563   start_key->key=    cur->min_key;
10564   start_key->length= cur->min_length;
10565   start_key->keypart_map= cur->min_keypart_map;
10566   start_key->flag=   ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
10567                       (cur->flag & EQ_RANGE) ?
10568                       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
10569   end_key->key=      cur->max_key;
10570   end_key->length=   cur->max_length;
10571   end_key->keypart_map= cur->max_keypart_map;
10572   /*
10573     We use HA_READ_AFTER_KEY here because if we are reading on a key
10574     prefix. We want to find all keys with this prefix.
10575   */
10576   end_key->flag=     (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
10577                       HA_READ_AFTER_KEY);
10578   range->range_flag= cur->flag;
10579   ctx->cur++;
10580   return 0;
10581 }
10582 
10583 
10584 /*
10585   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10586 
10587   SYNOPSIS
10588     mrr_persistent_flag_storage()
10589       seq  Range sequence being traversed
10590       idx  Number of range
10591 
10592   DESCRIPTION
10593     MRR/NDB implementation needs to store some bits for each range. This
10594     function returns a reference to the "range_flag" associated with the
10595     range number idx.
10596 
10597     This function should be removed when we get a proper MRR/NDB
10598     implementation.
10599 
10600   RETURN
10601     Reference to range_flag associated with range number #idx
10602 */
10603 
mrr_persistent_flag_storage(range_seq_t seq,uint idx)10604 uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
10605 {
10606   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
10607   return ctx->first[idx]->flag;
10608 }
10609 
10610 
10611 /*
10612   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10613 
10614   SYNOPSIS
10615     mrr_get_ptr_by_idx()
10616       seq  Range sequence bening traversed
10617       idx  Number of the range
10618 
10619   DESCRIPTION
10620     An extension of MRR range sequence interface needed by NDB: return the
10621     data associated with the given range.
10622 
10623     A proper MRR interface implementer is supposed to store and return
10624     range-associated data. NDB stores number of the range instead. So this
10625     is a helper function that translates range number to range associated
10626     data.
10627 
10628     This function does nothing, as currrently there is only one user of the
10629     MRR interface - the quick range select code, and this user doesn't need
10630     to use range-associated data.
10631 
10632   RETURN
10633     Reference to range-associated data
10634 */
10635 
mrr_get_ptr_by_idx(range_seq_t seq,uint idx)10636 char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
10637 {
10638   static char *dummy;
10639   return dummy;
10640 }
10641 
10642 
10643 /*
10644   Get next possible record using quick-struct.
10645 
10646   SYNOPSIS
10647     QUICK_RANGE_SELECT::get_next()
10648 
10649   NOTES
10650     Record is read into table->record[0]
10651 
10652   RETURN
10653     0			Found row
10654     HA_ERR_END_OF_FILE	No (more) rows in range
10655     #			Error code
10656 */
10657 
get_next()10658 int QUICK_RANGE_SELECT::get_next()
10659 {
10660   char *dummy;
10661   MY_BITMAP * const save_read_set= head->read_set;
10662   MY_BITMAP * const save_write_set= head->write_set;
10663   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
10664 
10665   if (in_ror_merged_scan)
10666   {
10667     /*
10668       We don't need to signal the bitmap change as the bitmap is always the
10669       same for this head->file
10670     */
10671     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10672   }
10673 
10674   int result= file->multi_range_read_next(&dummy);
10675 
10676   if (in_ror_merged_scan)
10677   {
10678     /* Restore bitmaps set on entry */
10679     head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
10680   }
10681   DBUG_RETURN(result);
10682 }
10683 
10684 
10685 /*
10686   Get the next record with a different prefix.
10687 
10688   @param prefix_length   length of cur_prefix
10689   @param group_key_parts The number of key parts in the group prefix
10690   @param cur_prefix      prefix of a key to be searched for
10691 
10692   Each subsequent call to the method retrieves the first record that has a
10693   prefix with length prefix_length and which is different from cur_prefix,
10694   such that the record with the new prefix is within the ranges described by
10695   this->ranges. The record found is stored into the buffer pointed by
10696   this->record. The method is useful for GROUP-BY queries with range
10697   conditions to discover the prefix of the next group that satisfies the range
10698   conditions.
10699 
10700   @todo
10701 
10702     This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
10703     methods should be unified into a more general one to reduce code
10704     duplication.
10705 
10706   @retval 0                  on success
10707   @retval HA_ERR_END_OF_FILE if returned all keys
10708   @retval other              if some error occurred
10709 */
10710 
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)10711 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
10712                                         uint group_key_parts,
10713                                         uchar *cur_prefix)
10714 {
10715   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
10716   const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
10717 
10718   for (;;)
10719   {
10720     int result;
10721     if (last_range)
10722     {
10723       /* Read the next record in the same range with prefix after cur_prefix. */
10724       DBUG_ASSERT(cur_prefix != NULL);
10725       result= file->ha_index_read_map(record, cur_prefix, keypart_map,
10726                                       HA_READ_AFTER_KEY);
10727       if (result || last_range->max_keypart_map == 0)
10728         DBUG_RETURN(result);
10729 
10730       key_range previous_endpoint;
10731       last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
10732       if (file->compare_key(&previous_endpoint) <= 0)
10733         DBUG_RETURN(0);
10734     }
10735 
10736     uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10737     if (count == 0)
10738     {
10739       /* Ranges have already been used up before. None is left for read. */
10740       last_range= 0;
10741       DBUG_RETURN(HA_ERR_END_OF_FILE);
10742     }
10743     last_range= *(cur_range++);
10744 
10745     key_range start_key, end_key;
10746     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
10747     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
10748 
10749     const bool sorted= (mrr_flags & HA_MRR_SORTED);
10750     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
10751 				   last_range->max_keypart_map ? &end_key : 0,
10752                                    MY_TEST(last_range->flag & EQ_RANGE),
10753 				   sorted);
10754     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
10755       last_range= 0;			// Stop searching
10756 
10757     if (result != HA_ERR_END_OF_FILE)
10758       DBUG_RETURN(result);
10759     last_range= 0;			// No matching rows; go to next range
10760   }
10761 }
10762 
10763 
10764 /* Get next for geometrical indexes */
10765 
get_next()10766 int QUICK_RANGE_SELECT_GEOM::get_next()
10767 {
10768   DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
10769 
10770   for (;;)
10771   {
10772     int result;
10773     if (last_range)
10774     {
10775       // Already read through key
10776       result= file->ha_index_next_same(record, last_range->min_key,
10777                                        last_range->min_length);
10778       if (result != HA_ERR_END_OF_FILE)
10779 	DBUG_RETURN(result);
10780     }
10781 
10782     uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10783     if (count == 0)
10784     {
10785       /* Ranges have already been used up before. None is left for read. */
10786       last_range= 0;
10787       DBUG_RETURN(HA_ERR_END_OF_FILE);
10788     }
10789     last_range= *(cur_range++);
10790 
10791     result= file->ha_index_read_map(record, last_range->min_key,
10792                                     last_range->min_keypart_map,
10793                                     (ha_rkey_function)(last_range->flag ^
10794                                                        GEOM_FLAG));
10795     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
10796       DBUG_RETURN(result);
10797     last_range= 0;				// Not found, to next range
10798   }
10799 }
10800 
10801 
10802 /*
10803   Check if current row will be retrieved by this QUICK_RANGE_SELECT
10804 
10805   NOTES
10806     It is assumed that currently a scan is being done on another index
10807     which reads all necessary parts of the index that is scanned by this
10808     quick select.
10809     The implementation does a binary search on sorted array of disjoint
10810     ranges, without taking size of range into account.
10811 
10812     This function is used to filter out clustered PK scan rows in
10813     index_merge quick select.
10814 
10815   RETURN
10816     TRUE  if current row will be retrieved by this quick select
10817     FALSE if not
10818 */
10819 
row_in_ranges()10820 bool QUICK_RANGE_SELECT::row_in_ranges()
10821 {
10822   QUICK_RANGE *res;
10823   uint min= 0;
10824   uint max= ranges.elements - 1;
10825   uint mid= (max + min)/2;
10826 
10827   while (min != max)
10828   {
10829     if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
10830     {
10831       /* current row value > mid->max */
10832       min= mid + 1;
10833     }
10834     else
10835       max= mid;
10836     mid= (min + max) / 2;
10837   }
10838   res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
10839   return (!cmp_next(res) && !cmp_prev(res));
10840 }
10841 
10842 /*
10843   This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
10844   get_next() interface, but we have to hold a pointer to the original
10845   QUICK_RANGE_SELECT because its data are used all over the place. What
10846   should be done is to factor out the data that is needed into a base
10847   class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
10848   which handle the ranges and implement the get_next() function.  But
10849   for now, this seems to work right at least.
10850  */
10851 
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg,bool * error)10852 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
10853                                      uint used_key_parts_arg,
10854                                      bool *error)
10855  :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
10856   used_key_parts (used_key_parts_arg)
10857 {
10858   QUICK_RANGE *r;
10859   /*
10860     Use default MRR implementation for reverse scans. No table engine
10861     currently can do an MRR scan with output in reverse index order.
10862   */
10863   mrr_buf_desc= NULL;
10864   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
10865   mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
10866   mrr_buf_size= 0;
10867 
10868 
10869   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
10870   QUICK_RANGE **end_range= pr + ranges.elements;
10871   for (; pr!=end_range; pr++)
10872     rev_ranges.push_front(*pr);
10873 
10874   /* Remove EQ_RANGE flag for keys that are not using the full key */
10875   for (r = rev_it++; r; r = rev_it++)
10876   {
10877     if ((r->flag & EQ_RANGE) &&
10878 	head->key_info[index].key_length != r->max_length)
10879       r->flag&= ~EQ_RANGE;
10880   }
10881   rev_it.rewind();
10882   q->dont_free=1;				// Don't free shared mem
10883 }
10884 
10885 
get_next()10886 int QUICK_SELECT_DESC::get_next()
10887 {
10888   DBUG_ENTER("QUICK_SELECT_DESC::get_next");
10889 
10890   /* The max key is handled as follows:
10891    *   - if there is NO_MAX_RANGE, start at the end and move backwards
10892    *   - if it is an EQ_RANGE (which means that max key covers the entire
10893    *     key) and the query does not use any hidden key fields that are
10894    *     not considered when the range optimzier sets EQ_RANGE (e.g. the
10895    *     primary key added by InnoDB), then go directly to the key and
10896    *     read through it (sorting backwards is same as sorting forwards).
10897    *   - if it is NEAR_MAX, go to the key or next, step back once, and
10898    *     move backwards
10899    *   - otherwise (not NEAR_MAX == include the key), go after the key,
10900    *     step back once, and move backwards
10901    */
10902 
10903   for (;;)
10904   {
10905     int result;
10906     if (last_range)
10907     {						// Already read through key
10908       result = ((last_range->flag & EQ_RANGE &&
10909                  used_key_parts <=
10910                  head->key_info[index].user_defined_key_parts) ?
10911                 file->ha_index_next_same(record, last_range->min_key,
10912                                          last_range->min_length) :
10913                 file->ha_index_prev(record));
10914       if (!result)
10915       {
10916 	if (cmp_prev(*rev_it.ref()) == 0)
10917 	  DBUG_RETURN(0);
10918       }
10919       else if (result != HA_ERR_END_OF_FILE)
10920 	DBUG_RETURN(result);
10921     }
10922 
10923     if (!(last_range= rev_it++))
10924       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
10925 
10926     // Case where we can avoid descending scan, see comment above
10927     const bool eqrange_all_keyparts= (last_range->flag & EQ_RANGE) &&
10928       (used_key_parts <= head->key_info[index].user_defined_key_parts);
10929 
10930     /*
10931       If we have pushed an index condition (ICP) and this quick select
10932       will use ha_index_prev() to read data, we need to let the
10933       handler know where to end the scan in order to avoid that the
10934       ICP implemention continues to read past the range boundary.
10935 
10936       An addition for MyRocks:
10937       MyRocks needs to know both start of the range and end of the range
10938       in order to use its bloom filters. This is useful regardless of whether
10939       ICP is usable (e.g. it is used for index-only scans which do not use
10940       ICP). Because of that, we remove the following:
10941       //  //  if (file->pushed_idx_cond)
10942     */
10943     {
10944       if (!eqrange_all_keyparts)
10945       {
10946         key_range min_range;
10947         last_range->make_min_endpoint(&min_range);
10948         if(min_range.length > 0)
10949           file->set_end_range(&min_range, handler::RANGE_SCAN_DESC);
10950         else
10951           file->set_end_range(NULL, handler::RANGE_SCAN_DESC);
10952       }
10953       else
10954       {
10955         /*
10956           Will use ha_index_next_same() for reading records. In case we have
10957           set the end range for an earlier range, this need to be cleared.
10958         */
10959         file->set_end_range(NULL, handler::RANGE_SCAN_ASC);
10960       }
10961     }
10962 
10963     key_range prepare_range_start;
10964     key_range prepare_range_end;
10965 
10966     last_range->make_min_endpoint(&prepare_range_start);
10967     last_range->make_max_endpoint(&prepare_range_end);
10968     result = file->prepare_range_scan((last_range->flag & NO_MIN_RANGE)
10969                                        ? NULL : &prepare_range_start,
10970                                       (last_range->flag & NO_MAX_RANGE)
10971                                        ? NULL : &prepare_range_end);
10972     if (result)
10973       DBUG_RETURN(result);
10974 
10975     if (last_range->flag & NO_MAX_RANGE)        // Read last record
10976     {
10977       int local_error;
10978       if ((local_error= file->ha_index_last(record)))
10979       {
10980         /*
10981           HA_ERR_END_OF_FILE is returned both when the table is empty and when
10982           there are no qualifying records in the range (when using ICP).
10983           Interpret this return value as "no qualifying rows in the range" to
10984           avoid loss of records. If the error code truly meant "empty table"
10985           the next iteration of the loop will exit.
10986         */
10987         if (local_error != HA_ERR_END_OF_FILE)
10988           DBUG_RETURN(local_error);
10989         last_range= NULL;                       // Go to next range
10990         continue;
10991       }
10992 
10993       if (cmp_prev(last_range) == 0)
10994 	DBUG_RETURN(0);
10995       last_range= 0;                            // No match; go to next range
10996       continue;
10997     }
10998 
10999     if (eqrange_all_keyparts)
11000 
11001     {
11002       result= file->ha_index_read_map(record, last_range->max_key,
11003                                       last_range->max_keypart_map,
11004                                       HA_READ_KEY_EXACT);
11005     }
11006     else
11007     {
11008       DBUG_ASSERT(last_range->flag & NEAR_MAX ||
11009                   (last_range->flag & EQ_RANGE &&
11010                    used_key_parts >
11011                    head->key_info[index].user_defined_key_parts) ||
11012                   range_reads_after_key(last_range));
11013       result= file->ha_index_read_map(record, last_range->max_key,
11014                                       last_range->max_keypart_map,
11015                                       ((last_range->flag & NEAR_MAX) ?
11016                                        HA_READ_BEFORE_KEY :
11017                                        HA_READ_PREFIX_LAST_OR_PREV));
11018     }
11019     if (result)
11020     {
11021       if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11022 	DBUG_RETURN(result);
11023       last_range= 0;                            // Not found, to next range
11024       continue;
11025     }
11026     if (cmp_prev(last_range) == 0)
11027     {
11028       if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11029 	last_range= 0;				// Stop searching
11030       DBUG_RETURN(0);				// Found key is in range
11031     }
11032     last_range= 0;                              // To next range
11033   }
11034 }
11035 
11036 
11037 /**
11038   Create a compatible quick select with the result ordered in an opposite way
11039 
11040   @param used_key_parts_arg  Number of used key parts
11041 
11042   @retval NULL in case of errors (OOM etc)
11043   @retval pointer to a newly created QUICK_SELECT_DESC if success
11044 */
11045 
make_reverse(uint used_key_parts_arg)11046 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
11047 {
11048   bool error= FALSE;
11049   QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg,
11050                                                       &error);
11051   if (new_quick == NULL || error)
11052   {
11053     delete new_quick;
11054     return NULL;
11055   }
11056   return new_quick;
11057 }
11058 
11059 
11060 /*
11061   Compare if found key is over max-value
11062   Returns 0 if key <= range->max_key
11063   TODO: Figure out why can't this function be as simple as cmp_prev().
11064 */
11065 
cmp_next(QUICK_RANGE * range_arg)11066 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
11067 {
11068   if (range_arg->flag & NO_MAX_RANGE)
11069     return 0;                                   /* key can't be to large */
11070 
11071   KEY_PART *key_part=key_parts;
11072   uint store_length;
11073 
11074   for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
11075        key < end;
11076        key+= store_length, key_part++)
11077   {
11078     int cmp;
11079     store_length= key_part->store_length;
11080     if (key_part->null_bit)
11081     {
11082       if (*key)
11083       {
11084         if (!key_part->field->is_null())
11085           return 1;
11086         continue;
11087       }
11088       else if (key_part->field->is_null())
11089         return 0;
11090       key++;					// Skip null byte
11091       store_length--;
11092     }
11093     if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
11094       return 0;
11095     if (cmp > 0)
11096       return 1;
11097   }
11098   return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
11099 }
11100 
11101 
11102 /*
11103   Returns 0 if found key is inside range (found key >= range->min_key).
11104 */
11105 
cmp_prev(QUICK_RANGE * range_arg)11106 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
11107 {
11108   int cmp;
11109   if (range_arg->flag & NO_MIN_RANGE)
11110     return 0;					/* key can't be to small */
11111 
11112   cmp= key_cmp(key_part_info, range_arg->min_key,
11113                range_arg->min_length);
11114   if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
11115     return 0;
11116   return 1;                                     // outside of range
11117 }
11118 
11119 
11120 /*
11121  * TRUE if this range will require using HA_READ_AFTER_KEY
11122    See comment in get_next() about this
11123  */
11124 
range_reads_after_key(QUICK_RANGE * range_arg)11125 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
11126 {
11127   return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
11128 	  !(range_arg->flag & EQ_RANGE) ||
11129 	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
11130 }
11131 
11132 
add_info_string(String * str)11133 void QUICK_RANGE_SELECT::add_info_string(String *str)
11134 {
11135   KEY *key_info= head->key_info + index;
11136   str->append(key_info->name);
11137 }
11138 
add_info_string(String * str)11139 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
11140 {
11141   QUICK_RANGE_SELECT *quick;
11142   bool first= TRUE;
11143   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11144   str->append(STRING_WITH_LEN("sort_union("));
11145   while ((quick= it++))
11146   {
11147     if (!first)
11148       str->append(',');
11149     else
11150       first= FALSE;
11151     quick->add_info_string(str);
11152   }
11153   if (pk_quick_select)
11154   {
11155     str->append(',');
11156     pk_quick_select->add_info_string(str);
11157   }
11158   str->append(')');
11159 }
11160 
add_info_string(String * str)11161 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
11162 {
11163   bool first= TRUE;
11164   QUICK_RANGE_SELECT *quick;
11165   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11166   str->append(STRING_WITH_LEN("intersect("));
11167   while ((quick= it++))
11168   {
11169     KEY *key_info= head->key_info + quick->index;
11170     if (!first)
11171       str->append(',');
11172     else
11173       first= FALSE;
11174     str->append(key_info->name);
11175   }
11176   if (cpk_quick)
11177   {
11178     KEY *key_info= head->key_info + cpk_quick->index;
11179     str->append(',');
11180     str->append(key_info->name);
11181   }
11182   str->append(')');
11183 }
11184 
add_info_string(String * str)11185 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
11186 {
11187   bool first= TRUE;
11188   QUICK_SELECT_I *quick;
11189   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11190   str->append(STRING_WITH_LEN("union("));
11191   while ((quick= it++))
11192   {
11193     if (!first)
11194       str->append(',');
11195     else
11196       first= FALSE;
11197     quick->add_info_string(str);
11198   }
11199   str->append(')');
11200 }
11201 
11202 
add_keys_and_lengths(String * key_names,String * used_lengths)11203 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
11204                                               String *used_lengths)
11205 {
11206   char buf[64];
11207   uint length;
11208   KEY *key_info= head->key_info + index;
11209   key_names->append(key_info->name);
11210   length= longlong2str(max_used_key_length, buf, 10) - buf;
11211   used_lengths->append(buf, length);
11212 }
11213 
add_keys_and_lengths(String * key_names,String * used_lengths)11214 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
11215                                                     String *used_lengths)
11216 {
11217   char buf[64];
11218   uint length;
11219   bool first= TRUE;
11220   QUICK_RANGE_SELECT *quick;
11221 
11222   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11223   while ((quick= it++))
11224   {
11225     if (first)
11226       first= FALSE;
11227     else
11228     {
11229       key_names->append(',');
11230       used_lengths->append(',');
11231     }
11232 
11233     KEY *key_info= head->key_info + quick->index;
11234     key_names->append(key_info->name);
11235     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11236     used_lengths->append(buf, length);
11237   }
11238   if (pk_quick_select)
11239   {
11240     KEY *key_info= head->key_info + pk_quick_select->index;
11241     key_names->append(',');
11242     key_names->append(key_info->name);
11243     length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
11244     used_lengths->append(',');
11245     used_lengths->append(buf, length);
11246   }
11247 }
11248 
add_keys_and_lengths(String * key_names,String * used_lengths)11249 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
11250                                                       String *used_lengths)
11251 {
11252   char buf[64];
11253   uint length;
11254   bool first= TRUE;
11255   QUICK_RANGE_SELECT *quick;
11256   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11257   while ((quick= it++))
11258   {
11259     KEY *key_info= head->key_info + quick->index;
11260     if (first)
11261       first= FALSE;
11262     else
11263     {
11264       key_names->append(',');
11265       used_lengths->append(',');
11266     }
11267     key_names->append(key_info->name);
11268     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11269     used_lengths->append(buf, length);
11270   }
11271 
11272   if (cpk_quick)
11273   {
11274     KEY *key_info= head->key_info + cpk_quick->index;
11275     key_names->append(',');
11276     key_names->append(key_info->name);
11277     length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
11278     used_lengths->append(',');
11279     used_lengths->append(buf, length);
11280   }
11281 }
11282 
add_keys_and_lengths(String * key_names,String * used_lengths)11283 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
11284                                                   String *used_lengths)
11285 {
11286   bool first= TRUE;
11287   QUICK_SELECT_I *quick;
11288   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11289   while ((quick= it++))
11290   {
11291     if (first)
11292       first= FALSE;
11293     else
11294     {
11295       used_lengths->append(',');
11296       key_names->append(',');
11297     }
11298     quick->add_keys_and_lengths(key_names, used_lengths);
11299   }
11300 }
11301 
11302 
11303 /*******************************************************************************
11304 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
11305 *******************************************************************************/
11306 
11307 static inline uint get_field_keypart(KEY *index, Field *field);
11308 static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
11309                                              PARAM *param, uint *param_idx);
11310 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
11311                                     SEL_ARG **cur_range);
11312 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
11313                        KEY_PART_INFO *first_non_group_part,
11314                        KEY_PART_INFO *min_max_arg_part,
11315                        KEY_PART_INFO *last_part, THD *thd,
11316                        uchar *key_infix, uint *key_infix_len,
11317                        KEY_PART_INFO **first_non_infix_part);
11318 static bool
11319 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
11320                                Field::imagetype image_type);
11321 
11322 static void
11323 cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
11324                    uint group_key_parts, SEL_TREE *range_tree,
11325                    SEL_ARG *index_tree, ha_rows quick_prefix_records,
11326                    bool have_min, bool have_max,
11327                    double *read_cost, ha_rows *records);
11328 
11329 
11330 /**
11331   Test if this access method is applicable to a GROUP query with MIN/MAX
11332   functions, and if so, construct a new TRP object.
11333 
11334   DESCRIPTION
11335     Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
11336     Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
11337     following conditions:
11338     A) Table T has at least one compound index I of the form:
11339        I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
11340     B) Query conditions:
11341     B0. Q is over a single table T.
11342     B1. The attributes referenced by Q are a subset of the attributes of I.
11343     B2. All attributes QA in Q can be divided into 3 overlapping groups:
11344         - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
11345           referenced by any number of MIN and/or MAX functions if present.
11346         - WA = {W_1, ..., W_p} - from the WHERE clause
11347         - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
11348              = SA              - if Q is a DISTINCT query (based on the
11349                                  equivalence of DISTINCT and GROUP queries.
11350         - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
11351           GROUP BY and not referenced by MIN/MAX functions.
11352         with the following properties specified below.
11353     B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
11354         applicable.
11355 
11356     SA1. There is at most one attribute in SA referenced by any number of
11357          MIN and/or MAX functions which, which if present, is denoted as C.
11358     SA2. The position of the C attribute in the index is after the last A_k.
11359     SA3. The attribute C can be referenced in the WHERE clause only in
11360          predicates of the forms:
11361          - (C {< | <= | > | >= | =} const)
11362          - (const {< | <= | > | >= | =} C)
11363          - (C between const_i and const_j)
11364          - C IS NULL
11365          - C IS NOT NULL
11366          - C != const
11367     SA4. If Q has a GROUP BY clause, there are no other aggregate functions
11368          except MIN and MAX. For queries with DISTINCT, aggregate functions
11369          are allowed.
11370     SA5. The select list in DISTINCT queries should not contain expressions.
11371     SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
11372          for AGG_FUNC(DISTINCT ...) optimization because cursor position is
11373          never stored after a unique key lookup in the clustered index and
11374          furhter index_next/prev calls can not be used. So loose index scan
11375          optimization can not be used in this case.
11376     SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
11377          access method is not used.
11378          For above queries MIN/MAX() aggregation has to be done at
11379          nested_loops_join (end_send_group). But with current design MIN/MAX()
11380          is always set as part of loose index scan. Because of this mismatch
11381          MIN() and MAX() values will be set incorrectly. For such queries to
11382          work we need a new interface for loose index scan. This new interface
11383          should only fetch records with min and max values and let
11384          end_send_group to do aggregation. Until then do not use
11385          loose_index_scan.
11386     GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
11387          G_i = A_j => i = j.
11388     GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
11389          forms a prefix of I. This permutation is used as the GROUP clause
11390          when the DISTINCT query is converted to a GROUP query.
11391     GA3. The attributes in GA may participate in arbitrary predicates, divided
11392          into two groups:
11393          - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
11394            attributes of a prefix of GA
11395          - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
11396            of GA. Since P is applied to only GROUP attributes it filters some
11397            groups, and thus can be applied after the grouping.
11398     GA4. There are no expressions among G_i, just direct column references.
11399     NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
11400          and the MIN/MAX attribute C, then NGA must consist of exactly the
11401          index attributes that constitute the gap. As a result there is a
11402          permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
11403          in the index.
11404     NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
11405          equality conditions for all NG_i of the form (NG_i = const) or
11406          (const = NG_i), such that each NG_i is referenced in exactly one
11407          conjunct. Informally, the predicates provide constants to fill the
11408          gap in the index.
11409     NGA3.If BA <> {}, there can only be one range. TODO: This is a code
11410          limitation and is not strictly needed. See BUG#15947433
11411     WA1. There are no other attributes in the WHERE clause except the ones
11412          referenced in predicates RNG, PA, PC, EQ defined above. Therefore
11413          WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
11414          above tests. By transitivity then it also follows that each WA_i
11415          participates in the index I (if this was already tested for GA, NGA
11416          and C).
11417     WA2. If there is a predicate on C, then it must be in conjunction
11418          to all predicates on all earlier keyparts in I.
11419 
11420     C) Overall query form:
11421        SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
11422          FROM T
11423         WHERE [RNG(A_1,...,A_p ; where p <= k)]
11424          [AND EQ(B_1,...,B_m)]
11425          [AND PC(C)]
11426          [AND PA(A_i1,...,A_iq)]
11427        GROUP BY A_1,...,A_k
11428        [HAVING PH(A_1, ..., B_1,..., C)]
11429     where EXPR(...) is an arbitrary expression over some or all SELECT fields,
11430     or:
11431        SELECT DISTINCT A_i1,...,A_ik
11432          FROM T
11433         WHERE [RNG(A_1,...,A_p ; where p <= k)]
11434          [AND PA(A_i1,...,A_iq)];
11435 
11436   NOTES
11437     If the current query satisfies the conditions above, and if
11438     (mem_root! = NULL), then the function constructs and returns a new TRP
11439     object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
11440     If (mem_root == NULL), then the function only tests whether the current
11441     query satisfies the conditions above, and, if so, sets
11442     is_applicable = TRUE.
11443 
11444     Queries with DISTINCT for which index access can be used are transformed
11445     into equivalent group-by queries of the form:
11446 
11447     SELECT A_1,...,A_k FROM T
11448      WHERE [RNG(A_1,...,A_p ; where p <= k)]
11449       [AND PA(A_i1,...,A_iq)]
11450     GROUP BY A_1,...,A_k;
11451 
11452     The group-by list is a permutation of the select attributes, according
11453     to their order in the index.
11454 
11455   TODO
11456   - What happens if the query groups by the MIN/MAX field, and there is no
11457     other field as in: "select min(a) from t1 group by a" ?
11458   - We assume that the general correctness of the GROUP-BY query was checked
11459     before this point. Is this correct, or do we have to check it completely?
11460   - Lift the limitation in condition (B3), that is, make this access method
11461     applicable to ROLLUP queries.
11462 
11463  @param  param     Parameter from test_quick_select
11464  @param  sel_tree  Range tree generated by get_mm_tree
11465  @param  read_time Best read time so far (=table/index scan time)
11466  @return table read plan
11467    @retval NULL  Loose index scan not applicable or mem_root == NULL
11468    @retval !NULL Loose index scan table read plan
11469 */
11470 
11471 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,double read_time)11472 get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
11473 {
11474   THD *thd= param->thd;
11475   JOIN *join= thd->lex->current_select->join;
11476   TABLE *table= param->table;
11477   bool have_min= FALSE;              /* TRUE if there is a MIN function. */
11478   bool have_max= FALSE;              /* TRUE if there is a MAX function. */
11479   Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
11480   KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
11481   uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
11482   KEY *index_info= NULL;    /* The index chosen for data access. */
11483   uint index= 0;            /* The id of the chosen index. */
11484   uint group_key_parts= 0;  // Number of index key parts in the group prefix.
11485   uint used_key_parts= 0;   /* Number of index key parts used for access. */
11486   uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
11487   uint key_infix_len= 0;          /* Length of key_infix. */
11488   TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
11489   uint key_part_nr;
11490   ORDER *tmp_group;
11491   Item *item;
11492   Item_field *item_field;
11493   bool is_agg_distinct;
11494   List<Item_field> agg_distinct_flds;
11495   /* Cost-related variables for the best index so far. */
11496   double best_read_cost= DBL_MAX;
11497   ha_rows best_records= 0;
11498   SEL_ARG *best_index_tree= NULL;
11499   ha_rows best_quick_prefix_records= 0;
11500   uint best_param_idx= 0;
11501   List_iterator<Item> select_items_it;
11502   Opt_trace_context * const trace= &param->thd->opt_trace;
11503 
11504   DBUG_ENTER("get_best_group_min_max");
11505 
11506   Opt_trace_object trace_group(trace, "group_index_range",
11507                                Opt_trace_context::RANGE_OPTIMIZER);
11508   const char* cause= NULL;
11509 
11510   /* Perform few 'cheap' tests whether this access method is applicable. */
11511   if (!join)
11512     cause= "no_join";
11513   else if (join->primary_tables != 1)  /* Query must reference one table. */
11514     cause= "not_single_table";
11515   else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
11516     cause= "rollup";
11517   else if (table->s->keys == 0)        /* There are no indexes to use. */
11518     cause= "no_index";
11519   else if (param->order_direction == ORDER::ORDER_DESC)
11520     cause= "cannot_do_reverse_ordering";
11521   if (cause != NULL)
11522   {
11523     trace_group.add("chosen", false).add_alnum("cause", cause);
11524     DBUG_RETURN(NULL);
11525   }
11526 
11527   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
11528   is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
11529 
11530   if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
11531       (!join->select_distinct) &&
11532       !is_agg_distinct)
11533   {
11534     trace_group.add("chosen", false).
11535       add_alnum("cause", "not_group_by_or_distinct");
11536     DBUG_RETURN(NULL);
11537   }
11538   /* Analyze the query in more detail. */
11539 
11540   if (join->sum_funcs[0])
11541   {
11542     Item_sum *min_max_item;
11543     Item_sum **func_ptr= join->sum_funcs;
11544     while ((min_max_item= *(func_ptr++)))
11545     {
11546       if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
11547         have_min= TRUE;
11548       else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
11549         have_max= TRUE;
11550       else if (is_agg_distinct &&
11551                (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
11552                 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
11553                 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
11554         continue;
11555       else
11556       {
11557         trace_group.add("chosen", false).
11558           add_alnum("cause", "not_applicable_aggregate_function");
11559         DBUG_RETURN(NULL);
11560       }
11561 
11562       /* The argument of MIN/MAX. */
11563       Item *expr= min_max_item->get_arg(0)->real_item();
11564       if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
11565       {
11566         if (! min_max_arg_item)
11567           min_max_arg_item= (Item_field*) expr;
11568         else if (! min_max_arg_item->eq(expr, 1))
11569           DBUG_RETURN(NULL);
11570       }
11571       else
11572         DBUG_RETURN(NULL);
11573     }
11574   }
11575 
11576   /* Check (SA7). */
11577   if (is_agg_distinct && (have_max || have_min))
11578   {
11579     trace_group.add("chosen", false).
11580       add_alnum("cause", "have_both_agg_distinct_and_min_max");
11581     DBUG_RETURN(NULL);
11582   }
11583 
11584   select_items_it= List_iterator<Item>(join->fields_list);
11585   /* Check (SA5). */
11586   if (join->select_distinct)
11587   {
11588     trace_group.add("distinct_query", true);
11589     while ((item= select_items_it++))
11590     {
11591       if (item->real_item()->type() != Item::FIELD_ITEM)
11592         DBUG_RETURN(NULL);
11593     }
11594   }
11595 
11596   /* Check (GA4) - that there are no expressions among the group attributes. */
11597   for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
11598   {
11599     if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
11600     {
11601       trace_group.add("chosen", false).
11602         add_alnum("cause", "group_field_is_expression");
11603       DBUG_RETURN(NULL);
11604     }
11605   }
11606 
11607   /*
11608     Check that table has at least one compound index such that the conditions
11609     (GA1,GA2) are all TRUE. If there is more than one such index, select the
11610     first one. Here we set the variables: group_prefix_len and index_info.
11611   */
11612 
11613   const uint pk= param->table->s->primary_key;
11614   KEY *cur_index_info= table->key_info;
11615   KEY *cur_index_info_end= cur_index_info + table->s->keys;
11616   SEL_ARG *cur_index_tree= NULL;
11617   ha_rows cur_quick_prefix_records= 0;
11618   uint cur_param_idx= MAX_KEY;
11619   Opt_trace_array trace_indices(trace, "potential_group_range_indices");
11620   for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
11621        cur_index_info++, cur_index++)
11622   {
11623     Opt_trace_object trace_idx(trace);
11624     trace_idx.add_utf8("index", cur_index_info->name);
11625     KEY_PART_INFO *cur_part;
11626     KEY_PART_INFO *end_part; /* Last part for loops. */
11627     /* Last index part. */
11628     KEY_PART_INFO *last_part;
11629     KEY_PART_INFO *first_non_group_part;
11630     KEY_PART_INFO *first_non_infix_part;
11631     uint key_infix_parts;
11632     uint cur_group_key_parts= 0;
11633     uint cur_group_prefix_len= 0;
11634     double cur_read_cost;
11635     ha_rows cur_records;
11636     key_map used_key_parts_map;
11637     uint max_key_part= 0;
11638     uint cur_key_infix_len= 0;
11639     uchar cur_key_infix[MAX_KEY_LENGTH];
11640     uint cur_used_key_parts;
11641 
11642     /* Check (B1) - if current index is covering. */
11643     if (!table->covering_keys.is_set(cur_index))
11644     {
11645       cause= "not_covering";
11646       goto next_index;
11647     }
11648 
11649     /*
11650       If the current storage manager is such that it appends the primary key to
11651       each index, then the above condition is insufficient to check if the
11652       index is covering. In such cases it may happen that some fields are
11653       covered by the PK index, but not by the current index. Since we can't
11654       use the concatenation of both indexes for index lookup, such an index
11655       does not qualify as covering in our case. If this is the case, below
11656       we check that all query fields are indeed covered by 'cur_index'.
11657     */
11658     if (pk < MAX_KEY && cur_index != pk &&
11659         (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
11660     {
11661       /* For each table field */
11662       for (uint i= 0; i < table->s->fields; i++)
11663       {
11664         Field *cur_field= table->field[i];
11665         /*
11666           If the field is used in the current query ensure that it's
11667           part of 'cur_index'
11668         */
11669         if (bitmap_is_set(table->read_set, cur_field->field_index) &&
11670             !cur_field->is_part_of_actual_key(thd, cur_index, cur_index_info))
11671         {
11672           cause= "not_covering";
11673           goto next_index;                  // Field was not part of key
11674         }
11675       }
11676     }
11677     trace_idx.add("covering", true);
11678 
11679     /*
11680       Check (GA1) for GROUP BY queries.
11681     */
11682     if (join->group_list)
11683     {
11684       cur_part= cur_index_info->key_part;
11685       end_part= cur_part + actual_key_parts(cur_index_info);
11686       /* Iterate in parallel over the GROUP list and the index parts. */
11687       for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
11688            tmp_group= tmp_group->next, cur_part++)
11689       {
11690         /*
11691           TODO:
11692           tmp_group::item is an array of Item, is it OK to consider only the
11693           first Item? If so, then why? What is the array for?
11694         */
11695         /* Above we already checked that all group items are fields. */
11696         DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
11697         Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
11698         if (group_field->field->eq(cur_part->field))
11699         {
11700           cur_group_prefix_len+= cur_part->store_length;
11701           ++cur_group_key_parts;
11702           max_key_part= cur_part - cur_index_info->key_part + 1;
11703           used_key_parts_map.set_bit(max_key_part);
11704         }
11705         else
11706         {
11707           cause= "group_attribute_not_prefix_in_index";
11708           goto next_index;
11709         }
11710       }
11711     }
11712 
11713     /*
11714       Check (GA2) if this is a DISTINCT query.
11715       If GA2, then Store a new ORDER object in group_fields_array at the
11716       position of the key part of item_field->field. Thus we get the ORDER
11717       objects for each field ordered as the corresponding key parts.
11718       Later group_fields_array of ORDER objects is used to convert the query
11719       to a GROUP query.
11720     */
11721     if ((!join->group_list && join->select_distinct) ||
11722         is_agg_distinct)
11723     {
11724       if (!is_agg_distinct)
11725       {
11726         select_items_it.rewind();
11727       }
11728 
11729       List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
11730       while (NULL !=
11731              (item= (is_agg_distinct ?
11732                      (Item *) agg_distinct_flds_it++ : select_items_it++)))
11733       {
11734         /* (SA5) already checked above. */
11735         item_field= (Item_field*) item->real_item();
11736         DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
11737 
11738         /* not doing loose index scan for derived tables */
11739         if (!item_field->field)
11740         {
11741           cause= "derived_table";
11742           goto next_index;
11743         }
11744 
11745         /* Find the order of the key part in the index. */
11746         key_part_nr= get_field_keypart(cur_index_info, item_field->field);
11747         /*
11748           Check if this attribute was already present in the select list.
11749           If it was present, then its corresponding key part was alredy used.
11750         */
11751         if (used_key_parts_map.is_set(key_part_nr))
11752           continue;
11753         if (key_part_nr < 1 ||
11754             (!is_agg_distinct && key_part_nr > join->fields_list.elements))
11755         {
11756           cause= "select_attribute_not_prefix_in_index";
11757           goto next_index;
11758         }
11759         cur_part= cur_index_info->key_part + key_part_nr - 1;
11760         cur_group_prefix_len+= cur_part->store_length;
11761         used_key_parts_map.set_bit(key_part_nr);
11762         ++cur_group_key_parts;
11763         max_key_part= max(max_key_part,key_part_nr);
11764       }
11765       /*
11766         Check that used key parts forms a prefix of the index.
11767         To check this we compare bits in all_parts and cur_parts.
11768         all_parts have all bits set from 0 to (max_key_part-1).
11769         cur_parts have bits set for only used keyparts.
11770       */
11771       ulonglong all_parts, cur_parts;
11772       all_parts= (1ULL << max_key_part) - 1;
11773       cur_parts= used_key_parts_map.to_ulonglong() >> 1;
11774       if (all_parts != cur_parts)
11775         goto next_index;
11776     }
11777 
11778     /* Check (SA2). */
11779     if (min_max_arg_item)
11780     {
11781       key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
11782       if (key_part_nr <= cur_group_key_parts)
11783       {
11784         cause= "aggregate_column_not_suffix_in_idx";
11785         goto next_index;
11786       }
11787       min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
11788     }
11789 
11790     /* Check (SA6) if clustered key is used. */
11791     if (is_agg_distinct && cur_index == table->s->primary_key &&
11792         table->file->primary_key_is_clustered())
11793     {
11794       cause= "primary_key_is_clustered";
11795       goto next_index;
11796     }
11797 
11798     /*
11799       Check (NGA1, NGA2) and extract a sequence of constants to be used as part
11800       of all search keys.
11801     */
11802 
11803     /*
11804       If there is MIN/MAX, each keypart between the last group part and the
11805       MIN/MAX part must participate in one equality with constants, and all
11806       keyparts after the MIN/MAX part must not be referenced in the query.
11807 
11808       If there is no MIN/MAX, the keyparts after the last group part can be
11809       referenced only in equalities with constants, and the referenced keyparts
11810       must form a sequence without any gaps that starts immediately after the
11811       last group keypart.
11812     */
11813     last_part= cur_index_info->key_part + actual_key_parts(cur_index_info);
11814     first_non_group_part=
11815       (cur_group_key_parts < actual_key_parts(cur_index_info)) ?
11816       cur_index_info->key_part + cur_group_key_parts :
11817       NULL;
11818     first_non_infix_part= min_max_arg_part ?
11819       (min_max_arg_part < last_part) ?
11820       min_max_arg_part :
11821       NULL :
11822       NULL;
11823     if (first_non_group_part &&
11824         (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
11825     {
11826       if (tree)
11827       {
11828         uint dummy;
11829         SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11830                                                         &dummy);
11831         if (!get_constant_key_infix(cur_index_info, index_range_tree,
11832                                     first_non_group_part, min_max_arg_part,
11833                                     last_part, thd, cur_key_infix,
11834                                     &cur_key_infix_len,
11835                                     &first_non_infix_part))
11836         {
11837           cause= "nonconst_equality_gap_attribute";
11838           goto next_index;
11839         }
11840       }
11841       else if (min_max_arg_part &&
11842                (min_max_arg_part - first_non_group_part > 0))
11843       {
11844         /*
11845           There is a gap but no range tree, thus no predicates at all for the
11846           non-group keyparts.
11847         */
11848         cause= "no_nongroup_keypart_predicate";
11849         goto next_index;
11850       }
11851       else if (first_non_group_part && join->conds)
11852       {
11853         /*
11854           If there is no MIN/MAX function in the query, but some index
11855           key part is referenced in the WHERE clause, then this index
11856           cannot be used because the WHERE condition over the keypart's
11857           field cannot be 'pushed' to the index (because there is no
11858           range 'tree'), and the WHERE clause must be evaluated before
11859           GROUP BY/DISTINCT.
11860         */
11861         /*
11862           Store the first and last keyparts that need to be analyzed
11863           into one array that can be passed as parameter.
11864         */
11865         KEY_PART_INFO *key_part_range[2];
11866         key_part_range[0]= first_non_group_part;
11867         key_part_range[1]= last_part;
11868 
11869         /* Check if cur_part is referenced in the WHERE clause. */
11870         if (join->conds->walk(&Item::find_item_in_field_list_processor, 1,
11871                               (uchar*) key_part_range))
11872         {
11873           cause= "keypart_reference_from_where_clause";
11874           goto next_index;
11875         }
11876       }
11877     }
11878 
11879     /*
11880       Test (WA1) partially - that no other keypart after the last infix part is
11881       referenced in the query.
11882     */
11883     if (first_non_infix_part)
11884     {
11885       cur_part= first_non_infix_part +
11886         (min_max_arg_part && (min_max_arg_part < last_part));
11887       for (; cur_part != last_part; cur_part++)
11888       {
11889         if (bitmap_is_set(table->read_set, cur_part->field->field_index))
11890         {
11891           cause= "keypart_after_infix_in_query";
11892           goto next_index;
11893         }
11894       }
11895     }
11896 
11897     /**
11898       Test WA2:If there are conditions on a column C participating in
11899       MIN/MAX, those conditions must be conjunctions to all earlier
11900       keyparts. Otherwise, Loose Index Scan cannot be used.
11901     */
11902     if (tree && min_max_arg_item)
11903     {
11904       uint dummy;
11905       SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11906                                                       &dummy);
11907       SEL_ARG *cur_range= NULL;
11908       if (get_sel_arg_for_keypart(min_max_arg_part->field,
11909                                   index_range_tree, &cur_range) ||
11910           (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
11911       {
11912         cause= "minmax_keypart_in_disjunctive_query";
11913         goto next_index;
11914       }
11915     }
11916 
11917     /* If we got to this point, cur_index_info passes the test. */
11918     key_infix_parts= cur_key_infix_len ? (uint)
11919       (first_non_infix_part - first_non_group_part) : 0;
11920     cur_used_key_parts= cur_group_key_parts + key_infix_parts;
11921 
11922     /* Compute the cost of using this index. */
11923     if (tree)
11924     {
11925       /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
11926       cur_index_tree= get_index_range_tree(cur_index, tree, param,
11927                                            &cur_param_idx);
11928       /* Check if this range tree can be used for prefix retrieval. */
11929       Cost_estimate dummy_cost;
11930       uint mrr_flags= HA_MRR_SORTED;
11931       uint mrr_bufsize=0;
11932       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
11933                                                    FALSE /*don't care*/,
11934                                                    cur_index_tree, TRUE,
11935                                                    &mrr_flags, &mrr_bufsize,
11936                                                    &dummy_cost);
11937 #ifdef OPTIMIZER_TRACE
11938       if (unlikely(cur_index_tree && trace->is_started()))
11939       {
11940         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics);
11941         Opt_trace_array trace_range(trace, "ranges");
11942 
11943         const KEY_PART_INFO *key_part= cur_index_info->key_part;
11944 
11945         String range_info;
11946         range_info.set_charset(system_charset_info);
11947         append_range_all_keyparts(&trace_range, NULL, &range_info,
11948                                   cur_index_tree, key_part);
11949       }
11950 #endif
11951     }
11952     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
11953                        cur_group_key_parts, tree, cur_index_tree,
11954                        cur_quick_prefix_records, have_min, have_max,
11955                        &cur_read_cost, &cur_records);
11956     /*
11957       If cur_read_cost is lower than best_read_cost use cur_index.
11958       Do not compare doubles directly because they may have different
11959       representations (64 vs. 80 bits).
11960     */
11961     trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
11962     if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
11963     {
11964       index_info= cur_index_info;
11965       index= cur_index;
11966       best_read_cost= cur_read_cost;
11967       best_records= cur_records;
11968       best_index_tree= cur_index_tree;
11969       best_quick_prefix_records= cur_quick_prefix_records;
11970       best_param_idx= cur_param_idx;
11971       group_key_parts= cur_group_key_parts;
11972       group_prefix_len= cur_group_prefix_len;
11973       key_infix_len= cur_key_infix_len;
11974       if (key_infix_len)
11975         memcpy (key_infix, cur_key_infix, sizeof (key_infix));
11976       used_key_parts= cur_used_key_parts;
11977     }
11978 
11979   next_index:
11980     if (cause)
11981     {
11982       trace_idx.add("usable", false).add_alnum("cause", cause);
11983       cause= NULL;
11984     }
11985   }
11986   trace_indices.end();
11987 
11988   if (!index_info) /* No usable index found. */
11989     DBUG_RETURN(NULL);
11990 
11991   /* Check (SA3) for the where clause. */
11992   if (join->conds && min_max_arg_item &&
11993       !check_group_min_max_predicates(join->conds, min_max_arg_item,
11994                                       (index_info->flags & HA_SPATIAL) ?
11995                                       Field::itMBR : Field::itRAW))
11996   {
11997     trace_group.add("usable", false).
11998       add_alnum("cause", "unsupported_predicate_on_agg_attribute");
11999     DBUG_RETURN(NULL);
12000   }
12001 
12002   /* The query passes all tests, so construct a new TRP object. */
12003   read_plan= new (param->mem_root)
12004                  TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
12005                                    min_max_arg_part,
12006                                    group_prefix_len, used_key_parts,
12007                                    group_key_parts, index_info, index,
12008                                    key_infix_len,
12009                                    (key_infix_len > 0) ? key_infix : NULL,
12010                                    tree, best_index_tree, best_param_idx,
12011                                    best_quick_prefix_records);
12012   if (read_plan)
12013   {
12014     if (tree && read_plan->quick_prefix_records == 0)
12015       DBUG_RETURN(NULL);
12016 
12017     read_plan->read_cost= best_read_cost;
12018     read_plan->records=   best_records;
12019     if (read_time < best_read_cost && is_agg_distinct)
12020     {
12021       trace_group.add("index_scan", true);
12022       read_plan->read_cost= 0;
12023       read_plan->use_index_scan();
12024     }
12025 
12026     DBUG_PRINT("info",
12027                ("Returning group min/max plan: cost: %g, records: %lu",
12028                 read_plan->read_cost, (ulong) read_plan->records));
12029   }
12030 
12031   DBUG_RETURN(read_plan);
12032 }
12033 
12034 
12035 /*
12036   Check that the MIN/MAX attribute participates only in range predicates
12037   with constants.
12038 
12039   SYNOPSIS
12040     check_group_min_max_predicates()
12041     cond              tree (or subtree) describing all or part of the WHERE
12042                       clause being analyzed
12043     min_max_arg_item  the field referenced by the MIN/MAX function(s)
12044     min_max_arg_part  the keypart of the MIN/MAX argument if any
12045 
12046   DESCRIPTION
12047     The function walks recursively over the cond tree representing a WHERE
12048     clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
12049     aggregate function, it is referenced only by one of the following
12050     predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
12051 
12052   RETURN
12053     TRUE  if cond passes the test
12054     FALSE o/w
12055 */
12056 
12057 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type)12058 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
12059                                Field::imagetype image_type)
12060 {
12061   DBUG_ENTER("check_group_min_max_predicates");
12062   DBUG_ASSERT(cond && min_max_arg_item);
12063 
12064   cond= cond->real_item();
12065   Item::Type cond_type= cond->type();
12066   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
12067   {
12068     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12069     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12070     Item *and_or_arg;
12071     while ((and_or_arg= li++))
12072     {
12073       if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
12074                                          image_type))
12075         DBUG_RETURN(FALSE);
12076     }
12077     DBUG_RETURN(TRUE);
12078   }
12079 
12080   /*
12081     TODO:
12082     This is a very crude fix to handle sub-selects in the WHERE clause
12083     (Item_subselect objects). With the test below we rule out from the
12084     optimization all queries with subselects in the WHERE clause. What has to
12085     be done, is that here we should analyze whether the subselect references
12086     the MIN/MAX argument field, and disallow the optimization only if this is
12087     so.
12088   */
12089   if (cond_type == Item::SUBSELECT_ITEM)
12090     DBUG_RETURN(FALSE);
12091 
12092   /*
12093     Condition of the form 'field' is equivalent to 'field <> 0' and thus
12094     satisfies the SA3 condition.
12095   */
12096   if (cond_type == Item::FIELD_ITEM)
12097   {
12098     DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12099     DBUG_RETURN(TRUE);
12100   }
12101 
12102   /* We presume that at this point there are no other Items than functions. */
12103   DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
12104 
12105   /* Test if cond references only group-by or non-group fields. */
12106   Item_func *pred= (Item_func*) cond;
12107   Item *cur_arg;
12108   DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12109   for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
12110   {
12111     Item **arguments= pred->arguments();
12112     cur_arg= arguments[arg_idx]->real_item();
12113     DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12114     if (cur_arg->type() == Item::FIELD_ITEM)
12115     {
12116       if (min_max_arg_item->eq(cur_arg, 1))
12117       {
12118        /*
12119          If pred references the MIN/MAX argument, check whether pred is a range
12120          condition that compares the MIN/MAX argument with a constant.
12121        */
12122         Item_func::Functype pred_type= pred->functype();
12123         if (pred_type != Item_func::EQUAL_FUNC     &&
12124             pred_type != Item_func::LT_FUNC        &&
12125             pred_type != Item_func::LE_FUNC        &&
12126             pred_type != Item_func::GT_FUNC        &&
12127             pred_type != Item_func::GE_FUNC        &&
12128             pred_type != Item_func::BETWEEN        &&
12129             pred_type != Item_func::ISNULL_FUNC    &&
12130             pred_type != Item_func::ISNOTNULL_FUNC &&
12131             pred_type != Item_func::EQ_FUNC        &&
12132             pred_type != Item_func::NE_FUNC)
12133           DBUG_RETURN(FALSE);
12134 
12135         /* Check that pred compares min_max_arg_item with a constant. */
12136         Item *args[3];
12137         memset(args, 0, 3 * sizeof(Item*));
12138         bool inv;
12139         /* Test if this is a comparison of a field and a constant. */
12140         if (!simple_pred(pred, args, &inv))
12141           DBUG_RETURN(FALSE);
12142 
12143         /* Check for compatible string comparisons - similar to get_mm_leaf. */
12144         if (args[0] && args[1] && !args[2] && // this is a binary function
12145             min_max_arg_item->result_type() == STRING_RESULT &&
12146             /*
12147               Don't use an index when comparing strings of different collations.
12148             */
12149             ((args[1]->result_type() == STRING_RESULT &&
12150               image_type == Field::itRAW &&
12151               min_max_arg_item->field->charset() != pred->compare_collation())
12152              ||
12153              /*
12154                We can't always use indexes when comparing a string index to a
12155                number.
12156              */
12157              (args[1]->result_type() != STRING_RESULT &&
12158               min_max_arg_item->field->cmp_type() != args[1]->result_type())))
12159           DBUG_RETURN(FALSE);
12160       }
12161     }
12162     else if (cur_arg->type() == Item::FUNC_ITEM)
12163     {
12164       if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
12165                                          image_type))
12166         DBUG_RETURN(FALSE);
12167     }
12168     else if (cur_arg->const_item())
12169     {
12170       /*
12171         For predicates of the form "const OP expr" we also have to check 'expr'
12172         to make a decision.
12173       */
12174       continue;
12175     }
12176     else
12177       DBUG_RETURN(FALSE);
12178   }
12179 
12180   DBUG_RETURN(TRUE);
12181 }
12182 
12183 
12184 /*
12185   Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
12186   any. 'tree' must be a unique conjunction to ALL predicates in earlier
12187   keyparts of 'keypart_tree'.
12188 
12189   E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
12190   covers 'field', all these conditions satisfies the requirement:
12191 
12192    1. "(kp1=2 OR kp1=3) AND kp2=10"    => returns "kp2=10"
12193    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)"  => returns "kp2=10"
12194    3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12195                                        => returns "kp2=10  OR kp2=11"
12196 
12197    whereas these do not
12198    1. "(kp1=2 AND kp2=10) OR kp1=3"
12199    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
12200    3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12201 
12202    This function effectively tests requirement WA2. In combination with
12203    a test that the returned tree has no more than one range it is also
12204    a test of NGA3.
12205 
12206   @param[in]   field          The field we want the SEL_ARG tree for
12207   @param[in]   keypart_tree   Root node of the SEL_ARG* tree for the index
12208   @param[out]  cur_range      The SEL_ARG tree, if any, for the keypart
12209                               covering field 'keypart_field'
12210   @retval true   'keypart_tree' contained a predicate for 'field' that
12211                   is not conjunction to all predicates on earlier keyparts
12212   @retval false  otherwise
12213 */
12214 
12215 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)12216 get_sel_arg_for_keypart(Field *field,
12217                         SEL_ARG *keypart_tree,
12218                         SEL_ARG **cur_range)
12219 {
12220   if (keypart_tree == NULL)
12221     return false;
12222   if (keypart_tree->type != SEL_ARG::KEY_RANGE)
12223   {
12224     /*
12225       A range predicate not usable by Loose Index Scan is found.
12226       Predicates for keypart 'keypart_tree->part' and later keyparts
12227       cannot be used.
12228     */
12229     *cur_range= keypart_tree;
12230     return false;
12231   }
12232   if (keypart_tree->field->eq(field))
12233   {
12234     *cur_range= keypart_tree;
12235     return false;
12236   }
12237 
12238   SEL_ARG *tree_first_range= NULL;
12239   SEL_ARG *first_kp=  keypart_tree->first();
12240 
12241   for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
12242   {
12243     SEL_ARG *curr_tree= NULL;
12244     if (cur_kp->next_key_part)
12245     {
12246       if (get_sel_arg_for_keypart(field,
12247                                   cur_kp->next_key_part,
12248                                   &curr_tree))
12249         return true;
12250     }
12251     /**
12252       Check if the SEL_ARG tree for 'field' is identical for all ranges in
12253       'keypart_tree
12254      */
12255     if (cur_kp == first_kp)
12256       tree_first_range= curr_tree;
12257     else if (!all_same(tree_first_range, curr_tree))
12258       return true;
12259   }
12260   *cur_range= tree_first_range;
12261   return false;
12262 }
12263 
12264 /*
12265   Extract a sequence of constants from a conjunction of equality predicates.
12266 
12267   SYNOPSIS
12268     get_constant_key_infix()
12269     index_info             [in]  Descriptor of the chosen index.
12270     index_range_tree       [in]  Range tree for the chosen index
12271     first_non_group_part   [in]  First index part after group attribute parts
12272     min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
12273     last_part              [in]  Last keypart of the index
12274     thd                    [in]  Current thread
12275     key_infix              [out] Infix of constants to be used for index lookup
12276     key_infix_len          [out] Lenghth of the infix
12277     first_non_infix_part   [out] The first keypart after the infix (if any)
12278 
12279   DESCRIPTION
12280     Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
12281     for each keypart field NGF_i not in GROUP-BY, check that there is a
12282     constant equality predicate among conds with the form (NGF_i = const_ci) or
12283     (const_ci = NGF_i).
12284     Thus all the NGF_i attributes must fill the 'gap' between the last group-by
12285     attribute and the MIN/MAX attribute in the index (if present).  Also ensure
12286     that there is only a single range on NGF_i (NGA3). If these
12287     conditions hold, copy each constant from its corresponding predicate into
12288     key_infix, in the order its NG_i attribute appears in the index, and update
12289     key_infix_len with the total length of the key parts in key_infix.
12290 
12291   RETURN
12292     TRUE  if the index passes the test
12293     FALSE o/w
12294 */
12295 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)12296 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
12297                        KEY_PART_INFO *first_non_group_part,
12298                        KEY_PART_INFO *min_max_arg_part,
12299                        KEY_PART_INFO *last_part, THD *thd,
12300                        uchar *key_infix, uint *key_infix_len,
12301                        KEY_PART_INFO **first_non_infix_part)
12302 {
12303   SEL_ARG       *cur_range;
12304   KEY_PART_INFO *cur_part;
12305   /* End part for the first loop below. */
12306   KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
12307 
12308   *key_infix_len= 0;
12309   uchar *key_ptr= key_infix;
12310   for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
12311   {
12312     cur_range= NULL;
12313     /*
12314       Check NGA3:
12315       1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
12316          checks for a unique conjunction of this tree with all the predicates
12317          on the earlier keyparts in the index.
12318       2. Check for multiple ranges on the found keypart tree.
12319 
12320       We assume that index_range_tree points to the leftmost keypart in
12321       the index.
12322     */
12323     if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
12324                                 &cur_range))
12325       return false;
12326 
12327     if (cur_range && cur_range->elements > 1)
12328       return false;
12329 
12330     if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
12331     {
12332       if (min_max_arg_part)
12333         return false; /* The current keypart has no range predicates at all. */
12334       else
12335       {
12336         *first_non_infix_part= cur_part;
12337         return true;
12338       }
12339     }
12340 
12341     if ((cur_range->min_flag & NO_MIN_RANGE) ||
12342         (cur_range->max_flag & NO_MAX_RANGE) ||
12343         (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
12344       return false;
12345 
12346     uint field_length= cur_part->store_length;
12347     if (cur_range->maybe_null &&
12348          cur_range->min_value[0] && cur_range->max_value[0])
12349     {
12350       /*
12351         cur_range specifies 'IS NULL'. In this case the argument points
12352         to a "null value" (a copy of is_null_string) that we do not
12353         memcmp(), or memcpy to a field.
12354       */
12355       DBUG_ASSERT (field_length > 0);
12356       *key_ptr= 1;
12357       key_ptr+= field_length;
12358       *key_infix_len+= field_length;
12359     }
12360     else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
12361     { /* cur_range specifies an equality condition. */
12362       memcpy(key_ptr, cur_range->min_value, field_length);
12363       key_ptr+= field_length;
12364       *key_infix_len+= field_length;
12365     }
12366     else
12367       return false;
12368   }
12369 
12370   if (!min_max_arg_part && (cur_part == last_part))
12371     *first_non_infix_part= last_part;
12372 
12373   return TRUE;
12374 }
12375 
12376 
12377 /*
12378   Find the key part referenced by a field.
12379 
12380   SYNOPSIS
12381     get_field_keypart()
12382     index  descriptor of an index
12383     field  field that possibly references some key part in index
12384 
12385   NOTES
12386     The return value can be used to get a KEY_PART_INFO pointer by
12387     part= index->key_part + get_field_keypart(...) - 1;
12388 
12389   RETURN
12390     Positive number which is the consecutive number of the key part, or
12391     0 if field does not reference any index field.
12392 */
12393 
12394 static inline uint
get_field_keypart(KEY * index,Field * field)12395 get_field_keypart(KEY *index, Field *field)
12396 {
12397   KEY_PART_INFO *part, *end;
12398 
12399   for (part= index->key_part, end= part + actual_key_parts(index) ;
12400        part < end; part++)
12401   {
12402     if (field->eq(part->field))
12403       return part - index->key_part + 1;
12404   }
12405   return 0;
12406 }
12407 
12408 
12409 /*
12410   Find the SEL_ARG sub-tree that corresponds to the chosen index.
12411 
12412   SYNOPSIS
12413     get_index_range_tree()
12414     index     [in]  The ID of the index being looked for
12415     range_tree[in]  Tree of ranges being searched
12416     param     [in]  PARAM from SQL_SELECT::test_quick_select
12417     param_idx [out] Index in the array PARAM::key that corresponds to 'index'
12418 
12419   DESCRIPTION
12420 
12421     A SEL_TREE contains range trees for all usable indexes. This procedure
12422     finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
12423     ordered in the same way as the members of PARAM::key, thus we first find
12424     the corresponding index in the array PARAM::key. This index is returned
12425     through the variable param_idx, to be used later as argument of
12426     check_quick_select().
12427 
12428   RETURN
12429     Pointer to the SEL_ARG subtree that corresponds to index.
12430 */
12431 
get_index_range_tree(uint index,SEL_TREE * range_tree,PARAM * param,uint * param_idx)12432 SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
12433                                uint *param_idx)
12434 {
12435   uint idx= 0; /* Index nr in param->key_parts */
12436   while (idx < param->keys)
12437   {
12438     if (index == param->real_keynr[idx])
12439       break;
12440     idx++;
12441   }
12442   *param_idx= idx;
12443   return(range_tree->keys[idx]);
12444 }
12445 
12446 
12447 /*
12448   Compute the cost of a quick_group_min_max_select for a particular index.
12449 
12450   SYNOPSIS
12451     cost_group_min_max()
12452     table                [in] The table being accessed
12453     index_info           [in] The index used to access the table
12454     used_key_parts       [in] Number of key parts used to access the index
12455     group_key_parts      [in] Number of index key parts in the group prefix
12456     range_tree           [in] Tree of ranges for all indexes
12457     index_tree           [in] The range tree for the current index
12458     quick_prefix_records [in] Number of records retrieved by the internally
12459 			      used quick range select if any
12460     have_min             [in] True if there is a MIN function
12461     have_max             [in] True if there is a MAX function
12462     read_cost           [out] The cost to retrieve rows via this quick select
12463     records             [out] The number of rows retrieved
12464 
12465   DESCRIPTION
12466     This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
12467     the number of rows returned.
12468 
12469   NOTES
12470     The cost computation distinguishes several cases:
12471     1) No equality predicates over non-group attributes (thus no key_infix).
12472        If groups are bigger than blocks on the average, then we assume that it
12473        is very unlikely that block ends are aligned with group ends, thus even
12474        if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
12475        keys, except for the first MIN and the last MAX keys, will be in the
12476        same block.  If groups are smaller than blocks, then we are going to
12477        read all blocks.
12478     2) There are equality predicates over non-group attributes.
12479        In this case the group prefix is extended by additional constants, and
12480        as a result the min/max values are inside sub-groups of the original
12481        groups. The number of blocks that will be read depends on whether the
12482        ends of these sub-groups will be contained in the same or in different
12483        blocks. We compute the probability for the two ends of a subgroup to be
12484        in two different blocks as the ratio of:
12485        - the number of positions of the left-end of a subgroup inside a group,
12486          such that the right end of the subgroup is past the end of the buffer
12487          containing the left-end, and
12488        - the total number of possible positions for the left-end of the
12489          subgroup, which is the number of keys in the containing group.
12490        We assume it is very unlikely that two ends of subsequent subgroups are
12491        in the same block.
12492     3) The are range predicates over the group attributes.
12493        Then some groups may be filtered by the range predicates. We use the
12494        selectivity of the range predicates to decide how many groups will be
12495        filtered.
12496 
12497   TODO
12498      - Take into account the optional range predicates over the MIN/MAX
12499        argument.
12500      - Check if we have a PK index and we use all cols - then each key is a
12501        group, and it will be better to use an index scan.
12502 
12503   RETURN
12504     None
12505 */
12506 
cost_group_min_max(TABLE * table,KEY * index_info,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,double * read_cost,ha_rows * records)12507 void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
12508                         uint group_key_parts, SEL_TREE *range_tree,
12509                         SEL_ARG *index_tree, ha_rows quick_prefix_records,
12510                         bool have_min, bool have_max,
12511                         double *read_cost, ha_rows *records)
12512 {
12513   ha_rows table_records;
12514   uint num_groups;
12515   uint num_blocks;
12516   uint keys_per_block;
12517   uint keys_per_group;
12518   uint keys_per_subgroup; /* Average number of keys in sub-groups */
12519                           /* formed by a key infix. */
12520   double p_overlap; /* Probability that a sub-group overlaps two blocks. */
12521   double quick_prefix_selectivity;
12522   double io_cost;
12523   DBUG_ENTER("cost_group_min_max");
12524 
12525   table_records= table->file->stats.records;
12526   keys_per_block= (table->file->stats.block_size / 2 /
12527                    (index_info->key_length + table->file->ref_length)
12528                         + 1);
12529   num_blocks= (uint)(table_records / keys_per_block) + 1;
12530 
12531   /* Compute the number of keys in a group. */
12532   keys_per_group= index_info->rec_per_key[group_key_parts - 1];
12533   if (keys_per_group == 0) /* If there is no statistics try to guess */
12534     /* each group contains 10% of all records */
12535     keys_per_group= (uint)(table_records / 10) + 1;
12536   num_groups= (uint)(table_records / keys_per_group) + 1;
12537 
12538   /* Apply the selectivity of the quick select for group prefixes. */
12539   if (range_tree && (quick_prefix_records != HA_POS_ERROR))
12540   {
12541     quick_prefix_selectivity= (double) quick_prefix_records /
12542                               (double) table_records;
12543     num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
12544     set_if_bigger(num_groups, 1);
12545   }
12546 
12547   if (used_key_parts > group_key_parts)
12548   { /*
12549       Compute the probability that two ends of a subgroup are inside
12550       different blocks.
12551     */
12552     keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1];
12553     if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
12554       p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
12555     else
12556     {
12557       double blocks_per_group= (double) num_blocks / (double) num_groups;
12558       p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
12559       p_overlap= min(p_overlap, 1.0);
12560     }
12561     io_cost= min<double>(num_groups * (1 + p_overlap), num_blocks);
12562   }
12563   else
12564     io_cost= (keys_per_group > keys_per_block) ?
12565              (have_min && have_max) ? (double) (num_groups + 1) :
12566                                       (double) num_groups :
12567              (double) num_blocks;
12568 
12569   /*
12570     CPU cost must be comparable to that of an index scan as computed
12571     in SQL_SELECT::test_quick_select(). When the groups are small,
12572     e.g. for a unique index, using index scan will be cheaper since it
12573     reads the next record without having to re-position to it on every
12574     group. To make the CPU cost reflect this, we estimate the CPU cost
12575     as the sum of:
12576     1. Cost for evaluating the condition (similarly as for index scan).
12577     2. Cost for navigating the index structure (assuming a b-tree).
12578        Note: We only add the cost for one comparision per block. For a
12579              b-tree the number of comparisons will be larger.
12580        TODO: This cost should be provided by the storage engine.
12581   */
12582   const double tree_traversal_cost=
12583     ceil(log(static_cast<double>(table_records))/
12584          log(static_cast<double>(keys_per_block))) * ROWID_COMPARE_COST;
12585 
12586   const double cpu_cost= num_groups * (tree_traversal_cost + ROW_EVALUATE_COST);
12587 
12588   *read_cost= io_cost + cpu_cost;
12589   *records= num_groups;
12590 
12591   DBUG_PRINT("info",
12592              ("table rows: %lu  keys/block: %u  keys/group: %u  result rows: %lu  blocks: %u",
12593               (ulong)table_records, keys_per_block, keys_per_group,
12594               (ulong) *records, num_blocks));
12595   DBUG_VOID_RETURN;
12596 }
12597 
12598 
12599 /*
12600   Construct a new quick select object for queries with group by with min/max.
12601 
12602   SYNOPSIS
12603     TRP_GROUP_MIN_MAX::make_quick()
12604     param              Parameter from test_quick_select
12605     retrieve_full_rows ignored
12606     parent_alloc       Memory pool to use, if any.
12607 
12608   NOTES
12609     Make_quick ignores the retrieve_full_rows parameter because
12610     QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
12611     The other parameter are ignored as well because all necessary
12612     data to create the QUICK object is computed at this TRP creation
12613     time.
12614 
12615   RETURN
12616     New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
12617     NULL otherwise.
12618 */
12619 
12620 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)12621 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
12622                               MEM_ROOT *parent_alloc)
12623 {
12624   QUICK_GROUP_MIN_MAX_SELECT *quick;
12625   DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
12626 
12627   quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
12628                                         param->thd->lex->current_select->join,
12629                                         have_min, have_max,
12630                                         have_agg_distinct, min_max_arg_part,
12631                                         group_prefix_len, group_key_parts,
12632                                         used_key_parts, index_info, index,
12633                                         read_cost, records, key_infix_len,
12634                                         key_infix, parent_alloc, is_index_scan);
12635   if (!quick)
12636     DBUG_RETURN(NULL);
12637 
12638   if (quick->init())
12639   {
12640     delete quick;
12641     DBUG_RETURN(NULL);
12642   }
12643 
12644   if (range_tree)
12645   {
12646     DBUG_ASSERT(quick_prefix_records > 0);
12647     if (quick_prefix_records == HA_POS_ERROR)
12648       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
12649     else
12650     {
12651       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
12652       quick->quick_prefix_select= get_quick_select(param, param_idx,
12653                                                    index_tree,
12654                                                    HA_MRR_SORTED,
12655                                                    0,
12656                                                    &quick->alloc);
12657       if (!quick->quick_prefix_select)
12658       {
12659         delete quick;
12660         DBUG_RETURN(NULL);
12661       }
12662     }
12663     /*
12664       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
12665       attribute, and create an array of QUICK_RANGES to be used by the
12666       new quick select.
12667     */
12668     if (min_max_arg_part)
12669     {
12670       SEL_ARG *min_max_range= index_tree;
12671       while (min_max_range) /* Find the tree for the MIN/MAX key part. */
12672       {
12673         if (min_max_range->field->eq(min_max_arg_part->field))
12674           break;
12675         min_max_range= min_max_range->next_key_part;
12676       }
12677       /* Scroll to the leftmost interval for the MIN/MAX argument. */
12678       while (min_max_range && min_max_range->prev)
12679         min_max_range= min_max_range->prev;
12680       /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
12681       while (min_max_range)
12682       {
12683         if (quick->add_range(min_max_range))
12684         {
12685           delete quick;
12686           quick= NULL;
12687           DBUG_RETURN(NULL);
12688         }
12689         min_max_range= min_max_range->next;
12690       }
12691     }
12692   }
12693   else
12694     quick->quick_prefix_select= NULL;
12695 
12696   quick->update_key_stat();
12697   quick->adjust_prefix_ranges();
12698 
12699   DBUG_RETURN(quick);
12700 }
12701 
12702 
12703 /*
12704   Construct new quick select for group queries with min/max.
12705 
12706   SYNOPSIS
12707     QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
12708     table             The table being accessed
12709     join              Descriptor of the current query
12710     have_min          TRUE if the query selects a MIN function
12711     have_max          TRUE if the query selects a MAX function
12712     min_max_arg_part  The only argument field of all MIN/MAX functions
12713     group_prefix_len  Length of all key parts in the group prefix
12714     prefix_key_parts  All key parts in the group prefix
12715     index_info        The index chosen for data access
12716     use_index         The id of index_info
12717     read_cost         Cost of this access method
12718     records           Number of records returned
12719     key_infix_len     Length of the key infix appended to the group prefix
12720     key_infix         Infix of constants from equality predicates
12721     parent_alloc      Memory pool for this and quick_prefix_select data
12722     is_index_scan     get the next different key not by jumping on it via
12723                       index read, but by scanning until the end of the
12724                       rows with equal key value.
12725 
12726   RETURN
12727     None
12728 */
12729 
12730 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,double read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)12731 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
12732                            bool have_max_arg, bool have_agg_distinct_arg,
12733                            KEY_PART_INFO *min_max_arg_part_arg,
12734                            uint group_prefix_len_arg, uint group_key_parts_arg,
12735                            uint used_key_parts_arg, KEY *index_info_arg,
12736                            uint use_index, double read_cost_arg,
12737                            ha_rows records_arg, uint key_infix_len_arg,
12738                            uchar *key_infix_arg, MEM_ROOT *parent_alloc,
12739                            bool is_index_scan_arg)
12740   :join(join_arg), index_info(index_info_arg),
12741    group_prefix_len(group_prefix_len_arg),
12742    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
12743    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
12744    seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
12745    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
12746    min_functions_it(NULL), max_functions_it(NULL),
12747    is_index_scan(is_index_scan_arg)
12748 {
12749   head=       table;
12750   index=      use_index;
12751   record=     head->record[0];
12752   tmp_record= head->record[1];
12753   read_time= read_cost_arg;
12754   records= records_arg;
12755   used_key_parts= used_key_parts_arg;
12756   real_key_parts= used_key_parts_arg;
12757   real_prefix_len= group_prefix_len + key_infix_len;
12758   group_prefix= NULL;
12759   min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
12760 
12761   /*
12762     We can't have parent_alloc set as the init function can't handle this case
12763     yet.
12764   */
12765   DBUG_ASSERT(!parent_alloc);
12766   if (!parent_alloc)
12767   {
12768     init_sql_alloc(&alloc, join->thd->variables.range_alloc_block_size, 0);
12769     join->thd->mem_root= &alloc;
12770   }
12771   else
12772     memset(&alloc, 0, sizeof(MEM_ROOT));  // ensure that it's not used
12773 }
12774 
12775 
12776 /*
12777   Do post-constructor initialization.
12778 
12779   SYNOPSIS
12780     QUICK_GROUP_MIN_MAX_SELECT::init()
12781 
12782   DESCRIPTION
12783     The method performs initialization that cannot be done in the constructor
12784     such as memory allocations that may fail. It allocates memory for the
12785     group prefix and inifix buffers, and for the lists of MIN/MAX item to be
12786     updated during execution.
12787 
12788   RETURN
12789     0      OK
12790     other  Error code
12791 */
12792 
init()12793 int QUICK_GROUP_MIN_MAX_SELECT::init()
12794 {
12795   if (group_prefix) /* Already initialized. */
12796     return 0;
12797 
12798   if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len)))
12799       return 1;
12800   /*
12801     We may use group_prefix to store keys with all select fields, so allocate
12802     enough space for it.
12803   */
12804   if (!(group_prefix= (uchar*) alloc_root(&alloc,
12805                                          real_prefix_len + min_max_arg_len)))
12806     return 1;
12807 
12808   if (key_infix_len > 0)
12809   {
12810     /*
12811       The memory location pointed to by key_infix will be deleted soon, so
12812       allocate a new buffer and copy the key_infix into it.
12813     */
12814     uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
12815     if (!tmp_key_infix)
12816       return 1;
12817     memcpy(tmp_key_infix, this->key_infix, key_infix_len);
12818     this->key_infix= tmp_key_infix;
12819   }
12820 
12821   if (min_max_arg_part)
12822   {
12823     if (my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
12824       return 1;
12825 
12826     if (have_min)
12827     {
12828       if (!(min_functions= new List<Item_sum>))
12829         return 1;
12830     }
12831     else
12832       min_functions= NULL;
12833     if (have_max)
12834     {
12835       if (!(max_functions= new List<Item_sum>))
12836         return 1;
12837     }
12838     else
12839       max_functions= NULL;
12840 
12841     Item_sum *min_max_item;
12842     Item_sum **func_ptr= join->sum_funcs;
12843     while ((min_max_item= *(func_ptr++)))
12844     {
12845       if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
12846         min_functions->push_back(min_max_item);
12847       else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
12848         max_functions->push_back(min_max_item);
12849     }
12850 
12851     if (have_min)
12852     {
12853       if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
12854         return 1;
12855     }
12856 
12857     if (have_max)
12858     {
12859       if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
12860         return 1;
12861     }
12862   }
12863   else
12864     min_max_ranges.elements= 0;
12865 
12866   return 0;
12867 }
12868 
12869 
~QUICK_GROUP_MIN_MAX_SELECT()12870 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
12871 {
12872   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
12873   if (head->file->inited)
12874     /*
12875       We may have used this object for index access during
12876       create_sort_index() and then switched to rnd access for the rest
12877       of execution. Since we don't do cleanup until now, we must call
12878       ha_*_end() for whatever is the current access method.
12879     */
12880     head->file->ha_index_or_rnd_end();
12881   if (min_max_arg_part)
12882     delete_dynamic(&min_max_ranges);
12883   free_root(&alloc,MYF(0));
12884   delete min_functions_it;
12885   delete max_functions_it;
12886   delete quick_prefix_select;
12887   DBUG_VOID_RETURN;
12888 }
12889 
12890 
12891 /*
12892   Eventually create and add a new quick range object.
12893 
12894   SYNOPSIS
12895     QUICK_GROUP_MIN_MAX_SELECT::add_range()
12896     sel_range  Range object from which a
12897 
12898   NOTES
12899     Construct a new QUICK_RANGE object from a SEL_ARG object, and
12900     add it to the array min_max_ranges. If sel_arg is an infinite
12901     range, e.g. (x < 5 or x > 4), then skip it and do not construct
12902     a quick range.
12903 
12904   RETURN
12905     FALSE on success
12906     TRUE  otherwise
12907 */
12908 
add_range(SEL_ARG * sel_range)12909 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
12910 {
12911   QUICK_RANGE *range;
12912   uint range_flag= sel_range->min_flag | sel_range->max_flag;
12913 
12914   /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
12915   if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
12916     return FALSE;
12917 
12918   if (!(sel_range->min_flag & NO_MIN_RANGE) &&
12919       !(sel_range->max_flag & NO_MAX_RANGE))
12920   {
12921     if (sel_range->maybe_null &&
12922         sel_range->min_value[0] && sel_range->max_value[0])
12923       range_flag|= NULL_RANGE; /* IS NULL condition */
12924     /*
12925       Do not perform comparison if one of the argiment is NULL value.
12926     */
12927     else if (!sel_range->min_value[0] &&
12928              !sel_range->max_value[0] &&
12929              memcmp(sel_range->min_value, sel_range->max_value,
12930                     min_max_arg_len) == 0)
12931       range_flag|= EQ_RANGE;  /* equality condition */
12932   }
12933   range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
12934                          make_keypart_map(sel_range->part),
12935                          sel_range->max_value, min_max_arg_len,
12936                          make_keypart_map(sel_range->part),
12937                          range_flag);
12938   if (!range)
12939     return TRUE;
12940   if (insert_dynamic(&min_max_ranges, &range))
12941     return TRUE;
12942   return FALSE;
12943 }
12944 
12945 
12946 /*
12947   Opens the ranges if there are more conditions in quick_prefix_select than
12948   the ones used for jumping through the prefixes.
12949 
12950   SYNOPSIS
12951     QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
12952 
12953   NOTES
12954     quick_prefix_select is made over the conditions on the whole key.
12955     It defines a number of ranges of length x.
12956     However when jumping through the prefixes we use only the the first
12957     few most significant keyparts in the range key. However if there
12958     are more keyparts to follow the ones we are using we must make the
12959     condition on the key inclusive (because x < "ab" means
12960     x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
12961     To achive the above we must turn off the NEAR_MIN/NEAR_MAX
12962 */
adjust_prefix_ranges()12963 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
12964 {
12965   if (quick_prefix_select &&
12966       group_prefix_len < quick_prefix_select->max_used_key_length)
12967   {
12968     DYNAMIC_ARRAY *arr;
12969     uint inx;
12970 
12971     for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++)
12972     {
12973       QUICK_RANGE *range;
12974 
12975       get_dynamic(arr, (uchar*)&range, inx);
12976       range->flag &= ~(NEAR_MIN | NEAR_MAX);
12977     }
12978   }
12979 }
12980 
12981 
12982 /*
12983   Determine the total number and length of the keys that will be used for
12984   index lookup.
12985 
12986   SYNOPSIS
12987     QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
12988 
12989   DESCRIPTION
12990     The total length of the keys used for index lookup depends on whether
12991     there are any predicates referencing the min/max argument, and/or if
12992     the min/max argument field can be NULL.
12993     This function does an optimistic analysis whether the search key might
12994     be extended by a constant for the min/max keypart. It is 'optimistic'
12995     because during actual execution it may happen that a particular range
12996     is skipped, and then a shorter key will be used. However this is data
12997     dependent and can't be easily estimated here.
12998 
12999   RETURN
13000     None
13001 */
13002 
update_key_stat()13003 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13004 {
13005   max_used_key_length= real_prefix_len;
13006   if (min_max_ranges.elements > 0)
13007   {
13008     QUICK_RANGE *cur_range;
13009     if (have_min)
13010     { /* Check if the right-most range has a lower boundary. */
13011       get_dynamic(&min_max_ranges, (uchar*)&cur_range,
13012                   min_max_ranges.elements - 1);
13013       if (!(cur_range->flag & NO_MIN_RANGE))
13014       {
13015         max_used_key_length+= min_max_arg_len;
13016         used_key_parts++;
13017         return;
13018       }
13019     }
13020     if (have_max)
13021     { /* Check if the left-most range has an upper boundary. */
13022       get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0);
13023       if (!(cur_range->flag & NO_MAX_RANGE))
13024       {
13025         max_used_key_length+= min_max_arg_len;
13026         used_key_parts++;
13027         return;
13028       }
13029     }
13030   }
13031   else if (have_min && min_max_arg_part &&
13032            min_max_arg_part->field->real_maybe_null())
13033   {
13034     /*
13035       If a MIN/MAX argument value is NULL, we can quickly determine
13036       that we're in the beginning of the next group, because NULLs
13037       are always < any other value. This allows us to quickly
13038       determine the end of the current group and jump to the next
13039       group (see next_min()) and thus effectively increases the
13040       usable key length.
13041     */
13042     max_used_key_length+= min_max_arg_len;
13043     used_key_parts++;
13044   }
13045 }
13046 
13047 
13048 /*
13049   Initialize a quick group min/max select for key retrieval.
13050 
13051   SYNOPSIS
13052     QUICK_GROUP_MIN_MAX_SELECT::reset()
13053 
13054   DESCRIPTION
13055     Initialize the index chosen for access and find and store the prefix
13056     of the last group. The method is expensive since it performs disk access.
13057 
13058   RETURN
13059     0      OK
13060     other  Error code
13061 */
13062 
reset(void)13063 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
13064 {
13065   int result;
13066   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
13067 
13068   seen_first_key= false;
13069   head->set_keyread(TRUE); /* We need only the key attributes */
13070   /*
13071     Request ordered index access as usage of ::index_last(),
13072     ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
13073   */
13074   if ((result= head->file->ha_index_init(index, true)))
13075   {
13076     head->file->print_error(result, MYF(0));
13077     DBUG_RETURN(result);
13078   }
13079   if (quick_prefix_select && quick_prefix_select->reset())
13080     DBUG_RETURN(1);
13081 
13082   result= head->file->ha_index_last(record);
13083   if (result != 0)
13084   {
13085     if (result == HA_ERR_END_OF_FILE)
13086       DBUG_RETURN(0);
13087     else
13088       DBUG_RETURN(result);
13089   }
13090 
13091   /* Save the prefix of the last group. */
13092   key_copy(last_prefix, record, index_info, group_prefix_len);
13093 
13094   DBUG_RETURN(0);
13095 }
13096 
13097 
13098 
13099 /*
13100   Get the next key containing the MIN and/or MAX key for the next group.
13101 
13102   SYNOPSIS
13103     QUICK_GROUP_MIN_MAX_SELECT::get_next()
13104 
13105   DESCRIPTION
13106     The method finds the next subsequent group of records that satisfies the
13107     query conditions and finds the keys that contain the MIN/MAX values for
13108     the key part referenced by the MIN/MAX function(s). Once a group and its
13109     MIN/MAX values are found, store these values in the Item_sum objects for
13110     the MIN/MAX functions. The rest of the values in the result row are stored
13111     in the Item_field::result_field of each select field. If the query does
13112     not contain MIN and/or MAX functions, then the function only finds the
13113     group prefix, which is a query answer itself.
13114 
13115   NOTES
13116     If both MIN and MAX are computed, then we use the fact that if there is
13117     no MIN key, there can't be a MAX key as well, so we can skip looking
13118     for a MAX key in this case.
13119 
13120   RETURN
13121     0                  on success
13122     HA_ERR_END_OF_FILE if returned all keys
13123     other              if some error occurred
13124 */
13125 
get_next()13126 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
13127 {
13128   int min_res= 0;
13129   int max_res= 0;
13130 #ifdef HPUX11
13131   /*
13132     volatile is required by a bug in the HP compiler due to which the
13133     last test of result fails.
13134   */
13135   volatile int result;
13136 #else
13137   int result;
13138 #endif
13139   int is_last_prefix= 0;
13140 
13141   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
13142 
13143   /*
13144     Loop until a group is found that satisfies all query conditions or the last
13145     group is reached.
13146   */
13147   do
13148   {
13149     result= next_prefix();
13150     /*
13151       Check if this is the last group prefix. Notice that at this point
13152       this->record contains the current prefix in record format.
13153     */
13154     if (!result)
13155     {
13156       is_last_prefix= key_cmp(index_info->key_part, last_prefix,
13157                               group_prefix_len);
13158       DBUG_ASSERT(is_last_prefix <= 0);
13159     }
13160     else
13161     {
13162       if (result == HA_ERR_KEY_NOT_FOUND)
13163         continue;
13164       break;
13165     }
13166 
13167     if (have_min)
13168     {
13169       min_res= next_min();
13170       if (min_res == 0)
13171         update_min_result();
13172     }
13173     /* If there is no MIN in the group, there is no MAX either. */
13174     if ((have_max && !have_min) ||
13175         (have_max && have_min && (min_res == 0)))
13176     {
13177       max_res= next_max();
13178       if (max_res == 0)
13179         update_max_result();
13180       /* If a MIN was found, a MAX must have been found as well. */
13181       DBUG_ASSERT((have_max && !have_min) ||
13182                   (have_max && have_min && (max_res == 0)));
13183     }
13184     /*
13185       If this is just a GROUP BY or DISTINCT without MIN or MAX and there
13186       are equality predicates for the key parts after the group, find the
13187       first sub-group with the extended prefix.
13188     */
13189     if (!have_min && !have_max && key_infix_len > 0)
13190       result= head->file->ha_index_read_map(record, group_prefix,
13191                                             make_prev_keypart_map(real_key_parts),
13192                                             HA_READ_KEY_EXACT);
13193 
13194     result= have_min ? min_res : have_max ? max_res : result;
13195   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13196            is_last_prefix != 0);
13197 
13198   if (result == HA_ERR_KEY_NOT_FOUND)
13199     result= HA_ERR_END_OF_FILE;
13200 
13201   DBUG_RETURN(result);
13202 }
13203 
13204 
13205 /*
13206   Retrieve the minimal key in the next group.
13207 
13208   SYNOPSIS
13209     QUICK_GROUP_MIN_MAX_SELECT::next_min()
13210 
13211   DESCRIPTION
13212     Find the minimal key within this group such that the key satisfies the query
13213     conditions and NULL semantics. The found key is loaded into this->record.
13214 
13215   IMPLEMENTATION
13216     Depending on the values of min_max_ranges.elements, key_infix_len, and
13217     whether there is a  NULL in the MIN field, this function may directly
13218     return without any data access. In this case we use the key loaded into
13219     this->record by the call to this->next_prefix() just before this call.
13220 
13221   RETURN
13222     0                    on success
13223     HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
13224     HA_ERR_END_OF_FILE   - "" -
13225     other                if some error occurred
13226 */
13227 
next_min()13228 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
13229 {
13230   int result= 0;
13231   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
13232 
13233   /* Find the MIN key using the eventually extended group prefix. */
13234   if (min_max_ranges.elements > 0)
13235   {
13236     if ((result= next_min_in_range()))
13237       DBUG_RETURN(result);
13238   }
13239   else
13240   {
13241     /* Apply the constant equality conditions to the non-group select fields */
13242     if (key_infix_len > 0)
13243     {
13244       if ((result= head->file->ha_index_read_map(record, group_prefix,
13245                                                  make_prev_keypart_map(real_key_parts),
13246                                                  HA_READ_KEY_EXACT)))
13247         DBUG_RETURN(result);
13248     }
13249 
13250     /*
13251       If the min/max argument field is NULL, skip subsequent rows in the same
13252       group with NULL in it. Notice that:
13253       - if the first row in a group doesn't have a NULL in the field, no row
13254       in the same group has (because NULL < any other value),
13255       - min_max_arg_part->field->ptr points to some place in 'record'.
13256     */
13257     if (min_max_arg_part && min_max_arg_part->field->is_null())
13258     {
13259       uchar key_buf[MAX_KEY_LENGTH];
13260 
13261       /* Find the first subsequent record without NULL in the MIN/MAX field. */
13262       key_copy(key_buf, record, index_info, max_used_key_length);
13263       result= head->file->ha_index_read_map(record, key_buf,
13264                                             make_keypart_map(real_key_parts),
13265                                             HA_READ_AFTER_KEY);
13266       /*
13267         Check if the new record belongs to the current group by comparing its
13268         prefix with the group's prefix. If it is from the next group, then the
13269         whole group has NULLs in the MIN/MAX field, so use the first record in
13270         the group as a result.
13271         TODO:
13272         It is possible to reuse this new record as the result candidate for the
13273         next call to next_min(), and to save one lookup in the next call. For
13274         this add a new member 'this->next_group_prefix'.
13275       */
13276       if (!result)
13277       {
13278         if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13279           key_restore(record, key_buf, index_info, 0);
13280       }
13281       else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
13282         result= 0; /* There is a result in any case. */
13283     }
13284   }
13285 
13286   /*
13287     If the MIN attribute is non-nullable, this->record already contains the
13288     MIN key in the group, so just return.
13289   */
13290   DBUG_RETURN(result);
13291 }
13292 
13293 
13294 /*
13295   Retrieve the maximal key in the next group.
13296 
13297   SYNOPSIS
13298     QUICK_GROUP_MIN_MAX_SELECT::next_max()
13299 
13300   DESCRIPTION
13301     Lookup the maximal key of the group, and store it into this->record.
13302 
13303   RETURN
13304     0                    on success
13305     HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
13306     HA_ERR_END_OF_FILE	 - "" -
13307     other                if some error occurred
13308 */
13309 
next_max()13310 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
13311 {
13312   int result;
13313 
13314   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
13315 
13316   /* Get the last key in the (possibly extended) group. */
13317   if (min_max_ranges.elements > 0)
13318     result= next_max_in_range();
13319   else
13320     result= head->file->ha_index_read_map(record, group_prefix,
13321                                           make_prev_keypart_map(real_key_parts),
13322                                           HA_READ_PREFIX_LAST);
13323   DBUG_RETURN(result);
13324 }
13325 
13326 
13327 /**
13328   Find the next different key value by skiping all the rows with the same key
13329   value.
13330 
13331   Implements a specialized loose index access method for queries
13332   containing aggregate functions with distinct of the form:
13333     SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
13334   This method comes to replace the index scan + Unique class
13335   (distinct selection) for loose index scan that visits all the rows of a
13336   covering index instead of jumping in the begining of each group.
13337   TODO: Placeholder function. To be replaced by a handler API call
13338 
13339   @param is_index_scan     hint to use index scan instead of random index read
13340                            to find the next different value.
13341   @param file              table handler
13342   @param key_part          group key to compare
13343   @param record            row data
13344   @param group_prefix      current key prefix data
13345   @param group_prefix_len  length of the current key prefix data
13346   @param group_key_parts   number of the current key prefix columns
13347   @return status
13348     @retval  0  success
13349     @retval !0  failure
13350 */
13351 
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)13352 static int index_next_different (bool is_index_scan, handler *file,
13353                                 KEY_PART_INFO *key_part, uchar * record,
13354                                 const uchar * group_prefix,
13355                                 uint group_prefix_len,
13356                                 uint group_key_parts)
13357 {
13358   if (is_index_scan)
13359   {
13360     int result= 0;
13361 
13362     while (!key_cmp (key_part, group_prefix, group_prefix_len))
13363     {
13364       result= file->ha_index_next(record);
13365       if (result)
13366         return(result);
13367     }
13368     return result;
13369   }
13370   else
13371     return file->ha_index_read_map(record, group_prefix,
13372                                    make_prev_keypart_map(group_key_parts),
13373                                    HA_READ_AFTER_KEY);
13374 }
13375 
13376 
13377 /*
13378   Determine the prefix of the next group.
13379 
13380   SYNOPSIS
13381     QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13382 
13383   DESCRIPTION
13384     Determine the prefix of the next group that satisfies the query conditions.
13385     If there is a range condition referencing the group attributes, use a
13386     QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
13387     condition. If there is a key infix of constants, append this infix
13388     immediately after the group attributes. The possibly extended prefix is
13389     stored in this->group_prefix. The first key of the found group is stored in
13390     this->record, on which relies this->next_min().
13391 
13392   RETURN
13393     0                    on success
13394     HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
13395     HA_ERR_END_OF_FILE   if there are no more keys
13396     other                if some error occurred
13397 */
next_prefix()13398 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13399 {
13400   int result;
13401   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
13402 
13403   if (quick_prefix_select)
13404   {
13405     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
13406     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
13407                                                       group_key_parts,
13408                                                       cur_prefix)))
13409       DBUG_RETURN(result);
13410     seen_first_key= TRUE;
13411   }
13412   else
13413   {
13414     if (!seen_first_key)
13415     {
13416       result= head->file->ha_index_first(record);
13417       if (result)
13418         DBUG_RETURN(result);
13419       seen_first_key= TRUE;
13420     }
13421     else
13422     {
13423       /* Load the first key in this group into record. */
13424       result= index_next_different (is_index_scan, head->file,
13425                                     index_info->key_part,
13426                                     record, group_prefix, group_prefix_len,
13427                                     group_key_parts);
13428       if (result)
13429         DBUG_RETURN(result);
13430     }
13431   }
13432 
13433   /* Save the prefix of this group for subsequent calls. */
13434   key_copy(group_prefix, record, index_info, group_prefix_len);
13435   /* Append key_infix to group_prefix. */
13436   if (key_infix_len > 0)
13437     memcpy(group_prefix + group_prefix_len,
13438            key_infix, key_infix_len);
13439 
13440   DBUG_RETURN(0);
13441 }
13442 
13443 
13444 /*
13445   Find the minimal key in a group that satisfies some range conditions for the
13446   min/max argument field.
13447 
13448   SYNOPSIS
13449     QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13450 
13451   DESCRIPTION
13452     Given the sequence of ranges min_max_ranges, find the minimal key that is
13453     in the left-most possible range. If there is no such key, then the current
13454     group does not have a MIN key that satisfies the WHERE clause. If a key is
13455     found, its value is stored in this->record.
13456 
13457   RETURN
13458     0                    on success
13459     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13460                          the ranges
13461     HA_ERR_END_OF_FILE   - "" -
13462     other                if some error
13463 */
13464 
next_min_in_range()13465 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13466 {
13467   ha_rkey_function find_flag;
13468   key_part_map keypart_map;
13469   QUICK_RANGE *cur_range;
13470   bool found_null= FALSE;
13471   int result= HA_ERR_KEY_NOT_FOUND;
13472 
13473   DBUG_ASSERT(min_max_ranges.elements > 0);
13474 
13475   for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
13476   { /* Search from the left-most range to the right. */
13477     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx);
13478 
13479     /*
13480       If the current value for the min/max argument is bigger than the right
13481       boundary of cur_range, there is no need to check this range.
13482     */
13483     if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
13484         (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
13485                  min_max_arg_len) == 1))
13486       continue;
13487 
13488     if (cur_range->flag & NO_MIN_RANGE)
13489     {
13490       keypart_map= make_prev_keypart_map(real_key_parts);
13491       find_flag= HA_READ_KEY_EXACT;
13492     }
13493     else
13494     {
13495       /* Extend the search key with the lower boundary for this range. */
13496       memcpy(group_prefix + real_prefix_len, cur_range->min_key,
13497              cur_range->min_length);
13498       keypart_map= make_keypart_map(real_key_parts);
13499       find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
13500                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
13501                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
13502     }
13503 
13504     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13505                                           find_flag);
13506     if (result)
13507     {
13508       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13509           (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
13510         continue; /* Check the next range. */
13511 
13512       /*
13513         In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
13514         HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
13515         range, it can't succeed for any other subsequent range.
13516       */
13517       break;
13518     }
13519 
13520     /* A key was found. */
13521     if (cur_range->flag & EQ_RANGE)
13522       break; /* No need to perform the checks below for equal keys. */
13523 
13524     if (cur_range->flag & NULL_RANGE)
13525     {
13526       /*
13527         Remember this key, and continue looking for a non-NULL key that
13528         satisfies some other condition.
13529       */
13530       memcpy(tmp_record, record, head->s->rec_buff_length);
13531       found_null= TRUE;
13532       continue;
13533     }
13534 
13535     /* Check if record belongs to the current group. */
13536     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13537     {
13538       result= HA_ERR_KEY_NOT_FOUND;
13539       continue;
13540     }
13541 
13542     /* If there is an upper limit, check if the found key is in the range. */
13543     if ( !(cur_range->flag & NO_MAX_RANGE) )
13544     {
13545       /* Compose the MAX key for the range. */
13546       uchar *max_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13547       memcpy(max_key, group_prefix, real_prefix_len);
13548       memcpy(max_key + real_prefix_len, cur_range->max_key,
13549              cur_range->max_length);
13550       /* Compare the found key with max_key. */
13551       int cmp_res= key_cmp(index_info->key_part, max_key,
13552                            real_prefix_len + min_max_arg_len);
13553       /*
13554         The key is outside of the range if:
13555         the interval is open and the key is equal to the maximum boundry
13556         or
13557         the key is greater than the maximum
13558       */
13559       if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
13560           cmp_res > 0)
13561       {
13562         result= HA_ERR_KEY_NOT_FOUND;
13563         continue;
13564       }
13565     }
13566     /* If we got to this point, the current key qualifies as MIN. */
13567     DBUG_ASSERT(result == 0);
13568     break;
13569   }
13570   /*
13571     If there was a key with NULL in the MIN/MAX field, and there was no other
13572     key without NULL from the same group that satisfies some other condition,
13573     then use the key with the NULL.
13574   */
13575   if (found_null && result)
13576   {
13577     memcpy(record, tmp_record, head->s->rec_buff_length);
13578     result= 0;
13579   }
13580   return result;
13581 }
13582 
13583 
13584 /*
13585   Find the maximal key in a group that satisfies some range conditions for the
13586   min/max argument field.
13587 
13588   SYNOPSIS
13589     QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13590 
13591   DESCRIPTION
13592     Given the sequence of ranges min_max_ranges, find the maximal key that is
13593     in the right-most possible range. If there is no such key, then the current
13594     group does not have a MAX key that satisfies the WHERE clause. If a key is
13595     found, its value is stored in this->record.
13596 
13597   RETURN
13598     0                    on success
13599     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13600                          the ranges
13601     HA_ERR_END_OF_FILE   - "" -
13602     other                if some error
13603 */
13604 
next_max_in_range()13605 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13606 {
13607   ha_rkey_function find_flag;
13608   key_part_map keypart_map;
13609   QUICK_RANGE *cur_range;
13610   int result;
13611 
13612   DBUG_ASSERT(min_max_ranges.elements > 0);
13613 
13614   for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
13615   { /* Search from the right-most range to the left. */
13616     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1);
13617 
13618     /*
13619       If the current value for the min/max argument is smaller than the left
13620       boundary of cur_range, there is no need to check this range.
13621     */
13622     if (range_idx != min_max_ranges.elements &&
13623         !(cur_range->flag & NO_MIN_RANGE) &&
13624         (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
13625                  min_max_arg_len) == -1))
13626       continue;
13627 
13628     if (cur_range->flag & NO_MAX_RANGE)
13629     {
13630       keypart_map= make_prev_keypart_map(real_key_parts);
13631       find_flag= HA_READ_PREFIX_LAST;
13632     }
13633     else
13634     {
13635       /* Extend the search key with the upper boundary for this range. */
13636       memcpy(group_prefix + real_prefix_len, cur_range->max_key,
13637              cur_range->max_length);
13638       keypart_map= make_keypart_map(real_key_parts);
13639       find_flag= (cur_range->flag & EQ_RANGE) ?
13640                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
13641                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
13642     }
13643 
13644     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13645                                           find_flag);
13646 
13647     if (result)
13648     {
13649       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13650           (cur_range->flag & EQ_RANGE))
13651         continue; /* Check the next range. */
13652 
13653       /*
13654         In no key was found with this upper bound, there certainly are no keys
13655         in the ranges to the left.
13656       */
13657       return result;
13658     }
13659     /* A key was found. */
13660     if (cur_range->flag & EQ_RANGE)
13661       return 0; /* No need to perform the checks below for equal keys. */
13662 
13663     /* Check if record belongs to the current group. */
13664     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13665       continue;                                 // Row not found
13666 
13667     /* If there is a lower limit, check if the found key is in the range. */
13668     if ( !(cur_range->flag & NO_MIN_RANGE) )
13669     {
13670       /* Compose the MIN key for the range. */
13671       uchar *min_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13672       memcpy(min_key, group_prefix, real_prefix_len);
13673       memcpy(min_key + real_prefix_len, cur_range->min_key,
13674              cur_range->min_length);
13675       /* Compare the found key with min_key. */
13676       int cmp_res= key_cmp(index_info->key_part, min_key,
13677                            real_prefix_len + min_max_arg_len);
13678       /*
13679         The key is outside of the range if:
13680         the interval is open and the key is equal to the minimum boundry
13681         or
13682         the key is less than the minimum
13683       */
13684       if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
13685           cmp_res < 0)
13686         continue;
13687     }
13688     /* If we got to this point, the current key qualifies as MAX. */
13689     return result;
13690   }
13691   return HA_ERR_KEY_NOT_FOUND;
13692 }
13693 
13694 
13695 /*
13696   Update all MIN function results with the newly found value.
13697 
13698   SYNOPSIS
13699     QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13700 
13701   DESCRIPTION
13702     The method iterates through all MIN functions and updates the result value
13703     of each function by calling Item_sum::reset(), which in turn picks the new
13704     result value from this->head->record[0], previously updated by
13705     next_min(). The updated value is stored in a member variable of each of the
13706     Item_sum objects, depending on the value type.
13707 
13708   IMPLEMENTATION
13709     The update must be done separately for MIN and MAX, immediately after
13710     next_min() was called and before next_max() is called, because both MIN and
13711     MAX take their result value from the same buffer this->head->record[0]
13712     (i.e.  this->record).
13713 
13714   RETURN
13715     None
13716 */
13717 
update_min_result()13718 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13719 {
13720   Item_sum *min_func;
13721 
13722   min_functions_it->rewind();
13723   while ((min_func= (*min_functions_it)++))
13724     min_func->reset_and_add();
13725 }
13726 
13727 
13728 /*
13729   Update all MAX function results with the newly found value.
13730 
13731   SYNOPSIS
13732     QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13733 
13734   DESCRIPTION
13735     The method iterates through all MAX functions and updates the result value
13736     of each function by calling Item_sum::reset(), which in turn picks the new
13737     result value from this->head->record[0], previously updated by
13738     next_max(). The updated value is stored in a member variable of each of the
13739     Item_sum objects, depending on the value type.
13740 
13741   IMPLEMENTATION
13742     The update must be done separately for MIN and MAX, immediately after
13743     next_max() was called, because both MIN and MAX take their result value
13744     from the same buffer this->head->record[0] (i.e.  this->record).
13745 
13746   RETURN
13747     None
13748 */
13749 
update_max_result()13750 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13751 {
13752   Item_sum *max_func;
13753 
13754   max_functions_it->rewind();
13755   while ((max_func= (*max_functions_it)++))
13756     max_func->reset_and_add();
13757 }
13758 
13759 
13760 /*
13761   Append comma-separated list of keys this quick select uses to key_names;
13762   append comma-separated list of corresponding used lengths to used_lengths.
13763 
13764   SYNOPSIS
13765     QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
13766     key_names    [out] Names of used indexes
13767     used_lengths [out] Corresponding lengths of the index names
13768 
13769   DESCRIPTION
13770     This method is used by select_describe to extract the names of the
13771     indexes used by a quick select.
13772 
13773 */
13774 
add_keys_and_lengths(String * key_names,String * used_lengths)13775 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
13776                                                       String *used_lengths)
13777 {
13778   char buf[64];
13779   uint length;
13780   key_names->append(index_info->name);
13781   length= longlong2str(max_used_key_length, buf, 10) - buf;
13782   used_lengths->append(buf, length);
13783 }
13784 
13785 
13786 
13787 /**
13788   Traverse the R-B range tree for this and later keyparts to see if
13789   there are at least as many equality ranges as defined by the limit.
13790 
13791   @param keypart_root   The root of a R-B tree of ranges for a given keypart.
13792   @param count[in,out]  The number of equality ranges found so far
13793   @param limit          The number of ranges
13794 
13795   @retval true if limit > 0 and 'limit' or more equality ranges have been
13796           found in the range R-B trees
13797   @retval false otherwise
13798 
13799 */
eq_ranges_exceeds_limit(SEL_ARG * keypart_root,uint * count,uint limit)13800 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count, uint limit)
13801 {
13802   // "Statistics instead of index dives" feature is turned off
13803   if (limit == 0)
13804     return false;
13805 
13806   /*
13807     Optimization: if there is at least one equality range, index
13808     statistics will be used when limit is 1. It's safe to return true
13809     even without checking that there is an equality range because if
13810     there are none, index statistics will not be used anyway.
13811   */
13812   if (limit == 1)
13813     return true;
13814 
13815   for(SEL_ARG *keypart_range= keypart_root->first();
13816       keypart_range; keypart_range= keypart_range->next)
13817   {
13818     /*
13819       This is an equality range predicate and should be counted if:
13820       1) the range for this keypart does not have a min/max flag
13821          (which indicates <, <= etc), and
13822       2) the lower and upper range boundaries have the same value
13823          (it's not a "x BETWEEN a AND b")
13824 
13825       Note, however, that if this is an "x IS NULL" condition we don't
13826       count it because the number of NULL-values is likely to be off
13827       the index statistics we plan to use.
13828     */
13829     if (!keypart_range->min_flag && !keypart_range->max_flag && // 1)
13830         !keypart_range->cmp_max_to_min(keypart_range) &&        // 2)
13831         !keypart_range->is_null_interval())                     // "x IS NULL"
13832     {
13833       /*
13834          Count predicates in the next keypart, but only if that keypart
13835          is the next in the index.
13836       */
13837       if (keypart_range->next_key_part &&
13838           keypart_range->next_key_part->part == keypart_range->part + 1)
13839         eq_ranges_exceeds_limit(keypart_range->next_key_part, count, limit);
13840       else
13841         // We've found a path of equlity predicates down to a keypart leaf
13842         (*count)++;
13843 
13844       if (*count >= limit)
13845         return true;
13846     }
13847   }
13848   return false;
13849 }
13850 
13851 #ifndef DBUG_OFF
13852 
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)13853 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
13854                            const char *msg)
13855 {
13856   SEL_ARG **key,**end;
13857   int idx;
13858   char buff[1024];
13859   DBUG_ENTER("print_sel_tree");
13860 
13861   String tmp(buff,sizeof(buff),&my_charset_bin);
13862   tmp.length(0);
13863   for (idx= 0,key=tree->keys, end=key+param->keys ;
13864        key != end ;
13865        key++,idx++)
13866   {
13867     if (tree_map->is_set(idx))
13868     {
13869       uint keynr= param->real_keynr[idx];
13870       if (tmp.length())
13871         tmp.append(',');
13872       tmp.append(param->table->key_info[keynr].name);
13873     }
13874   }
13875   if (!tmp.length())
13876     tmp.append(STRING_WITH_LEN("(empty)"));
13877 
13878   DBUG_PRINT("info", ("SEL_TREE: %p (%s)  scans: %s", tree, msg, tmp.ptr()));
13879   DBUG_VOID_RETURN;
13880 }
13881 
13882 
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)13883 static void print_ror_scans_arr(TABLE *table, const char *msg,
13884                                 struct st_ror_scan_info **start,
13885                                 struct st_ror_scan_info **end)
13886 {
13887   DBUG_ENTER("print_ror_scans_arr");
13888 
13889   char buff[1024];
13890   String tmp(buff,sizeof(buff),&my_charset_bin);
13891   tmp.length(0);
13892   for (;start != end; start++)
13893   {
13894     if (tmp.length())
13895       tmp.append(',');
13896     tmp.append(table->key_info[(*start)->keynr].name);
13897   }
13898   if (!tmp.length())
13899     tmp.append(STRING_WITH_LEN("(empty)"));
13900   DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
13901   fprintf(DBUG_FILE,"ROR key scans (%s): %s", msg, tmp.ptr());
13902 
13903   DBUG_VOID_RETURN;
13904 }
13905 
13906 
13907 #endif /* !DBUG_OFF */
13908 
13909 /**
13910   Print a key to a string
13911 
13912   @param[out] out          String the key is appended to
13913   @param[in]  key_part     Index components description
13914   @param[in]  key          Key tuple
13915   @param[in]  used_length  Key tuple length
13916 */
13917 static void
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key)13918 print_key_value(String *out, const KEY_PART_INFO *key_part, const uchar *key)
13919 {
13920   Field *field= key_part->field;
13921 
13922   if (field->flags & BLOB_FLAG)
13923   {
13924     // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
13925     if (field->real_maybe_null() && *key)
13926       out->append(STRING_WITH_LEN("NULL"));
13927     else
13928       out->append(STRING_WITH_LEN("unprintable_blob_value"));
13929     return;
13930   }
13931 
13932   char buff[128];
13933   String tmp(buff, sizeof(buff), system_charset_info);
13934   tmp.length(0);
13935 
13936   TABLE *table= field->table;
13937   my_bitmap_map *old_sets[2];
13938 
13939   dbug_tmp_use_all_columns(table, old_sets, table->read_set,
13940                            table->write_set);
13941 
13942   uint store_length= key_part->store_length;
13943 
13944   if (field->real_maybe_null())
13945   {
13946     /*
13947       Byte 0 of key is the null-byte. If set, key is NULL.
13948       Otherwise, print the key value starting immediately after the
13949       null-byte
13950     */
13951     if (*key)
13952     {
13953       out->append(STRING_WITH_LEN("NULL"));
13954       goto restore_col_map;
13955     }
13956     key++;                                    // Skip null byte
13957     store_length--;
13958   }
13959   field->set_key_image(key, key_part->length);
13960   if (field->type() == MYSQL_TYPE_BIT)
13961     (void) field->val_int_as_str(&tmp, 1); // may change tmp's charset
13962   else
13963     field->val_str(&tmp); // may change tmp's charset
13964   out->append(tmp.ptr(), tmp.length(), tmp.charset());
13965 
13966 restore_col_map:
13967   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
13968 }
13969 
13970 /**
13971   Append range info for a key part to a string
13972 
13973   @param[in,out] out          String the range info is appended to
13974   @param[in]     key_part     Indexed column used in a range select
13975   @param[in]     min_key      Key tuple describing lower bound of range
13976   @param[in]     max_key      Key tuple describing upper bound of range
13977   @param[in]     flag         Key range flags defining what min_key
13978                               and max_key represent @see my_base.h
13979  */
append_range(String * out,const KEY_PART_INFO * key_part,const uchar * min_key,const uchar * max_key,const uint flag)13980 void append_range(String *out,
13981                   const KEY_PART_INFO *key_part,
13982                   const uchar *min_key, const uchar *max_key,
13983                   const uint flag)
13984 {
13985   if (out->length() > 0)
13986     out->append(STRING_WITH_LEN(" AND "));
13987 
13988   if (!(flag & NO_MIN_RANGE))
13989   {
13990     print_key_value(out, key_part, min_key);
13991     if (flag & NEAR_MIN)
13992       out->append(STRING_WITH_LEN(" < "));
13993     else
13994       out->append(STRING_WITH_LEN(" <= "));
13995   }
13996 
13997   out->append(key_part->field->field_name);
13998 
13999   if (!(flag & NO_MAX_RANGE))
14000   {
14001     if (flag & NEAR_MAX)
14002       out->append(STRING_WITH_LEN(" < "));
14003     else
14004       out->append(STRING_WITH_LEN(" <= "));
14005     print_key_value(out, key_part, max_key);
14006   }
14007 }
14008 
14009 /**
14010   Traverse an R-B tree of range conditions and append all ranges for
14011   this keypart and consecutive keyparts to range_trace (if non-NULL)
14012   or to range_string (if range_trace is NULL). See description of R-B
14013   trees/SEL_ARG for details on how ranges are linked.
14014 
14015   @param[in,out] range_trace   Optimizer trace array ranges are appended to
14016   @param[in,out] range_string  The string where range predicates are
14017                                appended when the last keypart has
14018                                been reached.
14019   @param[in]     range_so_far  String containing ranges for keyparts prior
14020                                to this keypart.
14021   @param[in]     keypart_root  The root of the R-B tree containing intervals
14022                                for this keypart.
14023   @param[in]     key_parts     Index components description, used when adding
14024                                information to the optimizer trace
14025 
14026   @note This function mimics the behavior of sel_arg_range_seq_next()
14027 */
append_range_all_keyparts(Opt_trace_array * range_trace,String * range_string,String * range_so_far,SEL_ARG * keypart_root,const KEY_PART_INFO * key_parts)14028 static void append_range_all_keyparts(Opt_trace_array *range_trace,
14029                                       String *range_string,
14030                                       String *range_so_far,
14031                                       SEL_ARG *keypart_root,
14032                                       const KEY_PART_INFO *key_parts)
14033 {
14034   DBUG_ASSERT(keypart_root && keypart_root != &null_element);
14035 
14036   const bool append_to_trace= (range_trace != NULL);
14037 
14038   // Either add info to range_string or to range_trace
14039   DBUG_ASSERT(append_to_trace ? !range_string : (range_string != NULL));
14040 
14041   // Navigate to first interval in red-black tree
14042   const KEY_PART_INFO *cur_key_part= key_parts + keypart_root->part;
14043   const SEL_ARG *keypart_range= keypart_root->first();
14044 
14045   const uint save_range_so_far_length= range_so_far->length();
14046 
14047   while (keypart_range)
14048   {
14049     /*
14050       Skip the rest of condition printing to avoid OOM if appending to
14051       range_string and the string becomes too long. Printing very long
14052       range conditions normally doesn't make sense either.
14053      */
14054     if (!append_to_trace && range_string->length() > 500)
14055     {
14056       range_string->append(STRING_WITH_LEN("..."));
14057       break;
14058     }
14059 
14060     // Append the current range predicate to the range String
14061     append_range(range_so_far, cur_key_part,
14062                  keypart_range->min_value, keypart_range->max_value,
14063                  keypart_range->min_flag | keypart_range->max_flag);
14064 
14065     /*
14066       Print range predicates for consecutive keyparts if
14067       1) There are predicates for later keyparts
14068       2) There are no "holes" in the used keyparts (keypartX can only
14069          be used if there is a range predicate on keypartX-1)
14070       3) The current range is an equality range
14071      */
14072     if (keypart_range->next_key_part &&
14073         keypart_range->next_key_part->part == keypart_range->part + 1 &&
14074         keypart_range->is_singlepoint())
14075     {
14076       append_range_all_keyparts(range_trace, range_string, range_so_far,
14077                                 keypart_range->next_key_part, key_parts);
14078     }
14079     else
14080     {
14081       /*
14082         This is the last keypart with a usable range predicate. Print
14083         full range info to the optimizer trace or to the string
14084       */
14085       if (append_to_trace)
14086         range_trace->add_utf8(range_so_far->ptr(),
14087                               range_so_far->length());
14088       else
14089       {
14090         if (range_string->length() == 0)
14091           range_string->append(STRING_WITH_LEN("("));
14092         else
14093           range_string->append(STRING_WITH_LEN(" OR ("));
14094 
14095         range_string->append(range_so_far->ptr(), range_so_far->length());
14096         range_string->append(STRING_WITH_LEN(")"));
14097       }
14098     }
14099     keypart_range= keypart_range->next;
14100     /*
14101       Now moving to next range for this keypart, so "reset"
14102       range_so_far to include only range description of earlier
14103       keyparts
14104     */
14105     range_so_far->length(save_range_so_far_length);
14106   }
14107 }
14108 
14109 /**
14110   Print the ranges in a SEL_TREE to debug log.
14111 
14112   @param tree_name   Descriptive name of the tree
14113   @param tree        The SEL_TREE that will be printed to debug log
14114   @param param       PARAM from SQL_SELECT::test_quick_select
14115 */
dbug_print_tree(const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param)14116 static inline void dbug_print_tree(const char *tree_name,
14117                                    SEL_TREE *tree,
14118                                    const RANGE_OPT_PARAM *param)
14119 {
14120 #ifndef DBUG_OFF
14121   if (!param->using_real_indexes)
14122   {
14123     DBUG_PRINT("info",
14124                ("sel_tree: "
14125                 "%s uses a partitioned index and cannot be printed",
14126                 tree_name));
14127     return;
14128   }
14129 
14130   if (!tree)
14131   {
14132     DBUG_PRINT("info", ("sel_tree: %s is NULL", tree_name));
14133     return;
14134   }
14135 
14136   if (tree->type == SEL_TREE::IMPOSSIBLE)
14137   {
14138     DBUG_PRINT("info", ("sel_tree: %s is IMPOSSIBLE", tree_name));
14139     return;
14140   }
14141 
14142   if (tree->type == SEL_TREE::ALWAYS)
14143   {
14144     DBUG_PRINT("info", ("sel_tree: %s is ALWAYS", tree_name));
14145     return;
14146   }
14147 
14148   if (tree->type == SEL_TREE::MAYBE)
14149   {
14150     DBUG_PRINT("info", ("sel_tree: %s is MAYBE", tree_name));
14151     return;
14152   }
14153 
14154   if (!tree->merges.is_empty())
14155   {
14156     DBUG_PRINT("info",
14157                ("sel_tree: "
14158                 "%s contains the following merges", tree_name));
14159 
14160     List_iterator<SEL_IMERGE> it(tree->merges);
14161     int i= 0;
14162     for (SEL_IMERGE *el= it++; el; el= it++, i++)
14163     {
14164       for (SEL_TREE** current= el->trees;
14165            current != el->trees_next;
14166            current++)
14167         dbug_print_tree("  merge_tree", *current, param);
14168     }
14169   }
14170 
14171   for (uint i= 0; i< param->keys; i++)
14172   {
14173     if (tree->keys[i] == NULL || tree->keys[i] == &null_element)
14174       continue;
14175 
14176     uint real_key_nr= param->real_keynr[i];
14177 
14178     const KEY &cur_key= param->table->key_info[real_key_nr];
14179     const KEY_PART_INFO *key_part= cur_key.key_part;
14180 
14181     /*
14182       String holding the final range description from
14183       append_range_all_keyparts()
14184     */
14185     char buff1[512];
14186     String range_result(buff1, sizeof(buff1), system_charset_info);
14187     range_result.length(0);
14188 
14189     /*
14190       Range description up to a certain keypart - used internally in
14191       append_range_all_keyparts()
14192     */
14193     char buff2[128];
14194     String range_so_far(buff2, sizeof(buff2), system_charset_info);
14195     range_so_far.length(0);
14196 
14197     append_range_all_keyparts(NULL, &range_result, &range_so_far,
14198                               tree->keys[i], key_part);
14199 
14200     DBUG_PRINT("info",
14201                ("sel_tree: %s->keys[%d(real_keynr: %d)]: %s",
14202                 tree_name, i, real_key_nr, range_result.ptr()));
14203   }
14204 #endif
14205 }
14206 
14207 /*****************************************************************************
14208 ** Print a quick range for debugging
14209 ** TODO:
14210 ** This should be changed to use a String to store each row instead
14211 ** of locking the DEBUG stream !
14212 *****************************************************************************/
14213 
14214 #ifndef DBUG_OFF
14215 
14216 static void
print_multiple_key_values(KEY_PART * key_part,const uchar * key,uint used_length)14217 print_multiple_key_values(KEY_PART *key_part, const uchar *key,
14218                           uint used_length)
14219 {
14220   char buff[1024];
14221   const uchar *key_end= key+used_length;
14222   String tmp(buff,sizeof(buff),&my_charset_bin);
14223   uint store_length;
14224   TABLE *table= key_part->field->table;
14225   my_bitmap_map *old_sets[2];
14226 
14227   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14228 
14229   for (; key < key_end; key+=store_length, key_part++)
14230   {
14231     Field *field=      key_part->field;
14232     store_length= key_part->store_length;
14233 
14234     if (field->real_maybe_null())
14235     {
14236       if (*key)
14237       {
14238         fwrite("NULL",sizeof(char),4,DBUG_FILE);
14239         continue;
14240       }
14241       key++;                                    // Skip null byte
14242       store_length--;
14243     }
14244     field->set_key_image(key, key_part->length);
14245     if (field->type() == MYSQL_TYPE_BIT)
14246       (void) field->val_int_as_str(&tmp, 1);
14247     else
14248       field->val_str(&tmp);
14249     fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
14250     if (key+store_length < key_end)
14251       fputc('/',DBUG_FILE);
14252   }
14253   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14254 }
14255 
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)14256 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
14257 {
14258   char buf[MAX_KEY/8+1];
14259   TABLE *table;
14260   my_bitmap_map *old_sets[2];
14261   DBUG_ENTER("print_quick");
14262   if (!quick)
14263     DBUG_VOID_RETURN;
14264   DBUG_LOCK_FILE;
14265 
14266   table= quick->head;
14267   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14268   quick->dbug_dump(0, TRUE);
14269   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14270 
14271   fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
14272 
14273   DBUG_UNLOCK_FILE;
14274   DBUG_VOID_RETURN;
14275 }
14276 
dbug_dump(int indent,bool verbose)14277 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
14278 {
14279   /* purecov: begin inspected */
14280   fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
14281           indent, "", head->key_info[index].name, max_used_key_length);
14282 
14283   if (verbose)
14284   {
14285     QUICK_RANGE *range;
14286     QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
14287     QUICK_RANGE **end_range= pr + ranges.elements;
14288     for (; pr != end_range; ++pr)
14289     {
14290       fprintf(DBUG_FILE, "%*s", indent + 2, "");
14291       range= *pr;
14292       if (!(range->flag & NO_MIN_RANGE))
14293       {
14294         print_multiple_key_values(key_parts, range->min_key,
14295                                   range->min_length);
14296         if (range->flag & NEAR_MIN)
14297           fputs(" < ",DBUG_FILE);
14298         else
14299           fputs(" <= ",DBUG_FILE);
14300       }
14301       fputs("X",DBUG_FILE);
14302 
14303       if (!(range->flag & NO_MAX_RANGE))
14304       {
14305         if (range->flag & NEAR_MAX)
14306           fputs(" < ",DBUG_FILE);
14307         else
14308           fputs(" <= ",DBUG_FILE);
14309         print_multiple_key_values(key_parts, range->max_key,
14310                                   range->max_length);
14311       }
14312       fputs("\n",DBUG_FILE);
14313     }
14314   }
14315   /* purecov: end */
14316 }
14317 
dbug_dump(int indent,bool verbose)14318 void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
14319 {
14320   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14321   QUICK_RANGE_SELECT *quick;
14322   fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
14323   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14324   while ((quick= it++))
14325     quick->dbug_dump(indent+2, verbose);
14326   if (pk_quick_select)
14327   {
14328     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14329     pk_quick_select->dbug_dump(indent+2, verbose);
14330   }
14331   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14332 }
14333 
dbug_dump(int indent,bool verbose)14334 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
14335 {
14336   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14337   QUICK_RANGE_SELECT *quick;
14338   fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
14339           indent, "", need_to_fetch_row? "":"non-");
14340   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14341   while ((quick= it++))
14342     quick->dbug_dump(indent+2, verbose);
14343   if (cpk_quick)
14344   {
14345     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14346     cpk_quick->dbug_dump(indent+2, verbose);
14347   }
14348   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14349 }
14350 
dbug_dump(int indent,bool verbose)14351 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
14352 {
14353   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
14354   QUICK_SELECT_I *quick;
14355   fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
14356   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14357   while ((quick= it++))
14358     quick->dbug_dump(indent+2, verbose);
14359   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14360 }
14361 
14362 /*
14363   Print quick select information to DBUG_FILE.
14364 
14365   SYNOPSIS
14366     QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
14367     indent  Indentation offset
14368     verbose If TRUE show more detailed output.
14369 
14370   DESCRIPTION
14371     Print the contents of this quick select to DBUG_FILE. The method also
14372     calls dbug_dump() for the used quick select if any.
14373 
14374   IMPLEMENTATION
14375     Caller is responsible for locking DBUG_FILE before this call and unlocking
14376     it afterwards.
14377 
14378   RETURN
14379     None
14380 */
14381 
dbug_dump(int indent,bool verbose)14382 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
14383 {
14384   fprintf(DBUG_FILE,
14385           "%*squick_group_min_max_select: index %s (%d), length: %d\n",
14386           indent, "", index_info->name, index, max_used_key_length);
14387   if (key_infix_len > 0)
14388   {
14389     fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
14390             indent, "", key_infix_len);
14391   }
14392   if (quick_prefix_select)
14393   {
14394     fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
14395     quick_prefix_select->dbug_dump(indent + 2, verbose);
14396   }
14397   if (min_max_ranges.elements > 0)
14398   {
14399     fprintf(DBUG_FILE, "%*susing %lu quick_ranges for MIN/MAX:\n",
14400             indent, "", min_max_ranges.elements);
14401   }
14402 }
14403 
14404 
14405 #endif /* !DBUG_OFF */
14406