1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /*
24   TODO:
25   Fix that MAYBE_KEY are stored in the tree so that we can detect use
26   of full hash keys for queries like:
27 
28   select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
29 
30 */
31 
32 // Needed by the unit tests
33 #ifndef OPT_RANGE_CC_INCLUDED
34 #define OPT_RANGE_CC_INCLUDED
35 
36 /*
37   This file contains:
38 
39   RangeAnalysisModule
40     A module that accepts a condition, index (or partitioning) description,
41     and builds lists of intervals (in index/partitioning space), such that
42     all possible records that match the condition are contained within the
43     intervals.
44     The entry point for the range analysis module is get_mm_tree()
45     (mm=min_max) function.
46 
47     The lists are returned in form of complicated structure of interlinked
48     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
49     See quick_range_seq_next, find_used_partitions for examples of how to walk
50     this structure.
51     All direct "users" of this module are located within this file, too.
52 
53 
54   PartitionPruningModule
55     A module that accepts a partitioned table, condition, and finds which
56     partitions we will need to use in query execution. Search down for
57     "PartitionPruningModule" for description.
58     The module has single entry point - prune_partitions() function.
59 
60 
61   Range/index_merge/groupby-minmax optimizer module
62     A module that accepts a table, condition, and returns
63      - a QUICK_*_SELECT object that can be used to retrieve rows that match
64        the specified condition, or a "no records will match the condition"
65        statement.
66 
67     The module entry points are
68       test_quick_select()
69       get_quick_select_for_ref()
70 
71 
72   Record retrieval code for range/index_merge/groupby-min-max.
73     Implementations of QUICK_*_SELECT classes.
74 
75   KeyTupleFormat
76   ~~~~~~~~~~~~~~
77   The code in this file (and elsewhere) makes operations on key value tuples.
78   Those tuples are stored in the following format:
79 
80   The tuple is a sequence of key part values. The length of key part value
81   depends only on its type (and not depends on the what value is stored)
82 
83     KeyTuple: keypart1-data, keypart2-data, ...
84 
85   The value of each keypart is stored in the following format:
86 
87     keypart_data: [isnull_byte] keypart-value-bytes
88 
89   If a keypart may have a NULL value (key_part->field->real_maybe_null() can
90   be used to check this), then the first byte is a NULL indicator with the
91   following valid values:
92     1  - keypart has NULL value.
93     0  - keypart has non-NULL value.
94 
95   <questionable-statement> If isnull_byte==1 (NULL value), then the following
96   keypart->length bytes must be 0.
97   </questionable-statement>
98 
99   keypart-value-bytes holds the value. Its format depends on the field type.
100   The length of keypart-value-bytes may or may not depend on the value being
101   stored. The default is that length is static and equal to
102   KEY_PART_INFO::length.
103 
104   Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
105   value:
106 
107      keypart-value-bytes: value_length value_bytes
108 
109   The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
110 
111   See key_copy() and key_restore() for code to move data between index tuple
112   and table record
113 
114   CAUTION: the above description is only sergefp's understanding of the
115            subject and may omit some details.
116 */
117 
118 #include "opt_range.h"
119 
120 #include "item_sum.h"            // Item_sum
121 #include "key.h"                 // is_key_used
122 #include "log.h"                 // sql_print_error
123 #include "opt_statistics.h"      // guess_rec_per_key
124 #include "opt_trace.h"           // Opt_trace_array
125 #include "partition_info.h"      // partition_info
126 #include "sql_partition.h"       // HA_USE_AUTO_PARTITION
127 #include "sql_base.h"            // free_io_cache
128 #include "sql_class.h"           // THD
129 #include "sql_opt_exec_shared.h" // QEP_shared_owner
130 #include "sql_optimizer.h"       // JOIN
131 #include "sql_parse.h"           // check_stack_overrun
132 #include "uniques.h"             // Unique
133 #include "opt_hints.h"           // hint_key_state
134 #include "mysys_err.h"           // EE_CAPACITY_EXCEEDED
135 
136 using std::min;
137 using std::max;
138 
139 /*
140   Convert double value to #rows. Currently this does floor(), and we
141   might consider using round() instead.
142 */
143 #define double2rows(x) ((ha_rows)(x))
144 
145 static int sel_cmp(Field *f,uchar *a,uchar *b,uint8 a_flag,uint8 b_flag);
146 
147 static uchar is_null_string[2]= {1,0};
148 
149 class RANGE_OPT_PARAM;
150 
151 /**
152   Error handling class for range optimizer. We handle only out of memory
153   error here. This is to give a hint to the user to
154   raise range_optimizer_max_mem_size if required.
155   Warning for the memory error is pushed only once. The consequent errors
156   will be ignored.
157 */
158 class Range_optimizer_error_handler : public Internal_error_handler
159 {
160 public:
Range_optimizer_error_handler()161   Range_optimizer_error_handler()
162     : m_has_errors(false), m_is_mem_error(false)
163   {}
164 
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)165   virtual bool handle_condition(THD *thd,
166                                 uint sql_errno,
167                                 const char* sqlstate,
168                                 Sql_condition::enum_severity_level *level,
169                                 const char* msg)
170   {
171     if (*level == Sql_condition::SL_ERROR)
172     {
173       m_has_errors= true;
174       /* Out of memory error is reported only once. Return as handled */
175       if (m_is_mem_error && sql_errno == EE_CAPACITY_EXCEEDED)
176         return true;
177       if (sql_errno == EE_CAPACITY_EXCEEDED)
178       {
179         m_is_mem_error= true;
180         /* Convert the error into a warning. */
181         *level= Sql_condition::SL_WARNING;
182         push_warning_printf(
183                        thd, Sql_condition::SL_WARNING,
184                        ER_CAPACITY_EXCEEDED,
185                        ER_THD(thd, ER_CAPACITY_EXCEEDED),
186                        (ulonglong)thd->variables.range_optimizer_max_mem_size,
187                        "range_optimizer_max_mem_size",
188                        ER_THD(thd, ER_CAPACITY_EXCEEDED_IN_RANGE_OPTIMIZER));
189         return true;
190       }
191     }
192     return false;
193   }
194 
has_errors() const195   bool has_errors() const { return m_has_errors; }
196 private:
197   bool m_has_errors;
198   bool m_is_mem_error;
199 };
200 
201 /*
202   A construction block of the SEL_ARG-graph.
203 
204   The following description only covers graphs of SEL_ARG objects with
205   sel_arg->type==KEY_RANGE:
206 
207   One SEL_ARG object represents an "elementary interval" in form
208 
209       min_value <=?  table.keypartX  <=? max_value
210 
211   The interval is a non-empty interval of any kind: with[out] minimum/maximum
212   bound, [half]open/closed, single-point interval, etc.
213 
214   1. SEL_ARG GRAPH STRUCTURE
215 
216   SEL_ARG objects are linked together in a graph. The meaning of the graph
217   is better demostrated by an example:
218 
219      tree->keys[i]
220       |
221       |             $              $
222       |    part=1   $     part=2   $    part=3
223       |             $              $
224       |  +-------+  $   +-------+  $   +--------+
225       |  | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
226       |  +-------+  $   +-------+  $   +--------+
227       |      |      $              $       |
228       |      |      $              $   +--------+
229       |      |      $              $   | kp3=12 |
230       |      |      $              $   +--------+
231       |  +-------+  $              $
232       \->| kp1=2 |--$--------------$-+
233          +-------+  $              $ |   +--------+
234              |      $              $  ==>| kp3=11 |
235          +-------+  $              $ |   +--------+
236          | kp1=3 |--$--------------$-+       |
237          +-------+  $              $     +--------+
238              |      $              $     | kp3=14 |
239             ...     $              $     +--------+
240 
241   The entire graph is partitioned into "interval lists".
242 
243   An interval list is a sequence of ordered disjoint intervals over
244   the same key part. SEL_ARG are linked via "next" and "prev" pointers
245   with NULL as sentinel.
246 
247     In the example pic, there are 4 interval lists:
248     "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
249     The vertical lines represent SEL_ARG::next/prev pointers.
250 
251   Additionally, all intervals in the list form a red-black (RB) tree,
252   linked via left/right/parent pointers with null_element as sentinel. The
253   red-black tree root SEL_ARG object will be further called "root of the
254   interval list".
255 
256   A red-black tree with 7 SEL_ARGs will look similar to what is shown
257   below. Left/right/parent pointers are shown while next pointers go from a
258   node with number X to the node with number X+1 (and prev in the
259   opposite direction):
260 
261                          Root
262                         +---+
263                         | 4 |
264                         +---+
265                    left/     \ right
266                     __/       \__
267                    /             \
268               +---+               +---+
269               | 2 |               | 6 |
270               +---+               +---+
271         left /     \ right  left /     \ right
272             |       |           |       |
273         +---+       +---+   +---+       +---+
274         | 1 |       | 3 |   | 5 |       | 7 |
275         +---+       +---+   +---+       +---+
276 
277   In this tree,
278     * node1->prev == node7->next == NULL
279     * node1->left == node1->right ==
280       node3->left == ... node7->right == &null_element
281 
282   In an interval list, each member X may have SEL_ARG::next_key_part pointer
283   pointing to the root of another interval list Y. The pointed interval list
284   must cover a key part with greater number (i.e. Y->part > X->part).
285 
286     In the example pic, the next_key_part pointers are represented by
287     horisontal lines.
288 
289   2. SEL_ARG GRAPH SEMANTICS
290 
291   It represents a condition in a special form (we don't have a name for it ATM)
292   The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
293 
294   For example, the picture represents the condition in form:
295    (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
296    (kp1=2 AND (kp3=11 OR kp3=14)) OR
297    (kp1=3 AND (kp3=11 OR kp3=14))
298 
299   In red-black tree form:
300 
301                      +-------+                 +--------+
302                      | kp1=2 |.................| kp3=14 |
303                      +-------+                 +--------+
304                       /     \                     /
305              +---------+    +-------+     +--------+
306              | kp1 < 1 |    | kp1=3 |     | kp3=11 |
307              +---------+    +-------+     +--------+
308                  .               .
309             ......               .......
310             .                          .
311         +-------+                  +--------+
312         | kp2=5 |                  | kp3=14 |
313         +-------+                  +--------+
314             .                        /
315             .                   +--------+
316        (root of R-B tree        | kp3=11 |
317         for "kp3={10|12}")      +--------+
318 
319 
320   Where / and \ denote left and right pointers and ... denotes
321   next_key_part pointers to the root of the R-B tree of intervals for
322   consecutive key parts.
323 
324   3. SEL_ARG GRAPH USE
325 
326   Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
327   Then walk the SEL_ARG graph and get a list of dijsoint ordered key
328   intervals (i.e. intervals in form
329 
330    (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
331 
332   Those intervals can be used to access the index. The uses are in:
333    - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
334                             how many table records are contained within all
335                             intervals.
336    - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
337                             and create QUICK_RANGE_SELECT object that will
338                             read records within these intervals.
339 
340   4. SPACE COMPLEXITY NOTES
341 
342     SEL_ARG graph is a representation of an ordered disjoint sequence of
343     intervals over the ordered set of index tuple values.
344 
345     For multi-part keys, one can construct a WHERE expression such that its
346     list of intervals will be of combinatorial size. Here is an example:
347 
348       (keypart1 IN (1,2, ..., n1)) AND
349       (keypart2 IN (1,2, ..., n2)) AND
350       (keypart3 IN (1,2, ..., n3))
351 
352     For this WHERE clause the list of intervals will have n1*n2*n3 intervals
353     of form
354 
355       (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
356 
357     SEL_ARG graph structure aims to reduce the amount of required space by
358     "sharing" the elementary intervals when possible (the pic at the
359     beginning of this comment has examples of such sharing). The sharing may
360     prevent combinatorial blowup:
361 
362       There are WHERE clauses that have combinatorial-size interval lists but
363       will be represented by a compact SEL_ARG graph.
364       Example:
365         (keypartN IN (1,2, ..., n1)) AND
366         ...
367         (keypart2 IN (1,2, ..., n2)) AND
368         (keypart1 IN (1,2, ..., n3))
369 
370     but not in all cases:
371 
372     - There are WHERE clauses that do have a compact SEL_ARG-graph
373       representation but get_mm_tree() and its callees will construct a
374       graph of combinatorial size.
375       Example:
376         (keypart1 IN (1,2, ..., n1)) AND
377         (keypart2 IN (1,2, ..., n2)) AND
378         ...
379         (keypartN IN (1,2, ..., n3))
380 
381     - There are WHERE clauses for which the minimal possible SEL_ARG graph
382       representation will have combinatorial size.
383       Example:
384         By induction: Let's take any interval on some keypart in the middle:
385 
386            kp15=c0
387 
388         Then let's AND it with this interval 'structure' from preceding and
389         following keyparts:
390 
391           (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
392 
393         We will obtain this SEL_ARG graph:
394 
395              kp14     $      kp15      $      kp16
396                       $                $
397          +---------+  $   +---------+  $   +---------+
398          | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
399          +---------+  $   +---------+  $   +---------+
400               |       $                $
401          +---------+  $   +---------+  $
402          | kp14=c2 |--$-->| kp15=c0 |  $
403          +---------+  $   +---------+  $
404                       $                $
405 
406        Note that we had to duplicate "kp15=c0" and there was no way to avoid
407        that.
408        The induction step: AND the obtained expression with another "wrapping"
409        expression like (*).
410        When the process ends because of the limit on max. number of keyparts
411        we'll have:
412 
413          WHERE clause length  is O(3*#max_keyparts)
414          SEL_ARG graph size   is O(2^(#max_keyparts/2))
415 
416        (it is also possible to construct a case where instead of 2 in 2^n we
417         have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
418         nodes)
419 
420     We avoid consuming too much memory by setting a limit on the number of
421     SEL_ARG object we can construct during one range analysis invocation.
422 */
423 
424 class SEL_ARG :public Sql_alloc
425 {
426 public:
427   uint8 min_flag,max_flag,maybe_flag;
428   uint8 part;					// Which key part
429   uint8 maybe_null;
430   /**
431     The rtree index interval to scan, undefined unless
432     SEL_ARG::min_flag == GEOM_FLAG.
433    */
434   enum ha_rkey_function rkey_func_flag;
435   /*
436     Number of children of this element in the RB-tree, plus 1 for this
437     element itself.
438   */
439   uint16 elements;
440   /**
441     Valid only for elements which are RB-tree roots: Number of
442     references to this SEL_ARG tree. References may be from
443     SEL_ARG::next_key_part of SEL_ARGs from earlier keyparts or
444     SEL_TREE::keys[i].
445 
446     The SEL_ARGs are re-used in a lazy-copy manner based on this
447     reference counting.
448   */
449   ulong use_count;
450 
451   Field *field;
452   uchar *min_value,*max_value;			// Pointer to range
453 
454   /*
455     eq_tree(), first(), last() etc require that left == right == NULL
456     if the type is MAYBE_KEY. Todo: fix this so SEL_ARGs without R-B
457     children are handled consistently. See related WL#5894.
458    */
459   SEL_ARG *left,*right;   /* R-B tree children */
460   SEL_ARG *next,*prev;    /* Links for bi-directional interval list */
461   SEL_ARG *parent;        /* R-B tree parent */
462   /*
463     R-B tree root of intervals covering keyparts consecutive to this
464     SEL_ARG. See documentation of SEL_ARG GRAPH semantics for details.
465   */
466   SEL_ARG *next_key_part;
467   enum leaf_color { BLACK,RED } color;
468 
469   /**
470     Used to indicate if the range predicate for an index is always
471     true/false, depends on values from other tables or can be
472     evaluated as is.
473   */
474   enum Type {
475     /** The range predicate for this index is always false. */
476     IMPOSSIBLE,
477     /** The range predicate for this index is always true.*/
478     ALWAYS,
479     /**
480       There is a range predicate that refers to another table. The
481       range access method cannot be used on this index unless that
482       other table is earlier in the join sequence. The bit
483       representing the index is set in SQL_SELECT::needed_reg to
484       notify the join optimizer that there is a table dependency.
485       After deciding on join order, the optimizer may chose to rerun
486       the range optimizer for tables with such dependencies.
487     */
488     MAYBE_KEY,
489     /**
490       There is a range condition that can be used on this index. The
491       range conditions for this index in stored in the SEL_ARG tree.
492     */
493     KEY_RANGE
494   } type;
495 
SEL_ARG()496   SEL_ARG() {}
497   SEL_ARG(SEL_ARG &);
498   SEL_ARG(Field *,const uchar *, const uchar *);
499   SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
500 	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
501   /*
502     Used to construct MAYBE_KEY and IMPOSSIBLE SEL_ARGs. left and
503     right is NULL, so this ctor must not be used to create other
504     SEL_ARG types. See todo for left/right pointers.
505   */
SEL_ARG(enum Type type_arg)506   SEL_ARG(enum Type type_arg)
507     :min_flag(0), part(0), rkey_func_flag(HA_READ_INVALID), elements(1),
508     use_count(1), left(NULL), right(NULL),
509     next_key_part(0), color(BLACK), type(type_arg)
510   {
511     DBUG_ASSERT(type_arg == MAYBE_KEY || type_arg == IMPOSSIBLE);
512   }
513   /**
514     returns true if a range predicate is equal. Use all_same()
515     to check for equality of all the predicates on this keypart.
516   */
is_same(const SEL_ARG * arg) const517   inline bool is_same(const SEL_ARG *arg) const
518   {
519     if (type != arg->type || part != arg->part)
520       return false;
521     if (type != KEY_RANGE)
522       return true;
523     return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
524   }
525   /**
526     returns true if all the predicates in the keypart tree are equal
527   */
all_same(const SEL_ARG * arg) const528   bool all_same(const SEL_ARG *arg) const
529   {
530     if (type != arg->type || part != arg->part)
531       return false;
532     if (type != KEY_RANGE)
533       return true;
534     if (arg == this)
535       return true;
536     const SEL_ARG *cmp_arg= arg->first();
537     const SEL_ARG *cur_arg= first();
538     for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg);
539          cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ;
540     if (cur_arg || cmp_arg)
541       return false;
542     return true;
543   }
merge_flags(SEL_ARG * arg)544   inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
maybe_smaller()545   inline void maybe_smaller() { maybe_flag=1; }
546   /* Return true iff it's a single-point null interval */
is_null_interval()547   inline bool is_null_interval() { return maybe_null && max_value[0] == 1; }
cmp_min_to_min(const SEL_ARG * arg) const548   inline int cmp_min_to_min(const SEL_ARG* arg) const
549   {
550     return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
551   }
cmp_min_to_max(const SEL_ARG * arg) const552   inline int cmp_min_to_max(const SEL_ARG* arg) const
553   {
554     return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
555   }
cmp_max_to_max(const SEL_ARG * arg) const556   inline int cmp_max_to_max(const SEL_ARG* arg) const
557   {
558     return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
559   }
cmp_max_to_min(const SEL_ARG * arg) const560   inline int cmp_max_to_min(const SEL_ARG* arg) const
561   {
562     return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
563   }
clone_and(SEL_ARG * arg,MEM_ROOT * mem_root)564   SEL_ARG *clone_and(SEL_ARG* arg, MEM_ROOT *mem_root)
565   {						// Get overlapping range
566     uchar *new_min,*new_max;
567     uint8 flag_min,flag_max;
568     if (cmp_min_to_min(arg) >= 0)
569     {
570       new_min=min_value; flag_min=min_flag;
571     }
572     else
573     {
574       new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
575     }
576     if (cmp_max_to_max(arg) <= 0)
577     {
578       new_max=max_value; flag_max=max_flag;
579     }
580     else
581     {
582       new_max=arg->max_value; flag_max=arg->max_flag;
583     }
584     return new (mem_root) SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
585 		       MY_TEST(maybe_flag && arg->maybe_flag));
586   }
clone_first(SEL_ARG * arg,MEM_ROOT * mem_root)587   SEL_ARG *clone_first(SEL_ARG *arg, MEM_ROOT *mem_root)
588   {						// min <= X < arg->min
589     return new (mem_root) SEL_ARG(field,part, min_value, arg->min_value,
590 		       min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
591 		       maybe_flag | arg->maybe_flag);
592   }
clone_last(SEL_ARG * arg,MEM_ROOT * mem_root)593   SEL_ARG *clone_last(SEL_ARG *arg, MEM_ROOT *mem_root)
594   {						// min <= X <= key_max
595     return new (mem_root) SEL_ARG(field, part, min_value, arg->max_value,
596 		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
597   }
598   SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next);
599 
copy_min(SEL_ARG * arg)600   bool copy_min(SEL_ARG* arg)
601   {						// Get overlapping range
602     if (cmp_min_to_min(arg) > 0)
603     {
604       min_value=arg->min_value; min_flag=arg->min_flag;
605       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
606 	return 1;				// Full range
607     }
608     maybe_flag|=arg->maybe_flag;
609     return 0;
610   }
copy_max(SEL_ARG * arg)611   bool copy_max(SEL_ARG* arg)
612   {						// Get overlapping range
613     if (cmp_max_to_max(arg) <= 0)
614     {
615       max_value=arg->max_value; max_flag=arg->max_flag;
616       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
617 	return 1;				// Full range
618     }
619     maybe_flag|=arg->maybe_flag;
620     return 0;
621   }
622 
copy_min_to_min(SEL_ARG * arg)623   void copy_min_to_min(SEL_ARG *arg)
624   {
625     min_value=arg->min_value; min_flag=arg->min_flag;
626   }
copy_min_to_max(SEL_ARG * arg)627   void copy_min_to_max(SEL_ARG *arg)
628   {
629     max_value=arg->min_value;
630     max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
631   }
copy_max_to_min(SEL_ARG * arg)632   void copy_max_to_min(SEL_ARG *arg)
633   {
634     min_value=arg->max_value;
635     min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
636   }
637 
638   /**
639     Set spatial index range scan parameters. This object will be used to do
640     spatial index range scan after this call.
641 
642     @param rkey_func The scan function to perform. It must be one of the
643                      spatial index specific scan functions.
644    */
set_gis_index_read_function(const enum ha_rkey_function rkey_func)645   void set_gis_index_read_function(const enum ha_rkey_function rkey_func)
646   {
647     DBUG_ASSERT(rkey_func >= HA_READ_MBR_CONTAIN &&
648                 rkey_func <= HA_READ_MBR_EQUAL);
649     min_flag= GEOM_FLAG;
650     rkey_func_flag= rkey_func;
651     max_flag= NO_MAX_RANGE;
652   }
653 
654   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_min(uint length,uchar ** min_key,uint min_key_flag)655   int store_min(uint length, uchar **min_key,uint min_key_flag)
656   {
657     /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
658     if ((min_flag & GEOM_FLAG) ||
659         (!(min_flag & NO_MIN_RANGE) &&
660 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
661     {
662       if (maybe_null && *min_value)
663       {
664 	**min_key=1;
665 	memset(*min_key+1, 0, length-1);
666       }
667       else
668 	memcpy(*min_key,min_value,length);
669       (*min_key)+= length;
670       return 1;
671     }
672     return 0;
673   }
674   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_max(uint length,uchar ** max_key,uint max_key_flag)675   int store_max(uint length, uchar **max_key, uint max_key_flag)
676   {
677     if (!(max_flag & NO_MAX_RANGE) &&
678 	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
679     {
680       if (maybe_null && *max_value)
681       {
682 	**max_key=1;
683 	memset(*max_key+1, 0, length-1);
684       }
685       else
686 	memcpy(*max_key,max_value,length);
687       (*max_key)+= length;
688       return 1;
689     }
690     return 0;
691   }
692 
693   /*
694     Returns a number of keypart values appended to the key buffer
695     for min key and max key. This function is used by both Range
696     Analysis and Partition pruning. For partition pruning we have
697     to ensure that we don't store also subpartition fields. Thus
698     we have to stop at the last partition part and not step into
699     the subpartition fields. For Range Analysis we set last_part
700     to MAX_KEY which we should never reach.
701   */
store_min_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)702   int store_min_key(KEY_PART *key,
703                     uchar **range_key,
704                     uint *range_key_flag,
705                     uint last_part)
706   {
707     SEL_ARG *key_tree= first();
708     uint res= key_tree->store_min(key[key_tree->part].store_length,
709                                   range_key, *range_key_flag);
710     *range_key_flag|= key_tree->min_flag;
711 
712     if (key_tree->next_key_part &&
713 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
714         key_tree->part != last_part &&
715 	key_tree->next_key_part->part == key_tree->part+1 &&
716 	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)))
717       res+= key_tree->next_key_part->store_min_key(key,
718                                                    range_key,
719                                                    range_key_flag,
720                                                    last_part);
721     return res;
722   }
723 
724   /* returns a number of keypart values appended to the key buffer */
store_max_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)725   int store_max_key(KEY_PART *key,
726                     uchar **range_key,
727                     uint *range_key_flag,
728                     uint last_part)
729   {
730     SEL_ARG *key_tree= last();
731     uint res=key_tree->store_max(key[key_tree->part].store_length,
732                                  range_key, *range_key_flag);
733     (*range_key_flag)|= key_tree->max_flag;
734     if (key_tree->next_key_part &&
735 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
736         key_tree->part != last_part &&
737 	key_tree->next_key_part->part == key_tree->part+1 &&
738 	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
739       res+= key_tree->next_key_part->store_max_key(key,
740                                                    range_key,
741                                                    range_key_flag,
742                                                    last_part);
743     return res;
744   }
745 
746   SEL_ARG *insert(SEL_ARG *key);
747   SEL_ARG *tree_delete(SEL_ARG *key);
748   SEL_ARG *find_range(SEL_ARG *key);
749   SEL_ARG *rb_insert(SEL_ARG *leaf);
750   friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
751 #ifndef DBUG_OFF
752   friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
753 #endif
754   bool test_use_count(SEL_ARG *root);
755   SEL_ARG *first();
756   const SEL_ARG *first() const;
757   SEL_ARG *last();
758   void make_root();
simple_key()759   inline bool simple_key()
760   {
761     return !next_key_part && elements == 1;
762   }
763   /**
764     Update use_count of all SEL_ARG trees for later keyparts to
765     reflect that this SEL_ARG tree is now referred to 'count' more
766     times than it used to be (either through SEL_TREE::keys[] or
767     SEL_ARG::next_key_part pointers).
768 
769     This function does NOT update use_count of the current SEL_ARG
770     object.
771 
772     @param count The number of additional references to this SEL_ARG
773                  tree.
774 
775     @todo consider refactoring this function to also increase
776           use_count of 'this' instead of incrementing use_count only
777           on later keyparts.
778   */
increment_use_count(long count)779   void increment_use_count(long count)
780   {
781     /*
782       Increment use_count for all SEL_ARG trees referenced via
783       next_key_part from any SEL_ARG in this tree.
784     */
785     for (SEL_ARG *cur_selarg= first();
786          cur_selarg;
787          cur_selarg= cur_selarg->next)
788     {
789       if (cur_selarg->next_key_part)
790       {
791         cur_selarg->next_key_part->use_count+= count;
792         cur_selarg->next_key_part->increment_use_count(count);
793       }
794     }
795   }
796 
797   /**
798     Update use count for SEL_ARG's next_key_part.
799     This function does NOT update use_count of the current
800     SEL_ARG object.
801 
802     Primarily used for reducing reference count of next_key_part of a
803     node when removed from SEL_ARG tree during tree merge operations.
804 
805     @param count The number of additional references to this SEL_ARG
806                  tree.
807   */
increment_next_key_part_use_count(long count)808   void increment_next_key_part_use_count(long count)
809   {
810     if (next_key_part)
811     {
812       next_key_part->use_count+= count;
813       next_key_part->increment_use_count(count);
814     }
815   }
816 
free_tree()817   void free_tree()
818   {
819     for (SEL_ARG *pos=first(); pos ; pos=pos->next)
820       if (pos->next_key_part)
821       {
822 	pos->next_key_part->use_count--;
823 	pos->next_key_part->free_tree();
824       }
825   }
826 
parent_ptr()827   inline SEL_ARG **parent_ptr()
828   {
829     return parent->left == this ? &parent->left : &parent->right;
830   }
831 
832 
833   /*
834     Check if this SEL_ARG object represents a single-point interval
835 
836     SYNOPSIS
837       is_singlepoint()
838 
839     DESCRIPTION
840       Check if this SEL_ARG object (not tree) represents a single-point
841       interval, i.e. if it represents a "keypart = const" or
842       "keypart IS NULL".
843 
844     RETURN
845       TRUE   This SEL_ARG object represents a singlepoint interval
846       FALSE  Otherwise
847   */
848 
is_singlepoint() const849   bool is_singlepoint() const
850   {
851     /*
852       Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
853       flags, and the same for right edge.
854     */
855     if (min_flag || max_flag)
856       return FALSE;
857     uchar *min_val= min_value;
858     uchar *max_val= max_value;
859 
860     if (maybe_null)
861     {
862       /* First byte is a NULL value indicator */
863       if (*min_val != *max_val)
864         return FALSE;
865 
866       if (*min_val)
867         return TRUE; /* This "x IS NULL" */
868       min_val++;
869       max_val++;
870     }
871     return !field->key_cmp(min_val, max_val);
872   }
873   SEL_ARG *clone_tree(RANGE_OPT_PARAM *param);
874 };
875 
876 /**
877   Helper function to compare two SEL_ARG's.
878 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)879 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
880 {
881   if (sa1 == NULL && sa2 == NULL)
882     return true;
883   if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
884     return false;
885   return sa1->all_same(sa2);
886 }
887 
888 class SEL_IMERGE;
889 
890 
891 class SEL_TREE :public Sql_alloc
892 {
893 public:
894   /**
895     Starting an effort to document this field:
896 
897     IMPOSSIBLE: if keys[i]->type == SEL_ARG::IMPOSSIBLE for some i,
898       then type == SEL_TREE::IMPOSSIBLE. Rationale: if the predicate for
899       one of the indexes is always false, then the full predicate is also
900       always false.
901 
902     ALWAYS: if either (keys[i]->type == SEL_ARG::ALWAYS) or
903       (keys[i] == NULL) for all i, then type == SEL_TREE::ALWAYS.
904       Rationale: the range access method will not be able to filter
905       out any rows when there are no range predicates that can be used
906       to filter on any index.
907 
908     KEY: There are range predicates that can be used on at least one
909       index.
910 
911     KEY_SMALLER: There are range predicates that can be used on at
912       least one index. In addition, there are predicates that cannot
913       be directly utilized by range access on key parts in the same
914       index. These unused predicates makes it probable that the row
915       estimate for range access on this index is too pessimistic.
916   */
917   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
918 
SEL_TREE(enum Type type_arg,MEM_ROOT * root,size_t num_keys)919   SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys)
920     : type(type_arg), keys(root, num_keys), n_ror_scans(0)
921   { }
SEL_TREE(MEM_ROOT * root,size_t num_keys)922   SEL_TREE(MEM_ROOT *root, size_t num_keys) :
923     type(KEY), keys(root, num_keys), n_ror_scans(0)
924   { }
925   /**
926     Constructor that performs deep-copy of the SEL_ARG trees in
927     'keys[]' and the index merge alternatives in 'merges'.
928 
929     @param arg     The SEL_TREE to copy
930     @param param   Parameters for range analysis
931   */
932   SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
933   /*
934     Possible ways to read rows using a single index because the
935     conditions of the query consists of single-index conjunctions:
936 
937        (ranges_for_idx_1) AND (ranges_for_idx_2) AND ...
938 
939     The SEL_ARG graph for each non-NULL element in keys[] may consist
940     of many single-index ranges (disjunctions), so ranges_for_idx_1
941     may e.g. be:
942 
943        "idx_field1 = 1 OR (idx_field1 > 5 AND idx_field2 = 10)"
944 
945     assuming that index1 is a composite index covering
946     (idx_field1,...,idx_field2,..)
947 
948     Index merge intersection intersects ranges on SEL_ARGs from two or
949     more indexes.
950 
951     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
952     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
953     merit in range analyzer functions (e.g. get_mm_parts) returning a
954     pointer to such SEL_TREE instead of NULL)
955   */
956   Mem_root_array<SEL_ARG *, true> keys;
957   key_map keys_map;        /* bitmask of non-NULL elements in keys */
958 
959   /*
960     Possible ways to read rows using Index merge (sort) union.
961 
962     Each element in 'merges' consists of multi-index disjunctions,
963     which means that Index merge (sort) union must be applied to read
964     rows. The nodes in the 'merges' list forms a conjunction of such
965     multi-index disjunctions.
966 
967     The list is non-empty only if type==KEY.
968   */
969   List<SEL_IMERGE> merges;
970 
971   /* The members below are filled/used only after get_mm_tree is done */
972   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
973   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
974 
975   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
976   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
977   /* Note that #records for each key scan is stored in table->quick_rows */
978 };
979 
980 class RANGE_OPT_PARAM
981 {
982 public:
983   THD	*thd;   /* Current thread handle */
984   TABLE *table; /* Table being analyzed */
985   Item *cond;   /* Used inside get_mm_tree(). */
986   table_map prev_tables;
987   table_map read_tables;
988   table_map current_table; /* Bit of the table being analyzed */
989 
990   /* Array of parts of all keys for which range analysis is performed */
991   KEY_PART *key_parts;
992   KEY_PART *key_parts_end;
993   MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
994   MEM_ROOT *old_root; /* Memory that will last until the query end */
995   /*
996     Number of indexes used in range analysis (In SEL_TREE::keys only first
997     #keys elements are not empty)
998   */
999   uint keys;
1000 
1001   /*
1002     If true, the index descriptions describe real indexes (and it is ok to
1003     call field->optimize_range(real_keynr[...], ...).
1004     Otherwise index description describes fake indexes, like a partitioning
1005     expression.
1006   */
1007   bool using_real_indexes;
1008 
1009   /*
1010     Aggressively remove "scans" that do not have conditions on first
1011     keyparts. Such scans are usable when doing partition pruning but not
1012     regular range optimization.
1013   */
1014   bool remove_jump_scans;
1015 
1016   /*
1017     used_key_no -> table_key_no translation table. Only makes sense if
1018     using_real_indexes==TRUE
1019   */
1020   uint real_keynr[MAX_KEY];
1021 
1022   /*
1023     Used to store 'current key tuples', in both range analysis and
1024     partitioning (list) analysis
1025   */
1026   uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
1027     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
1028 
1029   bool force_default_mrr;
1030   /**
1031     Whether index statistics or index dives should be used when
1032     estimating the number of rows in an equality range. If true, index
1033     statistics is used for these indexes.
1034   */
1035   bool use_index_statistics;
1036 
1037   /// Error handler for this param.
1038 
1039   Range_optimizer_error_handler error_handler;
1040 
has_errors() const1041   bool has_errors() const  { return (error_handler.has_errors()); }
1042 
~RANGE_OPT_PARAM()1043   virtual ~RANGE_OPT_PARAM() {}
1044 
1045 };
1046 
1047 class PARAM : public RANGE_OPT_PARAM
1048 {
1049 public:
1050   KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
1051   longlong baseflag;
1052   uint max_key_part;
1053   /* Number of ranges in the last checked tree->key */
1054   uint range_count;
1055 
1056   bool quick;				// Don't calulate possible keys
1057 
1058   uint fields_bitmap_size;
1059   MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
1060   MY_BITMAP tmp_covered_fields;
1061 
1062   key_map *needed_reg; /* ptr to needed_reg argument of test_quick_select() */
1063 
1064   // Buffer for index_merge cost estimates.
1065   Unique::Imerge_cost_buf_type imerge_cost_buff;
1066 
1067   /* TRUE if last checked tree->key can be used for ROR-scan */
1068   bool is_ror_scan;
1069   /* Number of ranges in the last checked tree->key */
1070   uint n_ranges;
1071 
1072   /*
1073      The sort order the range access method must be able
1074      to provide. Three-value logic: asc/desc/don't care
1075   */
1076   ORDER::enum_order order_direction;
1077 
1078   /// Control whether the various index merge strategies are allowed
1079   bool index_merge_allowed;
1080   bool index_merge_union_allowed;
1081   bool index_merge_sort_union_allowed;
1082   bool index_merge_intersect_allowed;
1083 };
1084 
1085 class TABLE_READ_PLAN;
1086   class TRP_RANGE;
1087   class TRP_ROR_INTERSECT;
1088   class TRP_ROR_UNION;
1089   class TRP_INDEX_MERGE;
1090   class TRP_GROUP_MIN_MAX;
1091 
1092 struct st_ror_scan_info;
1093 
1094 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,
1095                                Item_func *cond_func,Field *field,
1096                                Item_func::Functype type,Item *value,
1097                                Item_result cmp_type);
1098 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,Item *cond_func,Field *field,
1099 			    KEY_PART *key_part,
1100 			    Item_func::Functype type,Item *value);
1101 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond);
1102 
1103 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts);
1104 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
1105                                   SEL_ARG *tree, bool update_tbl_stats,
1106                                   uint *mrr_flags, uint *bufsize,
1107                                   Cost_estimate *cost);
1108 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
1109                                      SEL_ARG *key_tree, uint mrr_flags,
1110                                      uint mrr_buf_size, MEM_ROOT *alloc);
1111 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
1112                                        bool index_read_must_be_used,
1113                                        bool update_tbl_stats,
1114                                        const Cost_estimate *cost_est);
1115 static
1116 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
1117                                           const Cost_estimate *cost_est);
1118 static
1119 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
1120                                          const Cost_estimate *cost_est);
1121 static
1122 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
1123                                           const Cost_estimate *cost_est);
1124 #ifndef DBUG_OFF
1125 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
1126                            const char *msg);
1127 static void print_ror_scans_arr(TABLE *table, const char *msg,
1128                                 struct st_ror_scan_info **start,
1129                                 struct st_ror_scan_info **end);
1130 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
1131 #endif
1132 
1133 static void append_range_all_keyparts(Opt_trace_array *range_trace,
1134                                       String *range_string,
1135                                       String *range_so_far,
1136                                       SEL_ARG *keypart_root,
1137                                       const KEY_PART_INFO *key_parts,
1138                                       const bool print_full);
1139 static inline void dbug_print_tree(const char *tree_name,
1140                                    SEL_TREE *tree,
1141                                    const RANGE_OPT_PARAM *param);
1142 
1143 static inline void print_tree(String *out,
1144                               const char *tree_name,
1145                               SEL_TREE *tree,
1146                               const RANGE_OPT_PARAM *param,
1147                               const bool print_full) MY_ATTRIBUTE((unused));
1148 
1149 void append_range(String *out,
1150                   const KEY_PART_INFO *key_parts,
1151                   const uchar *min_key, const uchar *max_key,
1152                   const uint flag);
1153 
1154 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
1155 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
1156 /*
1157   A null_sel_tree is used in get_func_mm_tree_from_in_predicate to pass
1158   as an argument to tree_or. It is used only to influence the return
1159   value from tree_or function.
1160 */
1161 
1162 static MEM_ROOT null_root;
1163 static SEL_TREE null_sel_tree(SEL_TREE::IMPOSSIBLE, &null_root, 0);
1164 
1165 
1166 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
1167 static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
1168 static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
1169                         uint clone_flag);
1170 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
1171 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
1172                     SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
1173                     uchar *max_key,uint max_key_flag);
1174 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
1175 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count,
1176                                     uint limit);
1177 
1178 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
1179 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
1180                              uint length);
1181 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
1182 
1183 
1184 /*
1185   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
1186   a condition in the following form:
1187    (t_1||t_2||...||t_N) && (next)
1188 
1189   where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
1190   (t_i,t_j) contains SEL_ARGS for the same index.
1191 
1192   SEL_TREE contained in SEL_IMERGE always has merges=NULL.
1193 
1194   This class relies on memory manager to do the cleanup.
1195 */
1196 
1197 class SEL_IMERGE : public Sql_alloc
1198 {
1199   enum { PREALLOCED_TREES= 10};
1200 public:
1201   SEL_TREE *trees_prealloced[PREALLOCED_TREES];
1202   SEL_TREE **trees;             /* trees used to do index_merge   */
1203   SEL_TREE **trees_next;        /* last of these trees            */
1204   SEL_TREE **trees_end;         /* end of allocated space         */
1205 
1206   SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
1207 
SEL_IMERGE()1208   SEL_IMERGE() :
1209     trees(&trees_prealloced[0]),
1210     trees_next(trees),
1211     trees_end(trees + PREALLOCED_TREES)
1212   {}
1213   SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
1214   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
1215   int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
1216   int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
1217 };
1218 
1219 
1220 /*
1221   Add SEL_TREE to this index_merge without any checks,
1222 
1223   NOTES
1224     This function implements the following:
1225       (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
1226 
1227   RETURN
1228      0 - OK
1229     -1 - Out of memory.
1230 */
1231 
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)1232 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
1233 {
1234   if (trees_next == trees_end)
1235   {
1236     const int realloc_ratio= 2;		/* Double size for next round */
1237     uint old_elements= static_cast<uint>(trees_end - trees);
1238     uint old_size= sizeof(SEL_TREE**) * old_elements;
1239     uint new_size= old_size * realloc_ratio;
1240     SEL_TREE **new_trees;
1241     if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
1242       return -1;
1243     memcpy(new_trees, trees, old_size);
1244     trees=      new_trees;
1245     trees_next= trees + old_elements;
1246     trees_end=  trees + old_elements * realloc_ratio;
1247   }
1248   *(trees_next++)= tree;
1249   return 0;
1250 }
1251 
1252 
1253 /*
1254   Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
1255   combining new_tree with one of the trees in this SEL_IMERGE if they both
1256   have SEL_ARGs for the same key.
1257 
1258   SYNOPSIS
1259     or_sel_tree_with_checks()
1260       param    PARAM from test_quick_select
1261       new_tree SEL_TREE with type KEY or KEY_SMALLER.
1262 
1263   NOTES
1264     This does the following:
1265     (t_1||...||t_k)||new_tree =
1266      either
1267        = (t_1||...||t_k||new_tree)
1268      or
1269        = (t_1||....||(t_j|| new_tree)||...||t_k),
1270 
1271      where t_i, y are SEL_TREEs.
1272     new_tree is combined with the first t_j it has a SEL_ARG on common
1273     key with. As a consequence of this, choice of keys to do index_merge
1274     read may depend on the order of conditions in WHERE part of the query.
1275 
1276   RETURN
1277     0  OK
1278     1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
1279        and (*this) should be discarded.
1280    -1  An error occurred.
1281 */
1282 
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,SEL_TREE * new_tree)1283 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
1284 {
1285   DBUG_ENTER("SEL_IMERGE::or_sel_tree_with_checks");
1286   for (SEL_TREE** tree = trees;
1287        tree != trees_next;
1288        tree++)
1289   {
1290     if (sel_trees_can_be_ored(*tree, new_tree, param))
1291     {
1292       *tree = tree_or(param, *tree, new_tree);
1293       if (!*tree)
1294         DBUG_RETURN(1);
1295       if (((*tree)->type == SEL_TREE::MAYBE) ||
1296           ((*tree)->type == SEL_TREE::ALWAYS))
1297         DBUG_RETURN(1);
1298       /* SEL_TREE::IMPOSSIBLE is impossible here */
1299       DBUG_RETURN(0);
1300     }
1301   }
1302 
1303   /* New tree cannot be combined with any of existing trees. */
1304   const int ret= or_sel_tree(param, new_tree);
1305   DBUG_RETURN(ret);
1306 }
1307 
1308 
1309 /*
1310   Perform OR operation on this index_merge and supplied index_merge list.
1311 
1312   RETURN
1313     0 - OK
1314     1 - One of conditions in result is always TRUE and this SEL_IMERGE
1315         should be discarded.
1316    -1 - An error occurred
1317 */
1318 
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,SEL_IMERGE * imerge)1319 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
1320 {
1321   for (SEL_TREE** tree= imerge->trees;
1322        tree != imerge->trees_next;
1323        tree++)
1324   {
1325     if (or_sel_tree_with_checks(param, *tree))
1326       return 1;
1327   }
1328   return 0;
1329 }
1330 
1331 
SEL_TREE(SEL_TREE * arg,RANGE_OPT_PARAM * param)1332 SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param):
1333   Sql_alloc(), keys(param->mem_root, param->keys), n_ror_scans(0)
1334 {
1335   keys_map= arg->keys_map;
1336   type= arg->type;
1337   for (uint idx= 0; idx < param->keys; idx++)
1338   {
1339     if (arg->keys[idx])
1340     {
1341       keys[idx]= arg->keys[idx]->clone_tree(param);
1342       if (!keys[idx])
1343         break;
1344       keys[idx]->use_count++;
1345       keys[idx]->increment_use_count(1);
1346     }
1347     else
1348       keys[idx]= NULL;
1349   }
1350 
1351   List_iterator<SEL_IMERGE> it(arg->merges);
1352   for (SEL_IMERGE *el= it++; el; el= it++)
1353   {
1354     SEL_IMERGE *merge= new (param->mem_root) SEL_IMERGE(el, param);
1355     if (!merge || merge->trees == merge->trees_next ||
1356         param->has_errors())
1357     {
1358       merges.empty();
1359       return;
1360     }
1361     merges.push_back (merge);
1362   }
1363 
1364   /*
1365     SEL_TREEs are only created by get_mm_tree() (and functions called
1366     by get_mm_tree()). Index intersection is checked after
1367     get_mm_tree() has constructed all ranges. In other words, there
1368     should not be any ROR scans to copy when this ctor is called.
1369   */
1370   DBUG_ASSERT(n_ror_scans == 0);
1371 }
1372 
1373 
SEL_IMERGE(SEL_IMERGE * arg,RANGE_OPT_PARAM * param)1374 SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
1375 {
1376   uint elements= static_cast<uint>(arg->trees_end - arg->trees);
1377   if (elements > PREALLOCED_TREES)
1378   {
1379     uint size= elements * sizeof (SEL_TREE **);
1380     if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
1381       goto mem_err;
1382   }
1383   else
1384     trees= &trees_prealloced[0];
1385 
1386   trees_next= trees;
1387   trees_end= trees + elements;
1388 
1389   for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end;
1390        tree++, arg_tree++)
1391   {
1392     if (!(*tree= new (param->mem_root) SEL_TREE(*arg_tree, param)) ||
1393         param->has_errors())
1394       goto mem_err;
1395   }
1396 
1397   return;
1398 
1399 mem_err:
1400   trees= &trees_prealloced[0];
1401   trees_next= trees;
1402   trees_end= trees;
1403 }
1404 
1405 
1406 /*
1407   Perform AND operation on two index_merge lists and store result in *im1.
1408 */
1409 
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1410 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
1411 {
1412   im1->concat(im2);
1413 }
1414 
1415 
1416 /*
1417   Perform OR operation on 2 index_merge lists, storing result in first list.
1418 
1419   NOTES
1420     The following conversion is implemented:
1421      (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
1422       => (a_1||b_1).
1423 
1424     i.e. all conjuncts except the first one are currently dropped.
1425     This is done to avoid producing N*K ways to do index_merge.
1426 
1427     If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
1428     and index_merge is discarded (while it is actually possible to try
1429     harder).
1430 
1431     As a consequence of this, choice of keys to do index_merge read may depend
1432     on the order of conditions in WHERE part of the query.
1433 
1434   RETURN
1435     0     OK, result is stored in *im1
1436     other Error, both passed lists are unusable
1437 */
1438 
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1439 int imerge_list_or_list(RANGE_OPT_PARAM *param,
1440                         List<SEL_IMERGE> *im1,
1441                         List<SEL_IMERGE> *im2)
1442 {
1443   SEL_IMERGE *imerge= im1->head();
1444   im1->empty();
1445   im1->push_back(imerge);
1446 
1447   return imerge->or_sel_imerge_with_checks(param, im2->head());
1448 }
1449 
1450 
1451 /*
1452   Perform OR operation on index_merge list and key tree.
1453 
1454   RETURN
1455     false     OK, result is stored in *im1.
1456     true      Error
1457 */
1458 
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,SEL_TREE * tree)1459 static bool imerge_list_or_tree(RANGE_OPT_PARAM *param,
1460                                 List<SEL_IMERGE> *im1,
1461                                 SEL_TREE *tree)
1462 {
1463   DBUG_ENTER("imerge_list_or_tree");
1464   SEL_IMERGE *imerge;
1465   List_iterator<SEL_IMERGE> it(*im1);
1466 
1467   uint remaining_trees= im1->elements;
1468   while ((imerge= it++))
1469   {
1470     SEL_TREE *or_tree;
1471     /*
1472       Need to make a copy of 'tree' for all but the last OR operation
1473       because or_sel_tree_with_checks() may change it.
1474     */
1475     if (--remaining_trees == 0)
1476       or_tree= tree;
1477     else
1478     {
1479       or_tree= new (param->mem_root) SEL_TREE (tree, param);
1480       if (!or_tree || param->has_errors())
1481         DBUG_RETURN(true);
1482       if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
1483         DBUG_RETURN(false);
1484     }
1485 
1486     int result_or= imerge->or_sel_tree_with_checks(param, or_tree);
1487     if (result_or == 1)
1488       it.remove();
1489     else if (result_or == -1)
1490       DBUG_RETURN(true);
1491   }
1492   DBUG_ASSERT(remaining_trees == 0);
1493   DBUG_RETURN(im1->is_empty());
1494 }
1495 
1496 
1497 #undef index					// Fix for Unixware 7
1498 
QUICK_SELECT_I()1499 QUICK_SELECT_I::QUICK_SELECT_I()
1500   :max_used_key_length(0),
1501    used_key_parts(0)
1502 {}
1503 
trace_quick_description(Opt_trace_context * trace)1504 void QUICK_SELECT_I::trace_quick_description(Opt_trace_context *trace)
1505 {
1506   Opt_trace_object range_trace(trace, "range_details");
1507 
1508   String range_info;
1509   range_info.set_charset(system_charset_info);
1510   add_info_string(&range_info);
1511   range_trace.add_utf8("used_index", range_info.ptr(), range_info.length());
1512 }
1513 
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1514 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1515                                        bool no_alloc, MEM_ROOT *parent_alloc,
1516                                        bool *create_error)
1517   :ranges(key_memory_Quick_ranges), free_file(0), cur_range(NULL), last_range(0),
1518    mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
1519    dont_free(0)
1520 {
1521   my_bitmap_map *bitmap;
1522   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1523 
1524   in_ror_merged_scan= 0;
1525   index= key_nr;
1526   head=  table;
1527   key_part_info= head->key_info[index].key_part;
1528 
1529   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1530   mrr_buf_size= thd->variables.read_rnd_buff_size;
1531 
1532   if (!no_alloc && !parent_alloc)
1533   {
1534     // Allocates everything through the internal memroot
1535     init_sql_alloc(key_memory_quick_range_select_root,
1536                    &alloc, thd->variables.range_alloc_block_size, 0);
1537     thd->mem_root= &alloc;
1538   }
1539   else
1540     memset(&alloc, 0, sizeof(alloc));
1541   file= head->file;
1542   record= head->record[0];
1543 
1544   /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
1545   if (!(bitmap= (my_bitmap_map*) my_malloc(key_memory_my_bitmap_map,
1546                                            head->s->column_bitmap_size,
1547                                            MYF(MY_WME))))
1548   {
1549     column_bitmap.bitmap= 0;
1550     *create_error= 1;
1551   }
1552   else
1553     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1554   DBUG_VOID_RETURN;
1555 }
1556 
1557 
need_sorted_output()1558 void QUICK_RANGE_SELECT::need_sorted_output()
1559 {
1560   mrr_flags |= HA_MRR_SORTED;
1561 }
1562 
1563 
init()1564 int QUICK_RANGE_SELECT::init()
1565 {
1566   DBUG_ENTER("QUICK_RANGE_SELECT::init");
1567 
1568   if (file->inited)
1569     file->ha_index_or_rnd_end();
1570   DBUG_RETURN(FALSE);
1571 }
1572 
1573 
range_end()1574 void QUICK_RANGE_SELECT::range_end()
1575 {
1576   if (file->inited)
1577     file->ha_index_or_rnd_end();
1578 }
1579 
1580 
~QUICK_RANGE_SELECT()1581 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1582 {
1583   DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1584   if (!dont_free)
1585   {
1586     /* file is NULL for CPK scan on covering ROR-intersection */
1587     if (file)
1588     {
1589       range_end();
1590       if (free_file)
1591       {
1592         DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1593                             free_file));
1594         file->ha_external_lock(current_thd, F_UNLCK);
1595         file->ha_close();
1596         delete file;
1597       }
1598     }
1599     free_root(&alloc,MYF(0));
1600     my_free(column_bitmap.bitmap);
1601   }
1602   my_free(mrr_buf_desc);
1603   DBUG_VOID_RETURN;
1604 }
1605 
1606 
QUICK_INDEX_MERGE_SELECT(THD * thd_param,TABLE * table)1607 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
1608                                                    TABLE *table)
1609   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1610 {
1611   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
1612   index= MAX_KEY;
1613   head= table;
1614 
1615   init_sql_alloc(key_memory_quick_index_merge_root,
1616                  &alloc, thd->variables.range_alloc_block_size, 0);
1617   DBUG_VOID_RETURN;
1618 }
1619 
init()1620 int QUICK_INDEX_MERGE_SELECT::init()
1621 {
1622   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
1623   DBUG_RETURN(0);
1624 }
1625 
reset()1626 int QUICK_INDEX_MERGE_SELECT::reset()
1627 {
1628   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
1629   const int retval= read_keys_and_merge();
1630   DBUG_RETURN(retval);
1631 }
1632 
1633 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1634 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1635 {
1636   /*
1637     Save quick_select that does scan on clustered primary key as it will be
1638     processed separately.
1639   */
1640   if (head->file->primary_key_is_clustered() &&
1641       quick_sel_range->index == head->s->primary_key)
1642     pk_quick_select= quick_sel_range;
1643   else
1644     return quick_selects.push_back(quick_sel_range);
1645   return 0;
1646 }
1647 
~QUICK_INDEX_MERGE_SELECT()1648 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
1649 {
1650   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1651   QUICK_RANGE_SELECT* quick;
1652   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
1653   delete unique;
1654   quick_it.rewind();
1655   while ((quick= quick_it++))
1656     quick->file= NULL;
1657   quick_selects.delete_elements();
1658   delete pk_quick_select;
1659   /* It's ok to call the next two even if they are already deinitialized */
1660   end_read_record(&read_record);
1661   free_io_cache(head);
1662   free_root(&alloc,MYF(0));
1663   DBUG_VOID_RETURN;
1664 }
1665 
1666 
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1667 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1668                                                        TABLE *table,
1669                                                        bool retrieve_full_rows,
1670                                                        MEM_ROOT *parent_alloc)
1671   : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1672     scans_inited(FALSE)
1673 {
1674   index= MAX_KEY;
1675   head= table;
1676   record= head->record[0];
1677   if (!parent_alloc)
1678     init_sql_alloc(key_memory_quick_ror_intersect_select_root,
1679                    &alloc, thd->variables.range_alloc_block_size, 0);
1680   else
1681     memset(&alloc, 0, sizeof(MEM_ROOT));
1682   last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1683                                   head->file->ref_length);
1684 }
1685 
1686 
1687 /*
1688   Do post-constructor initialization.
1689   SYNOPSIS
1690     QUICK_ROR_INTERSECT_SELECT::init()
1691 
1692   RETURN
1693     0      OK
1694     other  Error code
1695 */
1696 
init()1697 int QUICK_ROR_INTERSECT_SELECT::init()
1698 {
1699   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1700  /* Check if last_rowid was successfully allocated in ctor */
1701   DBUG_RETURN(!last_rowid);
1702 }
1703 
1704 
1705 /*
1706   Initialize this quick select to be a ROR-merged scan.
1707 
1708   SYNOPSIS
1709     QUICK_RANGE_SELECT::init_ror_merged_scan()
1710       reuse_handler If TRUE, use head->file, otherwise create a separate
1711                     handler object
1712 
1713   NOTES
1714     This function creates and prepares for subsequent use a separate handler
1715     object if it can't reuse head->file. The reason for this is that during
1716     ROR-merge several key scans are performed simultaneously, and a single
1717     handler is only capable of preserving context of a single key scan.
1718 
1719     In ROR-merge the quick select doing merge does full records retrieval,
1720     merged quick selects read only keys.
1721 
1722   RETURN
1723     0  ROR child scan initialized, ok to use.
1724     1  error
1725 */
1726 
init_ror_merged_scan(bool reuse_handler)1727 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1728 {
1729   handler *save_file= file, *org_file;
1730   THD *thd;
1731   MY_BITMAP * const save_read_set= head->read_set;
1732   MY_BITMAP * const save_write_set= head->write_set;
1733   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1734 
1735   in_ror_merged_scan= 1;
1736   mrr_flags|= HA_MRR_SORTED;
1737   if (reuse_handler)
1738   {
1739     DBUG_PRINT("info", ("Reusing handler %p", file));
1740     if (init() || reset())
1741     {
1742       DBUG_RETURN(1);
1743     }
1744     head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1745     file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1746     goto end;
1747   }
1748 
1749   /* Create a separate handler object for this quick select */
1750   if (free_file)
1751   {
1752     /* already have own 'handler' object. */
1753     DBUG_RETURN(0);
1754   }
1755 
1756   thd= head->in_use;
1757   if (!(file= head->file->clone(head->s->normalized_path.str, thd->mem_root)))
1758   {
1759     /*
1760       Manually set the error flag. Note: there seems to be quite a few
1761       places where a failure could cause the server to "hang" the client by
1762       sending no response to a query. ATM those are not real errors because
1763       the storage engine calls in question happen to never fail with the
1764       existing storage engines.
1765     */
1766     my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1767     /* Caller will free the memory */
1768     goto failure;  /* purecov: inspected */
1769   }
1770 
1771   head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1772 
1773   if (file->ha_external_lock(thd, F_RDLCK))
1774     goto failure;
1775 
1776   if (init() || reset())
1777   {
1778     file->ha_external_lock(thd, F_UNLCK);
1779     file->ha_close();
1780     goto failure;
1781   }
1782   free_file= TRUE;
1783   last_rowid= file->ref;
1784   file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1785 
1786 end:
1787   /*
1788     We are only going to read key fields and call position() on 'file'
1789     The following sets head->tmp_set to only use this key and then updates
1790     head->read_set and head->write_set to use this bitmap.
1791     The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
1792   */
1793   org_file= head->file;
1794   head->file= file;
1795   /* We don't have to set 'head->keyread' here as the 'file' is unique */
1796   if (!head->no_keyread)
1797     head->mark_columns_used_by_index(index);
1798   head->prepare_for_position();
1799   head->file= org_file;
1800   bitmap_copy(&column_bitmap, head->read_set);
1801 
1802   /*
1803     We have prepared a column_bitmap which get_next() will use. To do this we
1804     used TABLE::read_set/write_set as playground; restore them to their
1805     original value to not pollute other scans.
1806   */
1807   head->column_bitmaps_set(save_read_set, save_write_set);
1808   bitmap_clear_all(&head->tmp_set);
1809 
1810   DBUG_RETURN(0);
1811 
1812 failure:
1813   head->column_bitmaps_set(save_read_set, save_write_set);
1814   delete file;
1815   file= save_file;
1816   DBUG_RETURN(1);
1817 }
1818 
1819 
1820 /*
1821   Initialize this quick select to be a part of a ROR-merged scan.
1822   SYNOPSIS
1823     QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1824       reuse_handler If TRUE, use head->file, otherwise create separate
1825                     handler object.
1826   RETURN
1827     0     OK
1828     other error code
1829 */
init_ror_merged_scan(bool reuse_handler)1830 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1831 {
1832   int error;
1833   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1834   QUICK_RANGE_SELECT* quick;
1835   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1836 
1837   /* Initialize all merged "children" quick selects */
1838   DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1839   if (!need_to_fetch_row && reuse_handler)
1840   {
1841     quick= quick_it++;
1842     /*
1843       There is no use of this->file. Use it for the first of merged range
1844       selects.
1845     */
1846     int error= quick->init_ror_merged_scan(TRUE);
1847     if (error)
1848       DBUG_RETURN(error);
1849     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1850   }
1851   while ((quick= quick_it++))
1852   {
1853 #ifndef DBUG_OFF
1854     const MY_BITMAP * const save_read_set= quick->head->read_set;
1855     const MY_BITMAP * const save_write_set= quick->head->write_set;
1856 #endif
1857     if ((error= quick->init_ror_merged_scan(FALSE)))
1858       DBUG_RETURN(error);
1859     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1860     // Sets are shared by all members of "quick_selects" so must not change
1861     DBUG_ASSERT(quick->head->read_set == save_read_set);
1862     DBUG_ASSERT(quick->head->write_set == save_write_set);
1863     /* All merged scans share the same record buffer in intersection. */
1864     quick->record= head->record[0];
1865   }
1866 
1867   /* Prepare for ha_rnd_pos calls if needed. */
1868   if (need_to_fetch_row && (error= head->file->ha_rnd_init(false)))
1869   {
1870     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1871     DBUG_RETURN(error);
1872   }
1873   DBUG_RETURN(0);
1874 }
1875 
1876 
1877 /*
1878   Initialize quick select for row retrieval.
1879   SYNOPSIS
1880     reset()
1881   RETURN
1882     0      OK
1883     other  Error code
1884 */
1885 
reset()1886 int QUICK_ROR_INTERSECT_SELECT::reset()
1887 {
1888   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1889   if (!scans_inited && init_ror_merged_scan(TRUE))
1890     DBUG_RETURN(1);
1891   scans_inited= TRUE;
1892   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
1893   QUICK_RANGE_SELECT *quick;
1894   while ((quick= it++))
1895     quick->reset();
1896   DBUG_RETURN(0);
1897 }
1898 
1899 
1900 /*
1901   Add a merged quick select to this ROR-intersection quick select.
1902 
1903   SYNOPSIS
1904     QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1905       quick Quick select to be added. The quick select must return
1906             rows in rowid order.
1907   NOTES
1908     This call can only be made before init() is called.
1909 
1910   RETURN
1911     FALSE OK
1912     TRUE  Out of memory.
1913 */
1914 
1915 bool
push_quick_back(QUICK_RANGE_SELECT * quick)1916 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
1917 {
1918   return quick_selects.push_back(quick);
1919 }
1920 
~QUICK_ROR_INTERSECT_SELECT()1921 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1922 {
1923   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1924   quick_selects.delete_elements();
1925   delete cpk_quick;
1926   free_root(&alloc,MYF(0));
1927   if (need_to_fetch_row && head->file->inited)
1928     head->file->ha_rnd_end();
1929   DBUG_VOID_RETURN;
1930 }
1931 
1932 
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1933 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1934                                                TABLE *table)
1935   : queue(Quick_ror_union_less(this),
1936           Malloc_allocator<PSI_memory_key>(PSI_INSTRUMENT_ME)),
1937     thd(thd_param), scans_inited(FALSE)
1938 {
1939   index= MAX_KEY;
1940   head= table;
1941   rowid_length= table->file->ref_length;
1942   record= head->record[0];
1943   init_sql_alloc(key_memory_quick_ror_union_select_root,
1944                  &alloc, thd->variables.range_alloc_block_size, 0);
1945   thd_param->mem_root= &alloc;
1946 }
1947 
1948 
1949 /*
1950   Do post-constructor initialization.
1951   SYNOPSIS
1952     QUICK_ROR_UNION_SELECT::init()
1953 
1954   RETURN
1955     0      OK
1956     other  Error code
1957 */
1958 
init()1959 int QUICK_ROR_UNION_SELECT::init()
1960 {
1961   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1962   if (queue.reserve(quick_selects.elements))
1963   {
1964     DBUG_RETURN(1);
1965   }
1966 
1967   if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1968     DBUG_RETURN(1);
1969   prev_rowid= cur_rowid + head->file->ref_length;
1970   DBUG_RETURN(0);
1971 }
1972 
1973 
1974 /*
1975   Initialize quick select for row retrieval.
1976   SYNOPSIS
1977     reset()
1978 
1979   RETURN
1980     0      OK
1981     other  Error code
1982 */
1983 
reset()1984 int QUICK_ROR_UNION_SELECT::reset()
1985 {
1986   QUICK_SELECT_I *quick;
1987   int error;
1988   DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1989   have_prev_rowid= FALSE;
1990   if (!scans_inited)
1991   {
1992     List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1993     while ((quick= it++))
1994     {
1995       /*
1996         Use mem_root of this "QUICK" as using the statement mem_root
1997         might result in too many allocations when combined with
1998         dynamic range access where range optimizer is invoked many times
1999         for a single statement.
2000       */
2001       THD *thd= quick->head->in_use;
2002       MEM_ROOT *saved_root= thd->mem_root;
2003       thd->mem_root= &alloc;
2004       error= quick->init_ror_merged_scan(false);
2005       thd->mem_root= saved_root;
2006       if (error)
2007         DBUG_RETURN(1);
2008     }
2009     scans_inited= TRUE;
2010   }
2011   queue.clear();
2012   /*
2013     Initialize scans for merged quick selects and put all merged quick
2014     selects into the queue.
2015   */
2016   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
2017   while ((quick= it++))
2018   {
2019     if ((error= quick->reset()))
2020       DBUG_RETURN(error);
2021     if ((error= quick->get_next()))
2022     {
2023       if (error == HA_ERR_END_OF_FILE)
2024         continue;
2025       DBUG_RETURN(error);
2026     }
2027     quick->save_last_pos();
2028     queue.push(quick);
2029   }
2030 
2031   /* Prepare for ha_rnd_pos calls. */
2032   if (head->file->inited && (error= head->file->ha_rnd_end()))
2033   {
2034     DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
2035     DBUG_RETURN(error);
2036   }
2037   if ((error= head->file->ha_rnd_init(false)))
2038   {
2039     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
2040     DBUG_RETURN(error);
2041   }
2042 
2043   DBUG_RETURN(0);
2044 }
2045 
2046 
2047 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)2048 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
2049 {
2050   return quick_selects.push_back(quick_sel_range);
2051 }
2052 
~QUICK_ROR_UNION_SELECT()2053 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
2054 {
2055   DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
2056   quick_selects.delete_elements();
2057   if (head->file->inited)
2058     head->file->ha_rnd_end();
2059   free_root(&alloc,MYF(0));
2060   DBUG_VOID_RETURN;
2061 }
2062 
2063 
QUICK_RANGE()2064 QUICK_RANGE::QUICK_RANGE()
2065   :min_key(0),max_key(0),min_length(0),max_length(0),
2066   flag(NO_MIN_RANGE | NO_MAX_RANGE), rkey_func_flag(HA_READ_INVALID),
2067   min_keypart_map(0), max_keypart_map(0)
2068 {}
2069 
QUICK_RANGE(const uchar * min_key_arg,uint min_length_arg,key_part_map min_keypart_map_arg,const uchar * max_key_arg,uint max_length_arg,key_part_map max_keypart_map_arg,uint flag_arg,enum ha_rkey_function rkey_func_flag_arg)2070 QUICK_RANGE::QUICK_RANGE(const uchar *min_key_arg, uint min_length_arg,
2071                          key_part_map min_keypart_map_arg,
2072                          const uchar *max_key_arg, uint max_length_arg,
2073                          key_part_map max_keypart_map_arg, uint flag_arg,
2074                          enum ha_rkey_function rkey_func_flag_arg)
2075   :min_key(NULL),
2076   max_key(NULL),
2077   min_length((uint16) min_length_arg),
2078   max_length((uint16) max_length_arg),
2079   flag((uint16) flag_arg),
2080   rkey_func_flag(rkey_func_flag_arg),
2081   min_keypart_map(min_keypart_map_arg),
2082   max_keypart_map(max_keypart_map_arg)
2083 {
2084   min_key= static_cast<uchar*>(sql_memdup(min_key_arg, min_length_arg + 1));
2085   max_key= static_cast<uchar*>(sql_memdup(max_key_arg, max_length_arg + 1));
2086   // If we get is_null_string as argument, the memdup is undefined behavior.
2087   DBUG_ASSERT(min_key_arg != is_null_string);
2088   DBUG_ASSERT(max_key_arg != is_null_string);
2089 }
2090 
SEL_ARG(SEL_ARG & arg)2091 SEL_ARG::SEL_ARG(SEL_ARG &arg)
2092   :Sql_alloc(),
2093   min_flag(arg.min_flag),
2094   max_flag(arg.max_flag),
2095   maybe_flag(arg.maybe_flag),
2096   part(arg.part),
2097   maybe_null(arg.maybe_null),
2098   rkey_func_flag(arg.rkey_func_flag),
2099   elements(1),
2100   use_count(1),
2101   field(arg.field),
2102   min_value(arg.min_value),
2103   max_value(arg.max_value),
2104   left(&null_element),
2105   right(&null_element),
2106   next(NULL),
2107   prev(NULL),
2108   next_key_part(arg.next_key_part),
2109   type(arg.type)
2110 {
2111   DBUG_ASSERT(arg.type != MAYBE_KEY);  // Would need left=right=NULL
2112 }
2113 
2114 
make_root()2115 inline void SEL_ARG::make_root()
2116 {
2117   left=right= &null_element;
2118   color=BLACK;
2119   next=prev= NULL;
2120   use_count=0; elements=1;
2121 }
2122 
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)2123 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
2124                  const uchar *max_value_arg)
2125   :min_flag(0), max_flag(0), maybe_flag(0), part(0),
2126   maybe_null(f->real_maybe_null()), rkey_func_flag(HA_READ_INVALID),
2127   elements(1), use_count(1), field(f),
2128   min_value(const_cast<uchar *>(min_value_arg)),
2129   max_value(const_cast<uchar *>(max_value_arg)),
2130   left(&null_element), right(&null_element),
2131   next(NULL), prev(NULL),
2132   next_key_part(0), color(BLACK), type(KEY_RANGE)
2133 {}
2134 
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)2135 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
2136                  uchar *min_value_, uchar *max_value_,
2137 		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
2138   :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), part(part_),
2139   maybe_null(field_->real_maybe_null()),
2140   rkey_func_flag(HA_READ_INVALID), elements(1),use_count(1),
2141   field(field_), min_value(min_value_), max_value(max_value_),
2142   left(&null_element), right(&null_element),
2143   next(NULL), prev(NULL), next_key_part(0), color(BLACK), type(KEY_RANGE)
2144 {}
2145 
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2146 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2147                         SEL_ARG **next_arg)
2148 {
2149   SEL_ARG *tmp;
2150 
2151   if (param->has_errors())
2152     return 0;
2153 
2154   if (type != KEY_RANGE)
2155   {
2156     if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2157       return 0;					// out of memory
2158     tmp->prev= *next_arg;			// Link into next/prev chain
2159     (*next_arg)->next=tmp;
2160     (*next_arg)= tmp;
2161     tmp->part= this->part;
2162   }
2163   else
2164   {
2165     if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2166                                              min_flag, max_flag, maybe_flag)))
2167       return 0;					// OOM
2168     tmp->parent=new_parent;
2169     tmp->next_key_part=next_key_part;
2170     if (left != &null_element)
2171       if (!(tmp->left=left->clone(param, tmp, next_arg)))
2172 	return 0;				// OOM
2173 
2174     tmp->prev= *next_arg;			// Link into next/prev chain
2175     (*next_arg)->next=tmp;
2176     (*next_arg)= tmp;
2177 
2178     if (right != &null_element)
2179       if (!(tmp->right= right->clone(param, tmp, next_arg)))
2180 	return 0;				// OOM
2181   }
2182   increment_use_count(1);
2183   tmp->color= color;
2184   tmp->elements= this->elements;
2185   return tmp;
2186 }
2187 
2188 /**
2189   This gives the first SEL_ARG in the interval list, and the minimal element
2190   in the red-black tree
2191 
2192   @return
2193   SEL_ARG   first SEL_ARG in the interval list
2194 */
first()2195 SEL_ARG *SEL_ARG::first()
2196 {
2197   SEL_ARG *next_arg=this;
2198   if (!next_arg->left)
2199     return 0;					// MAYBE_KEY
2200   while (next_arg->left != &null_element)
2201     next_arg=next_arg->left;
2202   return next_arg;
2203 }
2204 
first() const2205 const SEL_ARG *SEL_ARG::first() const
2206 {
2207   return const_cast<SEL_ARG*>(this)->first();
2208 }
2209 
last()2210 SEL_ARG *SEL_ARG::last()
2211 {
2212   SEL_ARG *next_arg=this;
2213   if (!next_arg->right)
2214     return 0;					// MAYBE_KEY
2215   while (next_arg->right != &null_element)
2216     next_arg=next_arg->right;
2217   return next_arg;
2218 }
2219 
2220 
2221 /*
2222   Check if a compare is ok, when one takes ranges in account
2223   Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
2224 */
2225 
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2226 static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2227                    uint8 b_flag)
2228 {
2229   int cmp;
2230   /* First check if there was a compare to a min or max element */
2231   if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2232   {
2233     if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2234 	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2235       return 0;
2236     return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2237   }
2238   if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2239     return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2240 
2241   if (field->real_maybe_null())			// If null is part of key
2242   {
2243     if (*a != *b)
2244     {
2245       return *a ? -1 : 1;
2246     }
2247     if (*a)
2248       goto end;					// NULL where equal
2249     a++; b++;					// Skip NULL marker
2250   }
2251   cmp=field->key_cmp(a , b);
2252   if (cmp) return cmp < 0 ? -1 : 1;		// The values differed
2253 
2254   // Check if the compared equal arguments was defined with open/closed range
2255  end:
2256   if (a_flag & (NEAR_MIN | NEAR_MAX))
2257   {
2258     if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2259       return 0;
2260     if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2261       return (a_flag & NEAR_MIN) ? 2 : -2;
2262     return (a_flag & NEAR_MIN) ? 1 : -1;
2263   }
2264   if (b_flag & (NEAR_MIN | NEAR_MAX))
2265     return (b_flag & NEAR_MIN) ? -2 : 2;
2266   return 0;					// The elements where equal
2267 }
2268 
2269 
clone_tree(RANGE_OPT_PARAM * param)2270 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2271 {
2272   SEL_ARG tmp_link,*next_arg,*root;
2273   next_arg= &tmp_link;
2274   if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)) ||
2275       (param && param->has_errors()))
2276     return 0;
2277   next_arg->next=0;				// Fix last link
2278   tmp_link.next->prev=0;			// Fix first link
2279   if (root)					// If not OOM
2280     root->use_count= 0;
2281   return root;
2282 }
2283 
2284 
2285 /*
2286   Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2287   objects from table read plans.
2288 */
2289 class TABLE_READ_PLAN
2290 {
2291 public:
2292   /*
2293     Plan read cost, with or without cost of full row retrieval, depending
2294     on plan creation parameters.
2295   */
2296   Cost_estimate cost_est;
2297   ha_rows records; /* estimate of #rows to be examined */
2298 
2299   /*
2300     If TRUE, the scan returns rows in rowid order. This is used only for
2301     scans that can be both ROR and non-ROR.
2302   */
2303   bool is_ror;
2304 
2305   /*
2306     Create quick select for this plan.
2307     SYNOPSIS
2308      make_quick()
2309        param               Parameter from test_quick_select
2310        retrieve_full_rows  If TRUE, created quick select will do full record
2311                            retrieval.
2312        parent_alloc        Memory pool to use, if any.
2313 
2314     NOTES
2315       retrieve_full_rows is ignored by some implementations.
2316 
2317     RETURN
2318       created quick select
2319       NULL on any error.
2320   */
2321   virtual QUICK_SELECT_I *make_quick(PARAM *param,
2322                                      bool retrieve_full_rows,
2323                                      MEM_ROOT *parent_alloc=NULL) = 0;
2324 
2325   /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2326   static void *operator new(size_t size, MEM_ROOT *mem_root)
2327   { return alloc_root(mem_root, size); }
operator delete(void * ptr,size_t size)2328   static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2329   static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2330   virtual ~TABLE_READ_PLAN() {}               /* Remove gcc warning */
2331 
2332   /**
2333      Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2334 
2335      @param param        Parameters for range analysis of this table
2336      @param trace_object The optimizer trace object the info is appended to
2337    */
2338   virtual void trace_basic_info(const PARAM *param,
2339                                 Opt_trace_object *trace_object) const = 0;
2340 };
2341 
2342 /*
2343   Plan for a QUICK_RANGE_SELECT scan.
2344   TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2345   QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2346   record retrieval scans.
2347 */
2348 
2349 class TRP_RANGE : public TABLE_READ_PLAN
2350 {
2351 public:
2352   /**
2353     Root of red-black tree for intervals over key fields to be used in
2354     "range" method retrieval. See SEL_ARG graph description.
2355   */
2356   SEL_ARG *key;
2357   uint     key_idx; /* key number in PARAM::key and PARAM::real_keynr*/
2358   uint     mrr_flags;
2359   uint     mrr_buf_size;
2360 
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2361   TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2362    : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2363   {}
~TRP_RANGE()2364   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
2365 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2366   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2367                              MEM_ROOT *parent_alloc)
2368   {
2369     DBUG_ENTER("TRP_RANGE::make_quick");
2370     QUICK_RANGE_SELECT *quick;
2371     if ((quick= get_quick_select(param, key_idx, key, mrr_flags, mrr_buf_size,
2372                                  parent_alloc)))
2373     {
2374       quick->records= records;
2375       quick->cost_est= cost_est;
2376     }
2377     DBUG_RETURN(quick);
2378   }
2379 
2380   void trace_basic_info(const PARAM *param,
2381                         Opt_trace_object *trace_object) const;
2382 };
2383 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2384 void TRP_RANGE::trace_basic_info(const PARAM *param,
2385                                  Opt_trace_object *trace_object) const
2386 {
2387 #ifdef OPTIMIZER_TRACE
2388   DBUG_ASSERT(param->using_real_indexes);
2389   const uint keynr_in_table= param->real_keynr[key_idx];
2390 
2391   const KEY &cur_key= param->table->key_info[keynr_in_table];
2392   const KEY_PART_INFO *key_part= cur_key.key_part;
2393 
2394   trace_object->add_alnum("type", "range_scan").
2395     add_utf8("index", cur_key.name).add("rows", records);
2396 
2397   Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
2398 
2399   // TRP_RANGE should not be created if there are no range intervals
2400   DBUG_ASSERT(key);
2401 
2402   String range_info;
2403   range_info.set_charset(system_charset_info);
2404   append_range_all_keyparts(&trace_range, NULL, &range_info,
2405                             key, key_part, false);
2406 #endif
2407 }
2408 
2409 
2410 typedef struct st_ror_scan_info
2411 {
2412   uint      idx;      ///< # of used key in param->keys
2413   uint      keynr;    ///< # of used key in table
2414   ha_rows   records;  ///< estimate of # records this scan will return
2415 
2416   /** Set of intervals over key fields that will be used for row retrieval. */
2417   SEL_ARG   *sel_arg;
2418 
2419   /** Fields used in the query and covered by this ROR scan. */
2420   MY_BITMAP covered_fields;
2421   /**
2422     Fields used in the query that are a) covered by this ROR scan and
2423     b) not already covered by ROR scans ordered earlier in the merge
2424     sequence.
2425   */
2426   MY_BITMAP covered_fields_remaining;
2427   /** #fields in covered_fields_remaining (caching of bitmap_bits_set()) */
2428   uint      num_covered_fields_remaining;
2429 
2430   /**
2431     Cost of reading all index records with values in sel_arg intervals set
2432     (assuming there is no need to access full table records)
2433   */
2434   Cost_estimate index_read_cost;
2435 } ROR_SCAN_INFO;
2436 
2437 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2438 
2439 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2440 {
2441 public:
TRP_ROR_INTERSECT()2442   TRP_ROR_INTERSECT() {}                      /* Remove gcc warning */
~TRP_ROR_INTERSECT()2443   virtual ~TRP_ROR_INTERSECT() {}             /* Remove gcc warning */
2444   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2445                              MEM_ROOT *parent_alloc);
2446 
2447   /* Array of pointers to ROR range scans used in this intersection */
2448   struct st_ror_scan_info **first_scan;
2449   struct st_ror_scan_info **last_scan; /* End of the above array */
2450   struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
2451   bool is_covering; /* TRUE if no row retrieval phase is necessary */
2452   Cost_estimate index_scan_cost; /* SUM(cost(index_scan)) */
2453 
2454   void trace_basic_info(const PARAM *param,
2455                         Opt_trace_object *trace_object) const;
2456 };
2457 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2458 void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
2459                                          Opt_trace_object *trace_object) const
2460 {
2461 #ifdef OPTIMIZER_TRACE
2462   trace_object->add_alnum("type", "index_roworder_intersect").
2463     add("rows", records).
2464     add("cost", cost_est).
2465     add("covering", is_covering).
2466     add("clustered_pk_scan", cpk_scan != NULL);
2467 
2468   Opt_trace_context * const trace= &param->thd->opt_trace;
2469   Opt_trace_array ota(trace, "intersect_of");
2470   for (st_ror_scan_info **cur_scan= first_scan;
2471        cur_scan != last_scan;
2472        cur_scan++)
2473   {
2474     const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
2475     const KEY_PART_INFO *key_part= cur_key.key_part;
2476 
2477     Opt_trace_object trace_isect_idx(trace);
2478     trace_isect_idx.add_alnum("type", "range_scan").
2479       add_utf8("index", cur_key.name).add("rows", (*cur_scan)->records);
2480 
2481     Opt_trace_array trace_range(trace, "ranges");
2482     for (const SEL_ARG *current= (*cur_scan)->sel_arg;
2483          current;
2484          current= current->next)
2485     {
2486       String range_info;
2487       range_info.set_charset(system_charset_info);
2488       for (const SEL_ARG *part= current;
2489            part;
2490            part= part->next_key_part)
2491       {
2492         const KEY_PART_INFO *cur_key_part= key_part + part->part;
2493         append_range(&range_info, cur_key_part,
2494                      part->min_value, part->max_value,
2495                      part->min_flag | part->max_flag);
2496       }
2497       trace_range.add_utf8(range_info.ptr(), range_info.length());
2498     }
2499   }
2500 #endif
2501 }
2502 
2503 /*
2504   Plan for QUICK_ROR_UNION_SELECT scan.
2505   QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2506   is ignored by make_quick.
2507 */
2508 
2509 class TRP_ROR_UNION : public TABLE_READ_PLAN
2510 {
2511 public:
TRP_ROR_UNION()2512   TRP_ROR_UNION() {}                          /* Remove gcc warning */
~TRP_ROR_UNION()2513   virtual ~TRP_ROR_UNION() {}                 /* Remove gcc warning */
2514   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2515                              MEM_ROOT *parent_alloc);
2516   TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2517   TABLE_READ_PLAN **last_ror;  /* end of the above array */
2518 
2519   void trace_basic_info(const PARAM *param,
2520                         Opt_trace_object *trace_object) const;
2521 };
2522 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2523 void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
2524                                      Opt_trace_object *trace_object) const
2525 {
2526 #ifdef OPTIMIZER_TRACE
2527   Opt_trace_context * const trace= &param->thd->opt_trace;
2528   trace_object->add_alnum("type", "index_roworder_union");
2529   Opt_trace_array ota(trace, "union_of");
2530   for (TABLE_READ_PLAN **current= first_ror;
2531        current != last_ror;
2532        current++)
2533   {
2534     Opt_trace_object trp_info(trace);
2535     (*current)->trace_basic_info(param, &trp_info);
2536   }
2537 #endif
2538 }
2539 
2540 /*
2541   Plan for QUICK_INDEX_MERGE_SELECT scan.
2542   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2543   is ignored by make_quick.
2544 */
2545 
2546 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2547 {
2548 public:
TRP_INDEX_MERGE()2549   TRP_INDEX_MERGE() {}                        /* Remove gcc warning */
~TRP_INDEX_MERGE()2550   virtual ~TRP_INDEX_MERGE() {}               /* Remove gcc warning */
2551   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2552                              MEM_ROOT *parent_alloc);
2553   TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2554   TRP_RANGE **range_scans_end; /* end of the array */
2555 
2556   void trace_basic_info(const PARAM *param,
2557                         Opt_trace_object *trace_object) const;
2558 };
2559 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2560 void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
2561                                        Opt_trace_object *trace_object) const
2562 {
2563 #ifdef OPTIMIZER_TRACE
2564   Opt_trace_context * const trace= &param->thd->opt_trace;
2565   trace_object->add_alnum("type", "index_merge");
2566   Opt_trace_array ota(trace, "index_merge_of");
2567   for (TRP_RANGE **current= range_scans;
2568        current != range_scans_end;
2569        current++)
2570   {
2571     Opt_trace_object trp_info(trace);
2572     (*current)->trace_basic_info(param, &trp_info);
2573   }
2574 #endif
2575 }
2576 
2577 /*
2578   Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2579 */
2580 
2581 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2582 {
2583 private:
2584   bool have_min;             ///< TRUE if there is a MIN function
2585   bool have_max;             ///< TRUE if there is a MAX function
2586   /**
2587     TRUE if there is an aggregate distinct function, e.g.
2588     "COUNT(DISTINCT x)"
2589    */
2590   bool have_agg_distinct;
2591   /**
2592     The key_part of the only field used by all MIN/MAX functions.
2593     Note that TRP_GROUP_MIN_MAX is not used if there are MIN/MAX
2594     functions on more than one field.
2595   */
2596   KEY_PART_INFO *min_max_arg_part;
2597   uint group_prefix_len;    ///< Length of all key parts in the group prefix
2598   uint used_key_parts;      ///< Number of index key parts used for access
2599   uint group_key_parts;     ///< Number of index key parts in the group prefix
2600   KEY *index_info;          ///< The index chosen for data access
2601   uint index;               ///< The id of the chosen index
2602   uchar key_infix[MAX_KEY_LENGTH];  ///< Constants from equality predicates
2603   uint key_infix_len;       ///< Length of key_infix
2604   SEL_TREE *range_tree;     ///< Represents all range predicates in the query
2605   SEL_ARG  *index_tree;     ///< The sub-tree corresponding to index_info
2606   uint param_idx;           ///< Index of used key in param->key
2607   bool is_index_scan;       ///< Use index_next() instead of random read
2608 public:
2609   /** Number of records selected by the ranges in index_tree. */
2610   ha_rows quick_prefix_records;
2611 public:
2612 
2613   void trace_basic_info(const PARAM *param,
2614                         Opt_trace_object *trace_object) const;
2615 
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2616   TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2617                     bool have_agg_distinct_arg,
2618                     KEY_PART_INFO *min_max_arg_part_arg,
2619                     uint group_prefix_len_arg, uint used_key_parts_arg,
2620                     uint group_key_parts_arg, KEY *index_info_arg,
2621                     uint index_arg, uint key_infix_len_arg,
2622                     uchar *key_infix_arg,
2623                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2624                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
2625   : have_min(have_min_arg), have_max(have_max_arg),
2626     have_agg_distinct(have_agg_distinct_arg),
2627     min_max_arg_part(min_max_arg_part_arg),
2628     group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2629     group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2630     index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2631     index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2632     quick_prefix_records(quick_prefix_records_arg)
2633     {
2634       if (key_infix_len)
2635         memcpy(this->key_infix, key_infix_arg, key_infix_len);
2636     }
~TRP_GROUP_MIN_MAX()2637   virtual ~TRP_GROUP_MIN_MAX() {}             /* Remove gcc warning */
2638 
2639   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2640                              MEM_ROOT *parent_alloc);
use_index_scan()2641   void use_index_scan() { is_index_scan= TRUE; }
2642 };
2643 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2644 void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
2645                                          Opt_trace_object *trace_object) const
2646 {
2647 #ifdef OPTIMIZER_TRACE
2648   trace_object->add_alnum("type", "index_group").
2649     add_utf8("index", index_info->name);
2650   if (min_max_arg_part)
2651     trace_object->add_utf8("group_attribute",
2652                            min_max_arg_part->field->field_name);
2653   else
2654     trace_object->add_null("group_attribute");
2655   trace_object->add("min_aggregate", have_min).
2656     add("max_aggregate", have_max).
2657     add("distinct_aggregate", have_agg_distinct).
2658     add("rows", records).
2659     add("cost", cost_est);
2660 
2661   const KEY_PART_INFO *key_part= index_info->key_part;
2662   Opt_trace_context * const trace= &param->thd->opt_trace;
2663   {
2664     Opt_trace_array trace_keyparts(trace, "key_parts_used_for_access");
2665     for (uint partno= 0; partno < used_key_parts; partno++)
2666     {
2667       const KEY_PART_INFO *cur_key_part= key_part + partno;
2668       trace_keyparts.add_utf8(cur_key_part->field->field_name);
2669     }
2670   }
2671   Opt_trace_array trace_range(trace, "ranges");
2672 
2673   // can have group quick without ranges
2674   if (index_tree)
2675   {
2676     String range_info;
2677     range_info.set_charset(system_charset_info);
2678     append_range_all_keyparts(&trace_range, NULL,
2679                               &range_info, index_tree, key_part, false);
2680   }
2681 #endif
2682 }
2683 
2684 /*
2685   Fill param->needed_fields with bitmap of fields used in the query.
2686   SYNOPSIS
2687     fill_used_fields_bitmap()
2688       param Parameter from test_quick_select function.
2689 
2690   NOTES
2691     Clustered PK members are not put into the bitmap as they are implicitly
2692     present in all keys (and it is impossible to avoid reading them).
2693   RETURN
2694     0  Ok
2695     1  Out of memory.
2696 */
2697 
fill_used_fields_bitmap(PARAM * param)2698 static int fill_used_fields_bitmap(PARAM *param)
2699 {
2700   TABLE *table= param->table;
2701   my_bitmap_map *tmp;
2702   uint pk;
2703   param->tmp_covered_fields.bitmap= 0;
2704   param->fields_bitmap_size= table->s->column_bitmap_size;
2705   if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2706                                   param->fields_bitmap_size)) ||
2707       bitmap_init(&param->needed_fields, tmp, table->s->fields, FALSE))
2708     return 1;
2709 
2710   bitmap_copy(&param->needed_fields, table->read_set);
2711   bitmap_union(&param->needed_fields, table->write_set);
2712 
2713   pk= param->table->s->primary_key;
2714   if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2715   {
2716     /* The table uses clustered PK and it is not internally generated */
2717     KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2718     KEY_PART_INFO *key_part_end=
2719       key_part + param->table->key_info[pk].user_defined_key_parts;
2720     for (;key_part != key_part_end; ++key_part)
2721       bitmap_clear_bit(&param->needed_fields, key_part->fieldnr-1);
2722   }
2723   return 0;
2724 }
2725 
2726 
2727 /*
2728   Test if a key can be used in different ranges, and create the QUICK
2729   access method (range, index merge etc) that is estimated to be
2730   cheapest unless table/index scan is even cheaper (exception: @see
2731   parameter force_quick_range).
2732 
2733   SYNOPSIS
2734     test_quick_select()
2735       thd               Current thread
2736       keys_to_use       Keys to use for range retrieval
2737       prev_tables       Tables assumed to be already read when the scan is
2738                         performed (but not read at the moment of this call)
2739       limit             Query limit
2740       force_quick_range Prefer to use range (instead of full table scan) even
2741                         if it is more expensive.
2742       interesting_order The sort order the range access method must be able
2743                         to provide. Three-value logic: asc/desc/don't care
2744       needed_reg        this info is used in make_join_select() even if there is no quick!
2745       quick[out]        Calculated QUICK, or NULL
2746       ignore_table_scan Disregard table scan while looking for range.
2747 
2748   NOTES
2749     Updates the following:
2750       needed_reg - Bits for keys with may be used if all prev regs are read
2751 
2752     In the table struct the following information is updated:
2753       quick_keys           - Which keys can be used
2754       quick_rows           - How many rows the key matches
2755       quick_condition_rows - E(# rows that will satisfy the table condition)
2756 
2757   IMPLEMENTATION
2758     quick_condition_rows value is obtained as follows:
2759 
2760       It is a minimum of E(#output rows) for all considered table access
2761       methods (range and index_merge accesses over various indexes).
2762 
2763     The obtained value is not a true E(#rows that satisfy table condition)
2764     but rather a pessimistic estimate. To obtain a true E(#...) one would
2765     need to combine estimates of various access methods, taking into account
2766     correlations between sets of rows they will return.
2767 
2768     For example, if values of tbl.key1 and tbl.key2 are independent (a right
2769     assumption if we have no information about their correlation) then the
2770     correct estimate will be:
2771 
2772       E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2773       = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2774 
2775     which is smaller than
2776 
2777        MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2778 
2779     which is currently produced.
2780 
2781   TODO
2782    * Change the value returned in quick_condition_rows from a pessimistic
2783      estimate to true E(#rows that satisfy table condition).
2784      (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2785       for this)
2786 
2787    * Check if this function really needs to modify keys_to_use, and change the
2788      code to pass it by reference if it doesn't.
2789 
2790    * In addition to force_quick_range other means can be (an usually are) used
2791      to make this function prefer range over full table scan. Figure out if
2792      force_quick_range is really needed.
2793 
2794   RETURN
2795    -1 if impossible select (i.e. certainly no rows will be selected)
2796     0 if can't use quick_select
2797     1 if found usable ranges and quick select has been successfully created.
2798 
2799   @note After this call, caller may decide to really use the returned QUICK,
2800   by calling QEP_TAB::set_quick() and updating tab->type() if appropriate.
2801 
2802 */
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,const ORDER::enum_order interesting_order,const QEP_shared_owner * tab,Item * cond,key_map * needed_reg,QUICK_SELECT_I ** quick,bool ignore_table_scan)2803 int test_quick_select(THD *thd, key_map keys_to_use,
2804                       table_map prev_tables,
2805                       ha_rows limit, bool force_quick_range,
2806                       const ORDER::enum_order interesting_order,
2807                       const QEP_shared_owner *tab,
2808                       Item *cond, key_map *needed_reg, QUICK_SELECT_I **quick,
2809                       bool ignore_table_scan)
2810 {
2811   DBUG_ENTER("test_quick_select");
2812 
2813   *quick= NULL;
2814   needed_reg->clear_all();
2815 
2816   if (keys_to_use.is_clear_all())
2817     DBUG_RETURN(0);
2818 
2819   table_map const_tables, read_tables;
2820   if (tab->join())
2821   {
2822     const_tables= tab->join()->found_const_table_map;
2823     read_tables= tab->join()->is_executed() ?
2824       // in execution, range estimation is done for each row, so can access previous tables
2825       (tab->prefix_tables() & ~tab->added_tables()) :
2826       const_tables;
2827   }
2828   else
2829     const_tables= read_tables= 0;
2830 
2831   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
2832 		      (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2833 		      (ulong) const_tables));
2834 
2835   const Cost_model_server *const cost_model= thd->cost_model();
2836   TABLE *const head= tab->table();
2837   ha_rows records= head->file->stats.records;
2838   if (!records)
2839     records++;					/* purecov: inspected */
2840   double scan_time=
2841     cost_model->row_evaluate_cost(static_cast<double>(records)) + 1;
2842   Cost_estimate cost_est= head->file->table_scan_cost();
2843   cost_est.add_io(1.1);
2844   cost_est.add_cpu(scan_time);
2845   if (ignore_table_scan)
2846   {
2847     scan_time= DBL_MAX;
2848     cost_est.set_max_cost();
2849   }
2850   if (limit < records)
2851   {
2852     cost_est.reset();
2853     // Force to use index
2854     cost_est.add_io(head->cost_model()->page_read_cost(
2855       static_cast<double>(records)) + 1);
2856     cost_est.add_cpu(scan_time);
2857   }
2858   else if (cost_est.total_cost() <= 2.0 && !force_quick_range)
2859     DBUG_RETURN(0);				/* No need for quick select */
2860 
2861   Opt_trace_context * const trace= &thd->opt_trace;
2862   Opt_trace_object trace_range(trace, "range_analysis");
2863   Opt_trace_object(trace, "table_scan").
2864     add("rows", head->file->stats.records).
2865     add("cost", cost_est);
2866 
2867   keys_to_use.intersect(head->keys_in_use_for_query);
2868   if (!keys_to_use.is_clear_all())
2869   {
2870     MEM_ROOT alloc;
2871     SEL_TREE *tree= NULL;
2872     KEY_PART *key_parts;
2873     KEY *key_info;
2874     PARAM param;
2875 
2876     /*
2877       Use the 3 multiplier as range optimizer allocates big PARAM structure
2878       and may evaluate a subquery expression
2879       TODO During the optimization phase we should evaluate only inexpensive
2880            single-lookup subqueries.
2881     */
2882     if (check_stack_overrun(thd, 3*STACK_MIN_SIZE + sizeof(PARAM), NULL))
2883       DBUG_RETURN(0);                           // Fatal error flag is set
2884 
2885     /* set up parameter that is passed to all functions */
2886     param.thd= thd;
2887     param.baseflag= head->file->ha_table_flags();
2888     param.prev_tables=prev_tables | const_tables;
2889     param.read_tables=read_tables;
2890     param.current_table= head->pos_in_table_list->map();
2891     param.table=head;
2892     param.keys=0;
2893     param.mem_root= &alloc;
2894     param.old_root= thd->mem_root;
2895     param.needed_reg= needed_reg;
2896     param.imerge_cost_buff.reset();
2897     param.using_real_indexes= TRUE;
2898     param.remove_jump_scans= TRUE;
2899     param.force_default_mrr= (interesting_order == ORDER::ORDER_DESC);
2900     param.order_direction= interesting_order;
2901     param.use_index_statistics= false;
2902     /*
2903       Set index_merge_allowed from OPTIMIZER_SWITCH_INDEX_MERGE.
2904       Notice also that OPTIMIZER_SWITCH_INDEX_MERGE disables all
2905       index merge sub strategies.
2906     */
2907     param.index_merge_allowed=
2908       thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE);
2909     param.index_merge_union_allowed=
2910       param.index_merge_allowed &&
2911       thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION);
2912     param.index_merge_sort_union_allowed=
2913       param.index_merge_allowed &&
2914       thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION);
2915     param.index_merge_intersect_allowed=
2916       param.index_merge_allowed &&
2917       thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT);
2918 
2919     thd->no_errors=1;				// Don't warn about NULL
2920     init_sql_alloc(key_memory_test_quick_select_exec,
2921                    &alloc, thd->variables.range_alloc_block_size, 0);
2922     set_memroot_max_capacity(&alloc,
2923                              thd->variables.range_optimizer_max_mem_size);
2924     set_memroot_error_reporting(&alloc, true);
2925     thd->push_internal_handler(&param.error_handler);
2926     if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
2927                                                   sizeof(KEY_PART)*
2928                                                   head->s->key_parts)) ||
2929         fill_used_fields_bitmap(&param))
2930     {
2931       thd->no_errors=0;
2932       thd->pop_internal_handler();
2933       free_root(&alloc,MYF(0));			// Return memory & allocator
2934       DBUG_RETURN(0);				// Can't use range
2935     }
2936     key_parts= param.key_parts;
2937     thd->mem_root= &alloc;
2938 
2939     {
2940       Opt_trace_array trace_idx(trace,
2941                                 "potential_range_indexes",
2942                                 Opt_trace_context::RANGE_OPTIMIZER);
2943       /*
2944         Make an array with description of all key parts of all table keys.
2945         This is used in get_mm_parts function.
2946       */
2947       key_info= head->key_info;
2948       for (uint idx= 0 ; idx < head->s->keys ; idx++, key_info++)
2949       {
2950         Opt_trace_object trace_idx_details(trace);
2951         trace_idx_details.add_utf8("index", key_info->name);
2952         KEY_PART_INFO *key_part_info;
2953 
2954         if (!keys_to_use.is_set(idx))
2955         {
2956           trace_idx_details.add("usable", false).
2957             add_alnum("cause", "not_applicable");
2958           continue;
2959         }
2960 
2961         if (hint_key_state(thd, head, idx, NO_RANGE_HINT_ENUM, 0))
2962         {
2963           trace_idx_details.add("usable", false).
2964             add_alnum("cause", "no_range_optimization hint");
2965           continue;
2966         }
2967 
2968         if (key_info->flags & HA_FULLTEXT)
2969         {
2970           trace_idx_details.add("usable", false).
2971             add_alnum("cause", "fulltext");
2972           continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG
2973         }
2974 
2975         trace_idx_details.add("usable", true);
2976 
2977         param.key[param.keys]=key_parts;
2978         key_part_info= key_info->key_part;
2979         Opt_trace_array trace_keypart(trace, "key_parts");
2980         for (uint part=0 ; part < actual_key_parts(key_info) ;
2981              part++, key_parts++, key_part_info++)
2982         {
2983           key_parts->key=          param.keys;
2984           key_parts->part=         part;
2985           key_parts->length=       key_part_info->length;
2986           key_parts->store_length= key_part_info->store_length;
2987           key_parts->field=        key_part_info->field;
2988           key_parts->null_bit=     key_part_info->null_bit;
2989           key_parts->image_type =
2990             (part < key_info->user_defined_key_parts &&
2991              key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2992           /* Only HA_PART_KEY_SEG is used */
2993           key_parts->flag=         (uint8) key_part_info->key_part_flag;
2994           trace_keypart.add_utf8(key_parts->field->field_name);
2995         }
2996         param.real_keynr[param.keys++]=idx;
2997       }
2998     }
2999     param.key_parts_end=key_parts;
3000 
3001     /* Calculate cost of full index read for the shortest covering index */
3002     if (!head->covering_keys.is_clear_all())
3003     {
3004       int key_for_use= find_shortest_key(head, &head->covering_keys);
3005       Cost_estimate key_read_time=
3006         param.table->file->index_scan_cost(key_for_use, 1,
3007                                            static_cast<double>(records));
3008       key_read_time.add_cpu(cost_model->row_evaluate_cost(
3009         static_cast<double>(records)));
3010 
3011       bool chosen= false;
3012       if (key_read_time < cost_est)
3013       {
3014         cost_est= key_read_time;
3015         chosen= true;
3016       }
3017 
3018       Opt_trace_object trace_cov(trace,
3019                                  "best_covering_index_scan",
3020                                  Opt_trace_context::RANGE_OPTIMIZER);
3021       trace_cov.add_utf8("index", head->key_info[key_for_use].name).
3022         add("cost", key_read_time).add("chosen", chosen);
3023       if (!chosen)
3024         trace_cov.add_alnum("cause", "cost");
3025     }
3026 
3027     TABLE_READ_PLAN *best_trp= NULL;
3028     TRP_GROUP_MIN_MAX *group_trp;
3029     Cost_estimate best_cost= cost_est;
3030 
3031     if (cond)
3032     {
3033       {
3034         Opt_trace_array trace_setup_cond(trace, "setup_range_conditions");
3035         tree= get_mm_tree(&param, cond);
3036       }
3037       if (tree)
3038       {
3039         if (tree->type == SEL_TREE::IMPOSSIBLE)
3040         {
3041           trace_range.add("impossible_range", true);
3042           records=0L;                      /* Return -1 from this function. */
3043           cost_est.reset();
3044           cost_est.add_io(static_cast<double>(HA_POS_ERROR));
3045           goto free_mem;
3046         }
3047         /*
3048           If the tree can't be used for range scans, proceed anyway, as we
3049           can construct a group-min-max quick select
3050         */
3051         if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3052         {
3053           trace_range.add("range_scan_possible", false);
3054           if (tree->type == SEL_TREE::ALWAYS)
3055             trace_range.add_alnum("cause", "condition_always_true");
3056 
3057           tree= NULL;
3058         }
3059       }
3060     }
3061 
3062     /*
3063       Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
3064       Notice that it can be constructed no matter if there is a range tree.
3065     */
3066     group_trp= get_best_group_min_max(&param, tree, &best_cost);
3067     if (group_trp)
3068     {
3069       param.table->quick_condition_rows= min(group_trp->records,
3070                                              head->file->stats.records);
3071       Opt_trace_object grp_summary(trace,
3072                                    "best_group_range_summary",
3073                                    Opt_trace_context::RANGE_OPTIMIZER);
3074       if (unlikely(trace->is_started()))
3075         group_trp->trace_basic_info(&param, &grp_summary);
3076       if (group_trp->cost_est < best_cost)
3077       {
3078         grp_summary.add("chosen", true);
3079         best_trp= group_trp;
3080         best_cost= best_trp->cost_est;
3081       }
3082       else
3083         grp_summary.add("chosen", false).add_alnum("cause", "cost");
3084     }
3085 
3086     if (tree)
3087     {
3088       /*
3089         It is possible to use a range-based quick select (but it might be
3090         slower than 'all' table scan).
3091       */
3092       dbug_print_tree("final_tree", tree, &param);
3093 
3094       {
3095         /*
3096           Calculate cost of single index range scan and possible
3097           intersections of these
3098         */
3099         Opt_trace_object trace_range(trace,
3100                                      "analyzing_range_alternatives",
3101                                      Opt_trace_context::RANGE_OPTIMIZER);
3102         TRP_RANGE         *range_trp;
3103         TRP_ROR_INTERSECT *rori_trp;
3104 
3105         /* Get best 'range' plan and prepare data for making other plans */
3106         if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
3107                                              &best_cost)))
3108         {
3109           best_trp= range_trp;
3110           best_cost= best_trp->cost_est;
3111         }
3112 
3113         /*
3114           Simultaneous key scans and row deletes on several handler
3115           objects are not allowed so don't use ROR-intersection for
3116           table deletes. Also, ROR-intersection cannot return rows in
3117           descending order
3118         */
3119         if ((thd->lex->sql_command != SQLCOM_DELETE) &&
3120             param.index_merge_allowed &&
3121             interesting_order != ORDER::ORDER_DESC)
3122         {
3123           /*
3124             Get best non-covering ROR-intersection plan and prepare data for
3125             building covering ROR-intersection.
3126           */
3127           if ((rori_trp= get_best_ror_intersect(&param, tree, &best_cost)))
3128           {
3129             best_trp= rori_trp;
3130             best_cost= best_trp->cost_est;
3131           }
3132         }
3133       }
3134 
3135       // Here we calculate cost of union index merge
3136       if (!tree->merges.is_empty())
3137       {
3138         // Cannot return rows in descending order.
3139         if (param.index_merge_allowed &&
3140             interesting_order != ORDER::ORDER_DESC &&
3141             param.table->file->stats.records)
3142         {
3143           /* Try creating index_merge/ROR-union scan. */
3144           SEL_IMERGE *imerge;
3145           TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp= NULL;
3146           List_iterator_fast<SEL_IMERGE> it(tree->merges);
3147           Opt_trace_array trace_idx_merge(trace,
3148                                           "analyzing_index_merge_union",
3149                                           Opt_trace_context::RANGE_OPTIMIZER);
3150           while ((imerge= it++))
3151           {
3152             new_conj_trp= get_best_disjunct_quick(&param, imerge,
3153                                                   &best_cost);
3154             if (new_conj_trp)
3155               set_if_smaller(param.table->quick_condition_rows,
3156                              new_conj_trp->records);
3157             if (!best_conj_trp ||
3158                 (new_conj_trp &&
3159                  new_conj_trp->cost_est < best_conj_trp->cost_est))
3160             {
3161               best_conj_trp= new_conj_trp;
3162             }
3163           }
3164           if (best_conj_trp)
3165             best_trp= best_conj_trp;
3166         }
3167       }
3168     }
3169 
3170     thd->mem_root= param.old_root;
3171 
3172     /* If we got a read plan, create a quick select from it. */
3173     if (best_trp)
3174     {
3175       QUICK_SELECT_I *qck;
3176       records= best_trp->records;
3177       if (!(qck= best_trp->make_quick(&param, TRUE)) || qck->init())
3178         qck= NULL;
3179       *quick= qck;
3180     }
3181 
3182 free_mem:
3183     thd->pop_internal_handler();
3184     if (unlikely(*quick && trace->is_started() && best_trp))
3185     {
3186       // best_trp cannot be NULL if quick is set, done to keep fortify happy
3187       Opt_trace_object trace_range_summary(trace,
3188                                            "chosen_range_access_summary");
3189       {
3190         Opt_trace_object trace_range_plan(trace,
3191                                           "range_access_plan");
3192         best_trp->trace_basic_info(&param, &trace_range_plan);
3193       }
3194       trace_range_summary.add("rows_for_plan", (*quick)->records).
3195         add("cost_for_plan", (*quick)->cost_est).
3196         add("chosen", true);
3197     }
3198 
3199     free_root(&alloc,MYF(0));			// Return memory & allocator
3200     thd->mem_root= param.old_root;
3201     thd->no_errors=0;
3202 
3203     DBUG_EXECUTE("info", print_quick(*quick, needed_reg););
3204   }
3205 
3206 
3207   /*
3208     Assume that if the user is using 'limit' we will only need to scan
3209     limit rows if we are using a key
3210   */
3211   DBUG_RETURN(records ? MY_TEST(*quick) : -1);
3212 }
3213 
3214 /****************************************************************************
3215  * Partition pruning module
3216  ****************************************************************************/
3217 
3218 /*
3219   PartitionPruningModule
3220 
3221   This part of the code does partition pruning. Partition pruning solves the
3222   following problem: given a query over partitioned tables, find partitions
3223   that we will not need to access (i.e. partitions that we can assume to be
3224   empty) when executing the query.
3225   The set of partitions to prune doesn't depend on which query execution
3226   plan will be used to execute the query.
3227 
3228   HOW IT WORKS
3229 
3230   Partition pruning module makes use of RangeAnalysisModule. The following
3231   examples show how the problem of partition pruning can be reduced to the
3232   range analysis problem:
3233 
3234   EXAMPLE 1
3235     Consider a query:
3236 
3237       SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3238 
3239     where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
3240     way to find the used (i.e. not pruned away) partitions is as follows:
3241 
3242     1. analyze the WHERE clause and extract the list of intervals over t1.a
3243        for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3244 
3245     2. for each interval I
3246        {
3247          find partitions that have non-empty intersection with I;
3248          mark them as used;
3249        }
3250 
3251   EXAMPLE 2
3252     Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3253     we need to:
3254 
3255     1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3256        The list of intervals we'll obtain will look like this:
3257        ((t1.a, t1.b) = (1,'foo')),
3258        ((t1.a, t1.b) = (2,'bar')),
3259        ((t1,a, t1.b) > (10,'zz'))
3260 
3261     2. for each interval I
3262        {
3263          if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3264          {
3265            calculate HASH(part_func(t1.a, t1.b));
3266            find which partition has records with this hash value and mark
3267              it as used;
3268          }
3269          else
3270          {
3271            mark all partitions as used;
3272            break;
3273          }
3274        }
3275 
3276    For both examples the step #1 is exactly what RangeAnalysisModule could
3277    be used to do, if it was provided with appropriate index description
3278    (array of KEY_PART structures).
3279    In example #1, we need to provide it with description of index(t1.a),
3280    in example #2, we need to provide it with description of index(t1.a, t1.b).
3281 
3282    These index descriptions are further called "partitioning index
3283    descriptions". Note that it doesn't matter if such indexes really exist,
3284    as range analysis module only uses the description.
3285 
3286    Putting it all together, partitioning module works as follows:
3287 
3288    prune_partitions() {
3289      call create_partition_index_description();
3290 
3291      call get_mm_tree(); // invoke the RangeAnalysisModule
3292 
3293      // analyze the obtained interval list and get used partitions
3294      call find_used_partitions();
3295   }
3296 
3297 */
3298 
3299 struct st_part_prune_param;
3300 struct st_part_opt_info;
3301 
3302 typedef void (*mark_full_part_func)(partition_info*, uint32);
3303 
3304 /*
3305   Partition pruning operation context
3306 */
3307 typedef struct st_part_prune_param
3308 {
3309   RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3310 
3311   /***************************************************************
3312    Following fields are filled in based solely on partitioning
3313    definition and not modified after that:
3314    **************************************************************/
3315   partition_info *part_info; /* Copy of table->part_info */
3316   /* Function to get partition id from partitioning fields only */
3317   get_part_id_func get_top_partition_id_func;
3318   /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3319   mark_full_part_func mark_full_partition_used;
3320 
3321   /* Partitioning 'index' description, array of key parts */
3322   KEY_PART *key;
3323 
3324   /*
3325     Number of fields in partitioning 'index' definition created for
3326     partitioning (0 if partitioning 'index' doesn't include partitioning
3327     fields)
3328   */
3329   uint part_fields;
3330   uint subpart_fields; /* Same as above for subpartitioning */
3331 
3332   /*
3333     Number of the last partitioning field keypart in the index, or -1 if
3334     partitioning index definition doesn't include partitioning fields.
3335   */
3336   int last_part_partno;
3337   int last_subpart_partno; /* Same as above for supartitioning */
3338 
3339   /*
3340     is_part_keypart[i] == test(keypart #i in partitioning index is a member
3341                                used in partitioning)
3342     Used to maintain current values of cur_part_fields and cur_subpart_fields
3343   */
3344   my_bool *is_part_keypart;
3345   /* Same as above for subpartitioning */
3346   my_bool *is_subpart_keypart;
3347 
3348   my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3349 
3350   /***************************************************************
3351    Following fields form find_used_partitions() recursion context:
3352    **************************************************************/
3353   SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
3354   SEL_ARG **arg_stack_end; /* Top of the stack    */
3355   /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3356   uint cur_part_fields;
3357   /* Same as cur_part_fields, but for subpartitioning */
3358   uint cur_subpart_fields;
3359 
3360   /* Iterator to be used to obtain the "current" set of used partitions */
3361   PARTITION_ITERATOR part_iter;
3362 
3363   /* Initialized bitmap of num_subparts size */
3364   MY_BITMAP subparts_bitmap;
3365 
3366   uchar *cur_min_key;
3367   uchar *cur_max_key;
3368 
3369   uint cur_min_flag, cur_max_flag;
3370 } PART_PRUNE_PARAM;
3371 
3372 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3373 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3374 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3375                                        SEL_IMERGE *imerge);
3376 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3377                                             List<SEL_IMERGE> &merges);
3378 static void mark_all_partitions_as_used(partition_info *part_info);
3379 
3380 #ifndef DBUG_OFF
3381 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3382 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3383 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3384 #endif
3385 
3386 
3387 /**
3388   Perform partition pruning for a given table and condition.
3389 
3390   @param      thd            Thread handle
3391   @param      table          Table to perform partition pruning for
3392   @param      pprune_cond    Condition to use for partition pruning
3393 
3394   @note This function assumes that lock_partitions are setup when it
3395   is invoked. The function analyzes the condition, finds partitions that
3396   need to be used to retrieve the records that match the condition, and
3397   marks them as used by setting appropriate bit in part_info->read_partitions
3398   In the worst case all partitions are marked as used. If the table is not
3399   yet locked, it will also unset bits in part_info->lock_partitions that is
3400   not set in read_partitions.
3401 
3402   This function returns promptly if called for non-partitioned table.
3403 
3404   @return Operation status
3405     @retval true  Failure
3406     @retval false Success
3407 */
3408 
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3409 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3410 {
3411   partition_info *part_info = table->part_info;
3412   DBUG_ENTER("prune_partitions");
3413 
3414   /*
3415     If the prepare stage already have completed pruning successfully,
3416     it is no use of running prune_partitions() again on the same condition.
3417     Since it will not be able to prune anything more than the previous call
3418     from the prepare step.
3419   */
3420   if (part_info && part_info->is_pruning_completed)
3421     DBUG_RETURN(false);
3422 
3423   table->all_partitions_pruned_away= false;
3424 
3425   if (!part_info)
3426     DBUG_RETURN(FALSE); /* not a partitioned table */
3427 
3428   if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION &&
3429       part_info->is_auto_partitioned)
3430     DBUG_RETURN(false); /* Should not prune auto partitioned table */
3431 
3432   if (!pprune_cond)
3433   {
3434     mark_all_partitions_as_used(part_info);
3435     DBUG_RETURN(FALSE);
3436   }
3437 
3438   /* No need to continue pruning if there is no more partitions to prune! */
3439   if (bitmap_is_clear_all(&part_info->lock_partitions))
3440     bitmap_clear_all(&part_info->read_partitions);
3441   if (bitmap_is_clear_all(&part_info->read_partitions))
3442   {
3443     table->all_partitions_pruned_away= true;
3444     DBUG_RETURN(false);
3445   }
3446 
3447   PART_PRUNE_PARAM prune_param;
3448   MEM_ROOT alloc;
3449   RANGE_OPT_PARAM  *range_par= &prune_param.range_param;
3450   my_bitmap_map *old_sets[2];
3451 
3452   prune_param.part_info= part_info;
3453   init_sql_alloc(key_memory_prune_partitions_exec,
3454                  &alloc, thd->variables.range_alloc_block_size, 0);
3455   set_memroot_max_capacity(&alloc, thd->variables.range_optimizer_max_mem_size);
3456   set_memroot_error_reporting(&alloc, true);
3457   thd->push_internal_handler(&range_par->error_handler);
3458   range_par->mem_root= &alloc;
3459   range_par->old_root= thd->mem_root;
3460 
3461   if (create_partition_index_description(&prune_param))
3462   {
3463     mark_all_partitions_as_used(part_info);
3464     thd->pop_internal_handler();
3465     free_root(&alloc,MYF(0));		// Return memory & allocator
3466     DBUG_RETURN(FALSE);
3467   }
3468 
3469   dbug_tmp_use_all_columns(table, old_sets,
3470                            table->read_set, table->write_set);
3471   range_par->thd= thd;
3472   range_par->table= table;
3473   /* range_par->cond doesn't need initialization */
3474   range_par->prev_tables= range_par->read_tables= 0;
3475   range_par->current_table= table->pos_in_table_list->map();
3476 
3477   range_par->keys= 1; // one index
3478   range_par->using_real_indexes= FALSE;
3479   range_par->remove_jump_scans= FALSE;
3480   range_par->real_keynr[0]= 0;
3481 
3482   thd->no_errors=1;				// Don't warn about NULL
3483   thd->mem_root=&alloc;
3484 
3485   bitmap_clear_all(&part_info->read_partitions);
3486 
3487   prune_param.key= prune_param.range_param.key_parts;
3488   SEL_TREE *tree;
3489   int res;
3490 
3491   tree= get_mm_tree(range_par, pprune_cond);
3492   if (!tree)
3493     goto all_used;
3494 
3495   if (tree->type == SEL_TREE::IMPOSSIBLE)
3496   {
3497     /* Cannot improve the pruning any further. */
3498     part_info->is_pruning_completed= true;
3499     goto end;
3500   }
3501 
3502   if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3503     goto all_used;
3504 
3505   if (tree->merges.is_empty())
3506   {
3507     /* Range analysis has produced a single list of intervals. */
3508     prune_param.arg_stack_end= prune_param.arg_stack;
3509     prune_param.cur_part_fields= 0;
3510     prune_param.cur_subpart_fields= 0;
3511 
3512     prune_param.cur_min_key= prune_param.range_param.min_key;
3513     prune_param.cur_max_key= prune_param.range_param.max_key;
3514     prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3515 
3516     init_all_partitions_iterator(part_info, &prune_param.part_iter);
3517     if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3518                                                             tree->keys[0]))))
3519       goto all_used;
3520   }
3521   else
3522   {
3523     if (tree->merges.elements == 1)
3524     {
3525       /*
3526         Range analysis has produced a "merge" of several intervals lists, a
3527         SEL_TREE that represents an expression in form
3528           sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3529         that cannot be reduced to one tree. This can only happen when
3530         partitioning index has several keyparts and the condition is OR of
3531         conditions that refer to different key parts. For example, we'll get
3532         here for "partitioning_field=const1 OR subpartitioning_field=const2"
3533       */
3534       if (-1 == (res= find_used_partitions_imerge(&prune_param,
3535                                                   tree->merges.head())))
3536         goto all_used;
3537     }
3538     else
3539     {
3540       /*
3541         Range analysis has produced a list of several imerges, i.e. a
3542         structure that represents a condition in form
3543         imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3544         This is produced for complicated WHERE clauses that range analyzer
3545         can't really analyze properly.
3546       */
3547       if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3548                                                        tree->merges)))
3549         goto all_used;
3550     }
3551   }
3552 
3553   /*
3554     If the condition can be evaluated now, we are done with pruning.
3555 
3556     During the prepare phase, before locking, subqueries and stored programs
3557     are not evaluated. So we need to run prune_partitions() a second time in
3558     the optimize phase to prune partitions for reading, when subqueries and
3559     stored programs may be evaluated.
3560   */
3561   if (pprune_cond->can_be_evaluated_now())
3562     part_info->is_pruning_completed= true;
3563   goto end;
3564 
3565 all_used:
3566   mark_all_partitions_as_used(prune_param.part_info);
3567 end:
3568   thd->pop_internal_handler();
3569   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3570   thd->no_errors=0;
3571   thd->mem_root= range_par->old_root;
3572   free_root(&alloc,MYF(0));			// Return memory & allocator
3573   /* If an error occurred we can return failure after freeing the memroot. */
3574   if (thd->is_error())
3575   {
3576     DBUG_RETURN(true);
3577   }
3578   /*
3579     Must be a subset of the locked partitions.
3580     lock_partitions contains the partitions marked by explicit partition
3581     selection (... t PARTITION (pX) ...) and we must only use partitions
3582     within that set.
3583   */
3584   bitmap_intersect(&prune_param.part_info->read_partitions,
3585                    &prune_param.part_info->lock_partitions);
3586   /*
3587     If not yet locked, also prune partitions to lock if not UPDATEing
3588     partition key fields. This will also prune lock_partitions if we are under
3589     LOCK TABLES, so prune away calls to start_stmt().
3590     TODO: enhance this prune locking to also allow pruning of
3591     'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3592     a lock for part_key partition.
3593   */
3594   if (!thd->lex->is_query_tables_locked() &&
3595       !partition_key_modified(table, table->write_set))
3596   {
3597     bitmap_copy(&prune_param.part_info->lock_partitions,
3598                 &prune_param.part_info->read_partitions);
3599   }
3600   if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3601     table->all_partitions_pruned_away= true;
3602   DBUG_RETURN(false);
3603 }
3604 
3605 
3606 /*
3607   Store field key image to table record
3608 
3609   SYNOPSIS
3610     store_key_image_to_rec()
3611       field  Field which key image should be stored
3612       ptr    Field value in key format
3613       len    Length of the value, in bytes
3614 
3615   DESCRIPTION
3616     Copy the field value from its key image to the table record. The source
3617     is the value in key image format, occupying len bytes in buffer pointed
3618     by ptr. The destination is table record, in "field value in table record"
3619     format.
3620 */
3621 
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3622 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3623 {
3624   /* Do the same as print_key_value() does */
3625   my_bitmap_map *old_map;
3626 
3627   if (field->real_maybe_null())
3628   {
3629     if (*ptr)
3630     {
3631       field->set_null();
3632       return;
3633     }
3634     field->set_notnull();
3635     ptr++;
3636   }
3637   old_map= dbug_tmp_use_all_columns(field->table,
3638                                     field->table->write_set);
3639   field->set_key_image(ptr, len);
3640   dbug_tmp_restore_column_map(field->table->write_set, old_map);
3641 }
3642 
3643 
3644 /*
3645   For SEL_ARG* array, store sel_arg->min values into table record buffer
3646 
3647   SYNOPSIS
3648     store_selargs_to_rec()
3649       ppar   Partition pruning context
3650       start  Array of SEL_ARG* for which the minimum values should be stored
3651       num    Number of elements in the array
3652 
3653   DESCRIPTION
3654     For each SEL_ARG* interval in the specified array, store the left edge
3655     field value (sel_arg->min, key image format) into the table record.
3656 */
3657 
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)3658 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3659                                  int num)
3660 {
3661   KEY_PART *parts= ppar->range_param.key_parts;
3662   for (SEL_ARG **end= start + num; start != end; start++)
3663   {
3664     SEL_ARG *sel_arg= (*start);
3665     store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3666                            parts[sel_arg->part].length);
3667   }
3668 }
3669 
3670 
3671 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)3672 static void mark_full_partition_used_no_parts(partition_info* part_info,
3673                                               uint32 part_id)
3674 {
3675   DBUG_ENTER("mark_full_partition_used_no_parts");
3676   DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3677   bitmap_set_bit(&part_info->read_partitions, part_id);
3678   DBUG_VOID_RETURN;
3679 }
3680 
3681 
3682 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)3683 static void mark_full_partition_used_with_parts(partition_info *part_info,
3684                                                 uint32 part_id)
3685 {
3686   uint32 start= part_id * part_info->num_subparts;
3687   uint32 end=   start + part_info->num_subparts;
3688   DBUG_ENTER("mark_full_partition_used_with_parts");
3689 
3690   for (; start != end; start++)
3691   {
3692     DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3693     bitmap_set_bit(&part_info->read_partitions, start);
3694   }
3695   DBUG_VOID_RETURN;
3696 }
3697 
3698 /*
3699   Find the set of used partitions for List<SEL_IMERGE>
3700   SYNOPSIS
3701     find_used_partitions_imerge_list
3702       ppar      Partition pruning context.
3703       key_tree  Intervals tree to perform pruning for.
3704 
3705   DESCRIPTION
3706     List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3707     The set of used partitions is an intersection of used partitions sets
3708     for imerge_{i}.
3709     We accumulate this intersection in a separate bitmap.
3710 
3711   RETURN
3712     See find_used_partitions()
3713 */
3714 
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)3715 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3716                                             List<SEL_IMERGE> &merges)
3717 {
3718   MY_BITMAP all_merges;
3719   uint bitmap_bytes;
3720   my_bitmap_map *bitmap_buf;
3721   uint n_bits= ppar->part_info->read_partitions.n_bits;
3722   bitmap_bytes= bitmap_buffer_size(n_bits);
3723   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3724                                                 bitmap_bytes)))
3725   {
3726     /*
3727       Fallback, process just the first SEL_IMERGE. This can leave us with more
3728       partitions marked as used then actually needed.
3729     */
3730     return find_used_partitions_imerge(ppar, merges.head());
3731   }
3732   bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3733   bitmap_set_prefix(&all_merges, n_bits);
3734 
3735   List_iterator<SEL_IMERGE> it(merges);
3736   SEL_IMERGE *imerge;
3737   while ((imerge=it++))
3738   {
3739     int res= find_used_partitions_imerge(ppar, imerge);
3740     if (!res)
3741     {
3742       /* no used partitions on one ANDed imerge => no used partitions at all */
3743       return 0;
3744     }
3745 
3746     if (res != -1)
3747       bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3748 
3749     if (bitmap_is_clear_all(&all_merges))
3750       return 0;
3751 
3752     bitmap_clear_all(&ppar->part_info->read_partitions);
3753   }
3754   memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3755          bitmap_bytes);
3756   return 1;
3757 }
3758 
3759 
3760 /*
3761   Find the set of used partitions for SEL_IMERGE structure
3762   SYNOPSIS
3763     find_used_partitions_imerge()
3764       ppar      Partition pruning context.
3765       key_tree  Intervals tree to perform pruning for.
3766 
3767   DESCRIPTION
3768     SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3769     trivial - just use mark used partitions for each tree and bail out early
3770     if for some tree_{i} all partitions are used.
3771 
3772   RETURN
3773     See find_used_partitions().
3774 */
3775 
3776 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)3777 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3778 {
3779   int res= 0;
3780   for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3781   {
3782     ppar->arg_stack_end= ppar->arg_stack;
3783     ppar->cur_part_fields= 0;
3784     ppar->cur_subpart_fields= 0;
3785 
3786     ppar->cur_min_key= ppar->range_param.min_key;
3787     ppar->cur_max_key= ppar->range_param.max_key;
3788     ppar->cur_min_flag= ppar->cur_max_flag= 0;
3789 
3790     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3791     SEL_ARG *key_tree= (*ptree)->keys[0];
3792     if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3793       return -1;
3794   }
3795   return res;
3796 }
3797 
3798 
3799 /*
3800   Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3801 
3802   SYNOPSIS
3803     find_used_partitions()
3804       ppar      Partition pruning context.
3805       key_tree  SEL_ARG range tree to perform pruning for
3806 
3807   DESCRIPTION
3808     This function
3809       * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3810       * finds the partitions one needs to use to get rows in these intervals
3811       * marks these partitions as used.
3812     The next session desribes the process in greater detail.
3813 
3814   IMPLEMENTATION
3815     TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3816     We can find out which [sub]partitions to use if we obtain restrictions on
3817     [sub]partitioning fields in the following form:
3818     1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
3819     1.1  Same as (1) but for subpartition fields
3820 
3821     If partitioning supports interval analysis (i.e. partitioning is a
3822     function of a single table field, and partition_info::
3823     get_part_iter_for_interval != NULL), then we can also use condition in
3824     this form:
3825     2.  "const1 <=? partition_field <=? const2"
3826     2.1  Same as (2) but for subpartition_field
3827 
3828     INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3829 
3830     The below is an example of what SEL_ARG tree may represent:
3831 
3832     (start)
3833      |                           $
3834      |   Partitioning keyparts   $  subpartitioning keyparts
3835      |                           $
3836      |     ...          ...      $
3837      |      |            |       $
3838      | +---------+  +---------+  $  +-----------+  +-----------+
3839      \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3840        +---------+  +---------+  $  +-----------+  +-----------+
3841             |                    $        |             |
3842             |                    $        |        +-----------+
3843             |                    $        |        | subpar2=c6|
3844             |                    $        |        +-----------+
3845             |                    $        |
3846             |                    $  +-----------+  +-----------+
3847             |                    $  | subpar1=c4|--| subpar2=c8|
3848             |                    $  +-----------+  +-----------+
3849             |                    $
3850             |                    $
3851        +---------+               $  +------------+  +------------+
3852        | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3853        +---------+               $  +------------+  +------------+
3854             |                    $
3855            ...                   $
3856 
3857     The up-down connections are connections via SEL_ARG::left and
3858     SEL_ARG::right. A horizontal connection to the right is the
3859     SEL_ARG::next_key_part connection.
3860 
3861     find_used_partitions() traverses the entire tree via recursion on
3862      * SEL_ARG::next_key_part (from left to right on the picture)
3863      * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3864        performed for each depth level.
3865 
3866     Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3867     ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3868     For the example in the above picture, one of stack states is:
3869       in find_used_partitions(key_tree = "subpar2=c5") (***)
3870       in find_used_partitions(key_tree = "subpar1=c3")
3871       in find_used_partitions(key_tree = "par2=c2")   (**)
3872       in find_used_partitions(key_tree = "par1=c1")
3873       in prune_partitions(...)
3874     We apply partitioning limits as soon as possible, e.g. when we reach the
3875     depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3876     and save them in ppar->part_iter.
3877     When we reach the depth (***), we find which subpartition(s) correspond to
3878     "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3879     appropriate subpartitions as used.
3880 
3881     It is possible that constraints on some partitioning fields are missing.
3882     For the above example, consider this stack state:
3883       in find_used_partitions(key_tree = "subpar2=c12") (***)
3884       in find_used_partitions(key_tree = "subpar1=c10")
3885       in find_used_partitions(key_tree = "par1=c2")
3886       in prune_partitions(...)
3887     Here we don't have constraints for all partitioning fields. Since we've
3888     never set the ppar->part_iter to contain used set of partitions, we use
3889     its default "all partitions" value.  We get  subpartition id for
3890     "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3891     partition.
3892 
3893     The inverse is also possible: we may get constraints on partitioning
3894     fields, but not constraints on subpartitioning fields. In that case,
3895     calls to find_used_partitions() with depth below (**) will return -1,
3896     and we will mark entire partition as used.
3897 
3898   TODO
3899     Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3900 
3901   RETURN
3902     1   OK, one or more [sub]partitions are marked as used.
3903     0   The passed condition doesn't match any partitions
3904    -1   Couldn't infer any partition pruning "intervals" from the passed
3905         SEL_ARG* tree (which means that all partitions should be marked as
3906         used) Marking partitions as used is the responsibility of the caller.
3907 */
3908 
3909 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)3910 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3911 {
3912   int res, left_res=0, right_res=0;
3913   int key_tree_part= (int)key_tree->part;
3914   bool set_full_part_if_bad_ret= FALSE;
3915   bool ignore_part_fields= ppar->ignore_part_fields;
3916   bool did_set_ignore_part_fields= FALSE;
3917   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3918 
3919   if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3920     return -1;
3921 
3922   if (key_tree->left != &null_element)
3923   {
3924     if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3925       return -1;
3926   }
3927 
3928   /* Push SEL_ARG's to stack to enable looking backwards as well */
3929   ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3930   ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3931   *(ppar->arg_stack_end++)= key_tree;
3932 
3933   if (ignore_part_fields)
3934   {
3935     /*
3936       We come here when a condition on the first partitioning
3937       fields led to evaluating the partitioning condition
3938       (due to finding a condition of the type a < const or
3939       b > const). Thus we must ignore the rest of the
3940       partitioning fields but we still want to analyse the
3941       subpartitioning fields.
3942     */
3943     if (key_tree->next_key_part)
3944       res= find_used_partitions(ppar, key_tree->next_key_part);
3945     else
3946       res= -1;
3947     goto pop_and_go_right;
3948   }
3949 
3950   if (key_tree->type == SEL_ARG::KEY_RANGE)
3951   {
3952     if (ppar->part_info->get_part_iter_for_interval &&
3953         key_tree->part <= ppar->last_part_partno)
3954     {
3955       /* Collect left and right bound, their lengths and flags */
3956       uchar *min_key= ppar->cur_min_key;
3957       uchar *max_key= ppar->cur_max_key;
3958       uchar *tmp_min_key= min_key;
3959       uchar *tmp_max_key= max_key;
3960       key_tree->store_min(ppar->key[key_tree->part].store_length,
3961                           &tmp_min_key, ppar->cur_min_flag);
3962       key_tree->store_max(ppar->key[key_tree->part].store_length,
3963                           &tmp_max_key, ppar->cur_max_flag);
3964       uint flag;
3965       if (key_tree->next_key_part &&
3966           key_tree->next_key_part->part == key_tree->part+1 &&
3967           key_tree->next_key_part->part <= ppar->last_part_partno &&
3968           key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3969       {
3970         /*
3971           There are more key parts for partition pruning to handle
3972           This mainly happens when the condition is an equality
3973           condition.
3974         */
3975         if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3976             (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3977             !key_tree->min_flag && !key_tree->max_flag)
3978         {
3979           /* Set 'parameters' */
3980           ppar->cur_min_key= tmp_min_key;
3981           ppar->cur_max_key= tmp_max_key;
3982           uint save_min_flag= ppar->cur_min_flag;
3983           uint save_max_flag= ppar->cur_max_flag;
3984 
3985           ppar->cur_min_flag|= key_tree->min_flag;
3986           ppar->cur_max_flag|= key_tree->max_flag;
3987 
3988           res= find_used_partitions(ppar, key_tree->next_key_part);
3989 
3990           /* Restore 'parameters' back */
3991           ppar->cur_min_key= min_key;
3992           ppar->cur_max_key= max_key;
3993 
3994           ppar->cur_min_flag= save_min_flag;
3995           ppar->cur_max_flag= save_max_flag;
3996           goto pop_and_go_right;
3997         }
3998         /* We have arrived at the last field in the partition pruning */
3999         uint tmp_min_flag= key_tree->min_flag,
4000              tmp_max_flag= key_tree->max_flag;
4001         if (!tmp_min_flag)
4002           key_tree->next_key_part->store_min_key(ppar->key,
4003                                                  &tmp_min_key,
4004                                                  &tmp_min_flag,
4005                                                  ppar->last_part_partno);
4006         if (!tmp_max_flag)
4007           key_tree->next_key_part->store_max_key(ppar->key,
4008                                                  &tmp_max_key,
4009                                                  &tmp_max_flag,
4010                                                  ppar->last_part_partno);
4011         flag= tmp_min_flag | tmp_max_flag;
4012       }
4013       else
4014         flag= key_tree->min_flag | key_tree->max_flag;
4015 
4016       if (tmp_min_key != range_par->min_key)
4017         flag&= ~NO_MIN_RANGE;
4018       else
4019         flag|= NO_MIN_RANGE;
4020       if (tmp_max_key != range_par->max_key)
4021         flag&= ~NO_MAX_RANGE;
4022       else
4023         flag|= NO_MAX_RANGE;
4024 
4025       /*
4026         We need to call the interval mapper if we have a condition which
4027         makes sense to prune on. In the example of COLUMNS on a and
4028         b it makes sense if we have a condition on a, or conditions on
4029         both a and b. If we only have conditions on b it might make sense
4030         but this is a harder case we will solve later. For the harder case
4031         this clause then turns into use of all partitions and thus we
4032         simply set res= -1 as if the mapper had returned that.
4033         TODO: What to do here is defined in WL#4065.
4034       */
4035       if (ppar->arg_stack[0]->part == 0)
4036       {
4037         uint32 i;
4038         uint32 store_length_array[MAX_KEY];
4039         uint32 num_keys= ppar->part_fields;
4040 
4041         for (i= 0; i < num_keys; i++)
4042           store_length_array[i]= ppar->key[i].store_length;
4043         res= ppar->part_info->
4044              get_part_iter_for_interval(ppar->part_info,
4045                                         FALSE,
4046                                         store_length_array,
4047                                         range_par->min_key,
4048                                         range_par->max_key,
4049                                         tmp_min_key - range_par->min_key,
4050                                         tmp_max_key - range_par->max_key,
4051                                         flag,
4052                                         &ppar->part_iter);
4053         if (!res)
4054           goto pop_and_go_right; /* res==0 --> no satisfying partitions */
4055       }
4056       else
4057         res= -1;
4058 
4059       if (res == -1)
4060       {
4061         /* get a full range iterator */
4062         init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4063       }
4064       /*
4065         Save our intent to mark full partition as used if we will not be able
4066         to obtain further limits on subpartitions
4067       */
4068       if (key_tree_part < ppar->last_part_partno)
4069       {
4070         /*
4071           We need to ignore the rest of the partitioning fields in all
4072           evaluations after this
4073         */
4074         did_set_ignore_part_fields= TRUE;
4075         ppar->ignore_part_fields= TRUE;
4076       }
4077       set_full_part_if_bad_ret= TRUE;
4078       goto process_next_key_part;
4079     }
4080 
4081     if (key_tree_part == ppar->last_subpart_partno &&
4082         (NULL != ppar->part_info->get_subpart_iter_for_interval))
4083     {
4084       PARTITION_ITERATOR subpart_iter;
4085       DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
4086                                                     range_par->key_parts););
4087       res= ppar->part_info->
4088            get_subpart_iter_for_interval(ppar->part_info,
4089                                          TRUE,
4090                                          NULL, /* Currently not used here */
4091                                          key_tree->min_value,
4092                                          key_tree->max_value,
4093                                          0, 0, /* Those are ignored here */
4094                                          key_tree->min_flag |
4095                                            key_tree->max_flag,
4096                                          &subpart_iter);
4097       if (res == 0)
4098       {
4099         /*
4100            The only case where we can get "no satisfying subpartitions"
4101            returned from the above call is when an error has occurred.
4102         */
4103         DBUG_ASSERT(range_par->thd->is_error());
4104         return 0;
4105       }
4106 
4107       if (res == -1)
4108         goto pop_and_go_right; /* all subpartitions satisfy */
4109 
4110       uint32 subpart_id;
4111       bitmap_clear_all(&ppar->subparts_bitmap);
4112       while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
4113              NOT_A_PARTITION_ID)
4114         bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
4115 
4116       /* Mark each partition as used in each subpartition.  */
4117       uint32 part_id;
4118       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4119               NOT_A_PARTITION_ID)
4120       {
4121         for (uint i= 0; i < ppar->part_info->num_subparts; i++)
4122           if (bitmap_is_set(&ppar->subparts_bitmap, i))
4123             bitmap_set_bit(&ppar->part_info->read_partitions,
4124                            part_id * ppar->part_info->num_subparts + i);
4125       }
4126       goto pop_and_go_right;
4127     }
4128 
4129     if (key_tree->is_singlepoint())
4130     {
4131       if (key_tree_part == ppar->last_part_partno &&
4132           ppar->cur_part_fields == ppar->part_fields &&
4133           ppar->part_info->get_part_iter_for_interval == NULL)
4134       {
4135         /*
4136           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
4137           fields. Save all constN constants into table record buffer.
4138         */
4139         store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
4140         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
4141                                                        ppar->part_fields););
4142         uint32 part_id;
4143         longlong func_value;
4144         /* Find in which partition the {const1, ...,constN} tuple goes */
4145         if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
4146                                             &func_value))
4147         {
4148           res= 0; /* No satisfying partitions */
4149           goto pop_and_go_right;
4150         }
4151         /* Rembember the limit we got - single partition #part_id */
4152         init_single_partition_iterator(part_id, &ppar->part_iter);
4153 
4154         /*
4155           If there are no subpartitions/we fail to get any limit for them,
4156           then we'll mark full partition as used.
4157         */
4158         set_full_part_if_bad_ret= TRUE;
4159         goto process_next_key_part;
4160       }
4161 
4162       if (key_tree_part == ppar->last_subpart_partno &&
4163           ppar->cur_subpart_fields == ppar->subpart_fields)
4164       {
4165         /*
4166           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
4167           fields. Save all constN constants into table record buffer.
4168         */
4169         store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
4170                              ppar->subpart_fields);
4171         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
4172                                                        ppar->subpart_fields,
4173                                                        ppar->subpart_fields););
4174         /* Find the subpartition (it's HASH/KEY so we always have one) */
4175         partition_info *part_info= ppar->part_info;
4176         uint32 part_id, subpart_id;
4177 
4178         if (part_info->get_subpartition_id(part_info, &subpart_id))
4179           return 0;
4180 
4181         /* Mark this partition as used in each subpartition. */
4182         while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4183                 NOT_A_PARTITION_ID)
4184         {
4185           bitmap_set_bit(&part_info->read_partitions,
4186                          part_id * part_info->num_subparts + subpart_id);
4187         }
4188         res= 1; /* Some partitions were marked as used */
4189         goto pop_and_go_right;
4190       }
4191     }
4192     else
4193     {
4194       /*
4195         Can't handle condition on current key part. If we're that deep that
4196         we're processing subpartititoning's key parts, this means we'll not be
4197         able to infer any suitable condition, so bail out.
4198       */
4199       if (key_tree_part >= ppar->last_part_partno)
4200       {
4201         res= -1;
4202         goto pop_and_go_right;
4203       }
4204       /*
4205         No meaning in continuing with rest of partitioning key parts.
4206         Will try to continue with subpartitioning key parts.
4207       */
4208       ppar->ignore_part_fields= true;
4209       did_set_ignore_part_fields= true;
4210       goto process_next_key_part;
4211     }
4212   }
4213 
4214 process_next_key_part:
4215   if (key_tree->next_key_part)
4216     res= find_used_partitions(ppar, key_tree->next_key_part);
4217   else
4218     res= -1;
4219 
4220   if (did_set_ignore_part_fields)
4221   {
4222     /*
4223       We have returned from processing all key trees linked to our next
4224       key part. We are ready to be moving down (using right pointers) and
4225       this tree is a new evaluation requiring its own decision on whether
4226       to ignore partitioning fields.
4227     */
4228     ppar->ignore_part_fields= FALSE;
4229   }
4230   if (set_full_part_if_bad_ret)
4231   {
4232     if (res == -1)
4233     {
4234       /* Got "full range" for subpartitioning fields */
4235       uint32 part_id;
4236       bool found= FALSE;
4237       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4238              NOT_A_PARTITION_ID)
4239       {
4240         ppar->mark_full_partition_used(ppar->part_info, part_id);
4241         found= TRUE;
4242       }
4243       res= MY_TEST(found);
4244     }
4245     /*
4246       Restore the "used partitions iterator" to the default setting that
4247       specifies iteration over all partitions.
4248     */
4249     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4250   }
4251 
4252 pop_and_go_right:
4253   /* Pop this key part info off the "stack" */
4254   ppar->arg_stack_end--;
4255   ppar->cur_part_fields-=    ppar->is_part_keypart[key_tree_part];
4256   ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4257 
4258   if (res == -1)
4259     return -1;
4260   if (key_tree->right != &null_element)
4261   {
4262     if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4263       return -1;
4264   }
4265   return (left_res || right_res || res);
4266 }
4267 
4268 
mark_all_partitions_as_used(partition_info * part_info)4269 static void mark_all_partitions_as_used(partition_info *part_info)
4270 {
4271   bitmap_copy(&(part_info->read_partitions),
4272               &(part_info->lock_partitions));
4273 }
4274 
4275 
4276 /*
4277   Check if field types allow to construct partitioning index description
4278 
4279   SYNOPSIS
4280     fields_ok_for_partition_index()
4281       pfield  NULL-terminated array of pointers to fields.
4282 
4283   DESCRIPTION
4284     For an array of fields, check if we can use all of the fields to create
4285     partitioning index description.
4286 
4287     We can't process GEOMETRY fields - for these fields singlepoint intervals
4288     cant be generated, and non-singlepoint are "special" kinds of intervals
4289     to which our processing logic can't be applied.
4290 
4291     It is not known if we could process ENUM fields, so they are disabled to be
4292     on the safe side.
4293 
4294   RETURN
4295     TRUE   Yes, fields can be used in partitioning index
4296     FALSE  Otherwise
4297 */
4298 
fields_ok_for_partition_index(Field ** pfield)4299 static bool fields_ok_for_partition_index(Field **pfield)
4300 {
4301   if (!pfield)
4302     return FALSE;
4303   for (; (*pfield); pfield++)
4304   {
4305     enum_field_types ftype= (*pfield)->real_type();
4306     if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4307       return FALSE;
4308   }
4309   return TRUE;
4310 }
4311 
4312 
4313 /*
4314   Create partition index description and fill related info in the context
4315   struct
4316 
4317   SYNOPSIS
4318     create_partition_index_description()
4319       prune_par  INOUT Partition pruning context
4320 
4321   DESCRIPTION
4322     Create partition index description. Partition index description is:
4323 
4324       part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4325 
4326     If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4327     corresponding fields_list(...) is not included into index description
4328     and we don't perform partition pruning for partitions/subpartitions.
4329 
4330   RETURN
4331     TRUE   Out of memory or can't do partition pruning at all
4332     FALSE  OK
4333 */
4334 
create_partition_index_description(PART_PRUNE_PARAM * ppar)4335 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4336 {
4337   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4338   partition_info *part_info= ppar->part_info;
4339   uint used_part_fields, used_subpart_fields;
4340 
4341   used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4342                       part_info->num_part_fields : 0;
4343   used_subpart_fields=
4344     fields_ok_for_partition_index(part_info->subpart_field_array)?
4345       part_info->num_subpart_fields : 0;
4346 
4347   uint total_parts= used_part_fields + used_subpart_fields;
4348 
4349   ppar->ignore_part_fields= FALSE;
4350   ppar->part_fields=      used_part_fields;
4351   ppar->last_part_partno= (int)used_part_fields - 1;
4352 
4353   ppar->subpart_fields= used_subpart_fields;
4354   ppar->last_subpart_partno=
4355     used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4356 
4357   if (part_info->is_sub_partitioned())
4358   {
4359     ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
4360     ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4361   }
4362   else
4363   {
4364     ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
4365     ppar->get_top_partition_id_func= part_info->get_partition_id;
4366   }
4367 
4368   KEY_PART *key_part;
4369   MEM_ROOT *alloc= range_par->mem_root;
4370   if (!total_parts ||
4371       !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4372                                                total_parts)) ||
4373       !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4374                                                       total_parts)) ||
4375       !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4376                                                            total_parts)) ||
4377       !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4378                                                            total_parts)))
4379     return TRUE;
4380 
4381   if (ppar->subpart_fields)
4382   {
4383     my_bitmap_map *buf;
4384     uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4385     if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4386       return TRUE;
4387     bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4388                 FALSE);
4389   }
4390   range_par->key_parts= key_part;
4391   Field **field= (ppar->part_fields)? part_info->part_field_array :
4392                                            part_info->subpart_field_array;
4393   bool in_subpart_fields= FALSE;
4394   for (uint part= 0; part < total_parts; part++, key_part++)
4395   {
4396     key_part->key=          0;
4397     key_part->part=	    part;
4398     key_part->length= (uint16)(*field)->key_length();
4399     key_part->store_length= (uint16)get_partition_field_store_length(*field);
4400 
4401     DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4402                          key_part->length, key_part->store_length));
4403 
4404     key_part->field=        (*field);
4405     key_part->image_type =  Field::itRAW;
4406     /*
4407       We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4408       in the RangeAnalysisModule.
4409     */
4410     key_part->flag=         0;
4411     /* We don't set key_parts->null_bit as it will not be used */
4412 
4413     ppar->is_part_keypart[part]= !in_subpart_fields;
4414     ppar->is_subpart_keypart[part]= in_subpart_fields;
4415 
4416     /*
4417       Check if this was last field in this array, in this case we
4418       switch to subpartitioning fields. (This will only happens if
4419       there are subpartitioning fields to cater for).
4420     */
4421     if (!*(++field))
4422     {
4423       field= part_info->subpart_field_array;
4424       in_subpart_fields= TRUE;
4425     }
4426   }
4427   range_par->key_parts_end= key_part;
4428 
4429   DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4430                                                 range_par->key_parts_end););
4431   return FALSE;
4432 }
4433 
4434 
4435 #ifndef DBUG_OFF
4436 
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4437 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4438 {
4439   DBUG_ENTER("print_partitioning_index");
4440   DBUG_LOCK_FILE;
4441   fprintf(DBUG_FILE, "partitioning INDEX(");
4442   for (KEY_PART *p=parts; p != parts_end; p++)
4443   {
4444     fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
4445   }
4446   fputs(");\n", DBUG_FILE);
4447   DBUG_UNLOCK_FILE;
4448   DBUG_VOID_RETURN;
4449 }
4450 
4451 
4452 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4453 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4454 {
4455   DBUG_ENTER("dbug_print_segment_range");
4456   DBUG_LOCK_FILE;
4457   if (!(arg->min_flag & NO_MIN_RANGE))
4458   {
4459     store_key_image_to_rec(part->field, arg->min_value, part->length);
4460     part->field->dbug_print();
4461     if (arg->min_flag & NEAR_MIN)
4462       fputs(" < ", DBUG_FILE);
4463     else
4464       fputs(" <= ", DBUG_FILE);
4465   }
4466 
4467   fprintf(DBUG_FILE, "%s", part->field->field_name);
4468 
4469   if (!(arg->max_flag & NO_MAX_RANGE))
4470   {
4471     if (arg->max_flag & NEAR_MAX)
4472       fputs(" < ", DBUG_FILE);
4473     else
4474       fputs(" <= ", DBUG_FILE);
4475     store_key_image_to_rec(part->field, arg->max_value, part->length);
4476     part->field->dbug_print();
4477   }
4478   fputs("\n", DBUG_FILE);
4479   DBUG_UNLOCK_FILE;
4480   DBUG_VOID_RETURN;
4481 }
4482 
4483 
4484 /*
4485   Print a singlepoint multi-keypart range interval to debug trace
4486 
4487   SYNOPSIS
4488     dbug_print_singlepoint_range()
4489       start  Array of SEL_ARG* ptrs representing conditions on key parts
4490       num    Number of elements in the array.
4491 
4492   DESCRIPTION
4493     This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4494     interval to debug trace.
4495 */
4496 
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4497 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4498 {
4499   DBUG_ENTER("dbug_print_singlepoint_range");
4500   DBUG_LOCK_FILE;
4501   SEL_ARG **end= start + num;
4502 
4503   for (SEL_ARG **arg= start; arg != end; arg++)
4504   {
4505     Field *field= (*arg)->field;
4506     fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
4507     field->dbug_print();
4508   }
4509   fputs("\n", DBUG_FILE);
4510   DBUG_UNLOCK_FILE;
4511   DBUG_VOID_RETURN;
4512 }
4513 #endif
4514 
4515 /****************************************************************************
4516  * Partition pruning code ends
4517  ****************************************************************************/
4518 
4519 
4520 /*
4521   Get best plan for a SEL_IMERGE disjunctive expression.
4522   SYNOPSIS
4523     get_best_disjunct_quick()
4524       param     Parameter from check_quick_select function
4525       imerge    Expression to use
4526       cost_est  Don't create scans with cost > cost_est
4527 
4528   NOTES
4529     index_merge cost is calculated as follows:
4530     index_merge_cost =
4531       cost(index_reads) +         (see #1)
4532       cost(rowid_to_row_scan) +   (see #2)
4533       cost(unique_use)            (see #3)
4534 
4535     1. cost(index_reads) =SUM_i(cost(index_read_i))
4536        For non-CPK scans,
4537          cost(index_read_i) = {cost of ordinary 'index only' scan}
4538        For CPK scan,
4539          cost(index_read_i) = {cost of non-'index only' scan}
4540 
4541     2. cost(rowid_to_row_scan)
4542       If table PK is clustered then
4543         cost(rowid_to_row_scan) =
4544           {cost of ordinary clustered PK scan with n_ranges=n_rows}
4545 
4546       Otherwise, we use the following model to calculate costs:
4547       We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4548       We assume that offsets of rows we need are independent variates with
4549       uniform distribution in [0..max_file_offset] range.
4550 
4551       We'll denote block as "busy" if it contains row(s) we need to retrieve
4552       and "empty" if doesn't contain rows we need.
4553 
4554       Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4555       applies to any block in file). Let x_i be a variate taking value 1 if
4556       block #i is empty and 0 otherwise.
4557 
4558       Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4559 
4560       E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4561         = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4562        ~= n_blocks * exp(-n_rows/n_blocks).
4563 
4564       E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4565        ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4566 
4567       Average size of "hole" between neighbor non-empty blocks is
4568            E(hole_size) = n_blocks/E(n_busy_blocks).
4569 
4570       The total cost of reading all needed blocks in one "sweep" is:
4571 
4572         E(n_busy_blocks) * disk_seek_cost(n_blocks/E(n_busy_blocks))
4573 
4574       This cost estimate is calculated in get_sweep_read_cost().
4575 
4576     3. Cost of Unique use is calculated in Unique::get_use_cost function.
4577 
4578   ROR-union cost is calculated in the same way index_merge, but instead of
4579   Unique a priority queue is used.
4580 
4581   RETURN
4582     Created read plan
4583     NULL - Out of memory or no read scan could be built.
4584 */
4585 
4586 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,const Cost_estimate * cost_est)4587 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4588                                          const Cost_estimate *cost_est)
4589 {
4590   SEL_TREE **ptree;
4591   TRP_INDEX_MERGE *imerge_trp= NULL;
4592   uint n_child_scans= imerge->trees_next - imerge->trees;
4593   TRP_RANGE **range_scans;
4594   TRP_RANGE **cur_child;
4595   TRP_RANGE **cpk_scan= NULL;
4596   bool imerge_too_expensive= FALSE;
4597   Cost_estimate imerge_cost;
4598   ha_rows cpk_scan_records= 0;
4599   ha_rows non_cpk_scan_records= 0;
4600   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4601   bool all_scans_ror_able= TRUE;
4602   bool all_scans_rors= TRUE;
4603   size_t unique_calc_buff_size;
4604   TABLE_READ_PLAN **roru_read_plans;
4605   TABLE_READ_PLAN **cur_roru_plan;
4606   ha_rows roru_total_records;
4607   double roru_intersect_part= 1.0;
4608   const Cost_model_table *const cost_model= param->table->cost_model();
4609   Cost_estimate read_cost= *cost_est;
4610 
4611   DBUG_ENTER("get_best_disjunct_quick");
4612   DBUG_PRINT("info", ("Full table scan cost: %g", cost_est->total_cost()));
4613 
4614   DBUG_ASSERT(param->table->file->stats.records);
4615 
4616   Opt_trace_context * const trace= &param->thd->opt_trace;
4617   Opt_trace_object trace_best_disjunct(trace);
4618   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4619                                              sizeof(TRP_RANGE*)*
4620                                              n_child_scans)))
4621     DBUG_RETURN(NULL);
4622   // Note: to_merge.end() is called to close this object after this for-loop.
4623   Opt_trace_array to_merge(trace, "indexes_to_merge");
4624   /*
4625     Collect best 'range' scan for each of disjuncts, and, while doing so,
4626     analyze possibility of ROR scans. Also calculate some values needed by
4627     other parts of the code.
4628   */
4629   for (ptree= imerge->trees, cur_child= range_scans;
4630        ptree != imerge->trees_next;
4631        ptree++, cur_child++)
4632   {
4633     DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4634                                         "tree in SEL_IMERGE"););
4635     Opt_trace_object trace_idx(trace);
4636     if (!(*cur_child=
4637           get_key_scans_params(param, *ptree, true, false, &read_cost)))
4638     {
4639       /*
4640         One of index scans in this index_merge is more expensive than entire
4641         table read for another available option. The entire index_merge (and
4642         any possible ROR-union) will be more expensive then, too. We continue
4643         here only to update SQL_SELECT members.
4644       */
4645       imerge_too_expensive= true;
4646     }
4647     if (imerge_too_expensive)
4648     {
4649       trace_idx.add("chosen", false).add_alnum("cause", "cost");
4650       continue;
4651     }
4652 
4653     const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
4654     imerge_cost+= (*cur_child)->cost_est;
4655     all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4656     all_scans_rors &= (*cur_child)->is_ror;
4657     if (pk_is_clustered &&
4658         keynr_in_table == param->table->s->primary_key)
4659     {
4660       cpk_scan= cur_child;
4661       cpk_scan_records= (*cur_child)->records;
4662     }
4663     else
4664       non_cpk_scan_records += (*cur_child)->records;
4665 
4666     trace_idx.
4667       add_utf8("index_to_merge", param->table->key_info[keynr_in_table].name).
4668       add("cumulated_cost", imerge_cost);
4669   }
4670 
4671   // Note: to_merge trace object is closed here
4672   to_merge.end();
4673 
4674 
4675   trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
4676   if (imerge_too_expensive || (imerge_cost > read_cost) ||
4677       ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
4678       !read_cost.is_max_cost()))
4679   {
4680     /*
4681       Bail out if it is obvious that both index_merge and ROR-union will be
4682       more expensive
4683     */
4684     DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4685                         "full table scan, bailing out"));
4686     trace_best_disjunct.add("chosen", false).add_alnum("cause", "cost");
4687     DBUG_RETURN(NULL);
4688   }
4689 
4690   /*
4691     If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4692     guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4693     disabled in @@optimizer_switch
4694   */
4695   if (all_scans_rors &&
4696       param->index_merge_union_allowed)
4697   {
4698     roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4699     trace_best_disjunct.add("use_roworder_union", true).
4700       add_alnum("cause", "always_cheaper_than_not_roworder_retrieval");
4701     goto skip_to_ror_scan;
4702   }
4703 
4704   if (cpk_scan)
4705   {
4706     /*
4707       Add one rowid/key comparison for each row retrieved on non-CPK
4708       scan. (it is done in QUICK_RANGE_SELECT::row_in_ranges)
4709     */
4710     const double rid_comp_cost=
4711       cost_model->key_compare_cost(static_cast<double>(non_cpk_scan_records));
4712     imerge_cost.add_cpu(rid_comp_cost);
4713     trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
4714                             rid_comp_cost);
4715   }
4716 
4717   /* Calculate cost(rowid_to_row_scan) */
4718   {
4719     Cost_estimate sweep_cost;
4720     JOIN *join= param->thd->lex->select_lex->join;
4721     const bool is_interrupted= join && join->tables != 1;
4722     get_sweep_read_cost(param->table, non_cpk_scan_records, is_interrupted,
4723                         &sweep_cost);
4724     imerge_cost+= sweep_cost;
4725     trace_best_disjunct.add("cost_sort_rowid_and_read_disk",
4726                             sweep_cost);
4727   }
4728   DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4729                      imerge_cost.total_cost()));
4730   if (imerge_cost > read_cost ||
4731       !param->index_merge_sort_union_allowed)
4732   {
4733     trace_best_disjunct.add("use_roworder_index_merge", true).
4734       add_alnum("cause", "cost");
4735     goto build_ror_index_merge;
4736   }
4737 
4738   /* Add Unique operations cost */
4739   unique_calc_buff_size=
4740     Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4741                                     param->table->file->ref_length,
4742                                     param->thd->variables.sortbuff_size);
4743   if (param->imerge_cost_buff.size() < unique_calc_buff_size)
4744   {
4745     typedef Unique::Imerge_cost_buf_type::value_type element_type;
4746     void *rawmem=
4747       alloc_root(param->mem_root, unique_calc_buff_size * sizeof(element_type));
4748     if (!rawmem)
4749       DBUG_RETURN(NULL);
4750     param->imerge_cost_buff=
4751       Unique::Imerge_cost_buf_type(static_cast<element_type*>(rawmem),
4752                                    unique_calc_buff_size);
4753   }
4754 
4755   {
4756     const double dup_removal_cost=
4757       Unique::get_use_cost(param->imerge_cost_buff,
4758                            (uint)non_cpk_scan_records,
4759                            param->table->file->ref_length,
4760                            param->thd->variables.sortbuff_size,
4761                            cost_model);
4762 
4763     trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost);
4764     imerge_cost.add_cpu(dup_removal_cost);
4765 
4766     trace_best_disjunct.add("total_cost", imerge_cost);
4767     DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
4768               imerge_cost.total_cost(), read_cost.total_cost()));
4769   }
4770   if (imerge_cost < read_cost)
4771   {
4772     if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
4773     {
4774       imerge_trp->cost_est= imerge_cost;
4775       imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
4776       imerge_trp->records= min(imerge_trp->records,
4777                                param->table->file->stats.records);
4778       imerge_trp->range_scans= range_scans;
4779       imerge_trp->range_scans_end= range_scans + n_child_scans;
4780       read_cost= imerge_cost;
4781     }
4782   }
4783 
4784 build_ror_index_merge:
4785   if (!all_scans_ror_able ||
4786       param->thd->lex->sql_command == SQLCOM_DELETE ||
4787       !param->index_merge_union_allowed)
4788     DBUG_RETURN(imerge_trp);
4789 
4790   /* Ok, it is possible to build a ROR-union, try it. */
4791   if (!(roru_read_plans=
4792           (TABLE_READ_PLAN**)alloc_root(param->mem_root,
4793                                         sizeof(TABLE_READ_PLAN*)*
4794                                         n_child_scans)))
4795     DBUG_RETURN(imerge_trp);
4796 skip_to_ror_scan:
4797   Cost_estimate roru_index_cost;
4798   roru_total_records= 0;
4799   cur_roru_plan= roru_read_plans;
4800 
4801   /*
4802     Note: trace_analyze_ror.end() is called to close this object after
4803     this for-loop.
4804   */
4805   Opt_trace_array trace_analyze_ror(trace, "analyzing_roworder_scans");
4806   /* Find 'best' ROR scan for each of trees in disjunction */
4807   for (ptree= imerge->trees, cur_child= range_scans;
4808        ptree != imerge->trees_next;
4809        ptree++, cur_child++, cur_roru_plan++)
4810   {
4811     Opt_trace_object trp_info(trace);
4812     if (unlikely(trace->is_started()))
4813       (*cur_child)->trace_basic_info(param, &trp_info);
4814 
4815     /*
4816       Assume the best ROR scan is the one that has cheapest
4817       full-row-retrieval scan cost.
4818       Also accumulate index_only scan costs as we'll need them to
4819       calculate overall index_intersection cost.
4820     */
4821     Cost_estimate scan_cost;
4822     if ((*cur_child)->is_ror)
4823     {
4824       /* Ok, we have index_only cost, now get full rows scan cost */
4825       scan_cost=
4826         param->table->file->read_cost(param->real_keynr[(*cur_child)->key_idx],
4827           1, static_cast<double>((*cur_child)->records));
4828       scan_cost.add_cpu(
4829             cost_model->row_evaluate_cost(rows2double((*cur_child)->records)));
4830     }
4831     else
4832       scan_cost= read_cost;
4833 
4834     TABLE_READ_PLAN *prev_plan= *cur_child;
4835     if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, &scan_cost)))
4836     {
4837       if (prev_plan->is_ror)
4838         *cur_roru_plan= prev_plan;
4839       else
4840         DBUG_RETURN(imerge_trp);
4841       roru_index_cost += (*cur_roru_plan)->cost_est;
4842     }
4843     else
4844     {
4845       roru_index_cost +=
4846         ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_cost;
4847     }
4848     roru_total_records += (*cur_roru_plan)->records;
4849     roru_intersect_part *= (*cur_roru_plan)->records /
4850       param->table->file->stats.records;
4851   }
4852   // Note: trace_analyze_ror trace object is closed here
4853   trace_analyze_ror.end();
4854 
4855   /*
4856     rows to retrieve=
4857       SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
4858     This is valid because index_merge construction guarantees that conditions
4859     in disjunction do not share key parts.
4860   */
4861   roru_total_records -= (ha_rows)(roru_intersect_part*
4862                                   param->table->file->stats.records);
4863   /* ok, got a ROR read plan for each of the disjuncts
4864     Calculate cost:
4865     cost(index_union_scan(scan_1, ... scan_n)) =
4866       SUM_i(cost_of_index_only_scan(scan_i)) +
4867       queue_use_cost(rowid_len, n) +
4868       cost_of_row_retrieval
4869     See get_merge_buffers_cost function for queue_use_cost formula derivation.
4870   */
4871   Cost_estimate roru_total_cost;
4872   {
4873     JOIN *join= param->thd->lex->select_lex->join;
4874     const bool is_interrupted= join && join->tables != 1;
4875     get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
4876                         &roru_total_cost);
4877     roru_total_cost += roru_index_cost;
4878     roru_total_cost.add_cpu(
4879       cost_model->key_compare_cost(rows2double(roru_total_records) *
4880                                    log((double)n_child_scans) / M_LN2));
4881   }
4882 
4883   trace_best_disjunct.add("index_roworder_union_cost",
4884                           roru_total_cost).
4885     add("members", n_child_scans);
4886   TRP_ROR_UNION* roru;
4887   if (roru_total_cost < read_cost)
4888   {
4889     if ((roru= new (param->mem_root) TRP_ROR_UNION))
4890     {
4891       trace_best_disjunct.add("chosen", true);
4892       roru->first_ror= roru_read_plans;
4893       roru->last_ror= roru_read_plans + n_child_scans;
4894       roru->cost_est= roru_total_cost;
4895       roru->records= roru_total_records;
4896       DBUG_RETURN(roru);
4897     }
4898   }
4899   trace_best_disjunct.add("chosen", false);
4900 
4901   DBUG_RETURN(imerge_trp);
4902 }
4903 
4904 
4905 /*
4906   Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
4907   sel_arg set of intervals.
4908 
4909   SYNOPSIS
4910     make_ror_scan()
4911       param    Parameter from test_quick_select function
4912       idx      Index of key in param->keys
4913       sel_arg  Set of intervals for a given key
4914 
4915   RETURN
4916     NULL - out of memory
4917     ROR scan structure containing a scan for {idx, sel_arg}
4918 */
4919 
4920 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)4921 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
4922 {
4923   ROR_SCAN_INFO *ror_scan;
4924   my_bitmap_map *bitmap_buf1;
4925   my_bitmap_map *bitmap_buf2;
4926   uint keynr;
4927   DBUG_ENTER("make_ror_scan");
4928 
4929   if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
4930                                              sizeof(ROR_SCAN_INFO))))
4931     DBUG_RETURN(NULL);
4932 
4933   ror_scan->idx= idx;
4934   ror_scan->keynr= keynr= param->real_keynr[idx];
4935   ror_scan->sel_arg= sel_arg;
4936   ror_scan->records= param->table->quick_rows[keynr];
4937 
4938   if (!(bitmap_buf1= (my_bitmap_map*) alloc_root(param->mem_root,
4939                                                  param->fields_bitmap_size)))
4940     DBUG_RETURN(NULL);
4941   if (!(bitmap_buf2= (my_bitmap_map*) alloc_root(param->mem_root,
4942                                                  param->fields_bitmap_size)))
4943     DBUG_RETURN(NULL);
4944 
4945   if (bitmap_init(&ror_scan->covered_fields, bitmap_buf1,
4946                   param->table->s->fields, FALSE))
4947     DBUG_RETURN(NULL);
4948   if (bitmap_init(&ror_scan->covered_fields_remaining, bitmap_buf2,
4949                   param->table->s->fields, FALSE))
4950     DBUG_RETURN(NULL);
4951 
4952   bitmap_clear_all(&ror_scan->covered_fields);
4953 
4954   KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
4955   KEY_PART_INFO *key_part_end=
4956     key_part + param->table->key_info[keynr].user_defined_key_parts;
4957   for (;key_part != key_part_end; ++key_part)
4958   {
4959     if (bitmap_is_set(&param->needed_fields, key_part->fieldnr-1))
4960       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
4961   }
4962   bitmap_copy(&ror_scan->covered_fields_remaining, &ror_scan->covered_fields);
4963 
4964   double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
4965   ror_scan->index_read_cost=
4966     param->table->file->index_scan_cost(ror_scan->keynr, 1, rows);
4967   DBUG_RETURN(ror_scan);
4968 }
4969 
4970 
4971 /**
4972   Compare two ROR_SCAN_INFO* by
4973     1. #fields in this index that are not already covered
4974        by other indexes earlier in the intersect ordering: descending
4975     2. E(#records): ascending
4976 
4977   @param scan1   first ror scan to compare
4978   @param scan2   second ror scan to compare
4979 
4980   @return true if scan1 > scan2, false otherwise
4981 */
is_better_intersect_match(const ROR_SCAN_INFO * scan1,const ROR_SCAN_INFO * scan2)4982 static bool is_better_intersect_match(const ROR_SCAN_INFO *scan1,
4983                                       const ROR_SCAN_INFO *scan2)
4984 {
4985   if (scan1 == scan2)
4986     return false;
4987 
4988   if (scan1->num_covered_fields_remaining >
4989       scan2->num_covered_fields_remaining)
4990     return false;
4991 
4992   if (scan1->num_covered_fields_remaining <
4993       scan2->num_covered_fields_remaining)
4994     return true;
4995 
4996   return (scan1->records > scan2->records);
4997 }
4998 
4999 /**
5000   Sort indexes in an order that is likely to be a good index merge
5001   intersection order. After running this function, [start, ..., end-1]
5002   is ordered according to this strategy:
5003 
5004     1) Minimize the number of indexes that must be used in the
5005        intersection. I.e., the index covering most fields not already
5006        covered by other indexes earlier in the sort order is picked first.
5007     2) When multiple indexes cover equally many uncovered fields, the
5008        index with lowest E(#rows) is chosen.
5009 
5010   Note that all permutations of index ordering are not tested, so this
5011   function may not find the optimal order.
5012 
5013   @param[in,out] start     Pointer to the start of indexes that may
5014                            be used in index merge intersection
5015   @param         end       Pointer past the last index that may be used.
5016   @param         param     Parameter from test_quick_select function.
5017 */
find_intersect_order(ROR_SCAN_INFO ** start,ROR_SCAN_INFO ** end,const PARAM * param)5018 static void find_intersect_order(ROR_SCAN_INFO **start,
5019                                  ROR_SCAN_INFO **end,
5020                                  const PARAM *param)
5021 {
5022   // nothing to sort if there are only zero or one ROR scans
5023   if ((start == end) || (start + 1 == end))
5024     return;
5025 
5026   /*
5027     Bitmap of fields we would like the ROR scans to cover. Will be
5028     modified by the loop below so that when we're looking for a ROR
5029     scan in position 'x' in the ordering, all fields covered by ROR
5030     scans 0,...,x-1 have been removed.
5031   */
5032   MY_BITMAP fields_to_cover;
5033   my_bitmap_map *map;
5034   if (!(map= (my_bitmap_map*) alloc_root(param->mem_root,
5035                                          param->fields_bitmap_size)))
5036     return;
5037   bitmap_init(&fields_to_cover, map, param->needed_fields.n_bits, FALSE);
5038   bitmap_copy(&fields_to_cover, &param->needed_fields);
5039 
5040   // Sort ROR scans in [start,...,end-1]
5041   for (ROR_SCAN_INFO **place= start; place < (end - 1); place++)
5042   {
5043     /* Placeholder for the best ROR scan found for position 'place' so far */
5044     ROR_SCAN_INFO **best= place;
5045     ROR_SCAN_INFO **current= place + 1;
5046 
5047     {
5048       /*
5049         Calculate how many fields in 'fields_to_cover' not already
5050         covered by [start,...,place-1] the 'best' index covers. The
5051         result is used in is_better_intersect_match() and is valid
5052         when finding the best ROR scan for position 'place' only.
5053       */
5054       bitmap_intersect(&(*best)->covered_fields_remaining, &fields_to_cover);
5055       (*best)->num_covered_fields_remaining=
5056         bitmap_bits_set(&(*best)->covered_fields_remaining);
5057     }
5058     for (; current < end; current++)
5059     {
5060       {
5061         /*
5062           Calculate how many fields in 'fields_to_cover' not already
5063           covered by [start,...,place-1] the 'current' index covers.
5064           The result is used in is_better_intersect_match() and is
5065           valid when finding the best ROR scan for position 'place' only.
5066         */
5067         bitmap_intersect(&(*current)->covered_fields_remaining,
5068                          &fields_to_cover);
5069         (*current)->num_covered_fields_remaining=
5070           bitmap_bits_set(&(*current)->covered_fields_remaining);
5071 
5072         /*
5073           No need to compare with 'best' if 'current' does not
5074           contribute with uncovered fields.
5075         */
5076         if ((*current)->num_covered_fields_remaining == 0)
5077           continue;
5078       }
5079 
5080       if (is_better_intersect_match(*best, *current))
5081         best= current;
5082     }
5083 
5084     /*
5085       'best' is now the ROR scan that will be sorted in position
5086       'place'. When searching for the best ROR scans later in the sort
5087       sequence we do not need coverage of the fields covered by 'best'
5088      */
5089     bitmap_subtract(&fields_to_cover, &(*best)->covered_fields);
5090     if (best != place)
5091       swap_variables(ROR_SCAN_INFO*, *best, *place);
5092 
5093     if (bitmap_is_clear_all(&fields_to_cover))
5094       return;                                   // No more fields to cover
5095   }
5096 }
5097 
5098 /* Auxiliary structure for incremental ROR-intersection creation */
5099 typedef struct
5100 {
5101   const PARAM *param;
5102   MY_BITMAP covered_fields; /* union of fields covered by all scans */
5103   /*
5104     Fraction of table records that satisfies conditions of all scans.
5105     This is the number of full records that will be retrieved if a
5106     non-index_only index intersection will be employed.
5107   */
5108   double out_rows;
5109   /* TRUE if covered_fields is a superset of needed_fields */
5110   bool is_covering;
5111 
5112   ha_rows index_records; /* sum(#records to look in indexes) */
5113   Cost_estimate index_scan_cost; /* SUM(cost of 'index-only' scans) */
5114   Cost_estimate total_cost;
5115 } ROR_INTERSECT_INFO;
5116 
5117 
5118 /*
5119   Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
5120 
5121   SYNOPSIS
5122     ror_intersect_init()
5123       param         Parameter from test_quick_select
5124 
5125   RETURN
5126     allocated structure
5127     NULL on error
5128 */
5129 
5130 static
ror_intersect_init(const PARAM * param)5131 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
5132 {
5133   ROR_INTERSECT_INFO *info;
5134   my_bitmap_map* buf;
5135   if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
5136                                               sizeof(ROR_INTERSECT_INFO))))
5137     return NULL;
5138   info->param= param;
5139   if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
5140                                          param->fields_bitmap_size)))
5141     return NULL;
5142   if (bitmap_init(&info->covered_fields, buf, param->table->s->fields,
5143                   FALSE))
5144     return NULL;
5145   info->is_covering= FALSE;
5146   info->index_scan_cost.reset();
5147   info->total_cost.reset();
5148   info->index_records= 0;
5149   info->out_rows= (double) param->table->file->stats.records;
5150   bitmap_clear_all(&info->covered_fields);
5151   return info;
5152 }
5153 
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)5154 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
5155 {
5156   dst->param= src->param;
5157   memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
5158          no_bytes_in_map(&src->covered_fields));
5159   dst->out_rows= src->out_rows;
5160   dst->is_covering= src->is_covering;
5161   dst->index_records= src->index_records;
5162   dst->index_scan_cost= src->index_scan_cost;
5163   dst->total_cost= src->total_cost;
5164 }
5165 
5166 
5167 /*
5168   Get selectivity of adding a ROR scan to the ROR-intersection.
5169 
5170   SYNOPSIS
5171     ror_scan_selectivity()
5172       info  ROR-interection, an intersection of ROR index scans
5173       scan  ROR scan that may or may not improve the selectivity
5174             of 'info'
5175 
5176   NOTES
5177     Suppose we have conditions on several keys
5178     cond=k_11=c_11 AND k_12=c_12 AND ...  // key_parts of first key in 'info'
5179          k_21=c_21 AND k_22=c_22 AND ...  // key_parts of second key in 'info'
5180           ...
5181          k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //key_parts of 'scan'
5182 
5183     where k_ij may be the same as any k_pq (i.e. keys may have common parts).
5184 
5185     Note that for ROR retrieval, only equality conditions are usable so there
5186     are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'
5187 
5188     A full row is retrieved if entire condition holds.
5189 
5190     The recursive procedure for finding P(cond) is as follows:
5191 
5192     First step:
5193     Pick 1st part of 1st key and break conjunction (1) into two parts:
5194       cond= (k_11=c_11 AND R)
5195 
5196     Here R may still contain condition(s) equivalent to k_11=c_11.
5197     Nevertheless, the following holds:
5198 
5199       P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
5200 
5201     Mark k_11 as fixed field (and satisfied condition) F, save P(F),
5202     save R to be cond and proceed to recursion step.
5203 
5204     Recursion step:
5205     We have a set of fixed fields/satisfied conditions) F, probability P(F),
5206     and remaining conjunction R
5207     Pick next key part on current key and its condition "k_ij=c_ij".
5208     We will add "k_ij=c_ij" into F and update P(F).
5209     Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
5210 
5211      P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
5212 
5213     (where '|' mean conditional probability, not "or")
5214 
5215     Consider the first multiplier in (2). One of the following holds:
5216     a) F contains condition on field used in t (i.e. t AND F = F).
5217       Then P(t|F) = 1
5218 
5219     b) F doesn't contain condition on field used in t. Then F and t are
5220      considered independent.
5221 
5222      P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
5223           = P(t|fields_before_t_in_key).
5224 
5225      P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
5226                                    #records(fields_before_t_in_key, t)
5227 
5228     The second multiplier is calculated by applying this step recursively.
5229 
5230   IMPLEMENTATION
5231     This function calculates the result of application of the "recursion step"
5232     described above for all fixed key members of a single key, accumulating set
5233     of covered fields, selectivity, etc.
5234 
5235     The calculation is conducted as follows:
5236     Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
5237 
5238      n_{k1}      n_{k2}
5239     --------- * ---------  * .... (3)
5240      n_{k1-1}    n_{k2-1}
5241 
5242     where k1,k2,... are key parts which fields were not yet marked as fixed
5243     ( this is result of application of option b) of the recursion step for
5244       parts of a single key).
5245     Since it is reasonable to expect that most of the fields are not marked
5246     as fixed, we calculate (3) as
5247 
5248                                   n_{i1}      n_{i2}
5249     (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
5250                                   n_{i1-1}    n_{i2-1}
5251 
5252     where i1,i2, .. are key parts that were already marked as fixed.
5253 
5254     In order to minimize number of expensive records_in_range calls we
5255     group and reduce adjacent fractions. Note that on the optimizer's
5256     request, index statistics may be used instead of records_in_range
5257     @see RANGE_OPT_PARAM::use_index_statistics.
5258 
5259   RETURN
5260     Selectivity of given ROR scan, a number between 0 and 1. 1 means that
5261     adding 'scan' to the intersection does not improve the selectivity.
5262 */
5263 
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)5264 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
5265                                    const ROR_SCAN_INFO *scan)
5266 {
5267   double selectivity_mult= 1.0;
5268   const TABLE * const table= info->param->table;
5269   const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;
5270   /**
5271     key values tuple, used to store both min_range.key and
5272     max_range.key. This function is only called for equality ranges;
5273     open ranges (e.g. "min_value < X < max_value") cannot be used for
5274     rowid ordered retrieval, so in this function we know that
5275     min_range.key == max_range.key
5276   */
5277   uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
5278   uchar *key_ptr= key_val;
5279   SEL_ARG *sel_arg, *tuple_arg= NULL;
5280   key_part_map keypart_map= 0;
5281   bool cur_covered;
5282   bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5283                                            key_part->fieldnr-1));
5284   key_range min_range;
5285   key_range max_range;
5286   min_range.key= key_val;
5287   min_range.flag= HA_READ_KEY_EXACT;
5288   max_range.key= key_val;
5289   max_range.flag= HA_READ_AFTER_KEY;
5290   ha_rows prev_records= table->file->stats.records;
5291   DBUG_ENTER("ror_scan_selectivity");
5292 
5293   for (sel_arg= scan->sel_arg; sel_arg;
5294        sel_arg= sel_arg->next_key_part)
5295   {
5296     DBUG_PRINT("info",("sel_arg step"));
5297     cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5298                                        key_part[sel_arg->part].fieldnr-1));
5299     if (cur_covered != prev_covered)
5300     {
5301       /* create (part1val, ..., part{n-1}val) tuple. */
5302       bool is_null_range= false;
5303       ha_rows records;
5304       if (!tuple_arg)
5305       {
5306         tuple_arg= scan->sel_arg;
5307         /* Here we use the length of the first key part */
5308         tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);
5309         is_null_range|= tuple_arg->is_null_interval();
5310         keypart_map= 1;
5311       }
5312       while (tuple_arg->next_key_part != sel_arg)
5313       {
5314         tuple_arg= tuple_arg->next_key_part;
5315         tuple_arg->store_min(key_part[tuple_arg->part].store_length,
5316                              &key_ptr, 0);
5317         is_null_range|= tuple_arg->is_null_interval();
5318         keypart_map= (keypart_map << 1) | 1;
5319       }
5320       min_range.length= max_range.length= (size_t) (key_ptr - key_val);
5321       min_range.keypart_map= max_range.keypart_map= keypart_map;
5322 
5323       /*
5324         Get the number of rows in this range. This is done by calling
5325         records_in_range() unless all these are true:
5326           1) The user has requested that index statistics should be used
5327              for equality ranges to avoid the incurred overhead of
5328              index dives in records_in_range()
5329           2) The range is not on the form "x IS NULL". The reason is
5330              that the number of rows with this value are likely to be
5331              very different than the values in the index statistics
5332           3) Index statistics is available.
5333         @see key_val
5334       */
5335       if (!info->param->use_index_statistics ||        // (1)
5336           is_null_range ||                             // (2)
5337           !table->key_info[scan->keynr].
5338            has_records_per_key(tuple_arg->part))       // (3)
5339       {
5340         DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5341         DBUG_ASSERT(min_range.length > 0);
5342         records=
5343           table->file->records_in_range(scan->keynr, &min_range, &max_range);
5344       }
5345       else
5346       {
5347         // Use index statistics
5348         records= static_cast<ha_rows>(
5349           table->key_info[scan->keynr].records_per_key(tuple_arg->part));
5350       }
5351 
5352       if (cur_covered)
5353       {
5354         /* uncovered -> covered */
5355         double tmp= rows2double(records)/rows2double(prev_records);
5356         DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5357         selectivity_mult *= tmp;
5358         prev_records= HA_POS_ERROR;
5359       }
5360       else
5361       {
5362         /* covered -> uncovered */
5363         prev_records= records;
5364       }
5365     }
5366     prev_covered= cur_covered;
5367   }
5368   if (!prev_covered)
5369   {
5370     double tmp= rows2double(table->quick_rows[scan->keynr]) /
5371                 rows2double(prev_records);
5372     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5373     selectivity_mult *= tmp;
5374   }
5375   // Todo: This assert fires in PB sysqa RQG tests.
5376   // DBUG_ASSERT(selectivity_mult <= 1.0);
5377   DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
5378   DBUG_RETURN(selectivity_mult);
5379 }
5380 
5381 
5382 /*
5383   Check if adding a ROR scan to a ROR-intersection reduces its cost of
5384   ROR-intersection and if yes, update parameters of ROR-intersection,
5385   including its cost.
5386 
5387   SYNOPSIS
5388     ror_intersect_add()
5389       param        Parameter from test_quick_select
5390       info         ROR-intersection structure to add the scan to.
5391       ror_scan     ROR scan info to add.
5392       is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
5393                    from other parameters and is passed separately only to
5394                    avoid duplicating the inference code)
5395       trace_costs  Optimizer trace object cost details are added to
5396 
5397   NOTES
5398     Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
5399     intersection decreases. The cost of ROR-intersection is calculated as
5400     follows:
5401 
5402     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
5403 
5404     When we add a scan the first increases and the second decreases.
5405 
5406     cost_of_full_rows_retrieval=
5407       (union of indexes used covers all needed fields) ?
5408         cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
5409         0
5410 
5411     E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
5412                            ror_scan_selectivity({scan1}, scan2) * ... *
5413                            ror_scan_selectivity({scan1,...}, scanN).
5414   RETURN
5415     TRUE   ROR scan added to ROR-intersection, cost updated.
5416     FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
5417 */
5418 
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,bool is_cpk_scan,Opt_trace_object * trace_costs)5419 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
5420                               ROR_SCAN_INFO* ror_scan, bool is_cpk_scan,
5421                               Opt_trace_object *trace_costs)
5422 {
5423   double selectivity_mult= 1.0;
5424 
5425   DBUG_ENTER("ror_intersect_add");
5426   DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
5427   DBUG_PRINT("info", ("Adding scan on %s",
5428                       info->param->table->key_info[ror_scan->keynr].name));
5429   DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
5430 
5431   selectivity_mult = ror_scan_selectivity(info, ror_scan);
5432   if (selectivity_mult == 1.0)
5433   {
5434     /* Don't add this scan if it doesn't improve selectivity. */
5435     DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
5436     DBUG_RETURN(FALSE);
5437   }
5438 
5439   info->out_rows *= selectivity_mult;
5440 
5441   if (is_cpk_scan)
5442   {
5443     /*
5444       CPK scan is used to filter out rows. We apply filtering for each
5445       record of every scan. For each record we assume that one key
5446       compare is done:
5447     */
5448     const Cost_model_table *const cost_model= info->param->table->cost_model();
5449     const double idx_cost=
5450       cost_model->key_compare_cost(rows2double(info->index_records));
5451     info->index_scan_cost.add_cpu(idx_cost);
5452     trace_costs->add("index_scan_cost", idx_cost);
5453   }
5454   else
5455   {
5456     info->index_records += info->param->table->quick_rows[ror_scan->keynr];
5457     info->index_scan_cost+= ror_scan->index_read_cost;
5458     trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
5459     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
5460     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
5461                                                &info->covered_fields))
5462     {
5463       DBUG_PRINT("info", ("ROR-intersect is covering now"));
5464       info->is_covering= TRUE;
5465     }
5466   }
5467 
5468   info->total_cost= info->index_scan_cost;
5469   trace_costs->add("cumulated_index_scan_cost",
5470                    info->index_scan_cost);
5471 
5472   if (!info->is_covering)
5473   {
5474     Cost_estimate sweep_cost;
5475     JOIN *join= info->param->thd->lex->select_lex->join;
5476     const bool is_interrupted= join && join->tables != 1;
5477 
5478     get_sweep_read_cost(info->param->table, double2rows(info->out_rows),
5479                         is_interrupted, &sweep_cost);
5480     info->total_cost+= sweep_cost;
5481     trace_costs->add("disk_sweep_cost", sweep_cost);
5482   }
5483   else
5484     trace_costs->add("disk_sweep_cost", 0);
5485 
5486   DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
5487   DBUG_PRINT("info", ("New cost: %g, %scovering",
5488                       info->total_cost.total_cost(),
5489                       info->is_covering?"" : "non-"));
5490   DBUG_RETURN(TRUE);
5491 }
5492 
5493 
5494 /*
5495   Get best ROR-intersection plan using non-covering ROR-intersection search
5496   algorithm. The returned plan may be covering.
5497 
5498   SYNOPSIS
5499     get_best_ror_intersect()
5500       param            Parameter from test_quick_select function.
5501       tree             Transformed restriction condition to be used to look
5502                        for ROR scans.
5503       cost_est         Do not return read plans with cost > cost_est.
5504       are_all_covering [out] set to TRUE if union of all scans covers all
5505                        fields needed by the query (and it is possible to build
5506                        a covering ROR-intersection)
5507 
5508   NOTES
5509     get_key_scans_params must be called before this function can be called.
5510 
5511     When this function is called by ROR-union construction algorithm it
5512     assumes it is building an uncovered ROR-intersection (and thus # of full
5513     records to be retrieved is wrong here). This is a hack.
5514 
5515   IMPLEMENTATION
5516     The approximate best non-covering plan search algorithm is as follows:
5517 
5518     find_min_ror_intersection_scan()
5519     {
5520       R= select all ROR scans;
5521       order R by (E(#records_matched) * key_record_length).
5522 
5523       S= first(R); -- set of scans that will be used for ROR-intersection
5524       R= R-first(S);
5525       min_cost= cost(S);
5526       min_scan= make_scan(S);
5527       while (R is not empty)
5528       {
5529         firstR= R - first(R);
5530         if (!selectivity(S + firstR < selectivity(S)))
5531           continue;
5532 
5533         S= S + first(R);
5534         if (cost(S) < min_cost)
5535         {
5536           min_cost= cost(S);
5537           min_scan= make_scan(S);
5538         }
5539       }
5540       return min_scan;
5541     }
5542 
5543     See ror_intersect_add function for ROR intersection costs.
5544 
5545     Special handling for Clustered PK scans
5546     Clustered PK contains all table fields, so using it as a regular scan in
5547     index intersection doesn't make sense: a range scan on CPK will be less
5548     expensive in this case.
5549     Clustered PK scan has special handling in ROR-intersection: it is not used
5550     to retrieve rows, instead its condition is used to filter row references
5551     we get from scans on other keys.
5552 
5553   RETURN
5554     ROR-intersection table read plan
5555     NULL if out of memory or no suitable plan found.
5556 */
5557 
5558 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,const Cost_estimate * cost_est)5559 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
5560                                           const Cost_estimate *cost_est)
5561 {
5562   uint idx;
5563   Cost_estimate min_cost;
5564   Opt_trace_context * const trace= &param->thd->opt_trace;
5565   DBUG_ENTER("get_best_ror_intersect");
5566 
5567   Opt_trace_object trace_ror(trace, "analyzing_roworder_intersect");
5568 
5569   min_cost.set_max_cost();
5570 
5571   if ((tree->n_ror_scans < 2) || !param->table->file->stats.records ||
5572       !param->index_merge_intersect_allowed)
5573   {
5574     trace_ror.add("usable", false);
5575     if (tree->n_ror_scans < 2)
5576       trace_ror.add_alnum("cause", "too_few_roworder_scans");
5577     else
5578       trace_ror.add("need_tracing", true);
5579     DBUG_RETURN(NULL);
5580   }
5581 
5582   if (param->order_direction == ORDER::ORDER_DESC)
5583     DBUG_RETURN(NULL);
5584 
5585   /*
5586     Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
5587     them. Also find and save clustered PK scan if there is one.
5588   */
5589   ROR_SCAN_INFO **cur_ror_scan;
5590   ROR_SCAN_INFO *cpk_scan= NULL;
5591   uint cpk_no;
5592   bool cpk_scan_used= FALSE;
5593 
5594   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5595                                                      sizeof(ROR_SCAN_INFO*)*
5596                                                      param->keys)))
5597     DBUG_RETURN(NULL);
5598   cpk_no= ((param->table->file->primary_key_is_clustered()) ?
5599            param->table->s->primary_key : MAX_KEY);
5600 
5601   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
5602   {
5603     ROR_SCAN_INFO *scan;
5604     if (!tree->ror_scans_map.is_set(idx))
5605       continue;
5606     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
5607       DBUG_RETURN(NULL);
5608     if (param->real_keynr[idx] == cpk_no)
5609     {
5610       cpk_scan= scan;
5611       tree->n_ror_scans--;
5612     }
5613     else
5614       *(cur_ror_scan++)= scan;
5615   }
5616 
5617   tree->ror_scans_end= cur_ror_scan;
5618   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
5619                                           tree->ror_scans,
5620                                           tree->ror_scans_end););
5621   /*
5622     Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
5623     ROR_SCAN_INFO's.
5624     Step 2: Get best ROR-intersection using an approximate algorithm.
5625   */
5626   find_intersect_order(tree->ror_scans, tree->ror_scans_end, param);
5627 
5628   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
5629                                           tree->ror_scans,
5630                                           tree->ror_scans_end););
5631 
5632   ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
5633   ROR_SCAN_INFO **intersect_scans_end;
5634   if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5635                                                      sizeof(ROR_SCAN_INFO*)*
5636                                                      tree->n_ror_scans)))
5637     DBUG_RETURN(NULL);
5638   intersect_scans_end= intersect_scans;
5639 
5640   /* Create and incrementally update ROR intersection. */
5641   ROR_INTERSECT_INFO *intersect, *intersect_best;
5642   if (!(intersect= ror_intersect_init(param)) ||
5643       !(intersect_best= ror_intersect_init(param)))
5644     DBUG_RETURN(NULL);
5645 
5646   /* [intersect_scans,intersect_scans_best) will hold the best intersection */
5647   ROR_SCAN_INFO **intersect_scans_best;
5648   cur_ror_scan= tree->ror_scans;
5649   intersect_scans_best= intersect_scans;
5650   /*
5651     Note: trace_isect_idx.end() is called to close this object after
5652     this while-loop.
5653   */
5654   Opt_trace_array trace_isect_idx(trace, "intersecting_indexes");
5655   while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
5656   {
5657     Opt_trace_object trace_idx(trace);
5658     trace_idx.add_utf8("index",
5659                        param->table->key_info[(*cur_ror_scan)->keynr].name);
5660     /* S= S + first(R);  R= R - first(R); */
5661     if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE, &trace_idx))
5662     {
5663       trace_idx.add("cumulated_total_cost",
5664                     intersect->total_cost).
5665         add("usable", false).
5666         add_alnum("cause", "does_not_reduce_cost_of_intersect");
5667       cur_ror_scan++;
5668       continue;
5669     }
5670 
5671     trace_idx.add("cumulated_total_cost",
5672                   intersect->total_cost).
5673       add("usable", true).
5674       add("matching_rows_now", intersect->out_rows).
5675       add("isect_covering_with_this_index", intersect->is_covering);
5676 
5677     *(intersect_scans_end++)= *(cur_ror_scan++);
5678 
5679     if (intersect->total_cost < min_cost)
5680     {
5681       /* Local minimum found, save it */
5682       ror_intersect_cpy(intersect_best, intersect);
5683       intersect_scans_best= intersect_scans_end;
5684       min_cost= intersect->total_cost;
5685       trace_idx.add("chosen", true);
5686     }
5687     else
5688     {
5689       trace_idx.add("chosen", false).
5690         add_alnum("cause", "does_not_reduce_cost");
5691     }
5692   }
5693   // Note: trace_isect_idx trace object is closed here
5694   trace_isect_idx.end();
5695 
5696   if (intersect_scans_best == intersect_scans)
5697   {
5698     trace_ror.add("chosen", false).
5699       add_alnum("cause", "does_not_increase_selectivity");
5700     DBUG_PRINT("info", ("None of scans increase selectivity"));
5701     DBUG_RETURN(NULL);
5702   }
5703 
5704   DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
5705                                           "best ROR-intersection",
5706                                           intersect_scans,
5707                                           intersect_scans_best););
5708 
5709   uint best_num= intersect_scans_best - intersect_scans;
5710   ror_intersect_cpy(intersect, intersect_best);
5711 
5712   /*
5713     Ok, found the best ROR-intersection of non-CPK key scans.
5714     Check if we should add a CPK scan. If the obtained ROR-intersection is
5715     covering, it doesn't make sense to add CPK scan.
5716   */
5717   { // Scope for trace object
5718     Opt_trace_object trace_cpk(trace, "clustered_pk");
5719     if (cpk_scan && !intersect->is_covering)
5720     {
5721       if (ror_intersect_add(intersect, cpk_scan, TRUE, &trace_cpk) &&
5722           (intersect->total_cost < min_cost))
5723       {
5724         trace_cpk.add("clustered_pk_scan_added_to_intersect", true).
5725           add("cumulated_cost", intersect->total_cost);
5726         cpk_scan_used= TRUE;
5727         intersect_best= intersect; //just set pointer here
5728       }
5729       else
5730         trace_cpk.add("clustered_pk_added_to_intersect", false).
5731           add_alnum("cause", "cost");
5732     }
5733     else
5734     {
5735       trace_cpk.add("clustered_pk_added_to_intersect", false).
5736         add_alnum("cause", cpk_scan ?
5737                   "roworder_is_covering" : "no_clustered_pk_index");
5738     }
5739   }
5740   /* Ok, return ROR-intersect plan if we have found one */
5741   TRP_ROR_INTERSECT *trp= NULL;
5742   if (min_cost < *cost_est && (cpk_scan_used || best_num > 1))
5743   {
5744     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
5745       DBUG_RETURN(trp);
5746     if (!(trp->first_scan=
5747            (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5748                                        sizeof(ROR_SCAN_INFO*)*best_num)))
5749       DBUG_RETURN(NULL);
5750     memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
5751     trp->last_scan=  trp->first_scan + best_num;
5752     trp->is_covering= intersect_best->is_covering;
5753     trp->cost_est= intersect_best->total_cost;
5754     /* Prevent divisons by zero */
5755     ha_rows best_rows = double2rows(intersect_best->out_rows);
5756     if (!best_rows)
5757       best_rows= 1;
5758     set_if_smaller(param->table->quick_condition_rows, best_rows);
5759     trp->records= best_rows;
5760     trp->index_scan_cost= intersect_best->index_scan_cost;
5761     trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
5762 
5763     trace_ror.add("rows", trp->records).
5764       add("cost", trp->cost_est).
5765       add("covering", trp->is_covering).
5766       add("chosen", true);
5767 
5768     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
5769                         "cost %g, records %lu",
5770                         trp->cost_est.total_cost(), (ulong) trp->records));
5771   }
5772   else
5773   {
5774     trace_ror.add("chosen", false).
5775       add_alnum("cause", (*cost_est > min_cost) ? "too_few_indexes_to_merge" :
5776                 "cost");
5777   }
5778   DBUG_RETURN(trp);
5779 }
5780 
5781 /*
5782   Get best "range" table read plan for given SEL_TREE, also update some info
5783 
5784   SYNOPSIS
5785     get_key_scans_params()
5786       param                    Parameters from test_quick_select
5787       tree                     Make range select for this SEL_TREE
5788       index_read_must_be_used  TRUE <=> assume 'index only' option will be set
5789                                (except for clustered PK indexes)
5790       update_tbl_stats         TRUE <=> update table->quick_* with information
5791                                about range scans we've evaluated.
5792       cost_est                 Maximum cost. i.e. don't create read plans with
5793                                cost > cost_est.
5794 
5795   DESCRIPTION
5796     Find the best "range" table read plan for given SEL_TREE.
5797     The side effects are
5798      - tree->ror_scans is updated to indicate which scans are ROR scans.
5799      - if update_tbl_stats=TRUE then table->quick_* is updated with info
5800        about every possible range scan.
5801 
5802   RETURN
5803     Best range read plan
5804     NULL if no plan found or error occurred
5805 */
5806 
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool update_tbl_stats,const Cost_estimate * cost_est)5807 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
5808                                        bool index_read_must_be_used,
5809                                        bool update_tbl_stats,
5810                                        const Cost_estimate *cost_est)
5811 {
5812   uint idx, best_idx= 0;
5813   SEL_ARG *key, *key_to_read= NULL;
5814   ha_rows best_records= 0;              /* protected by key_to_read */
5815   uint    best_mrr_flags= 0, best_buf_size= 0;
5816   TRP_RANGE* read_plan= NULL;
5817   Cost_estimate read_cost= *cost_est;
5818   DBUG_ENTER("get_key_scans_params");
5819   Opt_trace_context * const trace= &param->thd->opt_trace;
5820   /*
5821     Note that there may be trees that have type SEL_TREE::KEY but contain no
5822     key reads at all, e.g. tree for expression "key1 is not null" where key1
5823     is defined as "not null".
5824   */
5825   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
5826                                       "tree scans"););
5827   Opt_trace_array ota(trace, "range_scan_alternatives");
5828 
5829   tree->ror_scans_map.clear_all();
5830   tree->n_ror_scans= 0;
5831   for (idx= 0; idx < param->keys; idx++)
5832   {
5833     key= tree->keys[idx];
5834     if (key)
5835     {
5836       ha_rows found_records;
5837       Cost_estimate cost;
5838       uint mrr_flags, buf_size;
5839       uint keynr= param->real_keynr[idx];
5840       if (key->type == SEL_ARG::MAYBE_KEY ||
5841           key->maybe_flag)
5842         param->needed_reg->set_bit(keynr);
5843 
5844       bool read_index_only= index_read_must_be_used ? TRUE :
5845                             (bool) param->table->covering_keys.is_set(keynr);
5846 
5847       Opt_trace_object trace_idx(trace);
5848       trace_idx.add_utf8("index", param->table->key_info[keynr].name);
5849 
5850       found_records= check_quick_select(param, idx, read_index_only, key,
5851                                         update_tbl_stats, &mrr_flags,
5852                                         &buf_size, &cost);
5853 
5854 #ifdef OPTIMIZER_TRACE
5855       // check_quick_select() says don't use range if it returns HA_POS_ERROR
5856       if (found_records != HA_POS_ERROR &&
5857           param->thd->opt_trace.is_started())
5858       {
5859         Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
5860 
5861         const KEY &cur_key= param->table->key_info[keynr];
5862         const KEY_PART_INFO *key_part= cur_key.key_part;
5863 
5864         String range_info;
5865         range_info.set_charset(system_charset_info);
5866         append_range_all_keyparts(&trace_range, NULL, &range_info,
5867                                   key, key_part, false);
5868         trace_range.end(); // NOTE: ends the tracing scope
5869 
5870         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics).
5871           add("rowid_ordered", param->is_ror_scan).
5872           add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)).
5873           add("index_only", read_index_only).
5874           add("rows", found_records).
5875           add("cost", cost);
5876       }
5877 #endif
5878 
5879       if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
5880       {
5881         tree->n_ror_scans++;
5882         tree->ror_scans_map.set_bit(idx);
5883       }
5884 
5885       if (found_records != HA_POS_ERROR &&
5886           read_cost > cost)
5887       {
5888         trace_idx.add("chosen", true);
5889         read_cost= cost;
5890         best_records= found_records;
5891         key_to_read=  key;
5892         best_idx= idx;
5893         best_mrr_flags= mrr_flags;
5894         best_buf_size=  buf_size;
5895       }
5896       else
5897       {
5898         trace_idx.add("chosen", false);
5899         if (found_records == HA_POS_ERROR)
5900           if (key->type == SEL_ARG::MAYBE_KEY)
5901             trace_idx.add_alnum("cause", "depends_on_unread_values");
5902           else
5903             trace_idx.add_alnum("cause", "unknown");
5904         else
5905           trace_idx.add_alnum("cause", "cost");
5906       }
5907 
5908     }
5909   }
5910 
5911   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
5912                                       "ROR scans"););
5913   if (key_to_read)
5914   {
5915     if ((read_plan= new (param->mem_root) TRP_RANGE(key_to_read, best_idx,
5916                                                     best_mrr_flags)))
5917     {
5918       read_plan->records= best_records;
5919       read_plan->is_ror= tree->ror_scans_map.is_set(best_idx);
5920       read_plan->cost_est= read_cost;
5921       read_plan->mrr_buf_size= best_buf_size;
5922       DBUG_PRINT("info",
5923                 ("Returning range plan for key %s, cost %g, records %lu",
5924                  param->table->key_info[param->real_keynr[best_idx]].name,
5925                  read_plan->cost_est.total_cost(), (ulong) read_plan->records));
5926     }
5927   }
5928   else
5929     DBUG_PRINT("info", ("No 'range' table read plan found"));
5930 
5931   DBUG_RETURN(read_plan);
5932 }
5933 
5934 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5935 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
5936                                             bool retrieve_full_rows,
5937                                             MEM_ROOT *parent_alloc)
5938 {
5939   QUICK_INDEX_MERGE_SELECT *quick_imerge;
5940   QUICK_RANGE_SELECT *quick;
5941   /* index_merge always retrieves full rows, ignore retrieve_full_rows */
5942   if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
5943     return NULL;
5944 
5945   quick_imerge->records= records;
5946   quick_imerge->cost_est= cost_est;
5947 
5948   for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
5949        range_scan++)
5950   {
5951     if (!(quick= (QUICK_RANGE_SELECT*)
5952           ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
5953         quick_imerge->push_quick_back(quick))
5954     {
5955       delete quick;
5956       delete quick_imerge;
5957       return NULL;
5958     }
5959   }
5960   return quick_imerge;
5961 }
5962 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5963 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
5964                                               bool retrieve_full_rows,
5965                                               MEM_ROOT *parent_alloc)
5966 {
5967   QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
5968   QUICK_RANGE_SELECT *quick;
5969   DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
5970   MEM_ROOT *alloc;
5971 
5972   if ((quick_intrsect=
5973          new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
5974                                         (retrieve_full_rows? (!is_covering) :
5975                                          FALSE),
5976                                         parent_alloc)))
5977   {
5978     DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
5979                                              "creating ROR-intersect",
5980                                              first_scan, last_scan););
5981     alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
5982     for (st_ror_scan_info **current= first_scan;
5983          current != last_scan;
5984          current++)
5985     {
5986       if (!(quick= get_quick_select(param, (*current)->idx,
5987                                     (*current)->sel_arg,
5988                                     HA_MRR_SORTED,
5989                                     0, alloc)) ||
5990           quick_intrsect->push_quick_back(quick))
5991       {
5992         delete quick_intrsect;
5993         DBUG_RETURN(NULL);
5994       }
5995     }
5996     if (cpk_scan)
5997     {
5998       if (!(quick= get_quick_select(param, cpk_scan->idx,
5999                                     cpk_scan->sel_arg,
6000                                     HA_MRR_SORTED,
6001                                     0, alloc)))
6002       {
6003         delete quick_intrsect;
6004         DBUG_RETURN(NULL);
6005       }
6006       quick->file= NULL;
6007       quick_intrsect->cpk_quick= quick;
6008     }
6009     quick_intrsect->records= records;
6010     quick_intrsect->cost_est= cost_est;
6011   }
6012   DBUG_RETURN(quick_intrsect);
6013 }
6014 
6015 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)6016 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
6017                                           bool retrieve_full_rows,
6018                                           MEM_ROOT *parent_alloc)
6019 {
6020   QUICK_ROR_UNION_SELECT *quick_roru;
6021   TABLE_READ_PLAN **scan;
6022   QUICK_SELECT_I *quick;
6023   DBUG_ENTER("TRP_ROR_UNION::make_quick");
6024   /*
6025     It is impossible to construct a ROR-union that will not retrieve full
6026     rows, ignore retrieve_full_rows parameter.
6027   */
6028   if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
6029   {
6030     for (scan= first_ror; scan != last_ror; scan++)
6031     {
6032       if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
6033           quick_roru->push_quick_back(quick))
6034         DBUG_RETURN(NULL);
6035     }
6036     quick_roru->records= records;
6037     quick_roru->cost_est= cost_est;
6038   }
6039   DBUG_RETURN(quick_roru);
6040 }
6041 
6042 
6043 /**
6044    If EXPLAIN or if the --safe-updates option is enabled, add a warning that
6045    the index cannot be used for range access due to either type conversion or
6046    different collations on the field used for comparison
6047 
6048    @param param              PARAM from test_quick_select
6049    @param key_num            Key number
6050    @param field              Field in the predicate
6051 */
warn_index_not_applicable(const RANGE_OPT_PARAM * param,const uint key_num,const Field * field)6052 static void warn_index_not_applicable(const RANGE_OPT_PARAM *param,
6053                                       const uint key_num, const Field *field)
6054 {
6055   THD *thd= param->thd;
6056   if (param->using_real_indexes &&
6057       (param->thd->lex->describe ||
6058        thd->variables.option_bits & OPTION_SAFE_UPDATES))
6059     push_warning_printf(
6060             param->thd,
6061             Sql_condition::SL_WARNING,
6062             ER_WARN_INDEX_NOT_APPLICABLE,
6063             ER(ER_WARN_INDEX_NOT_APPLICABLE),
6064             "range",
6065             field->table->key_info[param->real_keynr[key_num]].name,
6066             field->field_name);
6067 }
6068 
6069 
6070 /*
6071   Build a SEL_TREE for <> or NOT BETWEEN predicate
6072 
6073   SYNOPSIS
6074     get_ne_mm_tree()
6075       param       PARAM from test_quick_select
6076       cond_func   item for the predicate
6077       field       field in the predicate
6078       lt_value    constant that field should be smaller
6079       gt_value    constant that field should be greaterr
6080       cmp_type    compare type for the field
6081 
6082   RETURN
6083     #  Pointer to tree built tree
6084     0  on error
6085 */
get_ne_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * lt_value,Item * gt_value,Item_result cmp_type)6086 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
6087                                 Field *field,
6088                                 Item *lt_value, Item *gt_value,
6089                                 Item_result cmp_type)
6090 {
6091   SEL_TREE *tree= NULL;
6092 
6093   if (param->has_errors())
6094     return NULL;
6095 
6096   tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6097                      lt_value, cmp_type);
6098   if (tree)
6099   {
6100     tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
6101 					    Item_func::GT_FUNC,
6102 					    gt_value, cmp_type));
6103   }
6104   return tree;
6105 }
6106 
6107 
6108 /**
6109   Factory function to build a SEL_TREE from an <in predicate>
6110 
6111   @param param      Information on 'just about everything'.
6112   @param predicand  The <in predicate's> predicand, i.e. the left-hand
6113                     side of the <in predicate> expression.
6114   @param op         The 'in' operator itself.
6115   @param value      The right-hand side of the <in predicate> expression.
6116   @param cmp_type   What types we should pretend that the arguments are.
6117   @param is_negated If true, the operator is NOT IN, otherwise IN.
6118 */
get_func_mm_tree_from_in_predicate(RANGE_OPT_PARAM * param,Item * predicand,Item_func_in * op,Item * value,Item_result cmp_type,bool is_negated)6119 static SEL_TREE *get_func_mm_tree_from_in_predicate(RANGE_OPT_PARAM *param,
6120                                                     Item *predicand,
6121                                                     Item_func_in *op,
6122                                                     Item *value,
6123                                                     Item_result cmp_type,
6124                                                     bool is_negated)
6125 {
6126   if (param->has_errors())
6127     return NULL;
6128 
6129   if (is_negated)
6130   {
6131     // We don't support row constructors (multiple columns on lhs) here.
6132     if (predicand->type() != Item::FIELD_ITEM)
6133       return NULL;
6134 
6135     Field *field= static_cast<Item_field*>(predicand)->field;
6136 
6137     if (op->array && op->array->result_type() != ROW_RESULT)
6138     {
6139       /*
6140         We get here for conditions on the form "t.key NOT IN (c1, c2, ...)",
6141         where c{i} are constants. Our goal is to produce a SEL_TREE that
6142         represents intervals:
6143 
6144         ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
6145 
6146         where $MIN is either "-inf" or NULL.
6147 
6148         The most straightforward way to produce it is to convert NOT
6149         IN into "(t.key != c1) AND (t.key != c2) AND ... " and let the
6150         range analyzer build a SEL_TREE from that. The problem is that
6151         the range analyzer will use O(N^2) memory (which is probably a
6152         bug), and people who do use big NOT IN lists (e.g. see
6153         BUG#15872, BUG#21282), will run out of memory.
6154 
6155         Another problem with big lists like (*) is that a big list is
6156         unlikely to produce a good "range" access, while considering
6157         that range access will require expensive CPU calculations (and
6158         for MyISAM even index accesses). In short, big NOT IN lists
6159         are rarely worth analyzing.
6160 
6161         Considering the above, we'll handle NOT IN as follows:
6162 
6163         - if the number of entries in the NOT IN list is less than
6164           NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*)
6165           manually.
6166 
6167         - Otherwise, don't produce a SEL_TREE.
6168       */
6169 
6170       const uint NOT_IN_IGNORE_THRESHOLD= 1000;
6171       // If we have t.key NOT IN (null, null, ...) or the list is too long
6172       if (op->array->used_count == 0 ||
6173           op->array->used_count > NOT_IN_IGNORE_THRESHOLD)
6174         return NULL;
6175 
6176       MEM_ROOT *tmp_root= param->mem_root;
6177       param->thd->mem_root= param->old_root;
6178       /*
6179         Create one Item_type constant object. We'll need it as
6180         get_mm_parts only accepts constant values wrapped in Item_Type
6181         objects.
6182         We create the Item on param->mem_root which points to
6183         per-statement mem_root (while thd->mem_root is currently pointing
6184         to mem_root local to range optimizer).
6185       */
6186       Item *value_item= op->array->create_item();
6187       param->thd->mem_root= tmp_root;
6188 
6189       if (!value_item)
6190         return NULL;
6191 
6192       /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
6193       uint i= 0;
6194       SEL_TREE *tree= NULL;
6195       do
6196       {
6197         op->array->value_to_item(i, value_item);
6198         tree= get_mm_parts(param, op, field, Item_func::LT_FUNC, value_item,
6199                            cmp_type);
6200         if (!tree)
6201           break;
6202         i++;
6203       }
6204       while (i < op->array->used_count && tree->type == SEL_TREE::IMPOSSIBLE);
6205 
6206       if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
6207         /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
6208         return NULL;
6209       SEL_TREE *tree2= NULL;
6210       for (; i < op->array->used_count; i++)
6211       {
6212         if (op->array->compare_elems(i, i - 1))
6213         {
6214           /* Get a SEL_TREE for "-inf < X < c_i" interval */
6215           op->array->value_to_item(i, value_item);
6216           tree2= get_mm_parts(param, op, field, Item_func::LT_FUNC,
6217                               value_item, cmp_type);
6218           if (!tree2)
6219           {
6220             tree= NULL;
6221             break;
6222           }
6223 
6224           /* Change all intervals to be "c_{i-1} < X < c_i" */
6225           for (uint idx= 0; idx < param->keys; idx++)
6226           {
6227             SEL_ARG *new_interval, *last_val;
6228             if (((new_interval= tree2->keys[idx])) &&
6229                 (tree->keys[idx]) &&
6230                 ((last_val= tree->keys[idx]->last())))
6231             {
6232               new_interval->min_value= last_val->max_value;
6233               new_interval->min_flag= NEAR_MIN;
6234 
6235               /*
6236                 If the interval is over a partial keypart, the
6237                 interval must be "c_{i-1} <= X < c_i" instead of
6238                 "c_{i-1} < X < c_i". Reason:
6239 
6240                 Consider a table with a column "my_col VARCHAR(3)",
6241                 and an index with definition
6242                 "INDEX my_idx my_col(1)". If the table contains rows
6243                 with my_col values "f" and "foo", the index will not
6244                 distinguish the two rows.
6245 
6246                 Note that tree_or() below will effectively merge
6247                 this range with the range created for c_{i-1} and
6248                 we'll eventually end up with only one range:
6249                 "NULL < X".
6250 
6251                 Partitioning indexes are never partial.
6252               */
6253               if (param->using_real_indexes)
6254               {
6255                 const KEY key=
6256                   param->table->key_info[param->real_keynr[idx]];
6257                 const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
6258 
6259                 if (kpi->key_part_flag & HA_PART_KEY_SEG)
6260                   new_interval->min_flag= 0;
6261               }
6262             }
6263           }
6264           /*
6265             The following doesn't try to allocate memory so no need to
6266             check for NULL.
6267           */
6268           tree= tree_or(param, tree, tree2);
6269         }
6270       }
6271 
6272       if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
6273       {
6274         /*
6275           Get the SEL_TREE for the last "c_last < X < +inf" interval
6276           (value_item cotains c_last already)
6277         */
6278         tree2= get_mm_parts(param, op, field, Item_func::GT_FUNC,
6279                             value_item, cmp_type);
6280         tree= tree_or(param, tree, tree2);
6281       }
6282       return tree;
6283     }
6284     else
6285     {
6286       SEL_TREE *tree= get_ne_mm_tree(param, op, field, op->arguments()[1],
6287                                      op->arguments()[1], cmp_type);
6288       if (tree)
6289       {
6290         Item **arg, **end;
6291         for (arg= op->arguments() + 2, end= arg + op->argument_count() - 2;
6292              arg < end ; arg++)
6293         {
6294           tree= tree_and(param, tree,
6295                          get_ne_mm_tree(param, op, field, *arg, *arg,
6296                                         cmp_type));
6297         }
6298       }
6299       return tree;
6300     }
6301     return NULL;
6302   }
6303 
6304   // The expression is IN, not negated.
6305   if (predicand->type() == Item::FIELD_ITEM)
6306   {
6307     // The expression is (<column>) IN (...)
6308     Field *field= static_cast<Item_field*>(predicand)->field;
6309     SEL_TREE *tree= get_mm_parts(param, op, field, Item_func::EQ_FUNC,
6310                                  op->arguments()[1], cmp_type);
6311     if (tree)
6312     {
6313       Item **arg, **end;
6314       for (arg= op->arguments() + 2, end= arg + op->argument_count() - 2;
6315            arg < end ; arg++)
6316       {
6317         tree= tree_or(param, tree, get_mm_parts(param, op, field,
6318                                                 Item_func::EQ_FUNC,
6319                                                 *arg, cmp_type));
6320       }
6321     }
6322     return tree;
6323   }
6324   if (predicand->type() == Item::ROW_ITEM)
6325   {
6326     /*
6327       The expression is (<column>,...) IN (...)
6328 
6329       We iterate over the rows on the rhs of the in predicate,
6330       building an OR tree of ANDs, a.k.a. a DNF expression out of this. E.g:
6331 
6332       (col1, col2) IN ((const1, const2), (const3, const4))
6333       becomes
6334       (col1 = const1 AND col2 = const2) OR (col1 = const3 AND col2 = const4)
6335     */
6336     SEL_TREE *or_tree= &null_sel_tree;
6337     Item_row *row_predicand= static_cast<Item_row*>(predicand);
6338 
6339     // Iterate over the rows on the rhs of the in predicate, building an OR.
6340     for (uint i= 1; i < op->argument_count(); ++i)
6341     {
6342       /*
6343         We only support row value expressions. Some optimizations rewrite
6344         the Item tree, and we don't handle that.
6345       */
6346       Item *in_list_item= op->arguments()[i];
6347       if (in_list_item->type() != Item::ROW_ITEM)
6348         return NULL;
6349       Item_row *row= static_cast<Item_row*>(in_list_item);
6350 
6351       // Iterate over the columns, building an AND tree.
6352       SEL_TREE *and_tree= NULL;
6353       for (uint j= 0; j < row_predicand->cols(); ++j)
6354       {
6355         Item *item= row_predicand->element_index(j);
6356 
6357         // We only support columns in the row on the lhs of the in predicate.
6358         if (item->type() != Item::FIELD_ITEM)
6359           return NULL;
6360         Field *field= static_cast<Item_field*>(item)->field;
6361 
6362         Item *value= row->element_index(j);
6363 
6364         SEL_TREE *and_expr=
6365           get_mm_parts(param, op, field, Item_func::EQ_FUNC, value, cmp_type);
6366 
6367         and_tree= tree_and(param, and_tree, and_expr);
6368         /*
6369           Short-circuit evaluation: If and_expr is NULL then no key part in
6370           this disjunct can be used as a search key. Or in other words the
6371           condition is always true. Hence the whole disjunction is always true.
6372         */
6373         if (and_tree == NULL)
6374           return NULL;
6375       }
6376       or_tree= tree_or(param, and_tree, or_tree);
6377     }
6378     return or_tree;
6379   }
6380   return NULL;
6381 }
6382 
6383 
6384 /**
6385   Build a SEL_TREE for a simple predicate.
6386 
6387   @param param     PARAM from test_quick_select
6388   @param predicand field in the predicate
6389   @param cond_func item for the predicate
6390   @param value     constant in the predicate
6391   @param cmp_type  compare type for the field
6392   @param inv       TRUE <> NOT cond_func is considered
6393                   (makes sense only when cond_func is BETWEEN or IN)
6394 
6395   @return Pointer to the built tree.
6396 
6397   @todo Remove the appaling hack that 'value' can be a 1 cast to an Item*.
6398 */
6399 
get_func_mm_tree(RANGE_OPT_PARAM * param,Item * predicand,Item_func * cond_func,Item * value,Item_result cmp_type,bool inv)6400 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param,
6401                                   Item *predicand,
6402                                   Item_func *cond_func,
6403                                   Item *value,
6404                                   Item_result cmp_type,
6405                                   bool inv)
6406 {
6407   SEL_TREE *tree= 0;
6408   DBUG_ENTER("get_func_mm_tree");
6409 
6410   if (param->has_errors())
6411     DBUG_RETURN(0);
6412 
6413   switch (cond_func->functype()) {
6414 
6415   case Item_func::XOR_FUNC:
6416     DBUG_RETURN(NULL); // Always true (don't use range access on XOR).
6417     break;             // See WL#5800
6418 
6419   case Item_func::NE_FUNC:
6420     if (predicand->type() == Item::FIELD_ITEM)
6421     {
6422       Field *field= static_cast<Item_field*>(predicand)->field;
6423       tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
6424     }
6425     break;
6426 
6427   case Item_func::BETWEEN:
6428     if (predicand->type() == Item::FIELD_ITEM)
6429     {
6430       Field *field= static_cast<Item_field*>(predicand)->field;
6431 
6432       if (!value)
6433       {
6434         if (inv)
6435         {
6436           tree= get_ne_mm_tree(param, cond_func, field,
6437                                cond_func->arguments()[1],
6438                                cond_func->arguments()[2], cmp_type);
6439         }
6440         else
6441         {
6442           tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
6443                              cond_func->arguments()[1],cmp_type);
6444           if (tree)
6445           {
6446             tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
6447                                                      Item_func::LE_FUNC,
6448                                                      cond_func->arguments()[2],
6449                                                      cmp_type));
6450           }
6451         }
6452       }
6453       else
6454         tree= get_mm_parts(param, cond_func, field,
6455                            (inv ?
6456                             (value == reinterpret_cast<Item*>(1) ?
6457                              Item_func::GT_FUNC :
6458                              Item_func::LT_FUNC):
6459                             (value == reinterpret_cast<Item*>(1) ?
6460                              Item_func::LE_FUNC :
6461                              Item_func::GE_FUNC)),
6462                            cond_func->arguments()[0], cmp_type);
6463     }
6464     break;
6465   case Item_func::IN_FUNC:
6466   {
6467     Item_func_in *in_pred= static_cast<Item_func_in*>(cond_func);
6468     tree= get_func_mm_tree_from_in_predicate(param, predicand, in_pred, value,
6469                                              cmp_type, inv);
6470   }
6471   break;
6472   default:
6473     if (predicand->type() == Item::FIELD_ITEM)
6474     {
6475       Field *field= static_cast<Item_field*>(predicand)->field;
6476 
6477       /*
6478          Here the function for the following predicates are processed:
6479          <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL and GIS functions.
6480          If the predicate is of the form (value op field) it is handled
6481          as the equivalent predicate (field rev_op value), e.g.
6482          2 <= a is handled as a >= 2.
6483       */
6484       Item_func::Functype func_type=
6485         (value != cond_func->arguments()[0]) ? cond_func->functype() :
6486         ((Item_bool_func2*) cond_func)->rev_functype();
6487       tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
6488     }
6489   }
6490 
6491   DBUG_RETURN(tree);
6492 }
6493 
6494 
6495 /*
6496   Build conjunction of all SEL_TREEs for a simple predicate applying equalities
6497 
6498   SYNOPSIS
6499     get_full_func_mm_tree()
6500       param       PARAM from test_quick_select
6501       predicand   column or row constructor in the predicate's left-hand side.
6502       op          Item for the predicate operator
6503       value       constant in the predicate (or a field already read from
6504                   a table in the case of dynamic range access)
6505                   For BETWEEN it contains the number of the field argument.
6506       inv         If true, the predicate is negated, e.g. NOT IN.
6507                   (makes sense only when cond_func is BETWEEN or IN)
6508 
6509   DESCRIPTION
6510     For a simple SARGable predicate of the form (f op c), where f is a field and
6511     c is a constant, the function builds a conjunction of all SEL_TREES that can
6512     be obtained by the substitution of f for all different fields equal to f.
6513 
6514   NOTES
6515     If the WHERE condition contains a predicate (fi op c),
6516     then not only SELL_TREE for this predicate is built, but
6517     the trees for the results of substitution of fi for
6518     each fj belonging to the same multiple equality as fi
6519     are built as well.
6520     E.g. for WHERE t1.a=t2.a AND t2.a > 10
6521     a SEL_TREE for t2.a > 10 will be built for quick select from t2
6522     and
6523     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
6524 
6525     A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
6526     in a similar way: we build a conjuction of trees for the results
6527     of all substitutions of fi for equal fj.
6528     Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
6529     differently. It is considered as a conjuction of two SARGable
6530     predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
6531     is called for each of them separately producing trees for
6532        AND j (f1j <=c ) and AND j (f2j <= c)
6533     After this these two trees are united in one conjunctive tree.
6534     It's easy to see that the same tree is obtained for
6535        AND j,k (f1j <=c AND f2k<=c)
6536     which is equivalent to
6537        AND j,k (c BETWEEN f1j AND f2k).
6538     The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
6539     which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
6540     function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
6541     producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
6542     trees are united in one OR-tree. The expression
6543       (AND j (f1j > c) OR AND j (f2j < c)
6544     is equivalent to the expression
6545       AND j,k (f1j > c OR f2k < c)
6546     which is just a translation of
6547       AND j,k (c NOT BETWEEN f1j AND f2k)
6548 
6549     In the cases when one of the items f1, f2 is a constant c1 we do not create
6550     a tree for it at all. It works for BETWEEN predicates but does not
6551     work for NOT BETWEEN predicates as we have to evaluate the expression
6552     with it. If it is TRUE then the other tree can be completely ignored.
6553     We do not do it now and no trees are built in these cases for
6554     NOT BETWEEN predicates.
6555 
6556     As to IN predicates only ones of the form (f IN (c1,...,cn)),
6557     where f1 is a field and c1,...,cn are constant, are considered as
6558     SARGable. We never try to narrow the index scan using predicates of
6559     the form (c IN (c1,...,f,...,cn)).
6560 
6561   RETURN
6562     Pointer to the tree representing the built conjunction of SEL_TREEs
6563 */
6564 
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item * predicand,Item_func * op,Item * value,bool inv)6565 static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
6566                                        Item *predicand,
6567                                        Item_func *op,
6568                                        Item *value,
6569                                        bool inv)
6570 {
6571   SEL_TREE *tree= 0;
6572   SEL_TREE *ftree= 0;
6573   const table_map param_comp=
6574     ~(param->prev_tables | param->read_tables | param->current_table);
6575   DBUG_ENTER("get_full_func_mm_tree");
6576 
6577   if (param->has_errors())
6578     DBUG_RETURN(NULL);
6579 
6580   /*
6581     Here we compute a set of tables that we consider as constants
6582     suppliers during execution of the SEL_TREE that we produce below.
6583   */
6584   table_map ref_tables= 0;
6585   for (uint i= 0; i < op->arg_count; i++)
6586   {
6587     Item *arg= op->arguments()[i]->real_item();
6588     if (arg != predicand)
6589       ref_tables|= arg->used_tables();
6590   }
6591   if (predicand->type() == Item::FIELD_ITEM)
6592   {
6593     Item_field *item_field= static_cast<Item_field*>(predicand);
6594     Field *field= item_field->field;
6595     Item_result cmp_type= field->cmp_type();
6596 
6597     if (!((ref_tables | item_field->table_ref->map()) & param_comp))
6598       ftree= get_func_mm_tree(param, predicand, op, value, cmp_type, inv);
6599     Item_equal *item_equal= item_field->item_equal;
6600     if (item_equal != NULL)
6601     {
6602       Item_equal_iterator it(*item_equal);
6603       Item_field *item;
6604       while ((item= it++))
6605       {
6606         Field *f= item->field;
6607         if (!field->eq(f) &&
6608             !((ref_tables | item->table_ref->map()) & param_comp))
6609         {
6610           tree= get_func_mm_tree(param, item, op, value, cmp_type, inv);
6611           ftree= !ftree ? tree : tree_and(param, ftree, tree);
6612         }
6613       }
6614     }
6615   }
6616   else if (predicand->type() == Item::ROW_ITEM)
6617   {
6618     ftree= get_func_mm_tree(param, predicand, op, value, ROW_RESULT, inv);
6619     DBUG_RETURN(ftree);
6620   }
6621   DBUG_RETURN(ftree);
6622 }
6623 
6624 /**
6625   The Range Analysis Module, which finds range access alternatives
6626   applicable to single or multi-index (UNION) access. The function
6627   does not calculate or care about the cost of the different
6628   alternatives.
6629 
6630   get_mm_tree() employs a relaxed boolean algebra where the solution
6631   may be bigger than what the rules of boolean algebra accept. In
6632   other words, get_mm_tree() may return range access plans that will
6633   read more rows than the input conditions dictate. In it's simplest
6634   form, consider a condition on two fields indexed by two different
6635   indexes:
6636 
6637      "WHERE fld1 > 'x' AND fld2 > 'y'"
6638 
6639   In this case, there are two single-index range access alternatives.
6640   No matter which access path is chosen, rows that are not in the
6641   result set may be read.
6642 
6643   In the case above, get_mm_tree() will create range access
6644   alternatives for both indexes, so boolean algebra is still correct.
6645   In other cases, however, the conditions are too complex to be used
6646   without relaxing the rules. This typically happens when ORing a
6647   conjunction to a multi-index disjunctions (@see e.g.
6648   imerge_list_or_tree()). When this happens, the range optimizer may
6649   choose to ignore conjunctions (any condition connected with AND). The
6650   effect of this is that the result includes a "bigger" solution than
6651   neccessary. This is OK since all conditions will be used as filters
6652   after row retrieval.
6653 
6654   @see SEL_TREE::keys and SEL_TREE::merges for details of how single
6655   and multi-index range access alternatives are stored.
6656 */
get_mm_tree(RANGE_OPT_PARAM * param,Item * cond)6657 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond)
6658 {
6659   SEL_TREE *tree=0;
6660   SEL_TREE *ftree= 0;
6661   Item_field *field_item= 0;
6662   bool inv= FALSE;
6663   Item *value= 0;
6664   DBUG_ENTER("get_mm_tree");
6665 
6666   if (param->has_errors())
6667     DBUG_RETURN(NULL);
6668 
6669   if (cond->type() == Item::COND_ITEM)
6670   {
6671     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
6672 
6673     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6674     {
6675       tree= NULL;
6676       Item *item;
6677       while ((item=li++))
6678       {
6679         SEL_TREE *new_tree= get_mm_tree(param,item);
6680         if (param->has_errors())
6681           DBUG_RETURN(NULL);
6682         tree= tree_and(param,tree,new_tree);
6683         dbug_print_tree("after_and", tree, param);
6684         if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
6685           break;
6686       }
6687     }
6688     else
6689     {                                           // Item OR
6690       tree= get_mm_tree(param,li++);
6691       if (param->has_errors())
6692         DBUG_RETURN(NULL);
6693       if (tree)
6694       {
6695         Item *item;
6696         while ((item=li++))
6697         {
6698           SEL_TREE *new_tree=get_mm_tree(param,item);
6699           if (new_tree == NULL || param->has_errors())
6700             DBUG_RETURN(NULL);
6701           tree= tree_or(param,tree,new_tree);
6702           dbug_print_tree("after_or", tree, param);
6703           if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
6704             break;
6705         }
6706       }
6707     }
6708     dbug_print_tree("tree_returned", tree, param);
6709     DBUG_RETURN(tree);
6710   }
6711   /*
6712     Here when simple cond
6713     There are limits on what kinds of const items we can evaluate.
6714     At this stage a subquery in 'cond' might not be fully transformed yet
6715     (example: semijoin) thus cannot be evaluated.
6716   */
6717   if (cond->const_item() && !cond->is_expensive() && !cond->has_subquery())
6718   {
6719     /*
6720       During the cond->val_int() evaluation we can come across a subselect
6721       item which may allocate memory on the thd->mem_root and assumes
6722       all the memory allocated has the same life span as the subselect
6723       item itself. So we have to restore the thread's mem_root here.
6724     */
6725     MEM_ROOT *tmp_root= param->mem_root;
6726     param->thd->mem_root= param->old_root;
6727     const SEL_TREE::Type type=
6728       cond->val_int() ? SEL_TREE::ALWAYS : SEL_TREE::IMPOSSIBLE;
6729     tree= new (tmp_root) SEL_TREE(type, tmp_root, param->keys);
6730 
6731     param->thd->mem_root= tmp_root;
6732     if (param->has_errors())
6733       DBUG_RETURN(NULL);
6734     dbug_print_tree("tree_returned", tree, param);
6735     DBUG_RETURN(tree);
6736   }
6737 
6738   table_map ref_tables= 0;
6739   table_map param_comp= ~(param->prev_tables | param->read_tables |
6740 		          param->current_table);
6741   if (cond->type() != Item::FUNC_ITEM)
6742   {						// Should be a field
6743     ref_tables= cond->used_tables();
6744     if ((ref_tables & param->current_table) ||
6745 	(ref_tables & ~(param->prev_tables | param->read_tables)))
6746       DBUG_RETURN(0);
6747     DBUG_RETURN(new (param->mem_root)
6748                 SEL_TREE(SEL_TREE::MAYBE, param->mem_root, param->keys));
6749   }
6750 
6751   Item_func *cond_func= (Item_func*) cond;
6752   if (cond_func->functype() == Item_func::BETWEEN ||
6753       cond_func->functype() == Item_func::IN_FUNC)
6754     inv= ((Item_func_opt_neg *) cond_func)->negated;
6755   else
6756   {
6757     /*
6758       During the cond_func->select_optimize() evaluation we can come across a
6759       subselect item which may allocate memory on the thd->mem_root and assumes
6760       all the memory allocated has the same life span as the subselect item
6761       itself. So we have to restore the thread's mem_root here.
6762     */
6763     MEM_ROOT *tmp_root= param->mem_root;
6764     param->thd->mem_root= param->old_root;
6765     Item_func::optimize_type opt_type= cond_func->select_optimize();
6766     param->thd->mem_root= tmp_root;
6767     if (opt_type == Item_func::OPTIMIZE_NONE)
6768       DBUG_RETURN(NULL);
6769   }
6770 
6771   param->cond= cond;
6772 
6773   /*
6774     Notice that all fields that are outer references are const during
6775     the execution and should not be considered for range analysis like
6776     fields coming from the local query block are.
6777   */
6778   switch (cond_func->functype())
6779   {
6780   case Item_func::BETWEEN:
6781   {
6782     Item *const arg_left= cond_func->arguments()[0];
6783 
6784     if (!(arg_left->used_tables() & OUTER_REF_TABLE_BIT) &&
6785         arg_left->real_item()->type() == Item::FIELD_ITEM)
6786     {
6787       field_item= (Item_field*) arg_left->real_item();
6788       ftree= get_full_func_mm_tree(param, field_item, cond_func, NULL, inv);
6789     }
6790 
6791     /*
6792       Concerning the code below see the NOTES section in
6793       the comments for the function get_full_func_mm_tree()
6794     */
6795     for (uint i= 1 ; i < cond_func->arg_count ; i++)
6796     {
6797       Item *const arg= cond_func->arguments()[i];
6798 
6799       if (!(arg->used_tables() & OUTER_REF_TABLE_BIT) &&
6800           arg->real_item()->type() == Item::FIELD_ITEM)
6801       {
6802         field_item= (Item_field*) arg->real_item();
6803         SEL_TREE *tmp=
6804           get_full_func_mm_tree(param, field_item, cond_func,
6805                                 reinterpret_cast<Item*>(i), inv);
6806         if (inv)
6807         {
6808           tree= !tree ? tmp : tree_or(param, tree, tmp);
6809           if (tree == NULL)
6810             break;
6811         }
6812         else
6813           tree= tree_and(param, tree, tmp);
6814       }
6815       else if (inv)
6816       {
6817         tree= 0;
6818         break;
6819       }
6820     }
6821 
6822     ftree = tree_and(param, ftree, tree);
6823     break;
6824   } // end case Item_func::BETWEEN
6825 
6826   case Item_func::IN_FUNC:
6827   {
6828     Item *const predicand= ((Item_func_in*) cond_func)->key_item()->real_item();
6829     if (predicand->type() != Item::FIELD_ITEM &&
6830         predicand->type() != Item::ROW_ITEM)
6831       DBUG_RETURN(NULL);
6832     ftree= get_full_func_mm_tree(param, predicand, cond_func, NULL, inv);
6833     break;
6834   } // end case Item_func::IN_FUNC
6835 
6836   case Item_func::MULT_EQUAL_FUNC:
6837   {
6838     Item_equal *item_equal= (Item_equal *) cond;
6839     if (!(value= item_equal->get_const()))
6840       DBUG_RETURN(0);
6841     Item_equal_iterator it(*item_equal);
6842     ref_tables= value->used_tables();
6843     while ((field_item= it++))
6844     {
6845       Field *field= field_item->field;
6846       Item_result cmp_type= field->cmp_type();
6847       if (!((ref_tables | field_item->table_ref->map()) & param_comp))
6848       {
6849         tree= get_mm_parts(param, item_equal, field, Item_func::EQ_FUNC,
6850 		           value,cmp_type);
6851         ftree= !ftree ? tree : tree_and(param, ftree, tree);
6852       }
6853     }
6854 
6855     dbug_print_tree("tree_returned", ftree, param);
6856     DBUG_RETURN(ftree);
6857   } // end case Item_func::MULT_EQUAL_FUNC
6858 
6859   default:
6860   {
6861     Item *const arg_left= cond_func->arguments()[0];
6862 
6863     DBUG_ASSERT (!ftree);
6864     if (!(arg_left->used_tables() & OUTER_REF_TABLE_BIT) &&
6865         arg_left->real_item()->type() == Item::FIELD_ITEM)
6866     {
6867       field_item= (Item_field*) arg_left->real_item();
6868       value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : NULL;
6869       ftree= get_full_func_mm_tree(param, field_item, cond_func, value, inv);
6870     }
6871     /*
6872       Even if get_full_func_mm_tree() was executed above and did not
6873       return a range predicate it may still be possible to create one
6874       by reversing the order of the operands. Note that this only
6875       applies to predicates where both operands are fields. Example: A
6876       query of the form
6877 
6878          WHERE t1.a OP t2.b
6879 
6880       In this case, arguments()[0] == t1.a and arguments()[1] == t2.b.
6881       When creating range predicates for t2, get_full_func_mm_tree()
6882       above will return NULL because 'field' belongs to t1 and only
6883       predicates that applies to t2 are of interest. In this case a
6884       call to get_full_func_mm_tree() with reversed operands (see
6885       below) may succeed.
6886      */
6887     Item *arg_right;
6888     if (!ftree && cond_func->have_rev_func() &&
6889         (arg_right= cond_func->arguments()[1]) &&
6890         !(arg_right->used_tables() & OUTER_REF_TABLE_BIT) &&
6891         arg_right->real_item()->type() == Item::FIELD_ITEM)
6892     {
6893       field_item= (Item_field*) arg_right->real_item();
6894       value= arg_left;
6895       ftree= get_full_func_mm_tree(param, field_item, cond_func, value, inv);
6896     }
6897   }  // end case default
6898   }  // end switch
6899 
6900   dbug_print_tree("tree_returned", ftree, param);
6901   DBUG_RETURN(ftree);
6902 }
6903 
6904 /**
6905   Test whether a comparison operator is a spatial comparison
6906   operator, i.e. Item_func::SP_*.
6907 
6908   Used to check if range access using operator 'op_type' is applicable
6909   for a non-spatial index.
6910 
6911   @param   op_type  The comparison operator.
6912   @return  true if 'op_type' is a spatial comparison operator, false otherwise.
6913 
6914 */
is_spatial_operator(Item_func::Functype op_type)6915 bool is_spatial_operator(Item_func::Functype op_type)
6916 {
6917   switch (op_type)
6918   {
6919   case Item_func::SP_EQUALS_FUNC:
6920   case Item_func::SP_DISJOINT_FUNC:
6921   case Item_func::SP_INTERSECTS_FUNC:
6922   case Item_func::SP_TOUCHES_FUNC:
6923   case Item_func::SP_CROSSES_FUNC:
6924   case Item_func::SP_WITHIN_FUNC:
6925   case Item_func::SP_CONTAINS_FUNC:
6926   case Item_func::SP_COVEREDBY_FUNC:
6927   case Item_func::SP_COVERS_FUNC:
6928   case Item_func::SP_OVERLAPS_FUNC:
6929   case Item_func::SP_STARTPOINT:
6930   case Item_func::SP_ENDPOINT:
6931   case Item_func::SP_EXTERIORRING:
6932   case Item_func::SP_POINTN:
6933   case Item_func::SP_GEOMETRYN:
6934   case Item_func::SP_INTERIORRINGN:
6935     return true;
6936   default:
6937     return false;
6938   }
6939 }
6940 
6941 /**
6942   Test if 'value' is comparable to 'field' when setting up range
6943   access for predicate "field OP value". 'field' is a field in the
6944   table being optimized for while 'value' is whatever 'field' is
6945   compared to.
6946 
6947   @param cond_func   the predicate item that compares 'field' with 'value'
6948   @param field       field in the predicate
6949   @param itype       itMBR if indexed field is spatial, itRAW otherwise
6950   @param comp_type   comparator for the predicate
6951   @param value       whatever 'field' is compared to
6952 
6953   @return true if 'field' and 'value' are comparable, false otherwise
6954 */
6955 
comparable_in_index(Item * cond_func,const Field * field,const Field::imagetype itype,Item_func::Functype comp_type,const Item * value)6956 static bool comparable_in_index(Item *cond_func,
6957                                 const Field *field,
6958                                 const Field::imagetype itype,
6959                                 Item_func::Functype comp_type,
6960                                 const Item *value)
6961 {
6962   /*
6963     Usually an index cannot be used if the column collation differs
6964     from the operation collation. However, a case insensitive index
6965     may be used for some binary searches:
6966 
6967        WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;
6968        WHERE latin1_swedish_ci_colimn = BINARY 'a '
6969   */
6970   if ((field->result_type() == STRING_RESULT &&
6971        field->match_collation_to_optimize_range() &&
6972        value->result_type() == STRING_RESULT &&
6973        itype == Field::itRAW &&
6974        field->charset() != cond_func->compare_collation() &&
6975        !(cond_func->compare_collation()->state & MY_CS_BINSORT &&
6976          (comp_type == Item_func::EQUAL_FUNC ||
6977           comp_type == Item_func::EQ_FUNC))))
6978     return false;
6979 
6980   /*
6981     Temporal values: Cannot use range access if:
6982        'indexed_varchar_column = temporal_value'
6983     because there are many ways to represent the same date as a
6984     string. A few examples: "01-01-2001", "1-1-2001", "2001-01-01",
6985     "2001#01#01". The same problem applies to time. Thus, we cannot
6986     create a useful range predicate for temporal values into VARCHAR
6987     column indexes. @see add_key_field()
6988   */
6989   if (!field->is_temporal() && value->is_temporal())
6990     return false;
6991 
6992   /*
6993     Temporal values: Cannot use range access if
6994        'indexed_time = temporal_value_with_date_part'
6995     because:
6996       - without index, a TIME column with value '48:00:00' is
6997         equal to a DATETIME column with value
6998         'CURDATE() + 2 days'
6999       - with range access into the TIME column, CURDATE() + 2
7000         days becomes "00:00:00" (Field_timef::store_internal()
7001         simply extracts the time part from the datetime) which
7002         is a lookup key which does not match "48:00:00". On the other
7003         hand, we can do ref access for IndexedDatetimeComparedToTime
7004         because Field_temporal_with_date::store_time() will convert
7005         48:00:00 to CURDATE() + 2 days which is the correct lookup
7006         key.
7007   */
7008   if (field_time_cmp_date(field, value))
7009     return false;
7010 
7011   /*
7012     We can't always use indexes when comparing a string index to a
7013     number. cmp_type() is checked to allow comparison of dates and
7014     numbers.
7015   */
7016   if (field->result_type() == STRING_RESULT &&
7017       value->result_type() != STRING_RESULT &&
7018       field->cmp_type() != value->result_type())
7019     return false;
7020 
7021   /*
7022     We can't use indexes when comparing to a JSON value. For example,
7023     the string '{}' should compare equal to the JSON string "{}". If
7024     we use a string index to compare the two strings, we will be
7025     comparing '{}' and '"{}"', which don't compare equal.
7026   */
7027   if (value->result_type() == STRING_RESULT &&
7028       value->field_type() == MYSQL_TYPE_JSON)
7029     return false;
7030 
7031   return true;
7032 }
7033 
7034 static SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item_func::Functype type,Item * value,Item_result cmp_type)7035 get_mm_parts(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field,
7036 	     Item_func::Functype type,
7037 	     Item *value, Item_result cmp_type)
7038 {
7039   DBUG_ENTER("get_mm_parts");
7040 
7041   if (param->has_errors())
7042     DBUG_RETURN(0);
7043 
7044   if (field->table != param->table)
7045     DBUG_RETURN(0);
7046 
7047   KEY_PART *key_part = param->key_parts;
7048   KEY_PART *end = param->key_parts_end;
7049   SEL_TREE *tree=0;
7050   if (value &&
7051       value->used_tables() & ~(param->prev_tables | param->read_tables))
7052     DBUG_RETURN(0);
7053   for (; key_part != end ; key_part++)
7054   {
7055     if (field->eq(key_part->field))
7056     {
7057       /*
7058         Cannot do range access for spatial operators when a
7059         non-spatial index is used.
7060       */
7061       if (key_part->image_type != Field::itMBR &&
7062           is_spatial_operator(cond_func->functype()))
7063         continue;
7064 
7065       SEL_ARG *sel_arg=0;
7066       if (!tree && !(tree=new (param->mem_root)
7067                      SEL_TREE(param->mem_root, param->keys)))
7068         DBUG_RETURN(0); // OOM
7069       if (!value || !(value->used_tables() & ~param->read_tables))
7070       {
7071         sel_arg=get_mm_leaf(param,cond_func,
7072                             key_part->field,key_part,type,value);
7073         if (!sel_arg)
7074           continue;
7075         if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
7076         {
7077           tree->type=SEL_TREE::IMPOSSIBLE;
7078           DBUG_RETURN(tree);
7079         }
7080       }
7081       else
7082       {
7083         /*
7084           The index may not be used by dynamic range access unless
7085           'field' and 'value' are comparable.
7086         */
7087         if (!comparable_in_index(cond_func, key_part->field,
7088                                  key_part->image_type,
7089                                  type, value))
7090         {
7091           warn_index_not_applicable(param, key_part->key, field);
7092           DBUG_RETURN(NULL);
7093         }
7094 
7095         if (!(sel_arg= new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY)))
7096           DBUG_RETURN(NULL);  //OOM
7097       }
7098       sel_arg->part=(uchar) key_part->part;
7099       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
7100       tree->keys_map.set_bit(key_part->key);
7101     }
7102   }
7103 
7104   if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
7105     tree= NULL;
7106   DBUG_RETURN(tree);
7107 }
7108 
7109 /**
7110   Saves 'value' in 'field' and handles potential type conversion
7111   problems.
7112 
7113   @param tree [out]                 The SEL_ARG leaf under construction. If
7114                                     an always false predicate is found it is
7115                                     modified to point to a SEL_ARG with
7116                                     type == SEL_ARG::IMPOSSIBLE
7117   @param value                      The Item that contains a value that shall
7118                                     be stored in 'field'.
7119   @param comp_op                    Comparison operator: >, >=, <=> etc.
7120   @param field                      The field that 'value' is stored into.
7121   @param impossible_cond_cause[out] Set to a descriptive string if an
7122                                     impossible condition is found.
7123   @param memroot                    Memroot for creation of new SEL_ARG.
7124 
7125   @retval false  if saving went fine and it makes sense to continue
7126                  optimizing for this predicate.
7127   @retval true   if always true/false predicate was found, in which
7128                  case 'tree' has been modified to reflect this: NULL
7129                  pointer if always true, SEL_ARG with type IMPOSSIBLE
7130                  if always false.
7131 */
save_value_and_handle_conversion(SEL_ARG ** tree,Item * value,const Item_func::Functype comp_op,Field * field,const char ** impossible_cond_cause,MEM_ROOT * memroot)7132 static bool save_value_and_handle_conversion(SEL_ARG **tree,
7133                                              Item *value,
7134                                              const Item_func::Functype comp_op,
7135                                              Field *field,
7136                                              const char **impossible_cond_cause,
7137                                              MEM_ROOT *memroot)
7138 {
7139   // A SEL_ARG should not have been created for this predicate yet.
7140   DBUG_ASSERT(*tree == NULL);
7141 
7142   if (!value->can_be_evaluated_now())
7143   {
7144     /*
7145       We cannot evaluate the value yet (i.e. required tables are not yet
7146       locked.)
7147       This is the case of prune_partitions() called during
7148       SELECT_LEX::prepare().
7149     */
7150     return true;
7151   }
7152 
7153   // For comparison purposes allow invalid dates like 2000-01-32
7154   const sql_mode_t orig_sql_mode= field->table->in_use->variables.sql_mode;
7155   field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
7156 
7157   /*
7158     We want to change "field > value" to "field OP V"
7159     where:
7160     * V is what is in "field" after we stored "value" in it via
7161     save_in_field_no_warning() (such store operation may have done
7162     rounding...)
7163     * OP is > or >=, depending on what's correct.
7164     For example, if c is an INT column,
7165     "c > 2.9" is changed to "c OP 3"
7166     where OP is ">=" (">" would not be correct, as 3 > 2.9, a comparison
7167     done with stored_field_cmp_to_item()). And
7168     "c > 3.1" is changed to "c OP 3" where OP is ">" (3 < 3.1...).
7169   */
7170 
7171   // Note that value may be a stored function call, executed here.
7172   const type_conversion_status err= value->save_in_field_no_warnings(field, true);
7173   field->table->in_use->variables.sql_mode= orig_sql_mode;
7174 
7175   switch (err) {
7176   case TYPE_OK:
7177   case TYPE_NOTE_TRUNCATED:
7178   case TYPE_WARN_TRUNCATED:
7179     return false;
7180   case TYPE_WARN_INVALID_STRING:
7181     /*
7182       An invalid string does not produce any rows when used with
7183       equality operator.
7184     */
7185     if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
7186     {
7187       *impossible_cond_cause= "invalid_characters_in_string";
7188       goto impossible_cond;
7189     }
7190     /*
7191       For other operations on invalid strings, we assume that the range
7192       predicate is always true and let evaluate_join_record() decide
7193       the outcome.
7194     */
7195     return true;
7196   case TYPE_ERR_BAD_VALUE:
7197     /*
7198       In the case of incompatible values, MySQL's SQL dialect has some
7199       strange interpretations. For example,
7200 
7201           "int_col > 'foo'" is interpreted as "int_col > 0"
7202 
7203       instead of always false. Because of this, we assume that the
7204       range predicate is always true instead of always false and let
7205       evaluate_join_record() decide the outcome.
7206     */
7207     return true;
7208   case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
7209     // Checking NULL value on a field that cannot contain NULL.
7210     *impossible_cond_cause= "null_field_in_non_null_column";
7211     goto impossible_cond;
7212   case TYPE_WARN_OUT_OF_RANGE:
7213     /*
7214       value to store was either higher than field::max_value or lower
7215       than field::min_value. The field's max/min value has been stored
7216       instead.
7217      */
7218     if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
7219     {
7220       /*
7221         Independent of data type, "out_of_range_value =/<=> field" is
7222         always false.
7223       */
7224       *impossible_cond_cause= "value_out_of_range";
7225       goto impossible_cond;
7226     }
7227 
7228     // If the field is numeric, we can interpret the out of range value.
7229     if ((field->type() != FIELD_TYPE_BIT) &&
7230         (field->result_type() == REAL_RESULT ||
7231          field->result_type() == INT_RESULT ||
7232          field->result_type() == DECIMAL_RESULT))
7233     {
7234       /*
7235         value to store was higher than field::max_value if
7236            a) field has a value greater than 0, or
7237            b) if field is unsigned and has a negative value (which, when
7238               cast to unsigned, means some value higher than LLONG_MAX).
7239       */
7240       if ((field->val_int() > 0) ||                              // a)
7241           (static_cast<Field_num*>(field)->unsigned_flag &&
7242            field->val_int() < 0))                                // b)
7243       {
7244         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
7245         {
7246           /*
7247             '<' or '<=' compared to a value higher than the field
7248             can store is always true.
7249           */
7250           return true;
7251         }
7252         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
7253         {
7254           /*
7255             '>' or '>=' compared to a value higher than the field can
7256             store is always false.
7257           */
7258           *impossible_cond_cause= "value_out_of_range";
7259           goto impossible_cond;
7260         }
7261       }
7262       else // value is lower than field::min_value
7263       {
7264         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
7265         {
7266           /*
7267             '>' or '>=' compared to a value lower than the field
7268             can store is always true.
7269           */
7270           return true;
7271         }
7272         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
7273         {
7274           /*
7275             '<' or '=' compared to a value lower than the field can
7276             store is always false.
7277           */
7278           *impossible_cond_cause= "value_out_of_range";
7279           goto impossible_cond;
7280         }
7281       }
7282     }
7283     /*
7284       Value is out of range on a datatype where it can't be decided if
7285       it was underflow or overflow. It is therefore not possible to
7286       determine whether or not the condition is impossible or always
7287       true and we have to assume always true.
7288     */
7289     return true;
7290   case TYPE_NOTE_TIME_TRUNCATED:
7291     if (field->type() == FIELD_TYPE_DATE &&
7292         (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC ||
7293          comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC))
7294     {
7295       /*
7296         We were saving DATETIME into a DATE column, the conversion went ok
7297         but a non-zero time part was cut off.
7298 
7299         In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
7300         values. Index over a DATE column uses DATE comparison. Changing
7301         from one comparison to the other is possible:
7302 
7303         datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
7304         datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
7305 
7306         datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
7307         datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
7308 
7309         but we'll need to convert '>' to '>=' and '<' to '<='. This will
7310         be done together with other types at the end of get_mm_leaf()
7311         (grep for stored_field_cmp_to_item)
7312       */
7313       return false;
7314     }
7315     if (comp_op == Item_func::EQ_FUNC || comp_op == Item_func::EQUAL_FUNC)
7316     {
7317       // Equality comparison is always false when time info has been truncated.
7318       goto impossible_cond;
7319     }
7320     return true;
7321   case TYPE_ERR_OOM:
7322     return true;
7323     /*
7324       No default here to avoid adding new conversion status codes that are
7325       unhandled in this function.
7326     */
7327   }
7328 
7329   DBUG_ASSERT(FALSE); // Should never get here.
7330 
7331 impossible_cond:
7332   *tree= new (memroot) SEL_ARG(field, 0, 0);
7333   (*tree)->type= SEL_ARG::IMPOSSIBLE;
7334   return true;
7335 }
7336 
7337 
7338 static SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Item * conf_func,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)7339 get_mm_leaf(RANGE_OPT_PARAM *param, Item *conf_func, Field *field,
7340             KEY_PART *key_part, Item_func::Functype type,Item *value)
7341 {
7342   uint maybe_null=(uint) field->real_maybe_null();
7343   bool optimize_range;
7344   SEL_ARG *tree= 0;
7345   MEM_ROOT *alloc= param->mem_root;
7346   uchar *str;
7347   const char *impossible_cond_cause= NULL;
7348   DBUG_ENTER("get_mm_leaf");
7349 
7350   if (param->has_errors())
7351     goto end;
7352 
7353   /*
7354     We need to restore the runtime mem_root of the thread in this
7355     function because it evaluates the value of its argument, while
7356     the argument can be any, e.g. a subselect. The subselect
7357     items, in turn, assume that all the memory allocated during
7358     the evaluation has the same life span as the item itself.
7359     TODO: opt_range.cc should not reset thd->mem_root at all.
7360   */
7361   param->thd->mem_root= param->old_root;
7362   if (!value)					// IS NULL or IS NOT NULL
7363   {
7364     if (field->table->pos_in_table_list->outer_join)
7365       /*
7366         Range scan cannot be used to scan the inner table of an outer
7367         join if the predicate is IS NULL.
7368       */
7369       goto end;
7370     if (!maybe_null)                            // NOT NULL column
7371     {
7372       if (type == Item_func::ISNULL_FUNC)
7373         tree= &null_element;
7374       goto end;
7375     }
7376     uchar *null_string=
7377       static_cast<uchar*>(alloc_root(alloc, key_part->store_length + 1));
7378     if (!null_string)
7379       goto end;                                 // out of memory
7380 
7381     TRASH(null_string, key_part->store_length + 1);
7382     memcpy(null_string, is_null_string, sizeof(is_null_string));
7383 
7384     if (!(tree= new (alloc) SEL_ARG(field, null_string, null_string)))
7385       goto end;                                 // out of memory
7386     if (type == Item_func::ISNOTNULL_FUNC)
7387     {
7388       tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
7389       tree->max_flag=NO_MAX_RANGE;
7390     }
7391     goto end;
7392   }
7393 
7394   /*
7395     The range access method cannot be used unless 'field' and 'value'
7396     are comparable in the index. Examples of non-comparable
7397     field/values: different collation, DATETIME vs TIME etc.
7398   */
7399   if (!comparable_in_index(conf_func, field, key_part->image_type,
7400                            type, value))
7401   {
7402     warn_index_not_applicable(param, key_part->key, field);
7403     goto end;
7404   }
7405 
7406   if (key_part->image_type == Field::itMBR)
7407   {
7408     // @todo: use is_spatial_operator() instead?
7409     switch (type) {
7410     case Item_func::SP_EQUALS_FUNC:
7411     case Item_func::SP_DISJOINT_FUNC:
7412     case Item_func::SP_INTERSECTS_FUNC:
7413     case Item_func::SP_TOUCHES_FUNC:
7414     case Item_func::SP_CROSSES_FUNC:
7415     case Item_func::SP_WITHIN_FUNC:
7416     case Item_func::SP_CONTAINS_FUNC:
7417     case Item_func::SP_OVERLAPS_FUNC:
7418       break;
7419     default:
7420       /*
7421         We cannot involve spatial indexes for queries that
7422         don't use MBREQUALS(), MBRDISJOINT(), etc. functions.
7423       */
7424       goto end;
7425     }
7426   }
7427 
7428   if (param->using_real_indexes)
7429     optimize_range= field->optimize_range(param->real_keynr[key_part->key],
7430                                           key_part->part);
7431   else
7432     optimize_range= TRUE;
7433 
7434   if (type == Item_func::LIKE_FUNC)
7435   {
7436     bool like_error;
7437     char buff1[MAX_FIELD_WIDTH];
7438     uchar *min_str,*max_str;
7439     String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
7440     size_t length, offset, min_length, max_length;
7441     size_t field_length= field->pack_length()+maybe_null;
7442 
7443     if (!optimize_range)
7444       goto end;
7445     if (!(res= value->val_str(&tmp)))
7446     {
7447       tree= &null_element;
7448       goto end;
7449     }
7450 
7451     /*
7452       TODO:
7453       Check if this was a function. This should have be optimized away
7454       in the sql_select.cc
7455     */
7456     if (res != &tmp)
7457     {
7458       tmp.copy(*res);				// Get own copy
7459       res= &tmp;
7460     }
7461     if (field->cmp_type() != STRING_RESULT)
7462       goto end;                                 // Can only optimize strings
7463 
7464     offset=maybe_null;
7465     length=key_part->store_length;
7466 
7467     if (length != key_part->length  + maybe_null)
7468     {
7469       /* key packed with length prefix */
7470       offset+= HA_KEY_BLOB_LENGTH;
7471       field_length= length - HA_KEY_BLOB_LENGTH;
7472     }
7473     else
7474     {
7475       if (unlikely(length < field_length))
7476       {
7477 	/*
7478 	  This can only happen in a table created with UNIREG where one key
7479 	  overlaps many fields
7480 	*/
7481 	length= field_length;
7482       }
7483       else
7484 	field_length= length;
7485     }
7486     length+=offset;
7487     if (!(min_str= (uchar*) alloc_root(alloc, length*2)))
7488       goto end;
7489 
7490     max_str=min_str+length;
7491     if (maybe_null)
7492       max_str[0]= min_str[0]=0;
7493 
7494     Item_func_like *like_func= static_cast<Item_func_like*>(param->cond);
7495 
7496     // We can only optimize with LIKE if the escape string is known.
7497     if (!like_func->escape_is_evaluated())
7498       goto end;
7499 
7500     field_length-= maybe_null;
7501     like_error= my_like_range(field->charset(),
7502 			      res->ptr(), res->length(),
7503 			      like_func->escape,
7504 			      wild_one, wild_many,
7505 			      field_length,
7506 			      (char*) min_str+offset, (char*) max_str+offset,
7507 			      &min_length, &max_length);
7508     if (like_error)				// Can't optimize with LIKE
7509       goto end;
7510 
7511     if (offset != maybe_null)			// BLOB or VARCHAR
7512     {
7513       int2store(min_str+maybe_null, static_cast<uint16>(min_length));
7514       int2store(max_str+maybe_null, static_cast<uint16>(max_length));
7515     }
7516     tree= new (alloc) SEL_ARG(field, min_str, max_str);
7517     goto end;
7518   }
7519 
7520   if (!optimize_range &&
7521       type != Item_func::EQ_FUNC &&
7522       type != Item_func::EQUAL_FUNC)
7523     goto end;                                   // Can't optimize this
7524 
7525   /*
7526     Geometry operations may mix geometry types, e.g., we may be
7527     checking ST_Contains(<polygon field>, <point>). In such cases,
7528     field->geom_type will be a different type than the value we're
7529     trying to store in it, and the conversion will fail. Therefore,
7530     set the most general geometry type while saving, and revert to the
7531     original geometry type afterwards.
7532   */
7533   {
7534     const Field::geometry_type save_geom_type=
7535       (field->type() == MYSQL_TYPE_GEOMETRY) ?
7536       field->get_geometry_type() :
7537       Field::GEOM_GEOMETRY;
7538     if (field->type() == MYSQL_TYPE_GEOMETRY)
7539     {
7540       down_cast<Field_geom*>(field)->geom_type= Field::GEOM_GEOMETRY;
7541     }
7542 
7543     bool always_true_or_false=
7544       save_value_and_handle_conversion(&tree, value, type, field,
7545                                        &impossible_cond_cause, alloc);
7546 
7547     if (field->type() == MYSQL_TYPE_GEOMETRY &&
7548         save_geom_type != Field::GEOM_GEOMETRY)
7549     {
7550       down_cast<Field_geom*>(field)->geom_type= save_geom_type;
7551     }
7552 
7553     if (always_true_or_false)
7554       goto end;
7555   }
7556 
7557   /*
7558     Any sargable predicate except "<=>" involving NULL as a constant is always
7559     FALSE
7560   */
7561   if (type != Item_func::EQUAL_FUNC && field->is_real_null())
7562   {
7563     impossible_cond_cause= "comparison_with_null_always_false";
7564     tree= &null_element;
7565     goto end;
7566   }
7567 
7568   str= (uchar*) alloc_root(alloc, key_part->store_length+1);
7569   if (!str)
7570     goto end;
7571   if (maybe_null)
7572     *str= (uchar) field->is_real_null();        // Set to 1 if null
7573   field->get_key_image(str+maybe_null, key_part->length,
7574                        key_part->image_type);
7575   if (!(tree= new (alloc) SEL_ARG(field, str, str)))
7576     goto end;                                   // out of memory
7577 
7578   /*
7579     Check if we are comparing an UNSIGNED integer with a negative constant.
7580     In this case we know that:
7581     (a) (unsigned_int [< | <=] negative_constant) == FALSE
7582     (b) (unsigned_int [> | >=] negative_constant) == TRUE
7583     In case (a) the condition is false for all values, and in case (b) it
7584     is true for all values, so we can avoid unnecessary retrieval and condition
7585     testing, and we also get correct comparison of unsinged integers with
7586     negative integers (which otherwise fails because at query execution time
7587     negative integers are cast to unsigned if compared with unsigned).
7588    */
7589   if (field->result_type() == INT_RESULT &&
7590       value->result_type() == INT_RESULT &&
7591       ((field->type() == FIELD_TYPE_BIT ||
7592        ((Field_num *) field)->unsigned_flag) &&
7593        !((Item_int*) value)->unsigned_flag))
7594   {
7595     longlong item_val= value->val_int();
7596     if (item_val < 0)
7597     {
7598       if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
7599       {
7600         impossible_cond_cause= "unsigned_int_cannot_be_negative";
7601         tree->type= SEL_ARG::IMPOSSIBLE;
7602         goto end;
7603       }
7604       if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
7605       {
7606         tree= 0;
7607         goto end;
7608       }
7609     }
7610   }
7611 
7612   switch (type) {
7613   case Item_func::LT_FUNC:
7614     /* Don't use open ranges for partial key_segments */
7615     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7616         stored_field_cmp_to_item(param->thd, field, value) == 0)
7617       tree->max_flag=NEAR_MAX;
7618     /* fall through */
7619   case Item_func::LE_FUNC:
7620     if (!maybe_null)
7621       tree->min_flag=NO_MIN_RANGE;		/* From start */
7622     else
7623     {						// > NULL
7624       if (!(tree->min_value=
7625             static_cast<uchar*>(alloc_root(alloc, key_part->store_length+1))))
7626         goto end;
7627       TRASH(tree->min_value, key_part->store_length + 1);
7628       memcpy(tree->min_value, is_null_string, sizeof(is_null_string));
7629       tree->min_flag=NEAR_MIN;
7630     }
7631     break;
7632   case Item_func::GT_FUNC:
7633     /* Don't use open ranges for partial key_segments */
7634     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7635         (stored_field_cmp_to_item(param->thd, field, value) <= 0))
7636       tree->min_flag=NEAR_MIN;
7637     tree->max_flag= NO_MAX_RANGE;
7638     break;
7639   case Item_func::GE_FUNC:
7640     /* Don't use open ranges for partial key_segments */
7641     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7642         (stored_field_cmp_to_item(param->thd, field, value) < 0))
7643       tree->min_flag= NEAR_MIN;
7644     tree->max_flag=NO_MAX_RANGE;
7645     break;
7646   case Item_func::SP_EQUALS_FUNC:
7647     tree->set_gis_index_read_function(HA_READ_MBR_EQUAL);
7648     break;
7649   case Item_func::SP_DISJOINT_FUNC:
7650     tree->set_gis_index_read_function(HA_READ_MBR_DISJOINT);
7651     break;
7652   case Item_func::SP_INTERSECTS_FUNC:
7653     tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7654     break;
7655   case Item_func::SP_TOUCHES_FUNC:
7656     tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7657     break;
7658 
7659   case Item_func::SP_CROSSES_FUNC:
7660     tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7661     break;
7662   case Item_func::SP_WITHIN_FUNC:
7663     /*
7664       Adjust the rkey_func_flag as it's assumed and observed that both
7665       MyISAM and Innodb implement this function in reverse order.
7666     */
7667     tree->set_gis_index_read_function(HA_READ_MBR_CONTAIN);
7668     break;
7669 
7670   case Item_func::SP_CONTAINS_FUNC:
7671     /*
7672       Adjust the rkey_func_flag as it's assumed and observed that both
7673       MyISAM and Innodb implement this function in reverse order.
7674     */
7675     tree->set_gis_index_read_function(HA_READ_MBR_WITHIN);
7676     break;
7677   case Item_func::SP_OVERLAPS_FUNC:
7678     tree->set_gis_index_read_function(HA_READ_MBR_INTERSECT);
7679     break;
7680 
7681   default:
7682     break;
7683   }
7684 
7685 end:
7686   if (impossible_cond_cause != NULL)
7687   {
7688     Opt_trace_object wrapper (&param->thd->opt_trace);
7689     Opt_trace_object (&param->thd->opt_trace, "impossible_condition",
7690                       Opt_trace_context::RANGE_OPTIMIZER).
7691       add_alnum("cause", impossible_cond_cause);
7692   }
7693   param->thd->mem_root= alloc;
7694   DBUG_RETURN(tree);
7695 }
7696 
7697 
7698 /*
7699   Add a new key test to a key when scanning through all keys
7700   This will never be called for same key parts.
7701 */
7702 
7703 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)7704 sel_add(SEL_ARG *key1,SEL_ARG *key2)
7705 {
7706   SEL_ARG *root,**key_link;
7707 
7708   if (!key1)
7709     return key2;
7710   if (!key2)
7711     return key1;
7712 
7713   key_link= &root;
7714   while (key1 && key2)
7715   {
7716     if (key1->part < key2->part)
7717     {
7718       *key_link= key1;
7719       key_link= &key1->next_key_part;
7720       key1=key1->next_key_part;
7721     }
7722     else
7723     {
7724       *key_link= key2;
7725       key_link= &key2->next_key_part;
7726       key2=key2->next_key_part;
7727     }
7728   }
7729   *key_link=key1 ? key1 : key2;
7730   return root;
7731 }
7732 
7733 #define CLONE_KEY1_MAYBE 1
7734 #define CLONE_KEY2_MAYBE 2
7735 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
7736 
7737 
7738 static SEL_TREE *
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7739 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7740 {
7741   DBUG_ENTER("tree_and");
7742 
7743   if (param->has_errors())
7744     DBUG_RETURN(0);
7745 
7746   if (!tree1)
7747     DBUG_RETURN(tree2);
7748   if (!tree2)
7749     DBUG_RETURN(tree1);
7750   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7751     DBUG_RETURN(tree1);
7752   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7753     DBUG_RETURN(tree2);
7754   if (tree1->type == SEL_TREE::MAYBE)
7755   {
7756     if (tree2->type == SEL_TREE::KEY)
7757       tree2->type=SEL_TREE::KEY_SMALLER;
7758     DBUG_RETURN(tree2);
7759   }
7760   if (tree2->type == SEL_TREE::MAYBE)
7761   {
7762     tree1->type=SEL_TREE::KEY_SMALLER;
7763     DBUG_RETURN(tree1);
7764   }
7765 
7766   dbug_print_tree("tree1", tree1, param);
7767   dbug_print_tree("tree2", tree2, param);
7768 
7769   key_map  result_keys;
7770 
7771   /* Join the trees key per key */
7772   SEL_ARG **key1,**key2;
7773   for (uint idx=0; idx< param->keys; idx++)
7774   {
7775     key1= &tree1->keys[idx];
7776     key2= &tree2->keys[idx];
7777 
7778     uint flag=0;
7779     if (*key1 || *key2)
7780     {
7781       if (*key1 && !(*key1)->simple_key())
7782 	flag|=CLONE_KEY1_MAYBE;
7783       if (*key2 && !(*key2)->simple_key())
7784 	flag|=CLONE_KEY2_MAYBE;
7785       *key1= key_and(param, *key1, *key2, flag);
7786       if (*key1)
7787       {
7788         if ((*key1)->type == SEL_ARG::IMPOSSIBLE)
7789         {
7790           tree1->type= SEL_TREE::IMPOSSIBLE;
7791           DBUG_RETURN(tree1);
7792         }
7793         result_keys.set_bit(idx);
7794 #ifndef DBUG_OFF
7795         /*
7796           Do not test use_count if there is a large range tree created.
7797           It takes too much time to traverse the tree.
7798         */
7799         if (param->mem_root->allocated_size < 2097152)
7800           (*key1)->test_use_count(*key1);
7801 #endif
7802       }
7803 
7804     }
7805   }
7806   tree1->keys_map= result_keys;
7807 
7808   /* ok, both trees are index_merge trees */
7809   imerge_list_and_list(&tree1->merges, &tree2->merges);
7810   DBUG_RETURN(tree1);
7811 }
7812 
7813 
7814 /*
7815   Check if two SEL_TREES can be combined into one (i.e. a single key range
7816   read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
7817   using index_merge.
7818 */
7819 
sel_trees_can_be_ored(SEL_TREE * tree1,SEL_TREE * tree2,RANGE_OPT_PARAM * param)7820 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
7821                            RANGE_OPT_PARAM* param)
7822 {
7823   key_map common_keys= tree1->keys_map;
7824   DBUG_ENTER("sel_trees_can_be_ored");
7825   common_keys.intersect(tree2->keys_map);
7826 
7827   dbug_print_tree("tree1", tree1, param);
7828   dbug_print_tree("tree2", tree2, param);
7829 
7830   if (common_keys.is_clear_all())
7831     DBUG_RETURN(FALSE);
7832 
7833   /* trees have a common key, check if they refer to same key part */
7834   SEL_ARG *key1,*key2;
7835   for (uint key_no=0; key_no < param->keys; key_no++)
7836   {
7837     if (common_keys.is_set(key_no))
7838     {
7839       key1= tree1->keys[key_no];
7840       key2= tree2->keys[key_no];
7841       /* GIS_OPTIMIZER_FIXME: temp solution. key1 could be all nulls */
7842       if (key1 && key2 && key1->part == key2->part)
7843         DBUG_RETURN(TRUE);
7844     }
7845   }
7846   DBUG_RETURN(FALSE);
7847 }
7848 
7849 
7850 /*
7851   Remove the trees that are not suitable for record retrieval.
7852   SYNOPSIS
7853     param  Range analysis parameter
7854     tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
7855 
7856   DESCRIPTION
7857     This function walks through tree->keys[] and removes the SEL_ARG* trees
7858     that are not "maybe" trees (*) and cannot be used to construct quick range
7859     selects.
7860     (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
7861           these types here as well.
7862 
7863     A SEL_ARG* tree cannot be used to construct quick select if it has
7864     tree->part != 0. (e.g. it could represent "keypart2 < const").
7865 
7866     WHY THIS FUNCTION IS NEEDED
7867 
7868     Normally we allow construction of SEL_TREE objects that have SEL_ARG
7869     trees that do not allow quick range select construction. For example for
7870     " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
7871     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
7872     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
7873                                                from this
7874     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
7875                                    tree.
7876 
7877     There is an exception though: when we construct index_merge SEL_TREE,
7878     any SEL_ARG* tree that cannot be used to construct quick range select can
7879     be removed, because current range analysis code doesn't provide any way
7880     that tree could be later combined with another tree.
7881     Consider an example: we should not construct
7882     st1 = SEL_TREE {
7883       merges = SEL_IMERGE {
7884                             SEL_TREE(t.key1part1 = 1),
7885                             SEL_TREE(t.key2part2 = 2)   -- (*)
7886                           }
7887                    };
7888     because
7889      - (*) cannot be used to construct quick range select,
7890      - There is no execution path that would cause (*) to be converted to
7891        a tree that could be used.
7892 
7893     The latter is easy to verify: first, notice that the only way to convert
7894     (*) into a usable tree is to call tree_and(something, (*)).
7895 
7896     Second look at what tree_and/tree_or function would do when passed a
7897     SEL_TREE that has the structure like st1 tree has, and conlcude that
7898     tree_and(something, (*)) will not be called.
7899 
7900   RETURN
7901     0  Ok, some suitable trees left
7902     1  No tree->keys[] left.
7903 */
7904 
remove_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree)7905 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
7906 {
7907   bool res= FALSE;
7908   for (uint i=0; i < param->keys; i++)
7909   {
7910     if (tree->keys[i])
7911     {
7912       if (tree->keys[i]->part)
7913       {
7914         tree->keys[i]= NULL;
7915         tree->keys_map.clear_bit(i);
7916       }
7917       else
7918         res= TRUE;
7919     }
7920   }
7921   return !res;
7922 }
7923 
7924 
7925 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7926 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7927 {
7928   DBUG_ENTER("tree_or");
7929 
7930   if (param->has_errors())
7931     DBUG_RETURN(0);
7932 
7933   if (!tree1 || !tree2)
7934     DBUG_RETURN(0);
7935   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7936     DBUG_RETURN(tree2);
7937   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7938     DBUG_RETURN(tree1);
7939   if (tree1->type == SEL_TREE::MAYBE)
7940     DBUG_RETURN(tree1);				// Can't use this
7941   if (tree2->type == SEL_TREE::MAYBE)
7942     DBUG_RETURN(tree2);
7943 
7944   /*
7945     It is possible that a tree contains both
7946     a) simple range predicates (in tree->keys[]) and
7947     b) index merge range predicates (in tree->merges)
7948 
7949     If a tree has both, they represent equally *valid* range
7950     predicate alternatives; both will return all relevant rows from
7951     the table but one may return more unnecessary rows than the
7952     other (additional rows will be filtered later). However, doing
7953     an OR operation on trees with both types of predicates is too
7954     complex at the time. We therefore remove the index merge
7955     predicates (if we have both types) before OR'ing the trees.
7956 
7957     TODO: enable tree_or() for trees with both simple and index
7958     merge range predicates.
7959   */
7960   if (!tree1->merges.is_empty())
7961   {
7962     for (uint i= 0; i < param->keys; i++)
7963       if (tree1->keys[i] != NULL && tree1->keys[i] != &null_element)
7964       {
7965         tree1->merges.empty();
7966         break;
7967       }
7968   }
7969   if (!tree2->merges.is_empty())
7970   {
7971     for (uint i= 0; i< param->keys; i++)
7972       if (tree2->keys[i] != NULL && tree2->keys[i] != &null_element)
7973       {
7974         tree2->merges.empty();
7975         break;
7976       }
7977   }
7978 
7979   SEL_TREE *result= 0;
7980   key_map  result_keys;
7981   if (sel_trees_can_be_ored(tree1, tree2, param))
7982   {
7983     /* Join the trees key per key */
7984     SEL_ARG **key1,**key2;
7985     for (uint idx=0; idx < param->keys; idx++)
7986     {
7987       key1= &tree1->keys[idx];
7988       key2= &tree2->keys[idx];
7989       *key1= key_or(param, *key1, *key2);
7990       if (*key1)
7991       {
7992         result=tree1;				// Added to tree1
7993         result_keys.set_bit(idx);
7994 #ifndef DBUG_OFF
7995         /*
7996           Do not test use count if there is a large range tree created.
7997           It takes too much time to traverse the tree.
7998         */
7999         if (param->mem_root->allocated_size < 2097152)
8000           (*key1)->test_use_count(*key1);
8001 #endif
8002       }
8003     }
8004     if (result)
8005       result->keys_map= result_keys;
8006   }
8007   else
8008   {
8009     /* ok, two trees have KEY type but cannot be used without index merge */
8010     if (tree1->merges.is_empty() && tree2->merges.is_empty())
8011     {
8012       if (param->remove_jump_scans)
8013       {
8014         bool no_trees= remove_nonrange_trees(param, tree1);
8015         no_trees= no_trees || remove_nonrange_trees(param, tree2);
8016         if (no_trees)
8017           DBUG_RETURN(new (param->mem_root)
8018                       SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys));
8019       }
8020       SEL_IMERGE *merge;
8021       /* both trees are "range" trees, produce new index merge structure */
8022       if (!(result= new (param->mem_root)
8023             SEL_TREE(param->mem_root, param->keys)) ||
8024           !(merge= new (param->mem_root) SEL_IMERGE()) ||
8025           (result->merges.push_back(merge)) ||
8026           (merge->or_sel_tree(param, tree1)) ||
8027           (merge->or_sel_tree(param, tree2)))
8028         result= NULL;
8029       else
8030         result->type= tree1->type;
8031     }
8032     else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
8033     {
8034       if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
8035         result= new (param->mem_root)
8036           SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys);
8037       else
8038         result= tree1;
8039     }
8040     else
8041     {
8042       /* one tree is index merge tree and another is range tree */
8043       if (tree1->merges.is_empty())
8044         swap_variables(SEL_TREE*, tree1, tree2);
8045 
8046       if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
8047          DBUG_RETURN(new (param->mem_root)
8048                      SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys));
8049       /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
8050       if (imerge_list_or_tree(param, &tree1->merges, tree2))
8051         result= new (param->mem_root)
8052           SEL_TREE(SEL_TREE::ALWAYS, param->mem_root, param->keys);
8053       else
8054         result= tree1;
8055     }
8056   }
8057   DBUG_RETURN(result);
8058 }
8059 
8060 
8061 /* And key trees where key1->part < key2 -> part */
8062 
8063 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)8064 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
8065              uint clone_flag)
8066 {
8067   SEL_ARG *next;
8068   ulong use_count=key1->use_count;
8069 
8070   if (key1->elements != 1)
8071   {
8072     key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
8073     key2->increment_use_count((int) key1->elements-1);
8074   }
8075   if (key1->type == SEL_ARG::MAYBE_KEY)
8076   {
8077     // See todo for left/right pointers
8078     DBUG_ASSERT(!key1->left);
8079     DBUG_ASSERT(!key1->right);
8080     key1->next= key1->prev= 0;
8081   }
8082   for (next=key1->first(); next ; next=next->next)
8083   {
8084     if (next->next_key_part)
8085     {
8086       SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
8087       if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
8088       {
8089 	key1=key1->tree_delete(next);
8090 	continue;
8091       }
8092       next->next_key_part=tmp;
8093       if (use_count)
8094 	next->increment_use_count(use_count);
8095     }
8096     else
8097       next->next_key_part=key2;
8098   }
8099   if (!key1)
8100     return &null_element;			// Impossible ranges
8101   key1->use_count++;
8102   return key1;
8103 }
8104 
8105 
8106 /*
8107   Produce a SEL_ARG graph that represents "key1 AND key2"
8108 
8109   SYNOPSIS
8110     key_and()
8111       param   Range analysis context (needed to track if we have allocated
8112               too many SEL_ARGs)
8113       key1    First argument, root of its RB-tree
8114       key2    Second argument, root of its RB-tree
8115 
8116   RETURN
8117     RB-tree root of the resulting SEL_ARG graph.
8118     NULL if the result of AND operation is an empty interval {0}.
8119 */
8120 
8121 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)8122 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
8123 {
8124   if (param->has_errors())
8125     return 0;
8126 
8127   if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
8128     return key2;
8129   if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
8130     return key1;
8131   if (key1->part != key2->part)
8132   {
8133     if (key1->part > key2->part)
8134     {
8135       swap_variables(SEL_ARG *, key1, key2);
8136       clone_flag=swap_clone_flag(clone_flag);
8137     }
8138     // key1->part < key2->part
8139     key1->use_count--;
8140 
8141    /*
8142      Clone key1 if the use_count is greater than 0 otherwise use the
8143      "clone_flag" to determine if a key needs to be cloned.
8144      "clone_flag" is set to true if the conditions which need to be
8145      ANDed (in tree_and) are not simple (has many OR conditions within).
8146    */
8147     if (key1->use_count > 0 || (clone_flag & CLONE_KEY2_MAYBE))
8148       if (!(key1= key1->clone_tree(param)))
8149 	return 0;				// OOM
8150     return and_all_keys(param, key1, key2, clone_flag);
8151   }
8152 
8153   if (((clone_flag & CLONE_KEY2_MAYBE) &&
8154        !(clone_flag & CLONE_KEY1_MAYBE) &&
8155        key2->type != SEL_ARG::MAYBE_KEY) ||
8156       key1->type == SEL_ARG::MAYBE_KEY)
8157   {						// Put simple key in key2
8158     swap_variables(SEL_ARG *, key1, key2);
8159     clone_flag=swap_clone_flag(clone_flag);
8160   }
8161 
8162   /* If one of the key is MAYBE_KEY then the found region may be smaller */
8163   if (key2->type == SEL_ARG::MAYBE_KEY)
8164   {
8165     if (key1->use_count > 1)
8166     {
8167       key1->use_count--;
8168       if (!(key1=key1->clone_tree(param)))
8169 	return 0;				// OOM
8170       key1->use_count++;
8171     }
8172     if (key1->type == SEL_ARG::MAYBE_KEY)
8173     {						// Both are maybe key
8174       key1->next_key_part=key_and(param, key1->next_key_part,
8175                                   key2->next_key_part, clone_flag);
8176       if (key1->next_key_part &&
8177 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
8178 	return key1;
8179     }
8180     else
8181     {
8182       key1->maybe_smaller();
8183       if (key2->next_key_part)
8184       {
8185 	key1->use_count--;			// Incremented in and_all_keys
8186 	return and_all_keys(param, key1, key2, clone_flag);
8187       }
8188       key2->use_count--;			// Key2 doesn't have a tree
8189     }
8190     return key1;
8191   }
8192 
8193   if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
8194   {
8195     /*
8196       Cannot optimize geometry ranges. The next best thing is to keep
8197       one of them.
8198     */
8199     key2->free_tree();
8200     return key1;
8201   }
8202 
8203   key1->use_count--;
8204   key2->use_count--;
8205   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
8206 
8207   while (e1 && e2)
8208   {
8209     int cmp=e1->cmp_min_to_min(e2);
8210     if (cmp < 0)
8211     {
8212       if (get_range(&e1,&e2,key1))
8213 	continue;
8214     }
8215     else if (get_range(&e2,&e1,key2))
8216       continue;
8217     SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
8218                           clone_flag);
8219     e1->increment_use_count(1);
8220     e2->increment_use_count(1);
8221     if (!next || next->type != SEL_ARG::IMPOSSIBLE)
8222     {
8223       SEL_ARG *new_arg= e1->clone_and(e2, param->mem_root);
8224       if (!new_arg)
8225 	return &null_element;			// End of memory
8226       new_arg->next_key_part=next;
8227       if (!new_tree)
8228       {
8229 	new_tree=new_arg;
8230       }
8231       else
8232 	new_tree=new_tree->insert(new_arg);
8233     }
8234     if (e1->cmp_max_to_max(e2) < 0)
8235       e1=e1->next;				// e1 can't overlapp next e2
8236     else
8237       e2=e2->next;
8238   }
8239   key1->free_tree();
8240   key2->free_tree();
8241   if (!new_tree)
8242     return &null_element;			// Impossible range
8243   return new_tree;
8244 }
8245 
8246 
8247 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)8248 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
8249 {
8250   (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
8251   if ((*e1)->cmp_max_to_min(*e2) < 0)
8252   {
8253     if (!((*e1)=(*e1)->next))
8254       return 1;
8255     if ((*e1)->cmp_min_to_max(*e2) > 0)
8256     {
8257       (*e2)=(*e2)->next;
8258       return 1;
8259     }
8260   }
8261   return 0;
8262 }
8263 
8264 
8265 /**
8266    Combine two range expression under a common OR. On a logical level, the
8267    transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
8268 
8269    Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
8270    theformat is reminiscent of DNF, since an expression such as the following
8271 
8272    ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
8273 
8274    where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
8275    and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
8276    SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
8277    the first range and ranges must not overlap. It follows that they are also
8278    ordered by maximum endpoints. Thus
8279 
8280    ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
8281 
8282    Is a a valid SER_ARG expression for a key of at least 2 keyparts.
8283 
8284    For simplicity, we will assume that expr2 is a single range predicate,
8285    i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
8286    disjunction of several predicates by subsequently call key_or for each
8287    disjunct.
8288 
8289    The algorithm iterates over each disjunct of expr1, and for each disjunct
8290    where the first keypart's range overlaps with the first keypart's range in
8291    expr2:
8292 
8293    If the predicates are equal for the rest of the keyparts, or if there are
8294    no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
8295    node in expr2 is deallocated. If more ranges became connected in expr1, the
8296    surplus is also dealocated. If they differ, two ranges are created.
8297 
8298    - The range leading up to the overlap. Empty if endpoints are equal.
8299 
8300    - The overlapping sub-range. May be the entire range if they are equal.
8301 
8302    Finally, there may be one more range if expr2's first keypart's range has a
8303    greater maximum endpoint than the last range in expr1.
8304 
8305    For the overlapping sub-range, we recursively call key_or. Thus in order to
8306    compute key_or of
8307 
8308      (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
8309 
8310      (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
8311 
8312    We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
8313    first one, we simply hook on the condition for the second keypart from (1)
8314    : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
8315    < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
8316    the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
8317 
8318    ( 1  <  kp1 <= 2 AND 1 < kp2 < 10 ) OR
8319    ( 2  <  kp1 < 10 AND 1 < kp2 < 20 ) OR
8320    ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
8321 
8322    @param param    PARAM from test_quick_select
8323    @param key1     Root of RB-tree of SEL_ARGs to be ORed with key2
8324    @param key2     Root of RB-tree of SEL_ARGs to be ORed with key1
8325 */
8326 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)8327 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2)
8328 {
8329 
8330   if (param->has_errors())
8331     return 0;
8332 
8333   if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
8334   {
8335     if (key2)
8336     {
8337       key2->use_count--;
8338       key2->free_tree();
8339     }
8340     return key1;
8341   }
8342   if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
8343     // Case is symmetric to the one above, just flip parameters.
8344     return key_or(param, key2, key1);
8345 
8346   key1->use_count--;
8347   key2->use_count--;
8348 
8349   if (key1->part != key2->part ||
8350       (key1->min_flag | key2->min_flag) & GEOM_FLAG)
8351   {
8352     key1->free_tree();
8353     key2->free_tree();
8354     return 0;                                   // Can't optimize this
8355   }
8356 
8357   // If one of the key is MAYBE_KEY then the found region may be bigger
8358   if (key1->type == SEL_ARG::MAYBE_KEY)
8359   {
8360     key2->free_tree();
8361     key1->use_count++;
8362     return key1;
8363   }
8364   if (key2->type == SEL_ARG::MAYBE_KEY)
8365   {
8366     key1->free_tree();
8367     key2->use_count++;
8368     return key2;
8369   }
8370 
8371   if (key1->use_count > 0)
8372   {
8373     if (key2->use_count == 0 || key1->elements > key2->elements)
8374     {
8375       swap_variables(SEL_ARG *,key1,key2);
8376     }
8377     if (key1->use_count > 0 && (key1= key1->clone_tree(param)) == NULL)
8378       return 0;                                 // OOM
8379   }
8380 
8381   // Add tree at key2 to tree at key1
8382   const bool key2_shared= (key2->use_count != 0);
8383   key1->maybe_flag|= key2->maybe_flag;
8384 
8385   /*
8386     Notation for illustrations used in the rest of this function:
8387 
8388       Range: [--------]
8389              ^        ^
8390              start    stop
8391 
8392       Two overlapping ranges:
8393         [-----]               [----]            [--]
8394             [---]     or    [---]       or   [-------]
8395 
8396       Ambiguity: ***
8397         The range starts or stops somewhere in the "***" range.
8398         Example: a starts before b and may end before/the same place/after b
8399         a: [----***]
8400         b:   [---]
8401 
8402       Adjacent ranges:
8403         Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
8404         a: ----]
8405         b:      [----
8406    */
8407 
8408   SEL_ARG *cur_key2= key2->first();
8409   while (cur_key2)
8410   {
8411     /*
8412       key1 consists of one or more ranges. cur_key1 is the
8413       range currently being handled.
8414 
8415       initialize cur_key1 to the latest range in key1 that starts the
8416       same place or before the range in cur_key2 starts
8417 
8418       cur_key2:            [------]
8419       key1:      [---] [-----] [----]
8420                        ^
8421                        cur_key1
8422     */
8423     SEL_ARG *cur_key1= key1->find_range(cur_key2);
8424 
8425     /*
8426       Used to describe how two key values are positioned compared to
8427       each other. Consider key_value_a.<cmp_func>(key_value_b):
8428 
8429         -2: key_value_a is smaller than key_value_b, and they are adjacent
8430         -1: key_value_a is smaller than key_value_b (not adjacent)
8431          0: the key values are equal
8432          1: key_value_a is bigger than key_value_b (not adjacent)
8433          2: key_value_a is bigger than key_value_b, and they are adjacent
8434 
8435       Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
8436 
8437       cur_key2:          [--------           (10 <= x ...  )
8438       cur_key1:    -----]                    (  ... x <  10) => cmp==-2
8439       cur_key1:    ----]                     (  ... x <   9) => cmp==-1
8440       cur_key1:    ------]                   (  ... x <= 10) => cmp== 0
8441       cur_key1:    --------]                 (  ... x <= 12) => cmp== 1
8442       (cmp == 2 does not make sense for cmp_max_to_min())
8443      */
8444     int cmp= 0;
8445 
8446     if (!cur_key1)
8447     {
8448       /*
8449         The range in cur_key2 starts before the first range in key1. Use
8450         the first range in key1 as cur_key1.
8451 
8452         cur_key2: [--------]
8453         key1:            [****--] [----]   [-------]
8454                          ^
8455                          cur_key1
8456       */
8457       cur_key1= key1->first();
8458       cmp= -1;
8459     }
8460     else if ((cmp= cur_key1->cmp_max_to_min(cur_key2)) < 0)
8461     {
8462       /*
8463         This is the case:
8464         cur_key2:           [-------]
8465         cur_key1:   [----**]
8466        */
8467       SEL_ARG *next_key1= cur_key1->next;
8468       if (cmp == -2 &&
8469           eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8470       {
8471         /*
8472           Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
8473 
8474           This is the case:
8475           cur_key2:           [-------]
8476           cur_key1:     [----]
8477 
8478           Result:
8479           cur_key2:     [-------------]     => inserted into key1 below
8480           cur_key1:                         => deleted
8481         */
8482         SEL_ARG *next_key2= cur_key2->next;
8483         if (key2_shared)
8484         {
8485           if (!(cur_key2= new (param->mem_root) SEL_ARG(*cur_key2)))
8486             return 0;           // out of memory
8487           cur_key2->increment_use_count(key1->use_count+1);
8488           cur_key2->next= next_key2;                 // New copy of cur_key2
8489         }
8490 
8491         if (cur_key2->copy_min(cur_key1))
8492         {
8493           // cur_key2 is full range: [-inf <= cur_key2 <= +inf]
8494           key1->free_tree();
8495           key2->free_tree();
8496           key1->type= SEL_ARG::ALWAYS;
8497           key2->type= SEL_ARG::ALWAYS;
8498           if (key1->maybe_flag)
8499             return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8500           return 0;
8501         }
8502 
8503         if (!(key1= key1->tree_delete(cur_key1)))
8504         {
8505           /*
8506             cur_key1 was the last range in key1; move the cur_key2
8507             range that was merged above to key1
8508           */
8509           key1= cur_key2;
8510           key1->make_root();
8511           cur_key2= next_key2;
8512           break;
8513         }
8514       }
8515       // Move to next range in key1. Now cur_key1.min > cur_key2.min
8516       if (!(cur_key1= next_key1))
8517         break;         // No more ranges in key1. Copy rest of key2
8518     }
8519 
8520     if (cmp < 0)
8521     {
8522       /*
8523         This is the case:
8524         cur_key2:   [--***]
8525         cur_key1:       [----]
8526       */
8527       int cur_key1_cmp;
8528       if ((cur_key1_cmp= cur_key1->cmp_min_to_max(cur_key2)) > 0)
8529       {
8530         /*
8531           This is the case:
8532           cur_key2:  [------**]
8533           cur_key1:            [----]
8534         */
8535         if (cur_key1_cmp == 2 &&
8536             eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8537         {
8538           /*
8539             Adjacent ranges with equal next_key_part. Merge like this:
8540 
8541             This is the case:
8542             cur_key2:    [------]
8543             cur_key1:            [-----]
8544 
8545             Result:
8546             cur_key2:    [------]
8547             cur_key1:    [-------------]
8548 
8549             Then move on to next key2 range.
8550           */
8551           cur_key1->copy_min_to_min(cur_key2);
8552           key1->merge_flags(cur_key2); //should be cur_key1->merge...() ?
8553           if (cur_key1->min_flag & NO_MIN_RANGE &&
8554               cur_key1->max_flag & NO_MAX_RANGE)
8555           {
8556             if (key1->maybe_flag)
8557               return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8558             return 0;
8559           }
8560           cur_key2->increment_use_count(-1);        // Free not used tree
8561           cur_key2=cur_key2->next;
8562           continue;
8563         }
8564         else
8565         {
8566           /*
8567             cur_key2 not adjacent to cur_key1 or has different next_key_part.
8568             Insert into key1 and move to next range in key2
8569 
8570             This is the case:
8571             cur_key2:   [------**]
8572             cur_key1:             [----]
8573 
8574             Result:
8575             key1:       [------**][----]
8576                         ^         ^
8577                         insert    cur_key1
8578           */
8579           SEL_ARG *next_key2= cur_key2->next;
8580           if (key2_shared)
8581           {
8582             SEL_ARG *cpy= new (param->mem_root) SEL_ARG(*cur_key2);   // Must make copy
8583             if (!cpy)
8584               return 0;                         // OOM
8585             key1= key1->insert(cpy);
8586             cur_key2->increment_use_count(key1->use_count+1);
8587           }
8588           else
8589             key1= key1->insert(cur_key2); // Will destroy key2_root
8590           cur_key2= next_key2;
8591           continue;
8592         }
8593       }
8594     }
8595 
8596     /*
8597       The ranges in cur_key1 and cur_key2 are overlapping:
8598 
8599       cur_key2:       [----------]
8600       cur_key1:    [*****-----*****]
8601 
8602       Corollary: cur_key1.min <= cur_key2.max
8603     */
8604     if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8605     {
8606       // Merge overlapping ranges with equal next_key_part
8607       if (cur_key1->is_same(cur_key2))
8608       {
8609         /*
8610           cur_key1 covers exactly the same range as cur_key2
8611           Use the relevant range in key1.
8612         */
8613         cur_key1->merge_flags(cur_key2);        // Copy maybe flags
8614         cur_key2->increment_next_key_part_use_count(-1);  // Free not used tree
8615       }
8616       else
8617       {
8618         SEL_ARG *last= cur_key1;
8619         SEL_ARG *first= cur_key1;
8620 
8621         /*
8622           Find the last range in key1 that overlaps cur_key2 and
8623           where all ranges first...last have the same next_key_part as
8624           cur_key2.
8625 
8626           cur_key2:  [****----------------------*******]
8627           key1:         [--]  [----] [---]  [-----] [xxxx]
8628                         ^                   ^       ^
8629                         first               last    different next_key_part
8630 
8631           Since cur_key2 covers them, the ranges between first and last
8632           are merged into one range by deleting first...last-1 from
8633           the key1 tree. In the figure, this applies to first and the
8634           two consecutive ranges. The range of last is then extended:
8635             * last.min: Set to min(cur_key2.min, first.min)
8636             * last.max: If there is a last->next that overlaps cur_key2
8637                         (i.e., last->next has a different next_key_part):
8638                                         Set adjacent to last->next.min
8639                         Otherwise:      Set to max(cur_key2.max, last.max)
8640 
8641           Result:
8642           cur_key2:  [****----------------------*******]
8643                         [--]  [----] [---]                 => deleted from key1
8644           key1:      [**------------------------***][xxxx]
8645                      ^                              ^
8646                      cur_key1=last                  different next_key_part
8647         */
8648         while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
8649                eq_tree(last->next->next_key_part, cur_key2->next_key_part))
8650         {
8651           /*
8652             last->next is covered by cur_key2 and has same next_key_part.
8653             last can be deleted
8654           */
8655           SEL_ARG *save=last;
8656           last=last->next;
8657           key1= key1->tree_delete(save);
8658         }
8659         // Redirect cur_key1 to last which will cover the entire range
8660         cur_key1= last;
8661 
8662         /*
8663           Extend last to cover the entire range of
8664           [min(first.min_value,cur_key2.min_value)...last.max_value].
8665           If this forms a full range (the range covers all possible
8666           values) we return no SEL_ARG RB-tree.
8667         */
8668         bool full_range= last->copy_min(first);
8669         if (!full_range)
8670           full_range= last->copy_min(cur_key2);
8671 
8672         if (!full_range)
8673         {
8674           if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0)
8675           {
8676             /*
8677               This is the case:
8678               cur_key2:   [-------------]
8679               key1:     [***------]  [xxxx]
8680                         ^            ^
8681                         last         different next_key_part
8682 
8683               Extend range of last up to last->next:
8684               cur_key2:   [-------------]
8685               key1:     [***--------][xxxx]
8686             */
8687             last->copy_min_to_max(last->next);
8688           }
8689           else
8690             /*
8691               This is the case:
8692               cur_key2:   [--------*****]
8693               key1:     [***---------]    [xxxx]
8694                         ^                 ^
8695                         last              different next_key_part
8696 
8697               Extend range of last up to max(last.max, cur_key2.max):
8698               cur_key2:   [--------*****]
8699               key1:     [***----------**] [xxxx]
8700              */
8701             full_range= last->copy_max(cur_key2);
8702         }
8703         if (full_range)
8704         {                                       // Full range
8705           key1->free_tree();
8706           key1->type= SEL_ARG::ALWAYS;
8707           key2->type= SEL_ARG::ALWAYS;
8708           for (; cur_key2 ; cur_key2= cur_key2->next)
8709             cur_key2->increment_use_count(-1);  // Free not used tree
8710           if (key1->maybe_flag)
8711             return new (param->mem_root) SEL_ARG(SEL_ARG::MAYBE_KEY);
8712           return 0;
8713         }
8714       }
8715     }
8716 
8717     if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0)
8718     {
8719       /*
8720         This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
8721         cur_key2:                [-------]
8722         cur_key1:         [----------*******]
8723       */
8724 
8725       if (!cur_key1->next_key_part)
8726       {
8727         /*
8728           cur_key1->next_key_part is empty: cut the range that
8729           is covered by cur_key1 from cur_key2.
8730           Reason: (cur_key2->next_key_part OR
8731           cur_key1->next_key_part) will be empty and therefore
8732           equal to cur_key1->next_key_part. Thus, this part of
8733           the cur_key2 range is completely covered by cur_key1.
8734         */
8735         if (cur_key1->cmp_max_to_max(cur_key2) >= 0)
8736         {
8737           /*
8738             cur_key1 covers the entire range in cur_key2.
8739             cur_key2:            [-------]
8740             cur_key1:     [-----------------]
8741 
8742             Move on to next range in key2
8743           */
8744           /*
8745             cur_key2 will no longer be used. Reduce reference count
8746             of SEL_ARGs in its next_key_part.
8747           */
8748           cur_key2->increment_next_key_part_use_count(-1);
8749           cur_key2= cur_key2->next;
8750           continue;
8751         }
8752         else
8753         {
8754           /*
8755             This is the case:
8756             cur_key2:            [-------]
8757             cur_key1:     [---------]
8758 
8759             Result:
8760             cur_key2:                [---]
8761             cur_key1:     [---------]
8762           */
8763           cur_key2->copy_max_to_min(cur_key1);
8764           continue;
8765         }
8766       }
8767 
8768       /*
8769         The ranges are overlapping but have not been merged because
8770         next_key_part of cur_key1 and cur_key2 differ.
8771         cur_key2:               [----]
8772         cur_key1:     [------------*****]
8773 
8774         Split cur_key1 in two where cur_key2 starts:
8775         cur_key2:               [----]
8776         key1:         [--------][--*****]
8777                       ^         ^
8778                       insert    cur_key1
8779       */
8780       SEL_ARG *new_arg= cur_key1->clone_first(cur_key2, param->mem_root);
8781       if (!new_arg)
8782         return 0;                               // OOM
8783       if ((new_arg->next_key_part= cur_key1->next_key_part))
8784         new_arg->increment_use_count(key1->use_count+1);
8785       cur_key1->copy_min_to_min(cur_key2);
8786       key1= key1->insert(new_arg);
8787     } // cur_key1.min >= cur_key2.min due to this if()
8788 
8789     /*
8790       Now cur_key2.min <= cur_key1.min <= cur_key2.max:
8791       cur_key2:    [---------]
8792       cur_key1:    [****---*****]
8793      */
8794     SEL_ARG key2_cpy(*cur_key2); // Get copy we can modify
8795     for (;;)
8796     {
8797       if (cur_key1->cmp_min_to_min(&key2_cpy) > 0)
8798       {
8799         /*
8800           This is the case:
8801           key2_cpy:    [------------]
8802           key1:                 [-*****]
8803                                 ^
8804                                 cur_key1
8805 
8806           Result:
8807           key2_cpy:             [---]
8808           key1:        [-------][-*****]
8809                        ^        ^
8810                        insert   cur_key1
8811          */
8812         SEL_ARG *new_arg=key2_cpy.clone_first(cur_key1, param->mem_root);
8813         if (!new_arg)
8814           return 0; // OOM
8815         if ((new_arg->next_key_part=key2_cpy.next_key_part))
8816           new_arg->increment_use_count(key1->use_count+1);
8817         key1= key1->insert(new_arg);
8818         key2_cpy.copy_min_to_min(cur_key1);
8819       }
8820       // Now key2_cpy.min == cur_key1.min
8821 
8822       if ((cmp= cur_key1->cmp_max_to_max(&key2_cpy)) <= 0)
8823       {
8824         /*
8825           cur_key1.max <= key2_cpy.max:
8826           key2_cpy:       a)  [-------]    or b)     [----]
8827           cur_key1:           [----]                 [----]
8828 
8829           Steps:
8830 
8831            1) Update next_key_part of cur_key1: OR it with
8832               key2_cpy->next_key_part.
8833            2) If case a: Insert range [cur_key1.max, key2_cpy.max]
8834               into key1 using next_key_part of key2_cpy
8835 
8836            Result:
8837            key1:          a)  [----][-]    or b)     [----]
8838          */
8839         cur_key1->maybe_flag|= key2_cpy.maybe_flag;
8840         key2_cpy.increment_use_count(key1->use_count+1);
8841         cur_key1->next_key_part=
8842           key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part);
8843 
8844         if (!cmp)
8845           break;                     // case b: done with this key2 range
8846 
8847         // Make key2_cpy the range [cur_key1.max, key2_cpy.max]
8848         key2_cpy.copy_max_to_min(cur_key1);
8849         if (!(cur_key1= cur_key1->next))
8850         {
8851           /*
8852             No more ranges in key1. Insert key2_cpy and go to "end"
8853             label to insert remaining ranges in key2 if any.
8854           */
8855           SEL_ARG *new_key1_range= new (param->mem_root) SEL_ARG(key2_cpy);
8856           if (!new_key1_range)
8857             return 0; // OOM
8858           key1= key1->insert(new_key1_range);
8859           cur_key2= cur_key2->next;
8860           goto end;
8861         }
8862         if (cur_key1->cmp_min_to_max(&key2_cpy) > 0)
8863         {
8864           /*
8865             The next range in key1 does not overlap with key2_cpy.
8866             Insert this range into key1 and move on to the next range
8867             in key2.
8868           */
8869           SEL_ARG *new_key1_range= new (param->mem_root) SEL_ARG(key2_cpy);
8870           if (!new_key1_range)
8871             return 0;                           // OOM
8872           key1= key1->insert(new_key1_range);
8873           break;
8874         }
8875         /*
8876           key2_cpy overlaps with the next range in key1 and the case
8877           is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
8878           to for(;;) to handle this situation.
8879         */
8880         continue;
8881       }
8882       else
8883       {
8884         /*
8885           This is the case:
8886           key2_cpy:        [-------]
8887           cur_key1:        [------------]
8888 
8889           Result:
8890           key1:            [-------][---]
8891                            ^        ^
8892                            new_arg  cur_key1
8893           Steps:
8894 
8895            0) If cur_key1->next_key_part is empty: do nothing.
8896               Reason: (key2_cpy->next_key_part OR
8897               cur_key1->next_key_part) will be empty and
8898               therefore equal to cur_key1->next_key_part. Thus,
8899               the range in key2_cpy is completely covered by
8900               cur_key1
8901            1) Make new_arg with range [cur_key1.min, key2_cpy.max].
8902               new_arg->next_key_part is OR between next_key_part of
8903               cur_key1 and key2_cpy
8904            2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
8905            3) Insert new_arg into key1
8906         */
8907         if (!cur_key1->next_key_part) // Step 0
8908         {
8909           key2_cpy.increment_use_count(-1);     // Free not used tree
8910           break;
8911         }
8912         SEL_ARG *new_arg= cur_key1->clone_last(&key2_cpy, param->mem_root);
8913         if (!new_arg)
8914           return 0; // OOM
8915         cur_key1->copy_max_to_min(&key2_cpy);
8916         cur_key1->increment_use_count(key1->use_count+1);
8917         /* Increment key count as it may be used for next loop */
8918         key2_cpy.increment_use_count(1);
8919         new_arg->next_key_part= key_or(param, cur_key1->next_key_part,
8920                                        key2_cpy.next_key_part);
8921         key1= key1->insert(new_arg);
8922         break;
8923       }
8924     }
8925     // Move on to next range in key2
8926     cur_key2= cur_key2->next;
8927   }
8928 
8929 end:
8930   /*
8931     Add key2 ranges that are non-overlapping with and higher than the
8932     highest range in key1.
8933   */
8934   while (cur_key2)
8935   {
8936     SEL_ARG *next= cur_key2->next;
8937     if (key2_shared)
8938     {
8939       SEL_ARG *key2_cpy=new (param->mem_root) SEL_ARG(*cur_key2);  // Must make copy
8940       if (!key2_cpy)
8941         return 0;
8942       cur_key2->increment_use_count(key1->use_count+1);
8943       key1= key1->insert(key2_cpy);
8944     }
8945     else
8946       key1= key1->insert(cur_key2);   // Will destroy key2_root
8947     cur_key2= next;
8948   }
8949   key1->use_count++;
8950 
8951   return key1;
8952 }
8953 
8954 
8955 /* Compare if two trees are equal */
8956 
eq_tree(SEL_ARG * a,SEL_ARG * b)8957 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
8958 {
8959   if (a == b)
8960     return 1;
8961   if (!a || !b || !a->is_same(b))
8962     return 0;
8963   if (a->left != &null_element && b->left != &null_element)
8964   {
8965     if (!eq_tree(a->left,b->left))
8966       return 0;
8967   }
8968   else if (a->left != &null_element || b->left != &null_element)
8969     return 0;
8970   if (a->right != &null_element && b->right != &null_element)
8971   {
8972     if (!eq_tree(a->right,b->right))
8973       return 0;
8974   }
8975   else if (a->right != &null_element || b->right != &null_element)
8976     return 0;
8977   if (a->next_key_part != b->next_key_part)
8978   {						// Sub range
8979     if (!a->next_key_part != !b->next_key_part ||
8980 	!eq_tree(a->next_key_part, b->next_key_part))
8981       return 0;
8982   }
8983   return 1;
8984 }
8985 
8986 
8987 SEL_ARG *
insert(SEL_ARG * key)8988 SEL_ARG::insert(SEL_ARG *key)
8989 {
8990   SEL_ARG *element, **par= NULL, *last_element= NULL;
8991 
8992   for (element= this; element != &null_element ; )
8993   {
8994     last_element=element;
8995     if (key->cmp_min_to_min(element) > 0)
8996     {
8997       par= &element->right; element= element->right;
8998     }
8999     else
9000     {
9001       par = &element->left; element= element->left;
9002     }
9003   }
9004   *par=key;
9005   key->parent=last_element;
9006 	/* Link in list */
9007   if (par == &last_element->left)
9008   {
9009     key->next=last_element;
9010     if ((key->prev=last_element->prev))
9011       key->prev->next=key;
9012     last_element->prev=key;
9013   }
9014   else
9015   {
9016     if ((key->next=last_element->next))
9017       key->next->prev=key;
9018     key->prev=last_element;
9019     last_element->next=key;
9020   }
9021   key->left=key->right= &null_element;
9022   SEL_ARG *root=rb_insert(key);			// rebalance tree
9023   root->use_count=this->use_count;		// copy root info
9024   root->elements= this->elements+1;
9025   root->maybe_flag=this->maybe_flag;
9026   return root;
9027 }
9028 
9029 
9030 /*
9031 ** Find best key with min <= given key
9032 ** Because the call context this should never return 0 to get_range
9033 */
9034 
9035 SEL_ARG *
find_range(SEL_ARG * key)9036 SEL_ARG::find_range(SEL_ARG *key)
9037 {
9038   SEL_ARG *element=this,*found=0;
9039 
9040   for (;;)
9041   {
9042     if (element == &null_element)
9043       return found;
9044     int cmp=element->cmp_min_to_min(key);
9045     if (cmp == 0)
9046       return element;
9047     if (cmp < 0)
9048     {
9049       found=element;
9050       element=element->right;
9051     }
9052     else
9053       element=element->left;
9054   }
9055 }
9056 
9057 
9058 /*
9059   Remove a element from the tree
9060 
9061   SYNOPSIS
9062     tree_delete()
9063     key		Key that is to be deleted from tree (this)
9064 
9065   NOTE
9066     This also frees all sub trees that is used by the element
9067 
9068   RETURN
9069     root of new tree (with key deleted)
9070 */
9071 
9072 SEL_ARG *
tree_delete(SEL_ARG * key)9073 SEL_ARG::tree_delete(SEL_ARG *key)
9074 {
9075   enum leaf_color remove_color;
9076   SEL_ARG *root,*nod,**par,*fix_par;
9077   DBUG_ENTER("tree_delete");
9078 
9079   root=this;
9080   this->parent= 0;
9081 
9082   /* Unlink from list */
9083   if (key->prev)
9084     key->prev->next=key->next;
9085   if (key->next)
9086     key->next->prev=key->prev;
9087   key->increment_next_key_part_use_count(-1);
9088   if (!key->parent)
9089     par= &root;
9090   else
9091     par=key->parent_ptr();
9092 
9093   if (key->left == &null_element)
9094   {
9095     *par=nod=key->right;
9096     fix_par=key->parent;
9097     if (nod != &null_element)
9098       nod->parent=fix_par;
9099     remove_color= key->color;
9100   }
9101   else if (key->right == &null_element)
9102   {
9103     *par= nod=key->left;
9104     nod->parent=fix_par=key->parent;
9105     remove_color= key->color;
9106   }
9107   else
9108   {
9109     SEL_ARG *tmp=key->next;			// next bigger key (exist!)
9110     nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
9111     fix_par=tmp->parent;
9112     if (nod != &null_element)
9113       nod->parent=fix_par;
9114     remove_color= tmp->color;
9115 
9116     tmp->parent=key->parent;			// Move node in place of key
9117     (tmp->left=key->left)->parent=tmp;
9118     if ((tmp->right=key->right) != &null_element)
9119       tmp->right->parent=tmp;
9120     tmp->color=key->color;
9121     *par=tmp;
9122     if (fix_par == key)				// key->right == key->next
9123       fix_par=tmp;				// new parent of nod
9124   }
9125 
9126   if (root == &null_element)
9127     DBUG_RETURN(0);				// Maybe root later
9128   if (remove_color == BLACK)
9129     root=rb_delete_fixup(root,nod,fix_par);
9130 #ifndef DBUG_OFF
9131   test_rb_tree(root,root->parent);
9132 #endif
9133   root->use_count=this->use_count;		// Fix root counters
9134   root->elements=this->elements-1;
9135   root->maybe_flag=this->maybe_flag;
9136   DBUG_RETURN(root);
9137 }
9138 
9139 
9140 	/* Functions to fix up the tree after insert and delete */
9141 
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)9142 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
9143 {
9144   SEL_ARG *y=leaf->right;
9145   leaf->right=y->left;
9146   if (y->left != &null_element)
9147     y->left->parent=leaf;
9148   if (!(y->parent=leaf->parent))
9149     *root=y;
9150   else
9151     *leaf->parent_ptr()=y;
9152   y->left=leaf;
9153   leaf->parent=y;
9154 }
9155 
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)9156 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
9157 {
9158   SEL_ARG *y=leaf->left;
9159   leaf->left=y->right;
9160   if (y->right != &null_element)
9161     y->right->parent=leaf;
9162   if (!(y->parent=leaf->parent))
9163     *root=y;
9164   else
9165     *leaf->parent_ptr()=y;
9166   y->right=leaf;
9167   leaf->parent=y;
9168 }
9169 
9170 
9171 SEL_ARG *
rb_insert(SEL_ARG * leaf)9172 SEL_ARG::rb_insert(SEL_ARG *leaf)
9173 {
9174   SEL_ARG *y,*par,*par2,*root;
9175   root= this; root->parent= 0;
9176 
9177   leaf->color=RED;
9178   while (leaf != root && (par= leaf->parent)->color == RED)
9179   {					// This can't be root or 1 level under
9180     if (par == (par2= leaf->parent->parent)->left)
9181     {
9182       y= par2->right;
9183       if (y->color == RED)
9184       {
9185 	par->color=BLACK;
9186 	y->color=BLACK;
9187 	leaf=par2;
9188 	leaf->color=RED;		/* And the loop continues */
9189       }
9190       else
9191       {
9192 	if (leaf == par->right)
9193 	{
9194 	  left_rotate(&root,leaf->parent);
9195 	  par=leaf;			/* leaf is now parent to old leaf */
9196 	}
9197 	par->color=BLACK;
9198 	par2->color=RED;
9199 	right_rotate(&root,par2);
9200 	break;
9201       }
9202     }
9203     else
9204     {
9205       y= par2->left;
9206       if (y->color == RED)
9207       {
9208 	par->color=BLACK;
9209 	y->color=BLACK;
9210 	leaf=par2;
9211 	leaf->color=RED;		/* And the loop continues */
9212       }
9213       else
9214       {
9215 	if (leaf == par->left)
9216 	{
9217 	  right_rotate(&root,par);
9218 	  par=leaf;
9219 	}
9220 	par->color=BLACK;
9221 	par2->color=RED;
9222 	left_rotate(&root,par2);
9223 	break;
9224       }
9225     }
9226   }
9227   root->color=BLACK;
9228 #ifndef DBUG_OFF
9229   test_rb_tree(root,root->parent);
9230 #endif
9231   return root;
9232 }
9233 
9234 
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)9235 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
9236 {
9237   SEL_ARG *x,*w;
9238   root->parent=0;
9239 
9240   x= key;
9241   while (x != root && x->color == SEL_ARG::BLACK)
9242   {
9243     if (x == par->left)
9244     {
9245       w=par->right;
9246       if (w->color == SEL_ARG::RED)
9247       {
9248 	w->color=SEL_ARG::BLACK;
9249 	par->color=SEL_ARG::RED;
9250 	left_rotate(&root,par);
9251 	w=par->right;
9252       }
9253       if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
9254       {
9255 	w->color=SEL_ARG::RED;
9256 	x=par;
9257       }
9258       else
9259       {
9260 	if (w->right->color == SEL_ARG::BLACK)
9261 	{
9262 	  w->left->color=SEL_ARG::BLACK;
9263 	  w->color=SEL_ARG::RED;
9264 	  right_rotate(&root,w);
9265 	  w=par->right;
9266 	}
9267 	w->color=par->color;
9268 	par->color=SEL_ARG::BLACK;
9269 	w->right->color=SEL_ARG::BLACK;
9270 	left_rotate(&root,par);
9271 	x=root;
9272 	break;
9273       }
9274     }
9275     else
9276     {
9277       w=par->left;
9278       if (w->color == SEL_ARG::RED)
9279       {
9280 	w->color=SEL_ARG::BLACK;
9281 	par->color=SEL_ARG::RED;
9282 	right_rotate(&root,par);
9283 	w=par->left;
9284       }
9285       if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
9286       {
9287 	w->color=SEL_ARG::RED;
9288 	x=par;
9289       }
9290       else
9291       {
9292 	if (w->left->color == SEL_ARG::BLACK)
9293 	{
9294 	  w->right->color=SEL_ARG::BLACK;
9295 	  w->color=SEL_ARG::RED;
9296 	  left_rotate(&root,w);
9297 	  w=par->left;
9298 	}
9299 	w->color=par->color;
9300 	par->color=SEL_ARG::BLACK;
9301 	w->left->color=SEL_ARG::BLACK;
9302 	right_rotate(&root,par);
9303 	x=root;
9304 	break;
9305       }
9306     }
9307     par=x->parent;
9308   }
9309   x->color=SEL_ARG::BLACK;
9310   return root;
9311 }
9312 
9313 
9314 #ifndef DBUG_OFF
9315 	/* Test that the properties for a red-black tree hold */
9316 
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)9317 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
9318 {
9319   int count_l,count_r;
9320 
9321   if (element == &null_element)
9322     return 0;					// Found end of tree
9323   if (element->parent != parent)
9324   {
9325     sql_print_error("Wrong tree: Parent doesn't point at parent");
9326     return -1;
9327   }
9328   if (element->color == SEL_ARG::RED &&
9329       (element->left->color == SEL_ARG::RED ||
9330        element->right->color == SEL_ARG::RED))
9331   {
9332     sql_print_error("Wrong tree: Found two red in a row");
9333     return -1;
9334   }
9335   if (element->left == element->right && element->left != &null_element)
9336   {						// Dummy test
9337     sql_print_error("Wrong tree: Found right == left");
9338     return -1;
9339   }
9340   count_l=test_rb_tree(element->left,element);
9341   count_r=test_rb_tree(element->right,element);
9342   if (count_l >= 0 && count_r >= 0)
9343   {
9344     if (count_l == count_r)
9345       return count_l+(element->color == SEL_ARG::BLACK);
9346     sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
9347 	    count_l,count_r);
9348   }
9349   return -1;					// Error, no more warnings
9350 }
9351 #endif
9352 
9353 
9354 /**
9355   Count how many times SEL_ARG graph "root" refers to its part "key" via
9356   transitive closure.
9357 
9358   @param root  An RB-Root node in a SEL_ARG graph.
9359   @param key   Another RB-Root node in that SEL_ARG graph.
9360 
9361   The passed "root" node may refer to "key" node via root->next_key_part,
9362   root->next->n
9363 
9364   This function counts how many times the node "key" is referred (via
9365   SEL_ARG::next_key_part) by
9366   - intervals of RB-tree pointed by "root",
9367   - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
9368   intervals of RB-tree pointed by "root",
9369   - and so on.
9370 
9371   Here is an example (horizontal links represent next_key_part pointers,
9372   vertical links - next/prev prev pointers):
9373 
9374          +----+               $
9375          |root|-----------------+
9376          +----+               $ |
9377            |                  $ |
9378            |                  $ |
9379          +----+       +---+   $ |     +---+    Here the return value
9380          |    |- ... -|   |---$-+--+->|key|    will be 4.
9381          +----+       +---+   $ |  |  +---+
9382            |                  $ |  |
9383           ...                 $ |  |
9384            |                  $ |  |
9385          +----+   +---+       $ |  |
9386          |    |---|   |---------+  |
9387          +----+   +---+       $    |
9388            |        |         $    |
9389           ...     +---+       $    |
9390                   |   |------------+
9391                   +---+       $
9392   @return
9393   Number of links to "key" from nodes reachable from "root".
9394 */
9395 
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)9396 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
9397 {
9398   ulong count= 0;
9399   for (root=root->first(); root ; root=root->next)
9400   {
9401     if (root->next_key_part)
9402     {
9403       if (root->next_key_part == key)
9404 	count++;
9405       if (root->next_key_part->part < key->part)
9406 	count+=count_key_part_usage(root->next_key_part,key);
9407     }
9408   }
9409   return count;
9410 }
9411 
9412 
9413 /*
9414   Check if SEL_ARG::use_count value is correct
9415 
9416   SYNOPSIS
9417     SEL_ARG::test_use_count()
9418       root  The root node of the SEL_ARG graph (an RB-tree root node that
9419             has the least value of sel_arg->part in the entire graph, and
9420             thus is the "origin" of the graph)
9421 
9422   DESCRIPTION
9423     Check if SEL_ARG::use_count value is correct. See the definition of
9424     use_count for what is "correct".
9425 
9426   RETURN
9427     true    an incorrect SEL_ARG::use_count is found,
9428     false   otherwise
9429 */
9430 
test_use_count(SEL_ARG * root)9431 bool SEL_ARG::test_use_count(SEL_ARG *root)
9432 {
9433   uint e_count=0;
9434   if (this == root && use_count != 1)
9435   {
9436     sql_print_information("Use_count: Wrong count %lu for root",use_count);
9437     // DBUG_ASSERT(false); // Todo - enable and clean up mess
9438     return true;
9439   }
9440   if (this->type != SEL_ARG::KEY_RANGE)
9441     return false;
9442   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
9443   {
9444     e_count++;
9445     if (pos->next_key_part)
9446     {
9447       ulong count=count_key_part_usage(root,pos->next_key_part);
9448       if (count > pos->next_key_part->use_count)
9449       {
9450         sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu "
9451                               "should be %lu", (long unsigned int)pos,
9452                               pos->next_key_part->use_count, count);
9453         // DBUG_ASSERT(false); // Todo - enable and clean up mess
9454 	return true;
9455       }
9456       pos->next_key_part->test_use_count(root);
9457     }
9458   }
9459   if (e_count != elements)
9460   {
9461     sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
9462                       e_count, elements, (long unsigned int) this);
9463     // DBUG_ASSERT(false); // Todo - enable and clean up mess
9464     return true;
9465   }
9466   return false;
9467 }
9468 
9469 /****************************************************************************
9470   MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
9471  ****************************************************************************/
9472 
9473 /* MRR range sequence, SEL_ARG* implementation: stack entry */
9474 typedef struct st_range_seq_entry
9475 {
9476   /*
9477     Pointers in min and max keys. They point to right-after-end of key
9478     images. The 0-th entry has these pointing to key tuple start.
9479   */
9480   uchar *min_key, *max_key;
9481 
9482   /*
9483     Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
9484     min_key_flag may have NULL_RANGE set.
9485   */
9486   uint min_key_flag, max_key_flag;
9487   enum ha_rkey_function rkey_func_flag;
9488   /* Number of key parts */
9489   uint min_key_parts, max_key_parts;
9490   /**
9491     Pointer into the R-B tree for this keypart. It points to the
9492     currently active range for the keypart, so calling next on it will
9493     get to the next range. sel_arg_range_seq_next() uses this to avoid
9494     reparsing the R-B range trees each time a new range is fetched.
9495   */
9496   SEL_ARG *key_tree;
9497 } RANGE_SEQ_ENTRY;
9498 
9499 
9500 /*
9501   MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
9502 */
9503 class Sel_arg_range_sequence
9504 {
9505 private:
9506 
9507   /**
9508     Stack of ranges for the curr_kp first keyparts. Used by
9509     sel_arg_range_seq_next() so that if the next range is equal to the
9510     previous one for the first x keyparts, stack[x-1] can be
9511     accumulated with the new range in keyparts > x to quickly form
9512     the next range to return.
9513 
9514     Notation used below: "x:y" means a range where
9515     "column_in_keypart_0=x" and "column_in_keypart_1=y". For
9516     simplicity, only equality (no BETWEEN, < etc) is considered in the
9517     example but the same principle applies to other range predicate
9518     operators too.
9519 
9520     Consider a query with these range predicates:
9521       (kp0=1 and kp1=2 and kp2=3) or
9522       (kp0=1 and kp1=2 and kp2=4) or
9523       (kp0=1 and kp1=3 and kp2=5) or
9524       (kp0=1 and kp1=3 and kp2=6)
9525 
9526     1) sel_arg_range_seq_next() is called the first time
9527        - traverse the R-B tree (see SEL_ARG) to find the first range
9528        - returns range "1:2:3"
9529        - values in stack after this: stack[1, 1:2, 1:2:3]
9530     2) sel_arg_range_seq_next() is called second time
9531        - keypart 2 has another range, so the next range in
9532          keypart 2 is appended to stack[1] and saved
9533          in stack[2]
9534        - returns range "1:2:4"
9535        - values in stack after this: stack[1, 1:2, 1:2:4]
9536     3) sel_arg_range_seq_next() is called the third time
9537        - no more ranges in keypart 2, but keypart 1 has
9538          another range, so the next range in keypart 1 is
9539          appended to stack[0] and saved in stack[1]. The first
9540          range in keypart 2 is then appended to stack[1] and
9541          saved in stack[2]
9542        - returns range "1:3:5"
9543        - values in stack after this: stack[1, 1:3, 1:3:5]
9544     4) sel_arg_range_seq_next() is called the fourth time
9545        - keypart 2 has another range, see 2)
9546        - returns range "1:3:6"
9547        - values in stack after this: stack[1, 1:3, 1:3:6]
9548    */
9549   RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
9550   /*
9551     Index of last used element in the above array. A value of -1 means
9552     that the stack is empty.
9553   */
9554   int curr_kp;
9555 
9556 public:
9557   uint keyno;      /* index of used tree in SEL_TREE structure */
9558   uint real_keyno; /* Number of the index in tables */
9559 
9560   PARAM * const param;
9561   SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
9562 
Sel_arg_range_sequence(PARAM * param_arg)9563   Sel_arg_range_sequence(PARAM *param_arg) : param(param_arg) { reset(); }
9564 
reset()9565   void reset()
9566   {
9567     stack[0].key_tree= NULL;
9568     stack[0].min_key= (uchar*)param->min_key;
9569     stack[0].min_key_flag= 0;
9570     stack[0].min_key_parts= 0;
9571     stack[0].rkey_func_flag= HA_READ_INVALID;
9572 
9573     stack[0].max_key= (uchar*)param->max_key;
9574     stack[0].max_key_flag= 0;
9575     stack[0].max_key_parts= 0;
9576     curr_kp= -1;
9577   }
9578 
stack_empty() const9579   bool stack_empty() const { return (curr_kp == -1); }
9580 
9581   void stack_push_range(SEL_ARG *key_tree);
9582 
stack_pop_range()9583   void stack_pop_range()
9584   {
9585     DBUG_ASSERT(!stack_empty());
9586     if (curr_kp == 0)
9587       reset();
9588     else
9589       curr_kp--;
9590   }
9591 
stack_size() const9592   int stack_size() const { return curr_kp + 1; }
9593 
stack_top()9594   RANGE_SEQ_ENTRY *stack_top()
9595   {
9596     return stack_empty() ? NULL : &stack[curr_kp];
9597   }
9598 };
9599 
9600 
9601 /*
9602   Range sequence interface, SEL_ARG* implementation: Initialize the traversal
9603 
9604   SYNOPSIS
9605     init()
9606       init_params  SEL_ARG tree traversal context
9607       n_ranges     [ignored] The number of ranges obtained
9608       flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
9609 
9610   RETURN
9611     Value of init_param
9612 */
9613 
sel_arg_range_seq_init(void * init_param,uint n_ranges,uint flags)9614 range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
9615 {
9616   Sel_arg_range_sequence *seq=
9617     static_cast<Sel_arg_range_sequence*>(init_param);
9618   seq->reset();
9619   return init_param;
9620 }
9621 
9622 
stack_push_range(SEL_ARG * key_tree)9623 void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree)
9624 {
9625 
9626   DBUG_ASSERT((uint)curr_kp+1 < MAX_REF_PARTS);
9627 
9628   RANGE_SEQ_ENTRY *push_position= &stack[curr_kp + 1];
9629   RANGE_SEQ_ENTRY *last_added_kp= stack_top();
9630   if (stack_empty())
9631   {
9632     /*
9633        If we get here this is either
9634          a) the first time a range sequence is constructed for this
9635             range access method (in which case stack[0] has not been
9636             modified since the constructor was called), or
9637          b) there are multiple ranges for the first keypart in the
9638             condition (and we have called stack_pop_range() to empty
9639             the stack).
9640        In both cases, reset() has been called and all fields in
9641        push_position have been reset. All we need to do is to copy the
9642        min/max key flags from the predicate we're about to add to
9643        stack[0].
9644     */
9645     push_position->min_key_flag= key_tree->min_flag;
9646     push_position->max_key_flag= key_tree->max_flag;
9647     push_position->rkey_func_flag= key_tree->rkey_func_flag;
9648   }
9649   else
9650   {
9651     push_position->min_key= last_added_kp->min_key;
9652     push_position->max_key= last_added_kp->max_key;
9653     push_position->min_key_parts= last_added_kp->min_key_parts;
9654     push_position->max_key_parts= last_added_kp->max_key_parts;
9655     push_position->min_key_flag= last_added_kp->min_key_flag |
9656                                  key_tree->min_flag;
9657     push_position->max_key_flag= last_added_kp->max_key_flag |
9658                                  key_tree->max_flag;
9659     push_position->rkey_func_flag= key_tree->rkey_func_flag;
9660   }
9661 
9662   push_position->key_tree= key_tree;
9663   uint16 stor_length= param->key[keyno][key_tree->part].store_length;
9664   /* psergey-merge-done:
9665   key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
9666                   &cur->min_key, prev->min_key_flag,
9667                   &cur->max_key, prev->max_key_flag);
9668   */
9669   push_position->min_key_parts+=
9670     key_tree->store_min(stor_length, &push_position->min_key,
9671                         last_added_kp ? last_added_kp->min_key_flag : 0);
9672   push_position->max_key_parts+=
9673     key_tree->store_max(stor_length, &push_position->max_key,
9674                         last_added_kp ? last_added_kp->max_key_flag : 0);
9675 
9676   if (key_tree->is_null_interval())
9677     push_position->min_key_flag |= NULL_RANGE;
9678   curr_kp++;
9679 }
9680 
9681 
9682 /*
9683   Range sequence interface, SEL_ARG* implementation: get the next interval
9684   in the R-B tree
9685 
9686   SYNOPSIS
9687     sel_arg_range_seq_next()
9688       rseq        Value returned from sel_arg_range_seq_init
9689       range  OUT  Store information about the range here
9690 
9691   DESCRIPTION
9692     This is "get_next" function for Range sequence interface implementation
9693     for SEL_ARG* tree.
9694 
9695   IMPLEMENTATION
9696     The traversal also updates those param members:
9697       - is_ror_scan
9698       - range_count
9699       - max_key_part
9700 
9701   RETURN
9702     0  Ok
9703     1  No more ranges in the sequence
9704 
9705   NOTE: append_range_all_keyparts(), which is used to e.g. print
9706   ranges to Optimizer Trace in a human readable format, mimics the
9707   behavior of this function.
9708 */
9709 
9710 //psergey-merge-todo: support check_quick_keys:max_keypart
sel_arg_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)9711 uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
9712 {
9713   SEL_ARG *key_tree;
9714   Sel_arg_range_sequence *seq= static_cast<Sel_arg_range_sequence*>(rseq);
9715 
9716   if (seq->stack_empty())
9717   {
9718     /*
9719       This is the first time sel_arg_range_seq_next is called.
9720       seq->start points to the root of the R-B tree for the first
9721       keypart
9722     */
9723     key_tree= seq->start;
9724 
9725     /*
9726       Move to the first range for the first keypart. Save this range
9727       in seq->stack[0] and carry on to ranges in the next keypart if
9728       any
9729     */
9730     key_tree= key_tree->first();
9731     seq->stack_push_range(key_tree);
9732   }
9733   else
9734   {
9735     /*
9736       This is not the first time sel_arg_range_seq_next is called, so
9737       seq->stack is populated with the range the last call to this
9738       function found. seq->stack[current_keypart].key_tree points to a
9739       leaf in the R-B tree of the last keypart that was part of the
9740       former range. This is the starting point for finding the next
9741       range. @see Sel_arg_range_sequence::stack
9742     */
9743     // See if there are more ranges in this or any of the previous keyparts
9744     while (true)
9745     {
9746       key_tree= seq->stack_top()->key_tree;
9747       seq->stack_pop_range();
9748       if (key_tree->next)
9749       {
9750         /* This keypart has more ranges */
9751         DBUG_ASSERT(key_tree->next != &null_element);
9752         key_tree= key_tree->next;
9753 
9754         /*
9755           save the next range for this keypart and carry on to ranges in
9756           the next keypart if any
9757         */
9758         seq->stack_push_range(key_tree);
9759         seq->param->is_ror_scan= FALSE;
9760         break;
9761       }
9762 
9763       if (seq->stack_empty())
9764       {
9765         // There are no more ranges for the first keypart: we're done
9766         return 1;
9767       }
9768       /*
9769          There are no more ranges for the current keypart. Step back
9770          to the previous keypart and see if there are more ranges
9771          there.
9772       */
9773     }
9774   }
9775 
9776   DBUG_ASSERT(!seq->stack_empty());
9777 
9778   /*
9779     Add range info for the next keypart if
9780       1) there is a range predicate for a later keypart
9781       2) the range predicate is for the next keypart in the index: a
9782          range predicate on keypartX+1 can only be used if there is a
9783          range predicate on keypartX.
9784       3) the range predicate on the next keypart is usable
9785   */
9786   while (key_tree->next_key_part &&                              // 1)
9787          key_tree->next_key_part != &null_element &&             // 1)
9788          key_tree->next_key_part->part == key_tree->part + 1 &&  // 2)
9789          key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)    // 3)
9790   {
9791     {
9792       DBUG_PRINT("info", ("while(): key_tree->part %d",key_tree->part));
9793       RANGE_SEQ_ENTRY *cur= seq->stack_top();
9794       const size_t min_key_total_length= cur->min_key - seq->param->min_key;
9795       const size_t max_key_total_length= cur->max_key - seq->param->max_key;
9796 
9797       /*
9798         Check if more ranges can be added. This is the case if all
9799         predicates for keyparts handled so far are equality
9800         predicates. If either of the following apply, there are
9801         non-equality predicates in stack[]:
9802 
9803         1) min_key_total_length != max_key_total_length (because
9804            equality ranges are stored as "min_key = max_key = <value>")
9805         2) memcmp(<min_key_values>,<max_key_values>) != 0 (same argument as 1)
9806         3) A min or max flag has been set: Because flags denote ranges
9807            ('<', '<=' etc), any value but 0 indicates a non-equality
9808            predicate.
9809        */
9810 
9811       uchar* min_key_start;
9812       uchar* max_key_start;
9813       size_t cur_key_length;
9814 
9815       if (seq->stack_size() == 1)
9816       {
9817         min_key_start= seq->param->min_key;
9818         max_key_start= seq->param->max_key;
9819         cur_key_length= min_key_total_length;
9820       }
9821       else
9822       {
9823         const RANGE_SEQ_ENTRY prev= cur[-1];
9824         min_key_start= prev.min_key;
9825         max_key_start= prev.max_key;
9826         cur_key_length= cur->min_key - prev.min_key;
9827       }
9828 
9829       if ((min_key_total_length != max_key_total_length) ||         // 1)
9830           (memcmp(min_key_start, max_key_start, cur_key_length)) || // 2)
9831           (key_tree->min_flag || key_tree->max_flag))               // 3)
9832       {
9833         DBUG_PRINT("info", ("while(): inside if()"));
9834         /*
9835           The range predicate up to and including the one in key_tree
9836           is usable by range access but does not allow subranges made
9837           up from predicates in later keyparts. This may e.g. be
9838           because the predicate operator is "<". Since there are range
9839           predicates on more keyparts, we use those to more closely
9840           specify the start and stop locations for the range. Example:
9841 
9842                 "SELECT * FROM t1 WHERE a >= 2 AND b >= 3":
9843 
9844                 t1 content:
9845                 -----------
9846                 1 1
9847                 2 1     <- 1)
9848                 2 2
9849                 2 3     <- 2)
9850                 2 4
9851                 3 1
9852                 3 2
9853                 3 3
9854 
9855           The predicate cannot be translated into something like
9856              "(a=2 and b>=3) or (a=3 and b>=3) or ..."
9857           I.e., it cannot be divided into subranges, but by storing
9858           min/max key below we can at least start the scan from 2)
9859           instead of 1)
9860         */
9861         SEL_ARG *store_key_part= key_tree->next_key_part;
9862         seq->param->is_ror_scan= FALSE;
9863         if (!key_tree->min_flag)
9864           cur->min_key_parts +=
9865             store_key_part->store_min_key(seq->param->key[seq->keyno],
9866                                           &cur->min_key,
9867                                           &cur->min_key_flag,
9868                                           MAX_KEY);
9869         if (!key_tree->max_flag)
9870           cur->max_key_parts +=
9871             store_key_part->store_max_key(seq->param->key[seq->keyno],
9872                                           &cur->max_key,
9873                                           &cur->max_key_flag,
9874                                           MAX_KEY);
9875         break;
9876       }
9877     }
9878 
9879     /*
9880       There are usable range predicates for the next keypart and the
9881       range predicate for the current keypart allows us to make use of
9882       them. Move to the first range predicate for the next keypart.
9883       Push this range predicate to seq->stack and move on to the next
9884       keypart (if any). @see Sel_arg_range_sequence::stack
9885     */
9886     key_tree= key_tree->next_key_part->first();
9887     seq->stack_push_range(key_tree);
9888   }
9889 
9890   DBUG_ASSERT(!seq->stack_empty() && (seq->stack_top() != NULL));
9891 
9892   // We now have a full range predicate in seq->stack_top()
9893   RANGE_SEQ_ENTRY *cur= seq->stack_top();
9894   PARAM *param= seq->param;
9895   size_t min_key_length= cur->min_key - param->min_key;
9896 
9897   if (cur->min_key_flag & GEOM_FLAG)
9898   {
9899     range->range_flag= cur->min_key_flag;
9900 
9901     /* Here minimum contains also function code bits, and maximum is +inf */
9902     range->start_key.key=    param->min_key;
9903     range->start_key.length= min_key_length;
9904     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9905     range->start_key.flag= cur->rkey_func_flag;
9906     /*
9907       Spatial operators are only allowed on spatial indexes, and no
9908       spatial index can at the moment return rows in ROWID order
9909     */
9910     DBUG_ASSERT(!param->is_ror_scan);
9911   }
9912   else
9913   {
9914     const KEY *cur_key_info= &param->table->key_info[seq->real_keyno];
9915     range->range_flag= cur->min_key_flag | cur->max_key_flag;
9916 
9917     range->start_key.key=    param->min_key;
9918     range->start_key.length= cur->min_key - param->min_key;
9919     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9920     range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
9921                                                            HA_READ_KEY_EXACT);
9922 
9923     range->end_key.key=    param->max_key;
9924     range->end_key.length= cur->max_key - param->max_key;
9925     range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
9926     range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
9927                                                          HA_READ_AFTER_KEY);
9928 
9929     /*
9930       This is an equality range (keypart_0=X and ... and keypart_n=Z) if
9931         1) There are no flags indicating open range (e.g.,
9932            "keypart_x > y") or GIS.
9933         2) The lower bound and the upper bound of the range has the
9934            same value (min_key == max_key).
9935      */
9936     const uint is_open_range= (NO_MIN_RANGE | NO_MAX_RANGE |
9937                                NEAR_MIN | NEAR_MAX | GEOM_FLAG);
9938     const bool is_eq_range_pred=
9939       !(cur->min_key_flag & is_open_range) &&                           // 1)
9940       !(cur->max_key_flag & is_open_range) &&                           // 1)
9941       range->start_key.length == range->end_key.length &&               // 2)
9942       !memcmp(param->min_key, param->max_key, range->start_key.length);
9943 
9944     if (is_eq_range_pred)
9945     {
9946       range->range_flag= EQ_RANGE;
9947       /*
9948         Use statistics instead of index dives for estimates of rows in
9949         this range if the user requested it
9950       */
9951       if (param->use_index_statistics)
9952         range->range_flag|= USE_INDEX_STATISTICS;
9953 
9954       /*
9955         An equality range is a unique range (0 or 1 rows in the range)
9956         if the index is unique (1) and all keyparts are used (2).
9957         Note that keys which are extended with PK parts have no
9958         HA_NOSAME flag. So we can use user_defined_key_parts.
9959       */
9960       if (cur_key_info->flags & HA_NOSAME &&                              // 1)
9961           (uint)key_tree->part+1 == cur_key_info->user_defined_key_parts) // 2)
9962         range->range_flag|= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
9963     }
9964 
9965     if (param->is_ror_scan)
9966     {
9967       const uint key_part_number= key_tree->part + 1;
9968       /*
9969         If we get here, the condition on the key was converted to form
9970         "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
9971           somecond(keyXpart{key_tree->part})"
9972         Check if
9973           somecond is "keyXpart{key_tree->part} = const" and
9974           uncovered "tail" of KeyX parts is either empty or is identical to
9975           first members of clustered primary key.
9976 
9977         If last key part is PK part added to the key as an extension
9978         and is_key_scan_ror() result is TRUE then it's possible to
9979         use ROR scan.
9980       */
9981       if ((!is_eq_range_pred &&
9982            key_part_number <= cur_key_info->user_defined_key_parts) ||
9983           !is_key_scan_ror(param, seq->real_keyno, key_part_number))
9984         param->is_ror_scan= FALSE;
9985     }
9986   }
9987 
9988   seq->param->range_count++;
9989   seq->param->max_key_part=max<uint>(seq->param->max_key_part,key_tree->part);
9990 
9991   return 0;
9992 }
9993 
9994 
9995 /*
9996   Calculate estimate of number records that will be retrieved by a range
9997   scan on given index using given SEL_ARG intervals tree.
9998 
9999   SYNOPSIS
10000     check_quick_select()
10001       param             Parameter from test_quick_select
10002       idx               Number of index to use in PARAM::key SEL_TREE::key
10003       index_only        TRUE  - assume only index tuples will be accessed
10004                         FALSE - assume full table rows will be read
10005       tree              Transformed selection condition, tree->key[idx] holds
10006                         the intervals for the given index.
10007       update_tbl_stats  TRUE <=> update table->quick_* with information
10008                         about range scan we've evaluated.
10009       mrr_flags   INOUT MRR access flags
10010       cost        OUT   Scan cost
10011 
10012   NOTES
10013     param->is_ror_scan is set to reflect if the key scan is a ROR (see
10014     is_key_scan_ror function for more info)
10015     param->table->quick_*, param->range_count (and maybe others) are
10016     updated with data of given key scan, see quick_range_seq_next for details.
10017 
10018   RETURN
10019     Estimate # of records to be retrieved.
10020     HA_POS_ERROR if estimate calculation failed due to table handler problems.
10021 */
10022 
10023 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost)10024 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
10025                            SEL_ARG *tree, bool update_tbl_stats,
10026                            uint *mrr_flags, uint *bufsize, Cost_estimate *cost)
10027 {
10028   Sel_arg_range_sequence seq(param);
10029   RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
10030   handler *file= param->table->file;
10031   ha_rows rows;
10032   uint keynr= param->real_keynr[idx];
10033   DBUG_ENTER("check_quick_select");
10034 
10035   /* Handle cases when we don't have a valid non-empty list of range */
10036   if (!tree)
10037     DBUG_RETURN(HA_POS_ERROR);
10038   if (tree->type == SEL_ARG::IMPOSSIBLE)
10039     DBUG_RETURN(0L);
10040   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
10041     DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
10042 
10043   seq.keyno= idx;
10044   seq.real_keyno= keynr;
10045   seq.start= tree;
10046 
10047   param->range_count=0;
10048   param->max_key_part=0;
10049 
10050   /*
10051     If there are more equality ranges than specified by the
10052     eq_range_index_dive_limit variable we switches from using index
10053     dives to use statistics.
10054   */
10055   uint range_count= 0;
10056   param->use_index_statistics=
10057     eq_ranges_exceeds_limit(tree, &range_count,
10058                             param->thd->variables.eq_range_index_dive_limit);
10059 
10060   param->is_ror_scan= TRUE;
10061   if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
10062     param->is_ror_scan= FALSE;
10063 
10064   *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
10065   *mrr_flags|= HA_MRR_NO_ASSOCIATION;
10066   /*
10067     Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
10068   */
10069   if (param->order_direction != ORDER::ORDER_NOT_RELEVANT)
10070     *mrr_flags|= HA_MRR_SORTED;
10071 
10072   bool pk_is_clustered= file->primary_key_is_clustered();
10073   if (index_only &&
10074       (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
10075       !(pk_is_clustered && keynr == param->table->s->primary_key))
10076      *mrr_flags |= HA_MRR_INDEX_ONLY;
10077 
10078   if (current_thd->lex->sql_command != SQLCOM_SELECT)
10079     *mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10080 
10081   *bufsize= param->thd->variables.read_rnd_buff_size;
10082   // Sets is_ror_scan to false for some queries, e.g. multi-ranges
10083   rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
10084                                           bufsize, mrr_flags, cost);
10085   if (rows != HA_POS_ERROR)
10086   {
10087     param->table->quick_rows[keynr]=rows;
10088     if (update_tbl_stats)
10089     {
10090       param->table->quick_keys.set_bit(keynr);
10091       param->table->quick_key_parts[keynr]=param->max_key_part+1;
10092       param->table->quick_n_ranges[keynr]= param->range_count;
10093       param->table->quick_condition_rows=
10094         min(param->table->quick_condition_rows, rows);
10095     }
10096     param->table->possible_quick_keys.set_bit(keynr);
10097   }
10098   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
10099   enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
10100   if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
10101   {
10102     /*
10103       All scans are non-ROR scans for those index types.
10104       TODO: Don't have this logic here, make table engines return
10105       appropriate flags instead.
10106     */
10107     param->is_ror_scan= FALSE;
10108   }
10109   else
10110   {
10111     /* Clustered PK scan is always a ROR scan (TODO: same as above) */
10112     if (param->table->s->primary_key == keynr && pk_is_clustered)
10113       param->is_ror_scan= TRUE;
10114   }
10115   if (param->table->file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
10116     param->is_ror_scan= FALSE;
10117 
10118   /*
10119     QUICK_ROR_INTERSECT_SELECT and QUICK_ROR_UNION_SELECT do read_set
10120     manipulations in reset(), which breaks virtual generated column's
10121     computation logic, which is used when reading index values.
10122     So, disable index merge intersection/union for any index on such column.
10123     @todo lift this implementation restriction
10124   */
10125   if (param->table->index_contains_some_virtual_gcol(keynr))
10126     param->is_ror_scan= false;
10127 
10128   DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
10129   DBUG_RETURN(rows);
10130 }
10131 
10132 
10133 /*
10134   Check if key scan on given index with equality conditions on first n key
10135   parts is a ROR scan.
10136 
10137   SYNOPSIS
10138     is_key_scan_ror()
10139       param  Parameter from test_quick_select
10140       keynr  Number of key in the table. The key must not be a clustered
10141              primary key.
10142       nparts Number of first key parts for which equality conditions
10143              are present.
10144 
10145   NOTES
10146     ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
10147     ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
10148 
10149     This function is needed to handle a practically-important special case:
10150     an index scan is a ROR scan if it is done using a condition in form
10151 
10152         "key1_1=c_1 AND ... AND key1_n=c_n"
10153 
10154     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
10155 
10156     and the table has a clustered Primary Key defined as
10157 
10158       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
10159 
10160     i.e. the first key parts of it are identical to uncovered parts ot the
10161     key being scanned. This function assumes that the index flags do not
10162     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
10163 
10164     Check (1) is made in quick_range_seq_next()
10165 
10166   RETURN
10167     TRUE   The scan is ROR-scan
10168     FALSE  Otherwise
10169 */
10170 
is_key_scan_ror(PARAM * param,uint keynr,uint nparts)10171 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts)
10172 {
10173   KEY *table_key= param->table->key_info + keynr;
10174 
10175   /*
10176     Range predicates on hidden key parts do not change the fact
10177     that a scan is rowid ordered, so we only care about user
10178     defined keyparts
10179   */
10180   const uint user_defined_nparts=
10181     std::min<uint>(nparts, table_key->user_defined_key_parts);
10182 
10183   KEY_PART_INFO *key_part= table_key->key_part + user_defined_nparts;
10184   KEY_PART_INFO *key_part_end= (table_key->key_part +
10185                                 table_key->user_defined_key_parts);
10186   uint pk_number;
10187 
10188   for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
10189   {
10190     uint16 fieldnr= param->table->key_info[keynr].
10191                     key_part[kp - table_key->key_part].fieldnr - 1;
10192     if (param->table->field[fieldnr]->key_length() != kp->length)
10193       return FALSE;
10194   }
10195 
10196   if (key_part == key_part_end)
10197     return TRUE;
10198 
10199   key_part= table_key->key_part + user_defined_nparts;
10200   pk_number= param->table->s->primary_key;
10201   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
10202     return FALSE;
10203 
10204   KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
10205   KEY_PART_INFO *pk_part_end=
10206     pk_part + param->table->key_info[pk_number].user_defined_key_parts;
10207   for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
10208        ++key_part, ++pk_part)
10209   {
10210     if ((key_part->field != pk_part->field) ||
10211         (key_part->length != pk_part->length))
10212       return FALSE;
10213   }
10214   return (key_part == key_part_end);
10215 }
10216 
10217 
10218 /*
10219   Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
10220 
10221   SYNOPSIS
10222     get_quick_select()
10223       param
10224       idx            Index of used key in param->key.
10225       key_tree       SEL_ARG tree for the used key
10226       mrr_flags      MRR parameter for quick select
10227       mrr_buf_size   MRR parameter for quick select
10228       parent_alloc   If not NULL, use it to allocate memory for
10229                      quick select data. Otherwise use quick->alloc.
10230   NOTES
10231     The caller must call QUICK_SELECT::init for returned quick select.
10232 
10233     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
10234     deallocated when the returned quick select is deleted.
10235 
10236   RETURN
10237     NULL on error
10238     otherwise created quick select
10239 */
10240 
10241 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)10242 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
10243                  uint mrr_buf_size, MEM_ROOT *parent_alloc)
10244 {
10245   QUICK_RANGE_SELECT *quick;
10246   bool create_err= FALSE;
10247   DBUG_ENTER("get_quick_select");
10248 
10249   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
10250     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
10251                                       param->real_keynr[idx],
10252                                       MY_TEST(parent_alloc),
10253                                       parent_alloc, &create_err);
10254   else
10255     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
10256                                  param->real_keynr[idx],
10257                                  MY_TEST(parent_alloc), NULL, &create_err);
10258 
10259   if (quick)
10260   {
10261     if (create_err ||
10262 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
10263 		       param->max_key,0))
10264     {
10265       delete quick;
10266       quick=0;
10267     }
10268     else
10269     {
10270       quick->mrr_flags= mrr_flags;
10271       quick->mrr_buf_size= mrr_buf_size;
10272       quick->key_parts=(KEY_PART*)
10273         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
10274                     (char*) param->key[idx],
10275                     sizeof(KEY_PART) *
10276                     actual_key_parts(&param->
10277                                      table->key_info[param->real_keynr[idx]]));
10278     }
10279   }
10280   DBUG_RETURN(quick);
10281 }
10282 
10283 
10284 /*
10285 ** Fix this to get all possible sub_ranges
10286 */
10287 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)10288 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
10289 	       SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
10290 	       uchar *max_key, uint max_key_flag)
10291 {
10292   QUICK_RANGE *range;
10293   uint flag;
10294   int min_part= key_tree->part-1, // # of keypart values in min_key buffer
10295       max_part= key_tree->part-1; // # of keypart values in max_key buffer
10296 
10297   if (key_tree->left != &null_element)
10298   {
10299     if (get_quick_keys(param,quick,key,key_tree->left,
10300 		       min_key,min_key_flag, max_key, max_key_flag))
10301       return 1;
10302   }
10303   uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
10304   min_part+= key_tree->store_min(key[key_tree->part].store_length,
10305                                  &tmp_min_key,min_key_flag);
10306   max_part+= key_tree->store_max(key[key_tree->part].store_length,
10307                                  &tmp_max_key,max_key_flag);
10308 
10309   if (key_tree->next_key_part &&
10310       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
10311       key_tree->next_key_part->part == key_tree->part+1)
10312   {						  // const key as prefix
10313     if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
10314          memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
10315 	 key_tree->min_flag==0 && key_tree->max_flag==0)
10316     {
10317       if (get_quick_keys(param,quick,key,key_tree->next_key_part,
10318 			 tmp_min_key, min_key_flag | key_tree->min_flag,
10319 			 tmp_max_key, max_key_flag | key_tree->max_flag))
10320 	return 1;
10321       goto end;					// Ugly, but efficient
10322     }
10323     {
10324       uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
10325       if (!tmp_min_flag)
10326         min_part+= key_tree->next_key_part->store_min_key(key,
10327                                                           &tmp_min_key,
10328                                                           &tmp_min_flag,
10329                                                           MAX_KEY);
10330       if (!tmp_max_flag)
10331         max_part+= key_tree->next_key_part->store_max_key(key,
10332                                                           &tmp_max_key,
10333                                                           &tmp_max_flag,
10334                                                           MAX_KEY);
10335       flag=tmp_min_flag | tmp_max_flag;
10336     }
10337   }
10338   else
10339   {
10340     flag = (key_tree->min_flag & GEOM_FLAG) ?
10341       key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
10342   }
10343 
10344   /*
10345     Ensure that some part of min_key and max_key are used.  If not,
10346     regard this as no lower/upper range
10347   */
10348   if ((flag & GEOM_FLAG) == 0)
10349   {
10350     if (tmp_min_key != param->min_key)
10351       flag&= ~NO_MIN_RANGE;
10352     else
10353       flag|= NO_MIN_RANGE;
10354     if (tmp_max_key != param->max_key)
10355       flag&= ~NO_MAX_RANGE;
10356     else
10357       flag|= NO_MAX_RANGE;
10358   }
10359   if (flag == 0)
10360   {
10361     uint length= (uint) (tmp_min_key - param->min_key);
10362     if (length == (uint) (tmp_max_key - param->max_key) &&
10363 	!memcmp(param->min_key,param->max_key,length))
10364     {
10365       const KEY *table_key=quick->head->key_info+quick->index;
10366       flag=EQ_RANGE;
10367       /*
10368         Note that keys which are extended with PK parts have no
10369         HA_NOSAME flag. So we can use user_defined_key_parts.
10370       */
10371       if ((table_key->flags & HA_NOSAME) &&
10372           key_tree->part == table_key->user_defined_key_parts - 1)
10373       {
10374         if ((table_key->flags & HA_NULL_PART_KEY) &&
10375             null_part_in_key(key,
10376                              param->min_key,
10377                              (uint) (tmp_min_key - param->min_key)))
10378           flag|= NULL_RANGE;
10379         else
10380           flag|= UNIQUE_RANGE;
10381       }
10382     }
10383   }
10384 
10385   /* Get range for retrieving rows in QUICK_SELECT::get_next */
10386   if (!(range= new QUICK_RANGE(param->min_key,
10387 			       (uint) (tmp_min_key - param->min_key),
10388                                min_part >=0 ? make_keypart_map(min_part) : 0,
10389 			       param->max_key,
10390 			       (uint) (tmp_max_key - param->max_key),
10391                                max_part >=0 ? make_keypart_map(max_part) : 0,
10392 			       flag, key_tree->rkey_func_flag)))
10393     return 1;			// out of memory
10394 
10395   set_if_bigger(quick->max_used_key_length, range->min_length);
10396   set_if_bigger(quick->max_used_key_length, range->max_length);
10397   set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
10398   if (quick->ranges.push_back(range))
10399     return 1;
10400 
10401  end:
10402   if (key_tree->right != &null_element)
10403     return get_quick_keys(param,quick,key,key_tree->right,
10404 			  min_key,min_key_flag,
10405 			  max_key,max_key_flag);
10406   return 0;
10407 }
10408 
10409 /*
10410   Return 1 if there is only one range and this uses the whole unique key
10411 */
10412 
unique_key_range()10413 bool QUICK_RANGE_SELECT::unique_key_range()
10414 {
10415   if (ranges.size() == 1)
10416   {
10417     QUICK_RANGE *tmp= ranges[0];
10418     if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
10419     {
10420       KEY *key=head->key_info+index;
10421       return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
10422     }
10423   }
10424   return 0;
10425 }
10426 
10427 
10428 
10429 /*
10430   Return TRUE if any part of the key is NULL
10431 
10432   SYNOPSIS
10433     null_part_in_key()
10434       key_part  Array of key parts (index description)
10435       key       Key values tuple
10436       length    Length of key values tuple in bytes.
10437 
10438   RETURN
10439     TRUE   The tuple has at least one "keypartX is NULL"
10440     FALSE  Otherwise
10441 */
10442 
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)10443 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
10444 {
10445   for (const uchar *end=key+length ;
10446        key < end;
10447        key+= key_part++->store_length)
10448   {
10449     if (key_part->null_bit && *key)
10450       return 1;
10451   }
10452   return 0;
10453 }
10454 
10455 
is_keys_used(const MY_BITMAP * fields)10456 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
10457 {
10458   return is_key_used(head, index, fields);
10459 }
10460 
is_keys_used(const MY_BITMAP * fields)10461 bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
10462 {
10463   QUICK_RANGE_SELECT *quick;
10464   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
10465   while ((quick= it++))
10466   {
10467     if (is_key_used(head, quick->index, fields))
10468       return 1;
10469   }
10470   return 0;
10471 }
10472 
is_keys_used(const MY_BITMAP * fields)10473 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
10474 {
10475   QUICK_RANGE_SELECT *quick;
10476   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
10477   while ((quick= it++))
10478   {
10479     if (is_key_used(head, quick->index, fields))
10480       return 1;
10481   }
10482   return 0;
10483 }
10484 
is_keys_used(const MY_BITMAP * fields)10485 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
10486 {
10487   QUICK_SELECT_I *quick;
10488   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
10489   while ((quick= it++))
10490   {
10491     if (quick->is_keys_used(fields))
10492       return 1;
10493   }
10494   return 0;
10495 }
10496 
10497 
get_ft_select(THD * thd,TABLE * table,uint key)10498 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
10499 {
10500   bool create_err= FALSE;
10501   FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
10502   if (create_err)
10503   {
10504     delete fts;
10505     return NULL;
10506   }
10507   else
10508     return fts;
10509 }
10510 
10511 
10512 /*
10513   Check if any columns in the key value specified
10514   by 'key_info' has a NULL-value.
10515 */
10516 
10517 static bool
key_has_nulls(const KEY * key_info,const uchar * key,uint key_len)10518 key_has_nulls(const KEY* key_info, const uchar *key, uint key_len)
10519 {
10520   KEY_PART_INFO *curr_part, *end_part;
10521   const uchar* end_ptr= key + key_len;
10522   curr_part= key_info->key_part;
10523   end_part= curr_part + key_info->user_defined_key_parts;
10524 
10525   for (; curr_part != end_part && key < end_ptr; curr_part++)
10526   {
10527     if (curr_part->null_bit && *key)
10528       return TRUE;
10529 
10530     key += curr_part->store_length;
10531   }
10532   return FALSE;
10533 }
10534 
10535 /*
10536   Create quick select from ref/ref_or_null scan.
10537 
10538   SYNOPSIS
10539     get_quick_select_for_ref()
10540       thd      Thread handle
10541       table    Table to access
10542       ref      ref[_or_null] scan parameters
10543       records  Estimate of number of records (needed only to construct
10544                quick select)
10545   NOTES
10546     This allocates things in a new memory root, as this may be called many
10547     times during a query.
10548 
10549   RETURN
10550     Quick select that retrieves the same rows as passed ref scan
10551     NULL on error.
10552 */
10553 
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)10554 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
10555                                              TABLE_REF *ref, ha_rows records)
10556 {
10557   MEM_ROOT *old_root, *alloc;
10558   QUICK_RANGE_SELECT *quick;
10559   KEY *key_info = &table->key_info[ref->key];
10560   KEY_PART *key_part;
10561   QUICK_RANGE *range;
10562   uint part;
10563   bool create_err= FALSE;
10564   Cost_estimate cost;
10565 
10566   old_root= thd->mem_root;
10567   /* The following call may change thd->mem_root */
10568   quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
10569   /* save mem_root set by QUICK_RANGE_SELECT constructor */
10570   alloc= thd->mem_root;
10571   /*
10572     return back default mem_root (thd->mem_root) changed by
10573     QUICK_RANGE_SELECT constructor
10574   */
10575   thd->mem_root= old_root;
10576 
10577   if (!quick || create_err)
10578     return 0;			/* no ranges found */
10579   if (quick->init())
10580     goto err;
10581   quick->records= records;
10582 
10583   if (!(range= new (alloc) QUICK_RANGE()))
10584     goto err;                                   // out of memory
10585 
10586   range->min_key= range->max_key= ref->key_buff;
10587   range->min_length= range->max_length= ref->key_length;
10588   range->min_keypart_map= range->max_keypart_map=
10589     make_prev_keypart_map(ref->key_parts);
10590   range->flag= (ref->key_length == key_info->key_length ? EQ_RANGE : 0);
10591 
10592   if (!(quick->key_parts=key_part=(KEY_PART *)
10593 	alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
10594     goto err;
10595 
10596   for (part=0 ; part < ref->key_parts ;part++,key_part++)
10597   {
10598     key_part->part=part;
10599     key_part->field=        key_info->key_part[part].field;
10600     key_part->length=       key_info->key_part[part].length;
10601     key_part->store_length= key_info->key_part[part].store_length;
10602     key_part->null_bit=     key_info->key_part[part].null_bit;
10603     key_part->flag=         (uint8) key_info->key_part[part].key_part_flag;
10604   }
10605   if (quick->ranges.push_back(range))
10606     goto err;
10607 
10608   /*
10609      Add a NULL range if REF_OR_NULL optimization is used.
10610      For example:
10611        if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
10612        and have ref->null_ref_key set. Will create a new NULL range here.
10613   */
10614   if (ref->null_ref_key)
10615   {
10616     QUICK_RANGE *null_range;
10617 
10618     *ref->null_ref_key= 1;		// Set null byte then create a range
10619     if (!(null_range= new (alloc)
10620           QUICK_RANGE(ref->key_buff, ref->key_length,
10621                       make_prev_keypart_map(ref->key_parts),
10622                       ref->key_buff, ref->key_length,
10623                       make_prev_keypart_map(ref->key_parts), EQ_RANGE,
10624                       HA_READ_INVALID)))
10625       goto err;
10626     *ref->null_ref_key= 0;		// Clear null byte
10627     if (quick->ranges.push_back(null_range))
10628       goto err;
10629   }
10630 
10631   /* Call multi_range_read_info() to get the MRR flags and buffer size */
10632   quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
10633                     (table->key_read ? HA_MRR_INDEX_ONLY : 0);
10634   if (thd->lex->sql_command != SQLCOM_SELECT)
10635     quick->mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10636   if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
10637                                            ref->key_length))
10638     quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
10639 
10640   quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
10641   if (table->file->multi_range_read_info(quick->index, 1,
10642                                          static_cast<uint>(records),
10643                                          &quick->mrr_buf_size,
10644                                          &quick->mrr_flags, &cost))
10645     goto err;
10646 
10647   return quick;
10648 err:
10649   delete quick;
10650   return 0;
10651 }
10652 
10653 
10654 /*
10655   Perform key scans for all used indexes (except CPK), get rowids and merge
10656   them into an ordered non-recurrent sequence of rowids.
10657 
10658   The merge/duplicate removal is performed using Unique class. We put all
10659   rowids into Unique, get the sorted sequence and destroy the Unique.
10660 
10661   If table has a clustered primary key that covers all rows (TRUE for bdb
10662   and innodb currently) and one of the index_merge scans is a scan on PK,
10663   then rows that will be retrieved by PK scan are not put into Unique and
10664   primary key scan is not performed here, it is performed later separately.
10665 
10666   RETURN
10667     0     OK
10668     other error
10669 */
10670 
read_keys_and_merge()10671 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
10672 {
10673   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
10674   QUICK_RANGE_SELECT* cur_quick;
10675   int result;
10676   handler *file= head->file;
10677   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
10678 
10679   /* We're going to just read rowids. */
10680   head->set_keyread(TRUE);
10681   head->prepare_for_position();
10682 
10683   cur_quick_it.rewind();
10684   cur_quick= cur_quick_it++;
10685   DBUG_ASSERT(cur_quick != 0);
10686 
10687   DBUG_EXECUTE_IF("simulate_bug13919180",
10688                   {
10689                     my_error(ER_UNKNOWN_ERROR, MYF(0));
10690                     DBUG_RETURN(1);
10691                   });
10692   /*
10693     We reuse the same instance of handler so we need to call both init and
10694     reset here.
10695   */
10696   if (cur_quick->init() || cur_quick->reset())
10697     DBUG_RETURN(1);
10698 
10699   if (unique == NULL)
10700   {
10701     DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_ABORT(); );
10702     DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
10703                     DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
10704 
10705     unique= new Unique(refpos_order_cmp, (void *)file,
10706                        file->ref_length,
10707                        thd->variables.sortbuff_size);
10708   }
10709   else
10710   {
10711     unique->reset();
10712     filesort_free_buffers(head, false);
10713   }
10714 
10715   DBUG_ASSERT(file->ref_length == unique->get_size());
10716   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
10717 
10718   if (!unique)
10719     DBUG_RETURN(1);
10720   for (;;)
10721   {
10722     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
10723     {
10724       cur_quick->range_end();
10725       cur_quick= cur_quick_it++;
10726       if (!cur_quick)
10727         break;
10728 
10729       if (cur_quick->file->inited)
10730         cur_quick->file->ha_index_end();
10731       if (cur_quick->init() || cur_quick->reset())
10732         DBUG_RETURN(1);
10733     }
10734 
10735     if (result)
10736     {
10737       if (result != HA_ERR_END_OF_FILE)
10738       {
10739         cur_quick->range_end();
10740         DBUG_RETURN(result);
10741       }
10742       break;
10743     }
10744 
10745     if (thd->killed)
10746       DBUG_RETURN(1);
10747 
10748     /* skip row if it will be retrieved by clustered PK scan */
10749     if (pk_quick_select && pk_quick_select->row_in_ranges())
10750       continue;
10751 
10752     cur_quick->file->position(cur_quick->record);
10753     result= unique->unique_add((char*)cur_quick->file->ref);
10754     if (result)
10755       DBUG_RETURN(1);
10756   }
10757 
10758   /*
10759     Ok all rowids are in the Unique now. The next call will initialize
10760     head->sort structure so it can be used to iterate through the rowids
10761     sequence.
10762   */
10763   result= unique->get(head);
10764   doing_pk_scan= FALSE;
10765   /* index_merge currently doesn't support "using index" at all */
10766   head->set_keyread(FALSE);
10767   if (init_read_record(&read_record, thd, head, NULL, 1, 1, TRUE))
10768     DBUG_RETURN(1);
10769   DBUG_RETURN(result);
10770 }
10771 
10772 
10773 /*
10774   Get next row for index_merge.
10775   NOTES
10776     The rows are read from
10777       1. rowids stored in Unique.
10778       2. QUICK_RANGE_SELECT with clustered primary key (if any).
10779     The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
10780 */
10781 
get_next()10782 int QUICK_INDEX_MERGE_SELECT::get_next()
10783 {
10784   int result;
10785   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
10786 
10787   if (doing_pk_scan)
10788     DBUG_RETURN(pk_quick_select->get_next());
10789 
10790   if ((result= read_record.read_record(&read_record)) == -1)
10791   {
10792     result= HA_ERR_END_OF_FILE;
10793     end_read_record(&read_record);
10794     free_io_cache(head);
10795     /* All rows from Unique have been retrieved, do a clustered PK scan */
10796     if (pk_quick_select)
10797     {
10798       doing_pk_scan= TRUE;
10799       if ((result= pk_quick_select->init()) ||
10800           (result= pk_quick_select->reset()))
10801         DBUG_RETURN(result);
10802       DBUG_RETURN(pk_quick_select->get_next());
10803     }
10804   }
10805 
10806   DBUG_RETURN(result);
10807 }
10808 
10809 
10810 /*
10811   Retrieve next record.
10812   SYNOPSIS
10813      QUICK_ROR_INTERSECT_SELECT::get_next()
10814 
10815   NOTES
10816     Invariant on enter/exit: all intersected selects have retrieved all index
10817     records with rowid <= some_rowid_val and no intersected select has
10818     retrieved any index records with rowid > some_rowid_val.
10819     We start fresh and loop until we have retrieved the same rowid in each of
10820     the key scans or we got an error.
10821 
10822     If a Clustered PK scan is present, it is used only to check if row
10823     satisfies its condition (and never used for row retrieval).
10824 
10825     Locking: to ensure that exclusive locks are only set on records that
10826     are included in the final result we must release the lock
10827     on all rows we read but do not include in the final result. This
10828     must be done on each index that reads the record and the lock
10829     must be released using the same handler (the same quick object) as
10830     used when reading the record.
10831 
10832   RETURN
10833    0     - Ok
10834    other - Error code if any error occurred.
10835 */
10836 
get_next()10837 int QUICK_ROR_INTERSECT_SELECT::get_next()
10838 {
10839   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
10840   QUICK_RANGE_SELECT* quick;
10841 
10842   /* quick that reads the given rowid first. This is needed in order
10843   to be able to unlock the row using the same handler object that locked
10844   it */
10845   QUICK_RANGE_SELECT* quick_with_last_rowid;
10846 
10847   int error, cmp;
10848   uint last_rowid_count=0;
10849   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
10850 
10851   do
10852   {
10853     /* Get a rowid for first quick and save it as a 'candidate' */
10854     quick= quick_it++;
10855     error= quick->get_next();
10856     if (cpk_quick)
10857     {
10858       while (!error && !cpk_quick->row_in_ranges())
10859       {
10860         quick->file->unlock_row(); /* row not in range; unlock */
10861         error= quick->get_next();
10862       }
10863     }
10864     if (error)
10865       DBUG_RETURN(error);
10866 
10867     quick->file->position(quick->record);
10868     memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10869     last_rowid_count= 1;
10870     quick_with_last_rowid= quick;
10871 
10872     while (last_rowid_count < quick_selects.elements)
10873     {
10874       if (!(quick= quick_it++))
10875       {
10876         quick_it.rewind();
10877         quick= quick_it++;
10878       }
10879 
10880       do
10881       {
10882         DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
10883                         DBUG_SET("+d,innodb_report_deadlock"););
10884         if ((error= quick->get_next()))
10885         {
10886           /* On certain errors like deadlock, trx might be rolled back.*/
10887           if (!current_thd->transaction_rollback_request)
10888             quick_with_last_rowid->file->unlock_row();
10889           DBUG_RETURN(error);
10890         }
10891         quick->file->position(quick->record);
10892         cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
10893         if (cmp < 0)
10894         {
10895           /* This row is being skipped.  Release lock on it. */
10896           quick->file->unlock_row();
10897         }
10898       } while (cmp < 0);
10899 
10900       /* Ok, current select 'caught up' and returned ref >= cur_ref */
10901       if (cmp > 0)
10902       {
10903         /* Found a row with ref > cur_ref. Make it a new 'candidate' */
10904         if (cpk_quick)
10905         {
10906           while (!cpk_quick->row_in_ranges())
10907           {
10908             quick->file->unlock_row(); /* row not in range; unlock */
10909             if ((error= quick->get_next()))
10910             {
10911               /* On certain errors like deadlock, trx might be rolled back.*/
10912               if (!current_thd->transaction_rollback_request)
10913                 quick_with_last_rowid->file->unlock_row();
10914               DBUG_RETURN(error);
10915             }
10916           }
10917           quick->file->position(quick->record);
10918         }
10919         memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10920         quick_with_last_rowid->file->unlock_row();
10921         last_rowid_count= 1;
10922         quick_with_last_rowid= quick;
10923       }
10924       else
10925       {
10926         /* current 'candidate' row confirmed by this select */
10927         last_rowid_count++;
10928       }
10929     }
10930 
10931     /* We get here if we got the same row ref in all scans. */
10932     if (need_to_fetch_row)
10933       error= head->file->ha_rnd_pos(head->record[0], last_rowid);
10934   } while (error == HA_ERR_RECORD_DELETED);
10935   DBUG_RETURN(error);
10936 }
10937 
10938 
10939 /*
10940   Retrieve next record.
10941   SYNOPSIS
10942     QUICK_ROR_UNION_SELECT::get_next()
10943 
10944   NOTES
10945     Enter/exit invariant:
10946     For each quick select in the queue a {key,rowid} tuple has been
10947     retrieved but the corresponding row hasn't been passed to output.
10948 
10949   RETURN
10950    0     - Ok
10951    other - Error code if any error occurred.
10952 */
10953 
get_next()10954 int QUICK_ROR_UNION_SELECT::get_next()
10955 {
10956   int error, dup_row;
10957   QUICK_SELECT_I *quick;
10958   uchar *tmp;
10959   DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
10960 
10961   do
10962   {
10963     do
10964     {
10965       if (queue.empty())
10966         DBUG_RETURN(HA_ERR_END_OF_FILE);
10967       /* Ok, we have a queue with >= 1 scans */
10968 
10969       quick= queue.top();
10970       memcpy(cur_rowid, quick->last_rowid, rowid_length);
10971 
10972       /* put into queue rowid from the same stream as top element */
10973       if ((error= quick->get_next()))
10974       {
10975         if (error != HA_ERR_END_OF_FILE)
10976           DBUG_RETURN(error);
10977         queue.pop();
10978       }
10979       else
10980       {
10981         quick->save_last_pos();
10982         queue.update_top();
10983       }
10984 
10985       if (!have_prev_rowid)
10986       {
10987         /* No rows have been returned yet */
10988         dup_row= FALSE;
10989         have_prev_rowid= TRUE;
10990       }
10991       else
10992         dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
10993     } while (dup_row);
10994 
10995     tmp= cur_rowid;
10996     cur_rowid= prev_rowid;
10997     prev_rowid= tmp;
10998 
10999     error= head->file->ha_rnd_pos(quick->record, prev_rowid);
11000   } while (error == HA_ERR_RECORD_DELETED);
11001   DBUG_RETURN(error);
11002 }
11003 
11004 
reset()11005 int QUICK_RANGE_SELECT::reset()
11006 {
11007   uint  buf_size;
11008   uchar *mrange_buff;
11009   int   error;
11010   HANDLER_BUFFER empty_buf;
11011   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
11012   last_range= NULL;
11013   cur_range= ranges.begin();
11014 
11015   /* set keyread to TRUE if index is covering */
11016   if(!head->no_keyread && head->covering_keys.is_set(index))
11017     head->set_keyread(true);
11018   else
11019     head->set_keyread(false);
11020 
11021   if (!file->inited)
11022   {
11023     /*
11024       read_set is set to the correct value for ror_merge_scan here as a
11025       subquery execution during optimization might result in innodb not
11026       initializing the read set in index_read() leading to wrong
11027       results while merging.
11028     */
11029     MY_BITMAP * const save_read_set= head->read_set;
11030     MY_BITMAP * const save_write_set= head->write_set;
11031     const bool sorted= (mrr_flags & HA_MRR_SORTED);
11032     DBUG_EXECUTE_IF("bug14365043_2",
11033                     DBUG_SET("+d,ha_index_init_fail"););
11034 
11035     /* Pass index specifc read set for ror_merged_scan */
11036     if (in_ror_merged_scan)
11037     {
11038       /*
11039         We don't need to signal the bitmap change as the bitmap is always the
11040         same for this head->file
11041       */
11042       head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
11043     }
11044     if ((error= file->ha_index_init(index, sorted)))
11045     {
11046       file->print_error(error, MYF(0));
11047       DBUG_RETURN(error);
11048     }
11049     if (in_ror_merged_scan)
11050     {
11051       /* Restore bitmaps set on entry */
11052       head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
11053     }
11054   }
11055 
11056   /* Allocate buffer if we need one but haven't allocated it yet */
11057   if (mrr_buf_size && !mrr_buf_desc)
11058   {
11059     buf_size= mrr_buf_size;
11060     while (buf_size && !my_multi_malloc(key_memory_QUICK_RANGE_SELECT_mrr_buf_desc,
11061                                         MYF(MY_WME),
11062                                         &mrr_buf_desc, sizeof(*mrr_buf_desc),
11063                                         &mrange_buff, buf_size,
11064                                         NullS))
11065     {
11066       /* Try to shrink the buffers until both are 0. */
11067       buf_size/= 2;
11068     }
11069     if (!mrr_buf_desc)
11070       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
11071 
11072     /* Initialize the handler buffer. */
11073     mrr_buf_desc->buffer= mrange_buff;
11074     mrr_buf_desc->buffer_end= mrange_buff + buf_size;
11075     mrr_buf_desc->end_of_used_area= mrange_buff;
11076   }
11077 
11078   if (!mrr_buf_desc)
11079     empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
11080 
11081   RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
11082   error= file->multi_range_read_init(&seq_funcs, this, ranges.size(),
11083                                      mrr_flags, mrr_buf_desc? mrr_buf_desc:
11084                                                               &empty_buf);
11085   DBUG_RETURN(error);
11086 }
11087 
11088 
11089 /*
11090   Range sequence interface implementation for array<QUICK_RANGE>: initialize
11091 
11092   SYNOPSIS
11093     quick_range_seq_init()
11094       init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
11095       n_ranges    Number of ranges in the sequence (ignored)
11096       flags       MRR flags (currently not used)
11097 
11098   RETURN
11099     Opaque value to be passed to quick_range_seq_next
11100 */
11101 
quick_range_seq_init(void * init_param,uint n_ranges,uint flags)11102 range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
11103 {
11104   QUICK_RANGE_SELECT *quick= static_cast<QUICK_RANGE_SELECT*>(init_param);
11105   quick->qr_traversal_ctx.first= quick->ranges.begin();
11106   quick->qr_traversal_ctx.cur= quick->ranges.begin();
11107   quick->qr_traversal_ctx.last= quick->ranges.end();
11108   return &quick->qr_traversal_ctx;
11109 }
11110 
11111 
11112 /*
11113   Range sequence interface implementation for array<QUICK_RANGE>: get next
11114 
11115   SYNOPSIS
11116     quick_range_seq_next()
11117       rseq        Value returned from quick_range_seq_init
11118       range  OUT  Store information about the range here
11119 
11120   RETURN
11121     0  Ok
11122     1  No more ranges in the sequence
11123 */
11124 
quick_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)11125 uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
11126 {
11127   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
11128 
11129   if (ctx->cur == ctx->last)
11130     return 1; /* no more ranges */
11131 
11132   QUICK_RANGE *cur= *(ctx->cur);
11133   key_range *start_key= &range->start_key;
11134   key_range *end_key=   &range->end_key;
11135 
11136   start_key->key=    cur->min_key;
11137   start_key->length= cur->min_length;
11138   start_key->keypart_map= cur->min_keypart_map;
11139   start_key->flag=   ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
11140                       (cur->flag & EQ_RANGE) ?
11141                       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
11142   end_key->key=      cur->max_key;
11143   end_key->length=   cur->max_length;
11144   end_key->keypart_map= cur->max_keypart_map;
11145   /*
11146     We use HA_READ_AFTER_KEY here because if we are reading on a key
11147     prefix. We want to find all keys with this prefix.
11148   */
11149   end_key->flag=     (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
11150                       HA_READ_AFTER_KEY);
11151   range->range_flag= cur->flag;
11152   ctx->cur++;
11153   return 0;
11154 }
11155 
11156 
11157 /*
11158   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
11159 
11160   SYNOPSIS
11161     mrr_persistent_flag_storage()
11162       seq  Range sequence being traversed
11163       idx  Number of range
11164 
11165   DESCRIPTION
11166     MRR/NDB implementation needs to store some bits for each range. This
11167     function returns a reference to the "range_flag" associated with the
11168     range number idx.
11169 
11170     This function should be removed when we get a proper MRR/NDB
11171     implementation.
11172 
11173   RETURN
11174     Reference to range_flag associated with range number #idx
11175 */
11176 
mrr_persistent_flag_storage(range_seq_t seq,uint idx)11177 uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
11178 {
11179   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
11180   return ctx->first[idx]->flag;
11181 }
11182 
11183 
11184 /*
11185   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
11186 
11187   SYNOPSIS
11188     mrr_get_ptr_by_idx()
11189       seq  Range sequence bening traversed
11190       idx  Number of the range
11191 
11192   DESCRIPTION
11193     An extension of MRR range sequence interface needed by NDB: return the
11194     data associated with the given range.
11195 
11196     A proper MRR interface implementer is supposed to store and return
11197     range-associated data. NDB stores number of the range instead. So this
11198     is a helper function that translates range number to range associated
11199     data.
11200 
11201     This function does nothing, as currrently there is only one user of the
11202     MRR interface - the quick range select code, and this user doesn't need
11203     to use range-associated data.
11204 
11205   RETURN
11206     Reference to range-associated data
11207 */
11208 
mrr_get_ptr_by_idx(range_seq_t seq,uint idx)11209 char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
11210 {
11211   static char *dummy;
11212   return dummy;
11213 }
11214 
11215 
11216 /*
11217   Get next possible record using quick-struct.
11218 
11219   SYNOPSIS
11220     QUICK_RANGE_SELECT::get_next()
11221 
11222   NOTES
11223     Record is read into table->record[0]
11224 
11225   RETURN
11226     0			Found row
11227     HA_ERR_END_OF_FILE	No (more) rows in range
11228     #			Error code
11229 */
11230 
get_next()11231 int QUICK_RANGE_SELECT::get_next()
11232 {
11233   char *dummy;
11234   MY_BITMAP * const save_read_set= head->read_set;
11235   MY_BITMAP * const save_write_set= head->write_set;
11236   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
11237 
11238   if (in_ror_merged_scan)
11239   {
11240     /*
11241       We don't need to signal the bitmap change as the bitmap is always the
11242       same for this head->file
11243     */
11244     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
11245   }
11246 
11247   int result= file->multi_range_read_next(&dummy);
11248 
11249   if (in_ror_merged_scan)
11250   {
11251     /* Restore bitmaps set on entry */
11252     head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
11253   }
11254   DBUG_RETURN(result);
11255 }
11256 
11257 
11258 /*
11259   Get the next record with a different prefix.
11260 
11261   @param prefix_length   length of cur_prefix
11262   @param group_key_parts The number of key parts in the group prefix
11263   @param cur_prefix      prefix of a key to be searched for
11264 
11265   Each subsequent call to the method retrieves the first record that has a
11266   prefix with length prefix_length and which is different from cur_prefix,
11267   such that the record with the new prefix is within the ranges described by
11268   this->ranges. The record found is stored into the buffer pointed by
11269   this->record. The method is useful for GROUP-BY queries with range
11270   conditions to discover the prefix of the next group that satisfies the range
11271   conditions.
11272 
11273   @todo
11274 
11275     This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
11276     methods should be unified into a more general one to reduce code
11277     duplication.
11278 
11279   @retval 0                  on success
11280   @retval HA_ERR_END_OF_FILE if returned all keys
11281   @retval other              if some error occurred
11282 */
11283 
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)11284 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
11285                                         uint group_key_parts,
11286                                         uchar *cur_prefix)
11287 {
11288   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
11289   const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
11290 
11291   for (;;)
11292   {
11293     int result;
11294     if (last_range)
11295     {
11296       /* Read the next record in the same range with prefix after cur_prefix. */
11297       DBUG_ASSERT(cur_prefix != NULL);
11298       result= file->ha_index_read_map(record, cur_prefix, keypart_map,
11299                                       HA_READ_AFTER_KEY);
11300       if (result || last_range->max_keypart_map == 0)
11301         DBUG_RETURN(result);
11302 
11303       key_range previous_endpoint;
11304       last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
11305       if (file->compare_key(&previous_endpoint) <= 0)
11306         DBUG_RETURN(0);
11307     }
11308 
11309     const size_t count= ranges.size() - (cur_range - ranges.begin());
11310     if (count == 0)
11311     {
11312       /* Ranges have already been used up before. None is left for read. */
11313       last_range= 0;
11314       DBUG_RETURN(HA_ERR_END_OF_FILE);
11315     }
11316     last_range= *(cur_range++);
11317 
11318     key_range start_key, end_key;
11319     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
11320     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
11321 
11322     const bool sorted= (mrr_flags & HA_MRR_SORTED);
11323     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
11324 				   last_range->max_keypart_map ? &end_key : 0,
11325                                    MY_TEST(last_range->flag & EQ_RANGE),
11326 				   sorted);
11327     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11328       last_range= 0;			// Stop searching
11329 
11330     if (result != HA_ERR_END_OF_FILE)
11331       DBUG_RETURN(result);
11332     last_range= 0;			// No matching rows; go to next range
11333   }
11334 }
11335 
11336 
11337 /* Get next for geometrical indexes */
11338 
get_next()11339 int QUICK_RANGE_SELECT_GEOM::get_next()
11340 {
11341   DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
11342 
11343   for (;;)
11344   {
11345     int result;
11346     if (last_range)
11347     {
11348       // Already read through key
11349       result= file->ha_index_next_same(record, last_range->min_key,
11350                                        last_range->min_length);
11351       if (result != HA_ERR_END_OF_FILE)
11352 	DBUG_RETURN(result);
11353     }
11354 
11355     const size_t count= ranges.size() - (cur_range-ranges.begin());
11356     if (count == 0)
11357     {
11358       /* Ranges have already been used up before. None is left for read. */
11359       last_range= 0;
11360       DBUG_RETURN(HA_ERR_END_OF_FILE);
11361     }
11362     last_range= *(cur_range++);
11363 
11364     result= file->ha_index_read_map(record, last_range->min_key,
11365                                     last_range->min_keypart_map,
11366                                     last_range->rkey_func_flag);
11367     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11368       DBUG_RETURN(result);
11369     last_range= 0;				// Not found, to next range
11370   }
11371 }
11372 
11373 
11374 /*
11375   Check if current row will be retrieved by this QUICK_RANGE_SELECT
11376 
11377   NOTES
11378     It is assumed that currently a scan is being done on another index
11379     which reads all necessary parts of the index that is scanned by this
11380     quick select.
11381     The implementation does a binary search on sorted array of disjoint
11382     ranges, without taking size of range into account.
11383 
11384     This function is used to filter out clustered PK scan rows in
11385     index_merge quick select.
11386 
11387   RETURN
11388     TRUE  if current row will be retrieved by this quick select
11389     FALSE if not
11390 */
11391 
row_in_ranges()11392 bool QUICK_RANGE_SELECT::row_in_ranges()
11393 {
11394   QUICK_RANGE *res;
11395   size_t min= 0;
11396   size_t max= ranges.size() - 1;
11397   size_t mid= (max + min)/2;
11398 
11399   while (min != max)
11400   {
11401     if (cmp_next(ranges[mid]))
11402     {
11403       /* current row value > mid->max */
11404       min= mid + 1;
11405     }
11406     else
11407       max= mid;
11408     mid= (min + max) / 2;
11409   }
11410   res= ranges[mid];
11411   return (!cmp_next(res) && !cmp_prev(res));
11412 }
11413 
11414 /*
11415   This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
11416   get_next() interface, but we have to hold a pointer to the original
11417   QUICK_RANGE_SELECT because its data are used all over the place. What
11418   should be done is to factor out the data that is needed into a base
11419   class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
11420   which handle the ranges and implement the get_next() function.  But
11421   for now, this seems to work right at least.
11422  */
11423 
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg,bool * error)11424 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
11425                                      uint used_key_parts_arg,
11426                                      bool *error)
11427  :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
11428   used_key_parts (used_key_parts_arg)
11429 {
11430   QUICK_RANGE *r;
11431   /*
11432     Use default MRR implementation for reverse scans. No table engine
11433     currently can do an MRR scan with output in reverse index order.
11434   */
11435   mrr_buf_desc= NULL;
11436   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
11437   mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
11438   mrr_buf_size= 0;
11439 
11440 
11441   Quick_ranges::const_iterator pr= ranges.begin();
11442   Quick_ranges::const_iterator end_range= ranges.end();
11443   for (; pr != end_range; pr++)
11444     rev_ranges.push_front(*pr);
11445 
11446   /* Remove EQ_RANGE flag for keys that are not using the full key */
11447   for (r = rev_it++; r; r = rev_it++)
11448   {
11449     if ((r->flag & EQ_RANGE) &&
11450 	head->key_info[index].key_length != r->max_length)
11451       r->flag&= ~EQ_RANGE;
11452   }
11453   rev_it.rewind();
11454   q->dont_free=1;				// Don't free shared mem
11455 }
11456 
11457 
get_next()11458 int QUICK_SELECT_DESC::get_next()
11459 {
11460   DBUG_ENTER("QUICK_SELECT_DESC::get_next");
11461 
11462   /* The max key is handled as follows:
11463    *   - if there is NO_MAX_RANGE, start at the end and move backwards
11464    *   - if it is an EQ_RANGE (which means that max key covers the entire
11465    *     key) and the query does not use any hidden key fields that are
11466    *     not considered when the range optimzier sets EQ_RANGE (e.g. the
11467    *     primary key added by InnoDB), then go directly to the key and
11468    *     read through it (sorting backwards is same as sorting forwards).
11469    *   - if it is NEAR_MAX, go to the key or next, step back once, and
11470    *     move backwards
11471    *   - otherwise (not NEAR_MAX == include the key), go after the key,
11472    *     step back once, and move backwards
11473    */
11474 
11475   for (;;)
11476   {
11477     int result;
11478     if (last_range)
11479     {						// Already read through key
11480       result = ((last_range->flag & EQ_RANGE &&
11481                  used_key_parts <=
11482                  head->key_info[index].user_defined_key_parts) ?
11483                 file->ha_index_next_same(record, last_range->min_key,
11484                                          last_range->min_length) :
11485                 file->ha_index_prev(record));
11486       if (!result)
11487       {
11488 	if (cmp_prev(*rev_it.ref()) == 0)
11489 	  DBUG_RETURN(0);
11490       }
11491       else if (result != HA_ERR_END_OF_FILE)
11492 	DBUG_RETURN(result);
11493     }
11494 
11495     if (!(last_range= rev_it++))
11496       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
11497 
11498     // Case where we can avoid descending scan, see comment above
11499     const bool eqrange_all_keyparts= (last_range->flag & EQ_RANGE) &&
11500       (used_key_parts <= head->key_info[index].user_defined_key_parts);
11501 
11502     /*
11503       If we have pushed an index condition (ICP) and this quick select
11504       will use ha_index_prev() to read data, we need to let the
11505       handler know where to end the scan in order to avoid that the
11506       ICP implemention continues to read past the range boundary.
11507     */
11508     if (file->pushed_idx_cond)
11509     {
11510       if (!eqrange_all_keyparts)
11511       {
11512         key_range min_range;
11513         last_range->make_min_endpoint(&min_range);
11514         if(min_range.length > 0)
11515           file->set_end_range(&min_range, handler::RANGE_SCAN_DESC);
11516         else
11517           file->set_end_range(NULL, handler::RANGE_SCAN_DESC);
11518       }
11519       else
11520       {
11521         /*
11522           Will use ha_index_next_same() for reading records. In case we have
11523           set the end range for an earlier range, this need to be cleared.
11524         */
11525         file->set_end_range(NULL, handler::RANGE_SCAN_ASC);
11526       }
11527     }
11528 
11529     if (last_range->flag & NO_MAX_RANGE)        // Read last record
11530     {
11531       int local_error;
11532       if ((local_error= file->ha_index_last(record)))
11533       {
11534         /*
11535           HA_ERR_END_OF_FILE is returned both when the table is empty and when
11536           there are no qualifying records in the range (when using ICP).
11537           Interpret this return value as "no qualifying rows in the range" to
11538           avoid loss of records. If the error code truly meant "empty table"
11539           the next iteration of the loop will exit.
11540         */
11541         if (local_error != HA_ERR_END_OF_FILE)
11542           DBUG_RETURN(local_error);
11543         last_range= NULL;                       // Go to next range
11544         continue;
11545       }
11546 
11547       if (cmp_prev(last_range) == 0)
11548 	DBUG_RETURN(0);
11549       last_range= 0;                            // No match; go to next range
11550       continue;
11551     }
11552 
11553     if (eqrange_all_keyparts)
11554 
11555     {
11556       result= file->ha_index_read_map(record, last_range->max_key,
11557                                       last_range->max_keypart_map,
11558                                       HA_READ_KEY_EXACT);
11559     }
11560     else
11561     {
11562       DBUG_ASSERT(last_range->flag & NEAR_MAX ||
11563                   (last_range->flag & EQ_RANGE &&
11564                    used_key_parts >
11565                    head->key_info[index].user_defined_key_parts) ||
11566                   range_reads_after_key(last_range));
11567       result= file->ha_index_read_map(record, last_range->max_key,
11568                                       last_range->max_keypart_map,
11569                                       ((last_range->flag & NEAR_MAX) ?
11570                                        HA_READ_BEFORE_KEY :
11571                                        HA_READ_PREFIX_LAST_OR_PREV));
11572     }
11573     if (result)
11574     {
11575       if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
11576 	DBUG_RETURN(result);
11577       last_range= 0;                            // Not found, to next range
11578       continue;
11579     }
11580     if (cmp_prev(last_range) == 0)
11581     {
11582       if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11583 	last_range= 0;				// Stop searching
11584       DBUG_RETURN(0);				// Found key is in range
11585     }
11586     last_range= 0;                              // To next range
11587   }
11588 }
11589 
11590 
11591 /**
11592   Create a compatible quick select with the result ordered in an opposite way
11593 
11594   @param used_key_parts_arg  Number of used key parts
11595 
11596   @retval NULL in case of errors (OOM etc)
11597   @retval pointer to a newly created QUICK_SELECT_DESC if success
11598 */
11599 
make_reverse(uint used_key_parts_arg)11600 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
11601 {
11602   bool error= FALSE;
11603   QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg,
11604                                                       &error);
11605   if (new_quick == NULL || error)
11606   {
11607     delete new_quick;
11608     return NULL;
11609   }
11610   return new_quick;
11611 }
11612 
11613 
11614 /*
11615   Compare if found key is over max-value
11616   Returns 0 if key <= range->max_key
11617   TODO: Figure out why can't this function be as simple as cmp_prev().
11618   At least it could use key_cmp() from key.cc, it's almost identical.
11619 */
11620 
cmp_next(QUICK_RANGE * range_arg)11621 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
11622 {
11623   if (range_arg->flag & NO_MAX_RANGE)
11624     return 0;                                   /* key can't be to large */
11625 
11626   KEY_PART *key_part=key_parts;
11627   uint store_length;
11628 
11629   for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
11630        key < end;
11631        key+= store_length, key_part++)
11632   {
11633     int cmp;
11634     store_length= key_part->store_length;
11635     if (key_part->null_bit)
11636     {
11637       if (*key)
11638       {
11639         if (!key_part->field->is_null())
11640           return 1;
11641         continue;
11642       }
11643       else if (key_part->field->is_null())
11644         return 0;
11645       key++;					// Skip null byte
11646       store_length--;
11647     }
11648     if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
11649       return 0;
11650     if (cmp > 0)
11651       return 1;
11652   }
11653   return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
11654 }
11655 
11656 
11657 /*
11658   Returns 0 if found key is inside range (found key >= range->min_key).
11659 */
11660 
cmp_prev(QUICK_RANGE * range_arg)11661 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
11662 {
11663   int cmp;
11664   if (range_arg->flag & NO_MIN_RANGE)
11665     return 0;					/* key can't be to small */
11666 
11667   cmp= key_cmp(key_part_info, range_arg->min_key,
11668                range_arg->min_length);
11669   if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
11670     return 0;
11671   return 1;                                     // outside of range
11672 }
11673 
11674 
11675 /*
11676  * TRUE if this range will require using HA_READ_AFTER_KEY
11677    See comment in get_next() about this
11678  */
11679 
range_reads_after_key(QUICK_RANGE * range_arg)11680 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
11681 {
11682   return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
11683 	  !(range_arg->flag & EQ_RANGE) ||
11684 	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
11685 }
11686 
11687 
add_info_string(String * str)11688 void QUICK_RANGE_SELECT::add_info_string(String *str)
11689 {
11690   KEY *key_info= head->key_info + index;
11691   str->append(key_info->name);
11692 }
11693 
add_info_string(String * str)11694 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
11695 {
11696   QUICK_RANGE_SELECT *quick;
11697   bool first= TRUE;
11698   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11699   str->append(STRING_WITH_LEN("sort_union("));
11700   while ((quick= it++))
11701   {
11702     if (!first)
11703       str->append(',');
11704     else
11705       first= FALSE;
11706     quick->add_info_string(str);
11707   }
11708   if (pk_quick_select)
11709   {
11710     str->append(',');
11711     pk_quick_select->add_info_string(str);
11712   }
11713   str->append(')');
11714 }
11715 
add_info_string(String * str)11716 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
11717 {
11718   bool first= TRUE;
11719   QUICK_RANGE_SELECT *quick;
11720   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11721   str->append(STRING_WITH_LEN("intersect("));
11722   while ((quick= it++))
11723   {
11724     KEY *key_info= head->key_info + quick->index;
11725     if (!first)
11726       str->append(',');
11727     else
11728       first= FALSE;
11729     str->append(key_info->name);
11730   }
11731   if (cpk_quick)
11732   {
11733     KEY *key_info= head->key_info + cpk_quick->index;
11734     str->append(',');
11735     str->append(key_info->name);
11736   }
11737   str->append(')');
11738 }
11739 
add_info_string(String * str)11740 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
11741 {
11742   bool first= TRUE;
11743   QUICK_SELECT_I *quick;
11744   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11745   str->append(STRING_WITH_LEN("union("));
11746   while ((quick= it++))
11747   {
11748     if (!first)
11749       str->append(',');
11750     else
11751       first= FALSE;
11752     quick->add_info_string(str);
11753   }
11754   str->append(')');
11755 }
11756 
add_info_string(String * str)11757 void QUICK_GROUP_MIN_MAX_SELECT::add_info_string(String *str)
11758 {
11759   str->append(STRING_WITH_LEN("index_for_group_by("));
11760   str->append(index_info->name);
11761   str->append(')');
11762 }
11763 
add_keys_and_lengths(String * key_names,String * used_lengths)11764 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
11765                                               String *used_lengths)
11766 {
11767   char buf[64];
11768   size_t length;
11769   KEY *key_info= head->key_info + index;
11770   key_names->append(key_info->name);
11771   length= longlong2str(max_used_key_length, buf, 10) - buf;
11772   used_lengths->append(buf, length);
11773 }
11774 
add_keys_and_lengths(String * key_names,String * used_lengths)11775 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
11776                                                     String *used_lengths)
11777 {
11778   char buf[64];
11779   size_t length;
11780   bool first= TRUE;
11781   QUICK_RANGE_SELECT *quick;
11782 
11783   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11784   while ((quick= it++))
11785   {
11786     if (first)
11787       first= FALSE;
11788     else
11789     {
11790       key_names->append(',');
11791       used_lengths->append(',');
11792     }
11793 
11794     KEY *key_info= head->key_info + quick->index;
11795     key_names->append(key_info->name);
11796     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11797     used_lengths->append(buf, length);
11798   }
11799   if (pk_quick_select)
11800   {
11801     KEY *key_info= head->key_info + pk_quick_select->index;
11802     key_names->append(',');
11803     key_names->append(key_info->name);
11804     length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
11805     used_lengths->append(',');
11806     used_lengths->append(buf, length);
11807   }
11808 }
11809 
add_keys_and_lengths(String * key_names,String * used_lengths)11810 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
11811                                                       String *used_lengths)
11812 {
11813   char buf[64];
11814   size_t length;
11815   bool first= TRUE;
11816   QUICK_RANGE_SELECT *quick;
11817   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11818   while ((quick= it++))
11819   {
11820     KEY *key_info= head->key_info + quick->index;
11821     if (first)
11822       first= FALSE;
11823     else
11824     {
11825       key_names->append(',');
11826       used_lengths->append(',');
11827     }
11828     key_names->append(key_info->name);
11829     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11830     used_lengths->append(buf, length);
11831   }
11832 
11833   if (cpk_quick)
11834   {
11835     KEY *key_info= head->key_info + cpk_quick->index;
11836     key_names->append(',');
11837     key_names->append(key_info->name);
11838     length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
11839     used_lengths->append(',');
11840     used_lengths->append(buf, length);
11841   }
11842 }
11843 
add_keys_and_lengths(String * key_names,String * used_lengths)11844 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
11845                                                   String *used_lengths)
11846 {
11847   bool first= TRUE;
11848   QUICK_SELECT_I *quick;
11849   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11850   while ((quick= it++))
11851   {
11852     if (first)
11853       first= FALSE;
11854     else
11855     {
11856       used_lengths->append(',');
11857       key_names->append(',');
11858     }
11859     quick->add_keys_and_lengths(key_names, used_lengths);
11860   }
11861 }
11862 
11863 
11864 /*******************************************************************************
11865 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
11866 *******************************************************************************/
11867 
11868 static inline uint get_field_keypart(KEY *index, Field *field);
11869 static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
11870                                              PARAM *param);
11871 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
11872                                     SEL_ARG **cur_range);
11873 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
11874                        KEY_PART_INFO *first_non_group_part,
11875                        KEY_PART_INFO *min_max_arg_part,
11876                        KEY_PART_INFO *last_part, THD *thd,
11877                        uchar *key_infix, uint *key_infix_len,
11878                        KEY_PART_INFO **first_non_infix_part);
11879 static bool
11880 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
11881                                Field::imagetype image_type);
11882 
11883 static bool
11884 min_max_inspect_cond_for_fields(Item *cond, Item_field *min_max_arg_item,
11885                                 bool *min_max_arg_present,
11886                                 bool *non_min_max_arg_present);
11887 
11888 static void
11889 cost_group_min_max(TABLE* table, uint key, uint used_key_parts,
11890                    uint group_key_parts, SEL_TREE *range_tree,
11891                    SEL_ARG *index_tree, ha_rows quick_prefix_records,
11892                    bool have_min, bool have_max,
11893                    Cost_estimate *cost_est, ha_rows *records);
11894 
11895 
11896 /**
11897   Test if this access method is applicable to a GROUP query with MIN/MAX
11898   functions, and if so, construct a new TRP object.
11899 
11900   DESCRIPTION
11901     Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
11902     Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
11903     following conditions:
11904     A) Table T has at least one compound index I of the form:
11905        I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
11906     B) Query conditions:
11907     B0. Q is over a single table T.
11908     B1. The attributes referenced by Q are a subset of the attributes of I.
11909     B2. All attributes QA in Q can be divided into 3 overlapping groups:
11910         - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
11911           referenced by any number of MIN and/or MAX functions if present.
11912         - WA = {W_1, ..., W_p} - from the WHERE clause
11913         - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
11914              = SA              - if Q is a DISTINCT query (based on the
11915                                  equivalence of DISTINCT and GROUP queries.
11916         - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
11917           GROUP BY and not referenced by MIN/MAX functions.
11918         with the following properties specified below.
11919     B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
11920         applicable.
11921 
11922     SA1. There is at most one attribute in SA referenced by any number of
11923          MIN and/or MAX functions which, which if present, is denoted as C.
11924     SA2. The position of the C attribute in the index is after the last A_k.
11925     SA3. The attribute C can be referenced in the WHERE clause only in
11926          predicates of the forms:
11927          - (C {< | <= | > | >= | =} const)
11928          - (const {< | <= | > | >= | =} C)
11929          - (C between const_i and const_j)
11930          - C IS NULL
11931          - C IS NOT NULL
11932          - C != const
11933     SA4. If Q has a GROUP BY clause, there are no other aggregate functions
11934          except MIN and MAX. For queries with DISTINCT, aggregate functions
11935          are allowed.
11936     SA5. The select list in DISTINCT queries should not contain expressions.
11937     SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
11938          for AGG_FUNC(DISTINCT ...) optimization because cursor position is
11939          never stored after a unique key lookup in the clustered index and
11940          furhter index_next/prev calls can not be used. So loose index scan
11941          optimization can not be used in this case.
11942     SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
11943          access method is not used.
11944          For above queries MIN/MAX() aggregation has to be done at
11945          nested_loops_join (end_send_group). But with current design MIN/MAX()
11946          is always set as part of loose index scan. Because of this mismatch
11947          MIN() and MAX() values will be set incorrectly. For such queries to
11948          work we need a new interface for loose index scan. This new interface
11949          should only fetch records with min and max values and let
11950          end_send_group to do aggregation. Until then do not use
11951          loose_index_scan.
11952     GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
11953          G_i = A_j => i = j.
11954     GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
11955          forms a prefix of I. This permutation is used as the GROUP clause
11956          when the DISTINCT query is converted to a GROUP query.
11957     GA3. The attributes in GA may participate in arbitrary predicates, divided
11958          into two groups:
11959          - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
11960            attributes of a prefix of GA
11961          - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
11962            of GA. Since P is applied to only GROUP attributes it filters some
11963            groups, and thus can be applied after the grouping.
11964     GA4. There are no expressions among G_i, just direct column references.
11965     NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
11966          and the MIN/MAX attribute C, then NGA must consist of exactly the
11967          index attributes that constitute the gap. As a result there is a
11968          permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
11969          in the index.
11970     NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
11971          equality conditions for all NG_i of the form (NG_i = const) or
11972          (const = NG_i), such that each NG_i is referenced in exactly one
11973          conjunct. Informally, the predicates provide constants to fill the
11974          gap in the index.
11975     NGA3.If BA <> {}, there can only be one range. TODO: This is a code
11976          limitation and is not strictly needed. See BUG#15947433
11977     WA1. There are no other attributes in the WHERE clause except the ones
11978          referenced in predicates RNG, PA, PC, EQ defined above. Therefore
11979          WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
11980          above tests. By transitivity then it also follows that each WA_i
11981          participates in the index I (if this was already tested for GA, NGA
11982          and C).
11983     WA2. If there is a predicate on C, then it must be in conjunction
11984          to all predicates on all earlier keyparts in I.
11985 
11986     C) Overall query form:
11987        SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
11988          FROM T
11989         WHERE [RNG(A_1,...,A_p ; where p <= k)]
11990          [AND EQ(B_1,...,B_m)]
11991          [AND PC(C)]
11992          [AND PA(A_i1,...,A_iq)]
11993        GROUP BY A_1,...,A_k
11994        [HAVING PH(A_1, ..., B_1,..., C)]
11995     where EXPR(...) is an arbitrary expression over some or all SELECT fields,
11996     or:
11997        SELECT DISTINCT A_i1,...,A_ik
11998          FROM T
11999         WHERE [RNG(A_1,...,A_p ; where p <= k)]
12000          [AND PA(A_i1,...,A_iq)];
12001 
12002   NOTES
12003     If the current query satisfies the conditions above, and if
12004     (mem_root! = NULL), then the function constructs and returns a new TRP
12005     object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
12006     If (mem_root == NULL), then the function only tests whether the current
12007     query satisfies the conditions above, and, if so, sets
12008     is_applicable = TRUE.
12009 
12010     Queries with DISTINCT for which index access can be used are transformed
12011     into equivalent group-by queries of the form:
12012 
12013     SELECT A_1,...,A_k FROM T
12014      WHERE [RNG(A_1,...,A_p ; where p <= k)]
12015       [AND PA(A_i1,...,A_iq)]
12016     GROUP BY A_1,...,A_k;
12017 
12018     The group-by list is a permutation of the select attributes, according
12019     to their order in the index.
12020 
12021   TODO
12022   - What happens if the query groups by the MIN/MAX field, and there is no
12023     other field as in: "select min(a) from t1 group by a" ?
12024   - We assume that the general correctness of the GROUP-BY query was checked
12025     before this point. Is this correct, or do we have to check it completely?
12026   - Lift the limitation in condition (B3), that is, make this access method
12027     applicable to ROLLUP queries.
12028 
12029  @param  param     Parameter from test_quick_select
12030  @param  sel_tree  Range tree generated by get_mm_tree
12031  @param  cost_est  Best cost so far (=table/index scan time)
12032  @return table read plan
12033    @retval NULL  Loose index scan not applicable or mem_root == NULL
12034    @retval !NULL Loose index scan table read plan
12035 */
12036 
12037 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,const Cost_estimate * cost_est)12038 get_best_group_min_max(PARAM *param, SEL_TREE *tree, const Cost_estimate *cost_est)
12039 {
12040   THD *thd= param->thd;
12041   JOIN *join= thd->lex->current_select()->join;
12042   TABLE *table= param->table;
12043   bool have_min= FALSE;              /* TRUE if there is a MIN function. */
12044   bool have_max= FALSE;              /* TRUE if there is a MAX function. */
12045   Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
12046   KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
12047   uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
12048   KEY *index_info= NULL;    /* The index chosen for data access. */
12049   uint index= 0;            /* The id of the chosen index. */
12050   uint group_key_parts= 0;  // Number of index key parts in the group prefix.
12051   uint used_key_parts= 0;   /* Number of index key parts used for access. */
12052   uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
12053   uint key_infix_len= 0;          /* Length of key_infix. */
12054   TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
12055   uint key_part_nr;
12056   ORDER *tmp_group;
12057   Item *item;
12058   Item_field *item_field;
12059   bool is_agg_distinct;
12060   List<Item_field> agg_distinct_flds;
12061   /* Cost-related variables for the best index so far. */
12062   Cost_estimate best_read_cost;
12063   ha_rows best_records= 0;
12064   SEL_ARG *best_index_tree= NULL;
12065   ha_rows best_quick_prefix_records= 0;
12066   uint best_param_idx= 0;
12067   List_iterator<Item> select_items_it;
12068   Opt_trace_context * const trace= &param->thd->opt_trace;
12069 
12070   DBUG_ENTER("get_best_group_min_max");
12071 
12072   Opt_trace_object trace_group(trace, "group_index_range",
12073                                Opt_trace_context::RANGE_OPTIMIZER);
12074   const char* cause= NULL;
12075   best_read_cost.set_max_cost();
12076 
12077   /* Perform few 'cheap' tests whether this access method is applicable. */
12078   if (!join)
12079     cause= "no_join";
12080   else if (join->primary_tables != 1)  /* Query must reference one table. */
12081     cause= "not_single_table";
12082   else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
12083     cause= "rollup";
12084   else if (table->s->keys == 0)        /* There are no indexes to use. */
12085     cause= "no_index";
12086   else if (param->order_direction == ORDER::ORDER_DESC)
12087     cause= "cannot_do_reverse_ordering";
12088   if (cause != NULL)
12089   {
12090     trace_group.add("chosen", false).add_alnum("cause", cause);
12091     DBUG_RETURN(NULL);
12092   }
12093 
12094   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
12095   is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
12096 
12097   if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
12098       (!join->select_distinct) &&
12099       !is_agg_distinct)
12100   {
12101     trace_group.add("chosen", false).
12102       add_alnum("cause", "not_group_by_or_distinct");
12103     DBUG_RETURN(NULL);
12104   }
12105   /* Analyze the query in more detail. */
12106 
12107   if (join->sum_funcs[0])
12108   {
12109     Item_sum *min_max_item;
12110     Item_sum **func_ptr= join->sum_funcs;
12111     while ((min_max_item= *(func_ptr++)))
12112     {
12113       if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
12114         have_min= TRUE;
12115       else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
12116         have_max= TRUE;
12117       else if (is_agg_distinct &&
12118                (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
12119                 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
12120                 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
12121         continue;
12122       else
12123       {
12124         trace_group.add("chosen", false).
12125           add_alnum("cause", "not_applicable_aggregate_function");
12126         DBUG_RETURN(NULL);
12127       }
12128 
12129       /* The argument of MIN/MAX. */
12130       Item *expr= min_max_item->get_arg(0)->real_item();
12131       if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
12132       {
12133         if (! min_max_arg_item)
12134           min_max_arg_item= (Item_field*) expr;
12135         else if (! min_max_arg_item->eq(expr, 1))
12136           DBUG_RETURN(NULL);
12137       }
12138       else
12139         DBUG_RETURN(NULL);
12140     }
12141   }
12142 
12143   /**
12144     Test (Part of WA2): Skip loose index scan on disjunctive WHERE clause which
12145     results in null tree or merge tree.
12146   */
12147   if (tree && !tree->merges.is_empty())
12148   {
12149     /**
12150       The tree structure contains multiple disjoint trees. This happens when
12151       the WHERE clause can't be represented in a single range tree due to the
12152       disjunctive nature of it but there exists indexes to perform index
12153       merge scan.
12154     */
12155     trace_group.add("chosen", false).
12156       add_alnum("cause", "disjuntive_predicate_present");
12157     DBUG_RETURN(NULL);
12158   }
12159   else if (!tree && join->where_cond && min_max_arg_item)
12160   {
12161     /**
12162       Skip loose index scan if min_max attribute is present along with
12163       at least one other attribute in the WHERE cluse when the tree is null.
12164       There is no range tree if WHERE condition can't be represented in a
12165       single range tree and index merge is not possible.
12166     */
12167     bool min_max_arg_present= false;
12168     bool non_min_max_arg_present= false;
12169     if (min_max_inspect_cond_for_fields(join->where_cond,
12170                                         min_max_arg_item,
12171                                         &min_max_arg_present,
12172                                         &non_min_max_arg_present))
12173     {
12174       trace_group.add("chosen", false).
12175         add_alnum("cause", "minmax_keypart_in_disjunctive_query");
12176       DBUG_RETURN(NULL);
12177     }
12178   }
12179 
12180   /* Check (SA7). */
12181   if (is_agg_distinct && (have_max || have_min))
12182   {
12183     trace_group.add("chosen", false).
12184       add_alnum("cause", "have_both_agg_distinct_and_min_max");
12185     DBUG_RETURN(NULL);
12186   }
12187 
12188   select_items_it= List_iterator<Item>(join->fields_list);
12189   /* Check (SA5). */
12190   if (join->select_distinct)
12191   {
12192     trace_group.add("distinct_query", true);
12193     while ((item= select_items_it++))
12194     {
12195       if (item->real_item()->type() != Item::FIELD_ITEM)
12196         DBUG_RETURN(NULL);
12197     }
12198   }
12199 
12200   /* Check (GA4) - that there are no expressions among the group attributes. */
12201   for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
12202   {
12203     if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
12204     {
12205       trace_group.add("chosen", false).
12206         add_alnum("cause", "group_field_is_expression");
12207       DBUG_RETURN(NULL);
12208     }
12209   }
12210 
12211   /*
12212     Check that table has at least one compound index such that the conditions
12213     (GA1,GA2) are all TRUE. If there is more than one such index, select the
12214     first one. Here we set the variables: group_prefix_len and index_info.
12215   */
12216 
12217   const uint pk= param->table->s->primary_key;
12218   SEL_ARG *cur_index_tree= NULL;
12219   ha_rows cur_quick_prefix_records= 0;
12220   Opt_trace_array trace_indexes(trace, "potential_group_range_indexes");
12221   // We go through allowed indexes
12222   for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx)
12223   {
12224     const uint cur_index= param->real_keynr[cur_param_idx];
12225     KEY *const cur_index_info= &table->key_info[cur_index];
12226     Opt_trace_object trace_idx(trace);
12227     trace_idx.add_utf8("index", cur_index_info->name);
12228     KEY_PART_INFO *cur_part;
12229     KEY_PART_INFO *end_part; /* Last part for loops. */
12230     /* Last index part. */
12231     KEY_PART_INFO *last_part;
12232     KEY_PART_INFO *first_non_group_part;
12233     KEY_PART_INFO *first_non_infix_part;
12234     uint key_infix_parts;
12235     uint cur_group_key_parts= 0;
12236     uint cur_group_prefix_len= 0;
12237     Cost_estimate cur_read_cost;
12238     ha_rows cur_records;
12239     key_map used_key_parts_map;
12240     uint max_key_part= 0;
12241     uint cur_key_infix_len= 0;
12242     uchar cur_key_infix[MAX_KEY_LENGTH];
12243     uint cur_used_key_parts;
12244 
12245     /* Check (B1) - if current index is covering. */
12246     if (!table->covering_keys.is_set(cur_index))
12247     {
12248       cause= "not_covering";
12249       goto next_index;
12250     }
12251 
12252     /*
12253       If the current storage manager is such that it appends the primary key to
12254       each index, then the above condition is insufficient to check if the
12255       index is covering. In such cases it may happen that some fields are
12256       covered by the PK index, but not by the current index. Since we can't
12257       use the concatenation of both indexes for index lookup, such an index
12258       does not qualify as covering in our case. If this is the case, below
12259       we check that all query fields are indeed covered by 'cur_index'.
12260     */
12261     if (pk < MAX_KEY && cur_index != pk &&
12262         (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
12263     {
12264       /* For each table field */
12265       for (uint i= 0; i < table->s->fields; i++)
12266       {
12267         Field *cur_field= table->field[i];
12268         /*
12269           If the field is used in the current query ensure that it's
12270           part of 'cur_index'
12271         */
12272         if (bitmap_is_set(table->read_set, cur_field->field_index) &&
12273             !cur_field->is_part_of_actual_key(thd, cur_index, cur_index_info))
12274         {
12275           cause= "not_covering";
12276           goto next_index;                  // Field was not part of key
12277         }
12278       }
12279     }
12280     trace_idx.add("covering", true);
12281 
12282     /*
12283       Check (GA1) for GROUP BY queries.
12284     */
12285     if (join->group_list)
12286     {
12287       cur_part= cur_index_info->key_part;
12288       end_part= cur_part + actual_key_parts(cur_index_info);
12289       /* Iterate in parallel over the GROUP list and the index parts. */
12290       for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
12291            tmp_group= tmp_group->next, cur_part++)
12292       {
12293         /*
12294           TODO:
12295           tmp_group::item is an array of Item, is it OK to consider only the
12296           first Item? If so, then why? What is the array for?
12297         */
12298         /* Above we already checked that all group items are fields. */
12299         DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
12300         Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
12301         if (group_field->field->eq(cur_part->field))
12302         {
12303           cur_group_prefix_len+= cur_part->store_length;
12304           ++cur_group_key_parts;
12305           max_key_part= cur_part - cur_index_info->key_part + 1;
12306           used_key_parts_map.set_bit(max_key_part);
12307         }
12308         else
12309         {
12310           cause= "group_attribute_not_prefix_in_index";
12311           goto next_index;
12312         }
12313       }
12314     }
12315 
12316     /*
12317       Check (GA2) if this is a DISTINCT query.
12318       If GA2, then Store a new ORDER object in group_fields_array at the
12319       position of the key part of item_field->field. Thus we get the ORDER
12320       objects for each field ordered as the corresponding key parts.
12321       Later group_fields_array of ORDER objects is used to convert the query
12322       to a GROUP query.
12323     */
12324     if ((!join->group_list && join->select_distinct) ||
12325         is_agg_distinct)
12326     {
12327       if (!is_agg_distinct)
12328       {
12329         select_items_it.rewind();
12330       }
12331 
12332       List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
12333       while (NULL !=
12334              (item= (is_agg_distinct ?
12335                      (Item *) agg_distinct_flds_it++ : select_items_it++)))
12336       {
12337         /* (SA5) already checked above. */
12338         item_field= (Item_field*) item->real_item();
12339         DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
12340 
12341         /* not doing loose index scan for derived tables */
12342         if (!item_field->field)
12343         {
12344           cause= "derived_table";
12345           goto next_index;
12346         }
12347 
12348         /* Find the order of the key part in the index. */
12349         key_part_nr= get_field_keypart(cur_index_info, item_field->field);
12350         /*
12351           Check if this attribute was already present in the select list.
12352           If it was present, then its corresponding key part was alredy used.
12353         */
12354         if (used_key_parts_map.is_set(key_part_nr))
12355           continue;
12356         if (key_part_nr < 1 ||
12357             (!is_agg_distinct && key_part_nr > join->fields_list.elements))
12358         {
12359           cause= "select_attribute_not_prefix_in_index";
12360           goto next_index;
12361         }
12362         cur_part= cur_index_info->key_part + key_part_nr - 1;
12363         cur_group_prefix_len+= cur_part->store_length;
12364         used_key_parts_map.set_bit(key_part_nr);
12365         ++cur_group_key_parts;
12366         max_key_part= max(max_key_part,key_part_nr);
12367       }
12368       /*
12369         Check that used key parts forms a prefix of the index.
12370         To check this we compare bits in all_parts and cur_parts.
12371         all_parts have all bits set from 0 to (max_key_part-1).
12372         cur_parts have bits set for only used keyparts.
12373       */
12374       ulonglong all_parts, cur_parts;
12375       all_parts= (1ULL << max_key_part) - 1;
12376       cur_parts= used_key_parts_map.to_ulonglong() >> 1;
12377       if (all_parts != cur_parts)
12378         goto next_index;
12379     }
12380 
12381     /* Check (SA2). */
12382     if (min_max_arg_item)
12383     {
12384       key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
12385       if (key_part_nr <= cur_group_key_parts)
12386       {
12387         cause= "aggregate_column_not_suffix_in_idx";
12388         goto next_index;
12389       }
12390       min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
12391     }
12392 
12393     /* Check (SA6) if clustered key is used. */
12394     if (is_agg_distinct && cur_index == table->s->primary_key &&
12395         table->file->primary_key_is_clustered())
12396     {
12397       cause= "primary_key_is_clustered";
12398       goto next_index;
12399     }
12400 
12401     /*
12402       Check (NGA1, NGA2) and extract a sequence of constants to be used as part
12403       of all search keys.
12404     */
12405 
12406     /*
12407       If there is MIN/MAX, each keypart between the last group part and the
12408       MIN/MAX part must participate in one equality with constants, and all
12409       keyparts after the MIN/MAX part must not be referenced in the query.
12410 
12411       If there is no MIN/MAX, the keyparts after the last group part can be
12412       referenced only in equalities with constants, and the referenced keyparts
12413       must form a sequence without any gaps that starts immediately after the
12414       last group keypart.
12415     */
12416     last_part= cur_index_info->key_part + actual_key_parts(cur_index_info);
12417     first_non_group_part=
12418       (cur_group_key_parts < actual_key_parts(cur_index_info)) ?
12419       cur_index_info->key_part + cur_group_key_parts :
12420       NULL;
12421     first_non_infix_part= min_max_arg_part ?
12422       (min_max_arg_part < last_part) ?
12423       min_max_arg_part :
12424       NULL :
12425       NULL;
12426     if (first_non_group_part &&
12427         (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
12428     {
12429       if (tree)
12430       {
12431         SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param);
12432         if (!get_constant_key_infix(cur_index_info, index_range_tree,
12433                                     first_non_group_part, min_max_arg_part,
12434                                     last_part, thd, cur_key_infix,
12435                                     &cur_key_infix_len,
12436                                     &first_non_infix_part))
12437         {
12438           cause= "nonconst_equality_gap_attribute";
12439           goto next_index;
12440         }
12441       }
12442       else if (min_max_arg_part &&
12443                (min_max_arg_part - first_non_group_part > 0))
12444       {
12445         /*
12446           There is a gap but no range tree, thus no predicates at all for the
12447           non-group keyparts.
12448         */
12449         cause= "no_nongroup_keypart_predicate";
12450         goto next_index;
12451       }
12452       else if (first_non_group_part && join->where_cond)
12453       {
12454         /*
12455           If there is no MIN/MAX function in the query, but some index
12456           key part is referenced in the WHERE clause, then this index
12457           cannot be used because the WHERE condition over the keypart's
12458           field cannot be 'pushed' to the index (because there is no
12459           range 'tree'), and the WHERE clause must be evaluated before
12460           GROUP BY/DISTINCT.
12461         */
12462         /*
12463           Store the first and last keyparts that need to be analyzed
12464           into one array that can be passed as parameter.
12465         */
12466         KEY_PART_INFO *key_part_range[2];
12467         key_part_range[0]= first_non_group_part;
12468         key_part_range[1]= last_part;
12469 
12470         /* Check if cur_part is referenced in the WHERE clause. */
12471         if (join->where_cond->walk(&Item::find_item_in_field_list_processor,
12472                                    Item::WALK_SUBQUERY_POSTFIX,
12473                                    (uchar*) key_part_range))
12474         {
12475           cause= "keypart_reference_from_where_clause";
12476           goto next_index;
12477         }
12478       }
12479     }
12480 
12481     /*
12482       Test (WA1) partially - that no other keypart after the last infix part is
12483       referenced in the query.
12484     */
12485     if (first_non_infix_part)
12486     {
12487       cur_part= first_non_infix_part +
12488         (min_max_arg_part && (min_max_arg_part < last_part));
12489       for (; cur_part != last_part; cur_part++)
12490       {
12491         if (bitmap_is_set(table->read_set, cur_part->field->field_index))
12492         {
12493           cause= "keypart_after_infix_in_query";
12494           goto next_index;
12495         }
12496       }
12497     }
12498 
12499     /**
12500       Test Part of WA2:If there are conditions on a column C participating in
12501       MIN/MAX, those conditions must be conjunctions to all earlier
12502       keyparts. Otherwise, Loose Index Scan cannot be used.
12503     */
12504     if (tree && min_max_arg_item)
12505     {
12506       SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param);
12507       SEL_ARG *cur_range= NULL;
12508       if (get_sel_arg_for_keypart(min_max_arg_part->field,
12509                                   index_range_tree, &cur_range) ||
12510           (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
12511       {
12512         cause= "minmax_keypart_in_disjunctive_query";
12513         goto next_index;
12514       }
12515     }
12516 
12517     /* If we got to this point, cur_index_info passes the test. */
12518     key_infix_parts= cur_key_infix_len ? (uint)
12519       (first_non_infix_part - first_non_group_part) : 0;
12520     cur_used_key_parts= cur_group_key_parts + key_infix_parts;
12521 
12522     /* Compute the cost of using this index. */
12523     if (tree)
12524     {
12525       /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
12526       cur_index_tree= get_index_range_tree(cur_index, tree, param);
12527       /* Check if this range tree can be used for prefix retrieval. */
12528       Cost_estimate dummy_cost;
12529       uint mrr_flags= HA_MRR_SORTED;
12530       uint mrr_bufsize=0;
12531       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
12532                                                    FALSE /*don't care*/,
12533                                                    cur_index_tree, TRUE,
12534                                                    &mrr_flags, &mrr_bufsize,
12535                                                    &dummy_cost);
12536 #ifdef OPTIMIZER_TRACE
12537       if (unlikely(cur_index_tree && trace->is_started()))
12538       {
12539         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics);
12540         Opt_trace_array trace_range(trace, "ranges");
12541 
12542         const KEY_PART_INFO *key_part= cur_index_info->key_part;
12543 
12544         String range_info;
12545         range_info.set_charset(system_charset_info);
12546         append_range_all_keyparts(&trace_range, NULL, &range_info,
12547                                   cur_index_tree, key_part, false);
12548       }
12549 #endif
12550     }
12551     cost_group_min_max(table, cur_index, cur_used_key_parts,
12552                        cur_group_key_parts, tree, cur_index_tree,
12553                        cur_quick_prefix_records, have_min, have_max,
12554                        &cur_read_cost, &cur_records);
12555     /*
12556       If cur_read_cost is lower than best_read_cost use cur_index.
12557       Do not compare doubles directly because they may have different
12558       representations (64 vs. 80 bits).
12559     */
12560     trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
12561     {
12562       Cost_estimate min_diff_cost= cur_read_cost;
12563       min_diff_cost.multiply(DBL_EPSILON);
12564       if (cur_read_cost < (best_read_cost - min_diff_cost))
12565       {
12566         index_info= cur_index_info;
12567         index= cur_index;
12568         best_read_cost= cur_read_cost;
12569         best_records= cur_records;
12570         best_index_tree= cur_index_tree;
12571         best_quick_prefix_records= cur_quick_prefix_records;
12572         best_param_idx= cur_param_idx;
12573         group_key_parts= cur_group_key_parts;
12574         group_prefix_len= cur_group_prefix_len;
12575         key_infix_len= cur_key_infix_len;
12576         if (key_infix_len)
12577           memcpy (key_infix, cur_key_infix, sizeof (key_infix));
12578         used_key_parts= cur_used_key_parts;
12579       }
12580     }
12581 
12582   next_index:
12583     if (cause)
12584     {
12585       trace_idx.add("usable", false).add_alnum("cause", cause);
12586       cause= NULL;
12587     }
12588   }
12589   trace_indexes.end();
12590 
12591   if (!index_info) /* No usable index found. */
12592     DBUG_RETURN(NULL);
12593 
12594   /* Check (SA3) for the where clause. */
12595   if (join->where_cond && min_max_arg_item &&
12596       !check_group_min_max_predicates(join->where_cond, min_max_arg_item,
12597                                       (index_info->flags & HA_SPATIAL) ?
12598                                       Field::itMBR : Field::itRAW))
12599   {
12600     trace_group.add("usable", false).
12601       add_alnum("cause", "unsupported_predicate_on_agg_attribute");
12602     DBUG_RETURN(NULL);
12603   }
12604 
12605   /* The query passes all tests, so construct a new TRP object. */
12606   read_plan= new (param->mem_root)
12607                  TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
12608                                    min_max_arg_part,
12609                                    group_prefix_len, used_key_parts,
12610                                    group_key_parts, index_info, index,
12611                                    key_infix_len,
12612                                    (key_infix_len > 0) ? key_infix : NULL,
12613                                    tree, best_index_tree, best_param_idx,
12614                                    best_quick_prefix_records);
12615   if (read_plan)
12616   {
12617     if (tree && read_plan->quick_prefix_records == 0)
12618       DBUG_RETURN(NULL);
12619 
12620     read_plan->cost_est= best_read_cost;
12621     read_plan->records=   best_records;
12622     if (*cost_est < best_read_cost &&
12623         is_agg_distinct)
12624     {
12625       trace_group.add("index_scan", true);
12626       read_plan->cost_est.reset();
12627       read_plan->use_index_scan();
12628     }
12629 
12630     DBUG_PRINT("info",
12631                ("Returning group min/max plan: cost: %g, records: %lu",
12632                 read_plan->cost_est.total_cost(), (ulong) read_plan->records));
12633   }
12634 
12635   DBUG_RETURN(read_plan);
12636 }
12637 
12638 
12639 /*
12640   Check that the MIN/MAX attribute participates only in range predicates
12641   with constants.
12642 
12643   SYNOPSIS
12644     check_group_min_max_predicates()
12645     cond              tree (or subtree) describing all or part of the WHERE
12646                       clause being analyzed
12647     min_max_arg_item  the field referenced by the MIN/MAX function(s)
12648     min_max_arg_part  the keypart of the MIN/MAX argument if any
12649 
12650   DESCRIPTION
12651     The function walks recursively over the cond tree representing a WHERE
12652     clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
12653     aggregate function, it is referenced only by one of the following
12654     predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
12655 
12656   RETURN
12657     TRUE  if cond passes the test
12658     FALSE o/w
12659 */
12660 
12661 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type)12662 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
12663                                Field::imagetype image_type)
12664 {
12665   DBUG_ENTER("check_group_min_max_predicates");
12666   DBUG_ASSERT(cond && min_max_arg_item);
12667 
12668   cond= cond->real_item();
12669   Item::Type cond_type= cond->type();
12670   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
12671   {
12672     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12673     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12674     Item *and_or_arg;
12675     while ((and_or_arg= li++))
12676     {
12677       if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
12678                                          image_type))
12679         DBUG_RETURN(FALSE);
12680     }
12681     DBUG_RETURN(TRUE);
12682   }
12683 
12684   /*
12685     TODO:
12686     This is a very crude fix to handle sub-selects in the WHERE clause
12687     (Item_subselect objects). With the test below we rule out from the
12688     optimization all queries with subselects in the WHERE clause. What has to
12689     be done, is that here we should analyze whether the subselect references
12690     the MIN/MAX argument field, and disallow the optimization only if this is
12691     so.
12692     Need to handle subselect in min_max_inspect_cond_for_fields() once this
12693     is fixed.
12694   */
12695   if (cond_type == Item::SUBSELECT_ITEM)
12696     DBUG_RETURN(FALSE);
12697 
12698   /*
12699     Condition of the form 'field' is equivalent to 'field <> 0' and thus
12700     satisfies the SA3 condition.
12701   */
12702   if (cond_type == Item::FIELD_ITEM)
12703   {
12704     DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12705     DBUG_RETURN(TRUE);
12706   }
12707 
12708   /*
12709     At this point, we have weeded out most conditions other than
12710     function items. However, there are cases like the following:
12711 
12712       select 1 in (select max(c) from t1 where max(1) group by a)
12713 
12714     Here the condition "where max(1)" is an Item_sum_max, not an
12715     Item_func. In this particular case, the where clause should
12716     be equivalent to "where max(1) <> 0". A where clause
12717     phrased that way does not satisfy the SA3 condition of
12718     get_best_group_min_max(). The "where max(1) = true" clause
12719     causes this method to reject the access method
12720     (i.e., to return FALSE).
12721 
12722     It's been suggested that it may be possible to use the access method
12723     for a sub-family of cases when we're aggregating constants or
12724     outer references. For the moment, we bale out and we reject
12725     the access method for the query.
12726 
12727     It's hard to prove that there are no other cases where the
12728     condition is not an Item_func. So, for the moment, don't apply
12729     the optimization if the condition is not a function item.
12730   */
12731   if (cond_type == Item::SUM_FUNC_ITEM)
12732   {
12733     DBUG_RETURN(FALSE);
12734   }
12735 
12736   /*
12737    If this is a debug server, then we want to know about
12738    additional oddball cases which might benefit from this
12739    optimization.
12740   */
12741   DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
12742   if (cond_type != Item::FUNC_ITEM)
12743   {
12744     DBUG_RETURN(FALSE);
12745   }
12746 
12747   /* Test if cond references only group-by or non-group fields. */
12748   Item_func *pred= (Item_func*) cond;
12749   Item *cur_arg;
12750   DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12751   for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
12752   {
12753     Item **arguments= pred->arguments();
12754     cur_arg= arguments[arg_idx]->real_item();
12755     DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12756     if (cur_arg->type() == Item::FIELD_ITEM)
12757     {
12758       if (min_max_arg_item->eq(cur_arg, 1))
12759       {
12760        /*
12761          If pred references the MIN/MAX argument, check whether pred is a range
12762          condition that compares the MIN/MAX argument with a constant.
12763        */
12764         Item_func::Functype pred_type= pred->functype();
12765         if (pred_type != Item_func::EQUAL_FUNC     &&
12766             pred_type != Item_func::LT_FUNC        &&
12767             pred_type != Item_func::LE_FUNC        &&
12768             pred_type != Item_func::GT_FUNC        &&
12769             pred_type != Item_func::GE_FUNC        &&
12770             pred_type != Item_func::BETWEEN        &&
12771             pred_type != Item_func::ISNULL_FUNC    &&
12772             pred_type != Item_func::ISNOTNULL_FUNC &&
12773             pred_type != Item_func::EQ_FUNC        &&
12774             pred_type != Item_func::NE_FUNC)
12775           DBUG_RETURN(FALSE);
12776 
12777         /* Check that pred compares min_max_arg_item with a constant. */
12778         Item *args[3];
12779         memset(args, 0, 3 * sizeof(Item*));
12780         bool inv;
12781         /* Test if this is a comparison of a field and a constant. */
12782         if (!simple_pred(pred, args, &inv))
12783           DBUG_RETURN(FALSE);
12784 
12785         /* Check for compatible string comparisons - similar to get_mm_leaf. */
12786         if (args[0] && args[1] && !args[2] && // this is a binary function
12787             min_max_arg_item->result_type() == STRING_RESULT &&
12788             /*
12789               Don't use an index when comparing strings of different collations.
12790             */
12791             ((args[1]->result_type() == STRING_RESULT &&
12792               image_type == Field::itRAW &&
12793               min_max_arg_item->field->charset() != pred->compare_collation())
12794              ||
12795              /*
12796                We can't always use indexes when comparing a string index to a
12797                number.
12798              */
12799              (args[1]->result_type() != STRING_RESULT &&
12800               min_max_arg_item->field->cmp_type() != args[1]->result_type())))
12801           DBUG_RETURN(FALSE);
12802       }
12803     }
12804     else if (cur_arg->type() == Item::FUNC_ITEM)
12805     {
12806       if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
12807                                          image_type))
12808         DBUG_RETURN(FALSE);
12809     }
12810     else if (cur_arg->const_item())
12811     {
12812       /*
12813         For predicates of the form "const OP expr" we also have to check 'expr'
12814         to make a decision.
12815       */
12816       continue;
12817     }
12818     else
12819       DBUG_RETURN(FALSE);
12820   }
12821 
12822   DBUG_RETURN(TRUE);
12823 }
12824 
12825 /**
12826   Utility function used by min_max_inspect_cond_for_fields() for comparing
12827   FILED item with given MIN/MAX item and setting appropriate out paramater.
12828 
12829 @param         item_field         Item field for comparison.
12830 @param         min_max_arg_item   The field referenced by the MIN/MAX
12831                                   function(s).
12832 @param [out]   min_max_arg_present    This out parameter is set to true if
12833                                       MIN/MAX argument is present in cond.
12834 @param [out]   non_min_max_arg_present This out parameter is set to true if
12835                                        any field item other than MIN/MAX
12836                                        argument is present in cond.
12837 */
util_min_max_inspect_item(Item * item_field,Item_field * min_max_arg_item,bool * min_max_arg_present,bool * non_min_max_arg_present)12838 static inline void util_min_max_inspect_item(Item *item_field,
12839                                              Item_field *min_max_arg_item,
12840                                              bool *min_max_arg_present,
12841                                              bool *non_min_max_arg_present)
12842 {
12843   if (item_field->type() == Item::FIELD_ITEM)
12844   {
12845     if(min_max_arg_item->eq(item_field, 1))
12846       *min_max_arg_present= true;
12847     else
12848       *non_min_max_arg_present= true;
12849   }
12850 }
12851 
12852 /**
12853   This function detects the presents of MIN/MAX field along with at least
12854   one non MIN/MAX field participation in the given condition. Subqueries
12855   inspection is skipped as of now.
12856 
12857   @param         cond   tree (or subtree) describing all or part of the WHERE
12858                         clause being analyzed.
12859   @param         min_max_arg_item   The field referenced by the MIN/MAX
12860                                     function(s).
12861   @param [out]   min_max_arg_present    This out parameter is set to true if
12862                                         MIN/MAX argument is present in cond.
12863   @param [out]   non_min_max_arg_present This out parameter is set to true if
12864                                          any field item other than MIN/MAX
12865                                          argument is present in cond.
12866 
12867   @return  TRUE if both MIN/MAX field and non MIN/MAX field is present in cond.
12868            FALSE o/w.
12869 
12870   @todo: When the hack present in check_group_min_max_predicate() is removed,
12871          subqueries needs to be inspected.
12872 */
12873 
12874 static bool
min_max_inspect_cond_for_fields(Item * cond,Item_field * min_max_arg_item,bool * min_max_arg_present,bool * non_min_max_arg_present)12875 min_max_inspect_cond_for_fields(Item *cond, Item_field *min_max_arg_item,
12876                                 bool *min_max_arg_present,
12877                                 bool *non_min_max_arg_present)
12878 {
12879   DBUG_ENTER("inspect_cond_for_fields");
12880   DBUG_ASSERT(cond && min_max_arg_item);
12881 
12882   cond= cond->real_item();
12883   Item::Type cond_type= cond->type();
12884 
12885   switch (cond_type)  {
12886     case Item::COND_ITEM:  {
12887       DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12888       List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12889       Item *and_or_arg;
12890       while ((and_or_arg= li++))
12891       {
12892         min_max_inspect_cond_for_fields(and_or_arg, min_max_arg_item,
12893                                             min_max_arg_present,
12894                                             non_min_max_arg_present);
12895         if (*min_max_arg_present && *non_min_max_arg_present)
12896           DBUG_RETURN(true);
12897       }
12898 
12899       DBUG_RETURN(false);
12900     }
12901     case Item::FUNC_ITEM:  {
12902       /* Test if cond references both group-by and non-group fields. */
12903       Item_func *pred= (Item_func*) cond;
12904       Item *cur_arg;
12905       DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12906       for (uint arg_idx= 0; arg_idx < pred->argument_count(); arg_idx++)
12907       {
12908         Item **arguments= pred->arguments();
12909         cur_arg= arguments[arg_idx]->real_item();
12910         DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12911 
12912         if (cur_arg->type() == Item::FUNC_ITEM)
12913         {
12914           min_max_inspect_cond_for_fields(cur_arg, min_max_arg_item,
12915                                               min_max_arg_present,
12916                                               non_min_max_arg_present);
12917         }
12918         else
12919         {
12920           util_min_max_inspect_item(cur_arg,
12921                                     min_max_arg_item,
12922                                     min_max_arg_present,
12923                                     non_min_max_arg_present);
12924         }
12925 
12926         if (*min_max_arg_present && *non_min_max_arg_present)
12927           DBUG_RETURN(true);
12928       }
12929 
12930       if (pred->functype() == Item_func::MULT_EQUAL_FUNC)
12931       {
12932         /*
12933           Analyze participating fields in a multiequal condition.
12934         */
12935         Item_equal_iterator it(*(Item_equal*)cond);
12936 
12937         Item *item_field;
12938         while ((item_field= it++))
12939         {
12940           util_min_max_inspect_item(item_field,
12941                                     min_max_arg_item,
12942                                     min_max_arg_present,
12943                                     non_min_max_arg_present);
12944 
12945           if (*min_max_arg_present && *non_min_max_arg_present)
12946             DBUG_RETURN(true);
12947         }
12948       }
12949 
12950       break;
12951     }
12952     case Item::FIELD_ITEM:  {
12953       util_min_max_inspect_item(cond,
12954                                 min_max_arg_item,
12955                                 min_max_arg_present,
12956                                 non_min_max_arg_present);
12957       DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12958       DBUG_RETURN(false);
12959     }
12960     default:
12961       break;
12962   }
12963 
12964   DBUG_RETURN(false);
12965 }
12966 
12967 /*
12968   Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
12969   any. 'tree' must be a unique conjunction to ALL predicates in earlier
12970   keyparts of 'keypart_tree'.
12971 
12972   E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
12973   covers 'field', all these conditions satisfies the requirement:
12974 
12975    1. "(kp1=2 OR kp1=3) AND kp2=10"    => returns "kp2=10"
12976    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)"  => returns "kp2=10"
12977    3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12978                                        => returns "kp2=10  OR kp2=11"
12979 
12980    whereas these do not
12981    1. "(kp1=2 AND kp2=10) OR kp1=3"
12982    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
12983    3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12984 
12985    This function effectively tests requirement WA2. In combination with
12986    a test that the returned tree has no more than one range it is also
12987    a test of NGA3.
12988 
12989   @param[in]   field          The field we want the SEL_ARG tree for
12990   @param[in]   keypart_tree   Root node of the SEL_ARG* tree for the index
12991   @param[out]  cur_range      The SEL_ARG tree, if any, for the keypart
12992                               covering field 'keypart_field'
12993   @retval true   'keypart_tree' contained a predicate for 'field' that
12994                   is not conjunction to all predicates on earlier keyparts
12995   @retval false  otherwise
12996 */
12997 
12998 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)12999 get_sel_arg_for_keypart(Field *field,
13000                         SEL_ARG *keypart_tree,
13001                         SEL_ARG **cur_range)
13002 {
13003   if (keypart_tree == NULL)
13004     return false;
13005   if (keypart_tree->type != SEL_ARG::KEY_RANGE)
13006   {
13007     /*
13008       A range predicate not usable by Loose Index Scan is found.
13009       Predicates for keypart 'keypart_tree->part' and later keyparts
13010       cannot be used.
13011     */
13012     *cur_range= keypart_tree;
13013     return false;
13014   }
13015   if (keypart_tree->field->eq(field))
13016   {
13017     *cur_range= keypart_tree;
13018     return false;
13019   }
13020 
13021   SEL_ARG *tree_first_range= NULL;
13022   SEL_ARG *first_kp=  keypart_tree->first();
13023 
13024   for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
13025   {
13026     SEL_ARG *curr_tree= NULL;
13027     if (cur_kp->next_key_part)
13028     {
13029       if (get_sel_arg_for_keypart(field,
13030                                   cur_kp->next_key_part,
13031                                   &curr_tree))
13032         return true;
13033     }
13034     /**
13035       Check if the SEL_ARG tree for 'field' is identical for all ranges in
13036       'keypart_tree
13037      */
13038     if (cur_kp == first_kp)
13039       tree_first_range= curr_tree;
13040     else if (!all_same(tree_first_range, curr_tree))
13041       return true;
13042   }
13043   *cur_range= tree_first_range;
13044   return false;
13045 }
13046 
13047 /*
13048   Extract a sequence of constants from a conjunction of equality predicates.
13049 
13050   SYNOPSIS
13051     get_constant_key_infix()
13052     index_info             [in]  Descriptor of the chosen index.
13053     index_range_tree       [in]  Range tree for the chosen index
13054     first_non_group_part   [in]  First index part after group attribute parts
13055     min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
13056     last_part              [in]  Last keypart of the index
13057     thd                    [in]  Current thread
13058     key_infix              [out] Infix of constants to be used for index lookup
13059     key_infix_len          [out] Lenghth of the infix
13060     first_non_infix_part   [out] The first keypart after the infix (if any)
13061 
13062   DESCRIPTION
13063     Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
13064     for each keypart field NGF_i not in GROUP-BY, check that there is a
13065     constant equality predicate among conds with the form (NGF_i = const_ci) or
13066     (const_ci = NGF_i).
13067     Thus all the NGF_i attributes must fill the 'gap' between the last group-by
13068     attribute and the MIN/MAX attribute in the index (if present).  Also ensure
13069     that there is only a single range on NGF_i (NGA3). If these
13070     conditions hold, copy each constant from its corresponding predicate into
13071     key_infix, in the order its NG_i attribute appears in the index, and update
13072     key_infix_len with the total length of the key parts in key_infix.
13073 
13074   RETURN
13075     TRUE  if the index passes the test
13076     FALSE o/w
13077 */
13078 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)13079 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
13080                        KEY_PART_INFO *first_non_group_part,
13081                        KEY_PART_INFO *min_max_arg_part,
13082                        KEY_PART_INFO *last_part, THD *thd,
13083                        uchar *key_infix, uint *key_infix_len,
13084                        KEY_PART_INFO **first_non_infix_part)
13085 {
13086   SEL_ARG       *cur_range;
13087   KEY_PART_INFO *cur_part;
13088   /* End part for the first loop below. */
13089   KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
13090 
13091   *key_infix_len= 0;
13092   uchar *key_ptr= key_infix;
13093   for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
13094   {
13095     cur_range= NULL;
13096     /*
13097       Check NGA3:
13098       1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
13099          checks for a unique conjunction of this tree with all the predicates
13100          on the earlier keyparts in the index.
13101       2. Check for multiple ranges on the found keypart tree.
13102 
13103       We assume that index_range_tree points to the leftmost keypart in
13104       the index.
13105     */
13106     if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
13107                                 &cur_range))
13108       return false;
13109 
13110     if (cur_range && cur_range->elements > 1)
13111       return false;
13112 
13113     if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
13114     {
13115       if (min_max_arg_part)
13116         return false; /* The current keypart has no range predicates at all. */
13117       else
13118       {
13119         *first_non_infix_part= cur_part;
13120         return true;
13121       }
13122     }
13123 
13124     if ((cur_range->min_flag & NO_MIN_RANGE) ||
13125         (cur_range->max_flag & NO_MAX_RANGE) ||
13126         (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
13127       return false;
13128 
13129     uint field_length= cur_part->store_length;
13130     if (cur_range->maybe_null &&
13131          cur_range->min_value[0] && cur_range->max_value[0])
13132     {
13133       /*
13134         cur_range specifies 'IS NULL'. In this case the argument points
13135         to a "null value" (a copy of is_null_string) that we do not
13136         memcmp(), or memcpy to a field.
13137       */
13138       DBUG_ASSERT (field_length > 0);
13139       *key_ptr= 1;
13140       key_ptr+= field_length;
13141       *key_infix_len+= field_length;
13142     }
13143     else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
13144     { /* cur_range specifies an equality condition. */
13145       memcpy(key_ptr, cur_range->min_value, field_length);
13146       key_ptr+= field_length;
13147       *key_infix_len+= field_length;
13148     }
13149     else
13150       return false;
13151   }
13152 
13153   if (!min_max_arg_part && (cur_part == last_part))
13154     *first_non_infix_part= last_part;
13155 
13156   return TRUE;
13157 }
13158 
13159 
13160 /*
13161   Find the key part referenced by a field.
13162 
13163   SYNOPSIS
13164     get_field_keypart()
13165     index  descriptor of an index
13166     field  field that possibly references some key part in index
13167 
13168   NOTES
13169     The return value can be used to get a KEY_PART_INFO pointer by
13170     part= index->key_part + get_field_keypart(...) - 1;
13171 
13172   RETURN
13173     Positive number which is the consecutive number of the key part, or
13174     0 if field does not reference any index field.
13175 */
13176 
13177 static inline uint
get_field_keypart(KEY * index,Field * field)13178 get_field_keypart(KEY *index, Field *field)
13179 {
13180   KEY_PART_INFO *part, *end;
13181 
13182   for (part= index->key_part, end= part + actual_key_parts(index) ;
13183        part < end; part++)
13184   {
13185     if (field->eq(part->field))
13186       return part - index->key_part + 1;
13187   }
13188   return 0;
13189 }
13190 
13191 
13192 /*
13193   Find the SEL_ARG sub-tree that corresponds to the chosen index.
13194 
13195   SYNOPSIS
13196     get_index_range_tree()
13197     index     [in]  The ID of the index being looked for
13198     range_tree[in]  Tree of ranges being searched
13199     param     [in]  PARAM from test_quick_select
13200 
13201   DESCRIPTION
13202 
13203     A SEL_TREE contains range trees for all usable indexes. This procedure
13204     finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
13205     ordered in the same way as the members of PARAM::key, thus we first find
13206     the corresponding index in the array PARAM::key. This index is returned
13207     through the variable param_idx, to be used later as argument of
13208     check_quick_select().
13209 
13210   RETURN
13211     Pointer to the SEL_ARG subtree that corresponds to index.
13212 */
13213 
get_index_range_tree(uint index,SEL_TREE * range_tree,PARAM * param)13214 SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param)
13215 {
13216   uint idx= 0; /* Index nr in param->key_parts */
13217   while (idx < param->keys)
13218   {
13219     if (index == param->real_keynr[idx])
13220       break;
13221     idx++;
13222   }
13223   return(range_tree->keys[idx]);
13224 }
13225 
13226 
13227 /*
13228   Compute the cost of a quick_group_min_max_select for a particular index.
13229 
13230   SYNOPSIS
13231     cost_group_min_max()
13232     table                [in] The table being accessed
13233     key                  [in] The index used to access the table
13234     used_key_parts       [in] Number of key parts used to access the index
13235     group_key_parts      [in] Number of index key parts in the group prefix
13236     range_tree           [in] Tree of ranges for all indexes
13237     index_tree           [in] The range tree for the current index
13238     quick_prefix_records [in] Number of records retrieved by the internally
13239 			      used quick range select if any
13240     have_min             [in] True if there is a MIN function
13241     have_max             [in] True if there is a MAX function
13242     cost_est            [out] The cost to retrieve rows via this quick select
13243     records             [out] The number of rows retrieved
13244 
13245   DESCRIPTION
13246     This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
13247     the number of rows returned.
13248 
13249   NOTES
13250     The cost computation distinguishes several cases:
13251     1) No equality predicates over non-group attributes (thus no key_infix).
13252        If groups are bigger than blocks on the average, then we assume that it
13253        is very unlikely that block ends are aligned with group ends, thus even
13254        if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
13255        keys, except for the first MIN and the last MAX keys, will be in the
13256        same block.  If groups are smaller than blocks, then we are going to
13257        read all blocks.
13258     2) There are equality predicates over non-group attributes.
13259        In this case the group prefix is extended by additional constants, and
13260        as a result the min/max values are inside sub-groups of the original
13261        groups. The number of blocks that will be read depends on whether the
13262        ends of these sub-groups will be contained in the same or in different
13263        blocks. We compute the probability for the two ends of a subgroup to be
13264        in two different blocks as the ratio of:
13265        - the number of positions of the left-end of a subgroup inside a group,
13266          such that the right end of the subgroup is past the end of the buffer
13267          containing the left-end, and
13268        - the total number of possible positions for the left-end of the
13269          subgroup, which is the number of keys in the containing group.
13270        We assume it is very unlikely that two ends of subsequent subgroups are
13271        in the same block.
13272     3) The are range predicates over the group attributes.
13273        Then some groups may be filtered by the range predicates. We use the
13274        selectivity of the range predicates to decide how many groups will be
13275        filtered.
13276 
13277   TODO
13278      - Take into account the optional range predicates over the MIN/MAX
13279        argument.
13280      - Check if we have a PK index and we use all cols - then each key is a
13281        group, and it will be better to use an index scan.
13282 
13283   RETURN
13284     None
13285 */
13286 
cost_group_min_max(TABLE * table,uint key,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,Cost_estimate * cost_est,ha_rows * records)13287 void cost_group_min_max(TABLE* table, uint key, uint used_key_parts,
13288                         uint group_key_parts, SEL_TREE *range_tree,
13289                         SEL_ARG *index_tree, ha_rows quick_prefix_records,
13290                         bool have_min, bool have_max,
13291                         Cost_estimate *cost_est, ha_rows *records)
13292 {
13293   ha_rows table_records;
13294   uint num_groups;
13295   uint num_blocks;
13296   uint keys_per_block;
13297   rec_per_key_t keys_per_group;
13298   double p_overlap; /* Probability that a sub-group overlaps two blocks. */
13299   double quick_prefix_selectivity;
13300   double io_blocks;       // Number of blocks to read from table
13301   DBUG_ENTER("cost_group_min_max");
13302   DBUG_ASSERT(cost_est->is_zero());
13303 
13304   const KEY *const index_info= &table->key_info[key];
13305   table_records= table->file->stats.records;
13306   keys_per_block= (table->file->stats.block_size / 2 /
13307                    (index_info->key_length + table->file->ref_length)
13308                         + 1);
13309   num_blocks= (uint)(table_records / keys_per_block) + 1;
13310 
13311   /* Compute the number of keys in a group. */
13312   if (index_info->has_records_per_key(group_key_parts - 1))
13313     // Use index statistics
13314     keys_per_group= index_info->records_per_key(group_key_parts - 1);
13315   else
13316     /* If there is no statistics try to guess */
13317     keys_per_group= guess_rec_per_key(table, index_info, group_key_parts);
13318 
13319   num_groups= (uint)(table_records / keys_per_group) + 1;
13320 
13321   /* Apply the selectivity of the quick select for group prefixes. */
13322   if (range_tree && (quick_prefix_records != HA_POS_ERROR))
13323   {
13324     quick_prefix_selectivity= (double) quick_prefix_records /
13325                               (double) table_records;
13326     num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
13327     set_if_bigger(num_groups, 1);
13328   }
13329 
13330   if (used_key_parts > group_key_parts)
13331   {
13332     // Average number of keys in sub-groups formed by a key infix
13333     rec_per_key_t keys_per_subgroup;
13334     if (index_info->has_records_per_key(used_key_parts - 1))
13335       // Use index statistics
13336       keys_per_subgroup= index_info->records_per_key(used_key_parts - 1);
13337     else
13338     {
13339       // If no index statistics then we use a guessed records per key value.
13340       keys_per_subgroup= guess_rec_per_key(table, index_info, used_key_parts);
13341       set_if_smaller(keys_per_subgroup, keys_per_group);
13342     }
13343 
13344     /*
13345       Compute the probability that two ends of a subgroup are inside
13346       different blocks.
13347     */
13348     if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
13349       p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
13350     else
13351     {
13352       double blocks_per_group= (double) num_blocks / (double) num_groups;
13353       p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
13354       p_overlap= min(p_overlap, 1.0);
13355     }
13356     io_blocks= min<double>(num_groups * (1 + p_overlap), num_blocks);
13357   }
13358   else
13359     io_blocks= (keys_per_group > keys_per_block) ?
13360                (have_min && have_max) ? (double) (num_groups + 1) :
13361                                         (double) num_groups :
13362                (double) num_blocks;
13363 
13364   /*
13365     Estimate IO cost.
13366   */
13367   const Cost_model_table *const cost_model= table->cost_model();
13368   cost_est->add_io(cost_model->page_read_cost_index(key, io_blocks));
13369 
13370   /*
13371     CPU cost must be comparable to that of an index scan as computed
13372     in test_quick_select(). When the groups are small,
13373     e.g. for a unique index, using index scan will be cheaper since it
13374     reads the next record without having to re-position to it on every
13375     group. To make the CPU cost reflect this, we estimate the CPU cost
13376     as the sum of:
13377     1. Cost for evaluating the condition (similarly as for index scan).
13378     2. Cost for navigating the index structure (assuming a b-tree).
13379        Note: We only add the cost for one comparision per block. For a
13380              b-tree the number of comparisons will be larger.
13381        TODO: This cost should be provided by the storage engine.
13382   */
13383   const double tree_height= table_records == 0 ?
13384                             1.0 :
13385                             ceil(log(double(table_records)) /
13386                                  log(double(keys_per_block)));
13387   const double tree_traversal_cost= cost_model->key_compare_cost(tree_height);
13388 
13389   const double cpu_cost= num_groups * (tree_traversal_cost +
13390                                        cost_model->row_evaluate_cost(1.0));
13391   cost_est->add_cpu(cpu_cost);
13392   *records= num_groups;
13393 
13394   DBUG_PRINT("info",
13395              ("table rows: %lu  keys/block: %u  keys/group: %.1f  result rows: %lu  blocks: %u",
13396               (ulong)table_records, keys_per_block, keys_per_group,
13397               (ulong) *records, num_blocks));
13398   DBUG_VOID_RETURN;
13399 }
13400 
13401 
13402 /*
13403   Construct a new quick select object for queries with group by with min/max.
13404 
13405   SYNOPSIS
13406     TRP_GROUP_MIN_MAX::make_quick()
13407     param              Parameter from test_quick_select
13408     retrieve_full_rows ignored
13409     parent_alloc       Memory pool to use, if any.
13410 
13411   NOTES
13412     Make_quick ignores the retrieve_full_rows parameter because
13413     QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
13414     The other parameter are ignored as well because all necessary
13415     data to create the QUICK object is computed at this TRP creation
13416     time.
13417 
13418   RETURN
13419     New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
13420     NULL otherwise.
13421 */
13422 
13423 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)13424 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
13425                               MEM_ROOT *parent_alloc)
13426 {
13427   QUICK_GROUP_MIN_MAX_SELECT *quick;
13428   DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
13429 
13430   quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
13431                                         param->thd->lex->current_select()->join,
13432                                         have_min, have_max,
13433                                         have_agg_distinct, min_max_arg_part,
13434                                         group_prefix_len, group_key_parts,
13435                                         used_key_parts, index_info, index,
13436                                         &cost_est, records, key_infix_len,
13437                                         key_infix, parent_alloc, is_index_scan);
13438   if (!quick)
13439     DBUG_RETURN(NULL);
13440 
13441   if (quick->init())
13442   {
13443     delete quick;
13444     DBUG_RETURN(NULL);
13445   }
13446 
13447   if (range_tree)
13448   {
13449     DBUG_ASSERT(quick_prefix_records > 0);
13450     if (quick_prefix_records == HA_POS_ERROR)
13451       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
13452     else
13453     {
13454       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
13455       quick->quick_prefix_select= get_quick_select(param, param_idx,
13456                                                    index_tree,
13457                                                    HA_MRR_SORTED,
13458                                                    0,
13459                                                    &quick->alloc);
13460       if (!quick->quick_prefix_select)
13461       {
13462         delete quick;
13463         DBUG_RETURN(NULL);
13464       }
13465     }
13466     /*
13467       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
13468       attribute, and create an array of QUICK_RANGES to be used by the
13469       new quick select.
13470     */
13471     if (min_max_arg_part)
13472     {
13473       SEL_ARG *min_max_range= index_tree;
13474       while (min_max_range) /* Find the tree for the MIN/MAX key part. */
13475       {
13476         if (min_max_range->field->eq(min_max_arg_part->field))
13477           break;
13478         min_max_range= min_max_range->next_key_part;
13479       }
13480       /* Scroll to the leftmost interval for the MIN/MAX argument. */
13481       while (min_max_range && min_max_range->prev)
13482         min_max_range= min_max_range->prev;
13483       /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
13484       while (min_max_range)
13485       {
13486         if (quick->add_range(min_max_range))
13487         {
13488           delete quick;
13489           quick= NULL;
13490           DBUG_RETURN(NULL);
13491         }
13492         min_max_range= min_max_range->next;
13493       }
13494     }
13495   }
13496   else
13497     quick->quick_prefix_select= NULL;
13498 
13499   quick->update_key_stat();
13500   quick->adjust_prefix_ranges();
13501 
13502   DBUG_RETURN(quick);
13503 }
13504 
13505 
13506 /*
13507   Construct new quick select for group queries with min/max.
13508 
13509   SYNOPSIS
13510     QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
13511     table             The table being accessed
13512     join              Descriptor of the current query
13513     have_min          TRUE if the query selects a MIN function
13514     have_max          TRUE if the query selects a MAX function
13515     min_max_arg_part  The only argument field of all MIN/MAX functions
13516     group_prefix_len  Length of all key parts in the group prefix
13517     prefix_key_parts  All key parts in the group prefix
13518     index_info        The index chosen for data access
13519     use_index         The id of index_info
13520     read_cost         Cost of this access method
13521     records           Number of records returned
13522     key_infix_len     Length of the key infix appended to the group prefix
13523     key_infix         Infix of constants from equality predicates
13524     parent_alloc      Memory pool for this and quick_prefix_select data
13525     is_index_scan     get the next different key not by jumping on it via
13526                       index read, but by scanning until the end of the
13527                       rows with equal key value.
13528 
13529   RETURN
13530     None
13531 */
13532 
13533 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,const Cost_estimate * read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)13534 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
13535                            bool have_max_arg, bool have_agg_distinct_arg,
13536                            KEY_PART_INFO *min_max_arg_part_arg,
13537                            uint group_prefix_len_arg, uint group_key_parts_arg,
13538                            uint used_key_parts_arg, KEY *index_info_arg,
13539                            uint use_index, const Cost_estimate *read_cost_arg,
13540                            ha_rows records_arg, uint key_infix_len_arg,
13541                            uchar *key_infix_arg, MEM_ROOT *parent_alloc,
13542                            bool is_index_scan_arg)
13543   :join(join_arg), index_info(index_info_arg),
13544    group_prefix_len(group_prefix_len_arg),
13545    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
13546    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
13547    seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
13548    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
13549    min_max_ranges(PSI_INSTRUMENT_ME),
13550    min_functions_it(NULL), max_functions_it(NULL),
13551    is_index_scan(is_index_scan_arg)
13552 {
13553   head=       table;
13554   index=      use_index;
13555   record=     head->record[0];
13556   tmp_record= head->record[1];
13557   cost_est= *read_cost_arg;
13558   records= records_arg;
13559   used_key_parts= used_key_parts_arg;
13560   real_key_parts= used_key_parts_arg;
13561   real_prefix_len= group_prefix_len + key_infix_len;
13562   group_prefix= NULL;
13563   min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
13564 
13565   /*
13566     We can't have parent_alloc set as the init function can't handle this case
13567     yet.
13568   */
13569   DBUG_ASSERT(!parent_alloc);
13570   if (!parent_alloc)
13571   {
13572     init_sql_alloc(key_memory_quick_group_min_max_select_root,
13573                    &alloc, join->thd->variables.range_alloc_block_size, 0);
13574     join->thd->mem_root= &alloc;
13575   }
13576   else
13577     memset(&alloc, 0, sizeof(MEM_ROOT));  // ensure that it's not used
13578 }
13579 
13580 
13581 /*
13582   Do post-constructor initialization.
13583 
13584   SYNOPSIS
13585     QUICK_GROUP_MIN_MAX_SELECT::init()
13586 
13587   DESCRIPTION
13588     The method performs initialization that cannot be done in the constructor
13589     such as memory allocations that may fail. It allocates memory for the
13590     group prefix and inifix buffers, and for the lists of MIN/MAX item to be
13591     updated during execution.
13592 
13593   RETURN
13594     0      OK
13595     other  Error code
13596 */
13597 
init()13598 int QUICK_GROUP_MIN_MAX_SELECT::init()
13599 {
13600   if (group_prefix) /* Already initialized. */
13601     return 0;
13602 
13603   if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len)))
13604       return 1;
13605   /*
13606     We may use group_prefix to store keys with all select fields, so allocate
13607     enough space for it.
13608   */
13609   if (!(group_prefix= (uchar*) alloc_root(&alloc,
13610                                          real_prefix_len + min_max_arg_len)))
13611     return 1;
13612 
13613   if (key_infix_len > 0)
13614   {
13615     /*
13616       The memory location pointed to by key_infix will be deleted soon, so
13617       allocate a new buffer and copy the key_infix into it.
13618     */
13619     uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
13620     if (!tmp_key_infix)
13621       return 1;
13622     memcpy(tmp_key_infix, this->key_infix, key_infix_len);
13623     this->key_infix= tmp_key_infix;
13624   }
13625 
13626   if (min_max_arg_part)
13627   {
13628     if (have_min)
13629     {
13630       if (!(min_functions= new List<Item_sum>))
13631         return 1;
13632     }
13633     else
13634       min_functions= NULL;
13635     if (have_max)
13636     {
13637       if (!(max_functions= new List<Item_sum>))
13638         return 1;
13639     }
13640     else
13641       max_functions= NULL;
13642 
13643     Item_sum *min_max_item;
13644     Item_sum **func_ptr= join->sum_funcs;
13645     while ((min_max_item= *(func_ptr++)))
13646     {
13647       if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
13648         min_functions->push_back(min_max_item);
13649       else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
13650         max_functions->push_back(min_max_item);
13651     }
13652 
13653     if (have_min)
13654     {
13655       if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
13656         return 1;
13657     }
13658 
13659     if (have_max)
13660     {
13661       if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
13662         return 1;
13663     }
13664   }
13665 
13666   return 0;
13667 }
13668 
13669 
~QUICK_GROUP_MIN_MAX_SELECT()13670 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
13671 {
13672   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
13673   if (head->file->inited)
13674     /*
13675       We may have used this object for index access during
13676       create_sort_index() and then switched to rnd access for the rest
13677       of execution. Since we don't do cleanup until now, we must call
13678       ha_*_end() for whatever is the current access method.
13679     */
13680     head->file->ha_index_or_rnd_end();
13681 
13682   free_root(&alloc,MYF(0));
13683   delete min_functions_it;
13684   delete max_functions_it;
13685   delete quick_prefix_select;
13686   DBUG_VOID_RETURN;
13687 }
13688 
13689 
13690 /*
13691   Eventually create and add a new quick range object.
13692 
13693   SYNOPSIS
13694     QUICK_GROUP_MIN_MAX_SELECT::add_range()
13695     sel_range  Range object from which a
13696 
13697   NOTES
13698     Construct a new QUICK_RANGE object from a SEL_ARG object, and
13699     add it to the array min_max_ranges. If sel_arg is an infinite
13700     range, e.g. (x < 5 or x > 4), then skip it and do not construct
13701     a quick range.
13702 
13703   RETURN
13704     FALSE on success
13705     TRUE  otherwise
13706 */
13707 
add_range(SEL_ARG * sel_range)13708 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
13709 {
13710   QUICK_RANGE *range;
13711   uint range_flag= sel_range->min_flag | sel_range->max_flag;
13712 
13713   /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
13714   if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
13715     return FALSE;
13716 
13717   if (!(sel_range->min_flag & NO_MIN_RANGE) &&
13718       !(sel_range->max_flag & NO_MAX_RANGE))
13719   {
13720     if (sel_range->maybe_null &&
13721         sel_range->min_value[0] && sel_range->max_value[0])
13722       range_flag|= NULL_RANGE; /* IS NULL condition */
13723     /*
13724       Do not perform comparison if one of the argiment is NULL value.
13725     */
13726     else if (!sel_range->min_value[0] &&
13727              !sel_range->max_value[0] &&
13728              memcmp(sel_range->min_value, sel_range->max_value,
13729                     min_max_arg_len) == 0)
13730       range_flag|= EQ_RANGE;  /* equality condition */
13731   }
13732   range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
13733                          make_keypart_map(sel_range->part),
13734                          sel_range->max_value, min_max_arg_len,
13735                          make_keypart_map(sel_range->part),
13736                          range_flag, HA_READ_INVALID);
13737   if (!range)
13738     return TRUE;
13739   if (min_max_ranges.push_back(range))
13740     return TRUE;
13741   return FALSE;
13742 }
13743 
13744 
13745 /*
13746   Opens the ranges if there are more conditions in quick_prefix_select than
13747   the ones used for jumping through the prefixes.
13748 
13749   SYNOPSIS
13750     QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
13751 
13752   NOTES
13753     quick_prefix_select is made over the conditions on the whole key.
13754     It defines a number of ranges of length x.
13755     However when jumping through the prefixes we use only the the first
13756     few most significant keyparts in the range key. However if there
13757     are more keyparts to follow the ones we are using we must make the
13758     condition on the key inclusive (because x < "ab" means
13759     x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
13760     To achive the above we must turn off the NEAR_MIN/NEAR_MAX
13761 */
adjust_prefix_ranges()13762 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
13763 {
13764   if (quick_prefix_select &&
13765       group_prefix_len < quick_prefix_select->max_used_key_length)
13766   {
13767     for (size_t ix= 0; ix < quick_prefix_select->ranges.size(); ++ix)
13768     {
13769       QUICK_RANGE *range= quick_prefix_select->ranges[ix];
13770       range->flag&= ~(NEAR_MIN | NEAR_MAX);
13771     }
13772   }
13773 }
13774 
13775 
13776 /*
13777   Determine the total number and length of the keys that will be used for
13778   index lookup.
13779 
13780   SYNOPSIS
13781     QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13782 
13783   DESCRIPTION
13784     The total length of the keys used for index lookup depends on whether
13785     there are any predicates referencing the min/max argument, and/or if
13786     the min/max argument field can be NULL.
13787     This function does an optimistic analysis whether the search key might
13788     be extended by a constant for the min/max keypart. It is 'optimistic'
13789     because during actual execution it may happen that a particular range
13790     is skipped, and then a shorter key will be used. However this is data
13791     dependent and can't be easily estimated here.
13792 
13793   RETURN
13794     None
13795 */
13796 
update_key_stat()13797 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
13798 {
13799   max_used_key_length= real_prefix_len;
13800   if (min_max_ranges.size() > 0)
13801   {
13802     if (have_min)
13803     { /* Check if the right-most range has a lower boundary. */
13804       QUICK_RANGE *rightmost_range= min_max_ranges[min_max_ranges.size() - 1];
13805       if (!(rightmost_range->flag & NO_MIN_RANGE))
13806       {
13807         max_used_key_length+= min_max_arg_len;
13808         used_key_parts++;
13809         return;
13810       }
13811     }
13812     if (have_max)
13813     { /* Check if the left-most range has an upper boundary. */
13814       QUICK_RANGE *leftmost_range= min_max_ranges[0];
13815       if (!(leftmost_range->flag & NO_MAX_RANGE))
13816       {
13817         max_used_key_length+= min_max_arg_len;
13818         used_key_parts++;
13819         return;
13820       }
13821     }
13822   }
13823   else if (have_min && min_max_arg_part &&
13824            min_max_arg_part->field->real_maybe_null())
13825   {
13826     /*
13827       If a MIN/MAX argument value is NULL, we can quickly determine
13828       that we're in the beginning of the next group, because NULLs
13829       are always < any other value. This allows us to quickly
13830       determine the end of the current group and jump to the next
13831       group (see next_min()) and thus effectively increases the
13832       usable key length.
13833     */
13834     max_used_key_length+= min_max_arg_len;
13835     used_key_parts++;
13836   }
13837 }
13838 
13839 
13840 /*
13841   Initialize a quick group min/max select for key retrieval.
13842 
13843   SYNOPSIS
13844     QUICK_GROUP_MIN_MAX_SELECT::reset()
13845 
13846   DESCRIPTION
13847     Initialize the index chosen for access and find and store the prefix
13848     of the last group. The method is expensive since it performs disk access.
13849 
13850   RETURN
13851     0      OK
13852     other  Error code
13853 */
13854 
reset(void)13855 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
13856 {
13857   int result;
13858   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
13859 
13860   seen_first_key= false;
13861   head->set_keyread(TRUE); /* We need only the key attributes */
13862   /*
13863     Request ordered index access as usage of ::index_last(),
13864     ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
13865   */
13866   if ((result= head->file->ha_index_init(index, true)))
13867   {
13868     head->file->print_error(result, MYF(0));
13869     DBUG_RETURN(result);
13870   }
13871   if (quick_prefix_select && quick_prefix_select->reset())
13872     DBUG_RETURN(1);
13873 
13874   result= head->file->ha_index_last(record);
13875   if (result != 0)
13876   {
13877     if (result == HA_ERR_END_OF_FILE)
13878       DBUG_RETURN(0);
13879     else
13880       DBUG_RETURN(result);
13881   }
13882 
13883   /* Save the prefix of the last group. */
13884   key_copy(last_prefix, record, index_info, group_prefix_len);
13885 
13886   DBUG_RETURN(0);
13887 }
13888 
13889 
13890 
13891 /*
13892   Get the next key containing the MIN and/or MAX key for the next group.
13893 
13894   SYNOPSIS
13895     QUICK_GROUP_MIN_MAX_SELECT::get_next()
13896 
13897   DESCRIPTION
13898     The method finds the next subsequent group of records that satisfies the
13899     query conditions and finds the keys that contain the MIN/MAX values for
13900     the key part referenced by the MIN/MAX function(s). Once a group and its
13901     MIN/MAX values are found, store these values in the Item_sum objects for
13902     the MIN/MAX functions. The rest of the values in the result row are stored
13903     in the Item_field::result_field of each select field. If the query does
13904     not contain MIN and/or MAX functions, then the function only finds the
13905     group prefix, which is a query answer itself.
13906 
13907   NOTES
13908     If both MIN and MAX are computed, then we use the fact that if there is
13909     no MIN key, there can't be a MAX key as well, so we can skip looking
13910     for a MAX key in this case.
13911 
13912   RETURN
13913     0                  on success
13914     HA_ERR_END_OF_FILE if returned all keys
13915     other              if some error occurred
13916 */
13917 
get_next()13918 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
13919 {
13920   int min_res= 0;
13921   int max_res= 0;
13922   int result;
13923   int is_last_prefix= 0;
13924 
13925   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
13926 
13927   /*
13928     Loop until a group is found that satisfies all query conditions or the last
13929     group is reached.
13930   */
13931   do
13932   {
13933     result= next_prefix();
13934     /*
13935       Check if this is the last group prefix. Notice that at this point
13936       this->record contains the current prefix in record format.
13937     */
13938     if (!result)
13939     {
13940       is_last_prefix= key_cmp(index_info->key_part, last_prefix,
13941                               group_prefix_len);
13942       DBUG_ASSERT(is_last_prefix <= 0);
13943     }
13944     else
13945     {
13946       if (result == HA_ERR_KEY_NOT_FOUND)
13947         continue;
13948       break;
13949     }
13950 
13951     if (have_min)
13952     {
13953       min_res= next_min();
13954       if (min_res == 0)
13955         update_min_result();
13956     }
13957     /* If there is no MIN in the group, there is no MAX either. */
13958     if ((have_max && !have_min) ||
13959         (have_max && have_min && (min_res == 0)))
13960     {
13961       max_res= next_max();
13962       if (max_res == 0)
13963         update_max_result();
13964       /* If a MIN was found, a MAX must have been found as well. */
13965       DBUG_ASSERT((have_max && !have_min) ||
13966                   (have_max && have_min && (max_res == 0)));
13967     }
13968     /*
13969       If this is just a GROUP BY or DISTINCT without MIN or MAX and there
13970       are equality predicates for the key parts after the group, find the
13971       first sub-group with the extended prefix.
13972     */
13973     if (!have_min && !have_max && key_infix_len > 0)
13974       result= head->file->ha_index_read_map(record, group_prefix,
13975                                             make_prev_keypart_map(real_key_parts),
13976                                             HA_READ_KEY_EXACT);
13977 
13978     result= have_min ? min_res : have_max ? max_res : result;
13979   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13980            is_last_prefix != 0);
13981 
13982   if (result == HA_ERR_KEY_NOT_FOUND)
13983     result= HA_ERR_END_OF_FILE;
13984 
13985   DBUG_RETURN(result);
13986 }
13987 
13988 
13989 /*
13990   Retrieve the minimal key in the next group.
13991 
13992   SYNOPSIS
13993     QUICK_GROUP_MIN_MAX_SELECT::next_min()
13994 
13995   DESCRIPTION
13996     Find the minimal key within this group such that the key satisfies the query
13997     conditions and NULL semantics. The found key is loaded into this->record.
13998 
13999   IMPLEMENTATION
14000     Depending on the values of min_max_ranges.elements, key_infix_len, and
14001     whether there is a  NULL in the MIN field, this function may directly
14002     return without any data access. In this case we use the key loaded into
14003     this->record by the call to this->next_prefix() just before this call.
14004 
14005   RETURN
14006     0                    on success
14007     HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
14008     HA_ERR_END_OF_FILE   - "" -
14009     other                if some error occurred
14010 */
14011 
next_min()14012 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
14013 {
14014   int result= 0;
14015   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
14016 
14017   /* Find the MIN key using the eventually extended group prefix. */
14018   if (min_max_ranges.size() > 0)
14019   {
14020     if ((result= next_min_in_range()))
14021       DBUG_RETURN(result);
14022   }
14023   else
14024   {
14025     /* Apply the constant equality conditions to the non-group select fields */
14026     if (key_infix_len > 0)
14027     {
14028       if ((result= head->file->ha_index_read_map(record, group_prefix,
14029                                                  make_prev_keypart_map(real_key_parts),
14030                                                  HA_READ_KEY_EXACT)))
14031         DBUG_RETURN(result);
14032     }
14033 
14034     /*
14035       If the min/max argument field is NULL, skip subsequent rows in the same
14036       group with NULL in it. Notice that:
14037       - if the first row in a group doesn't have a NULL in the field, no row
14038       in the same group has (because NULL < any other value),
14039       - min_max_arg_part->field->ptr points to some place in 'record'.
14040     */
14041     if (min_max_arg_part && min_max_arg_part->field->is_null())
14042     {
14043       uchar key_buf[MAX_KEY_LENGTH];
14044 
14045       /* Find the first subsequent record without NULL in the MIN/MAX field. */
14046       key_copy(key_buf, record, index_info, max_used_key_length);
14047       result= head->file->ha_index_read_map(record, key_buf,
14048                                             make_keypart_map(real_key_parts),
14049                                             HA_READ_AFTER_KEY);
14050       /*
14051         Check if the new record belongs to the current group by comparing its
14052         prefix with the group's prefix. If it is from the next group, then the
14053         whole group has NULLs in the MIN/MAX field, so use the first record in
14054         the group as a result.
14055         TODO:
14056         It is possible to reuse this new record as the result candidate for the
14057         next call to next_min(), and to save one lookup in the next call. For
14058         this add a new member 'this->next_group_prefix'.
14059       */
14060       if (!result)
14061       {
14062         if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14063           key_restore(record, key_buf, index_info, 0);
14064       }
14065       else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
14066         result= 0; /* There is a result in any case. */
14067     }
14068   }
14069 
14070   /*
14071     If the MIN attribute is non-nullable, this->record already contains the
14072     MIN key in the group, so just return.
14073   */
14074   DBUG_RETURN(result);
14075 }
14076 
14077 
14078 /*
14079   Retrieve the maximal key in the next group.
14080 
14081   SYNOPSIS
14082     QUICK_GROUP_MIN_MAX_SELECT::next_max()
14083 
14084   DESCRIPTION
14085     Lookup the maximal key of the group, and store it into this->record.
14086 
14087   RETURN
14088     0                    on success
14089     HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
14090     HA_ERR_END_OF_FILE	 - "" -
14091     other                if some error occurred
14092 */
14093 
next_max()14094 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
14095 {
14096   int result;
14097 
14098   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
14099 
14100   /* Get the last key in the (possibly extended) group. */
14101   if (min_max_ranges.size() > 0)
14102     result= next_max_in_range();
14103   else
14104     result= head->file->ha_index_read_map(record, group_prefix,
14105                                           make_prev_keypart_map(real_key_parts),
14106                                           HA_READ_PREFIX_LAST);
14107   DBUG_RETURN(result);
14108 }
14109 
14110 
14111 /**
14112   Find the next different key value by skiping all the rows with the same key
14113   value.
14114 
14115   Implements a specialized loose index access method for queries
14116   containing aggregate functions with distinct of the form:
14117     SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
14118   This method comes to replace the index scan + Unique class
14119   (distinct selection) for loose index scan that visits all the rows of a
14120   covering index instead of jumping in the begining of each group.
14121   TODO: Placeholder function. To be replaced by a handler API call
14122 
14123   @param is_index_scan     hint to use index scan instead of random index read
14124                            to find the next different value.
14125   @param file              table handler
14126   @param key_part          group key to compare
14127   @param record            row data
14128   @param group_prefix      current key prefix data
14129   @param group_prefix_len  length of the current key prefix data
14130   @param group_key_parts   number of the current key prefix columns
14131   @return status
14132     @retval  0  success
14133     @retval !0  failure
14134 */
14135 
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)14136 static int index_next_different (bool is_index_scan, handler *file,
14137                                 KEY_PART_INFO *key_part, uchar * record,
14138                                 const uchar * group_prefix,
14139                                 uint group_prefix_len,
14140                                 uint group_key_parts)
14141 {
14142   if (is_index_scan)
14143   {
14144     int result= 0;
14145 
14146     while (!key_cmp (key_part, group_prefix, group_prefix_len))
14147     {
14148       result= file->ha_index_next(record);
14149       if (result)
14150         return(result);
14151     }
14152     return result;
14153   }
14154   else
14155     return file->ha_index_read_map(record, group_prefix,
14156                                    make_prev_keypart_map(group_key_parts),
14157                                    HA_READ_AFTER_KEY);
14158 }
14159 
14160 
14161 /*
14162   Determine the prefix of the next group.
14163 
14164   SYNOPSIS
14165     QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
14166 
14167   DESCRIPTION
14168     Determine the prefix of the next group that satisfies the query conditions.
14169     If there is a range condition referencing the group attributes, use a
14170     QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
14171     condition. If there is a key infix of constants, append this infix
14172     immediately after the group attributes. The possibly extended prefix is
14173     stored in this->group_prefix. The first key of the found group is stored in
14174     this->record, on which relies this->next_min().
14175 
14176   RETURN
14177     0                    on success
14178     HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
14179     HA_ERR_END_OF_FILE   if there are no more keys
14180     other                if some error occurred
14181 */
next_prefix()14182 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
14183 {
14184   int result;
14185   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
14186 
14187   if (quick_prefix_select)
14188   {
14189     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
14190     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
14191                                                       group_key_parts,
14192                                                       cur_prefix)))
14193       DBUG_RETURN(result);
14194     seen_first_key= TRUE;
14195   }
14196   else
14197   {
14198     if (!seen_first_key)
14199     {
14200       result= head->file->ha_index_first(record);
14201       if (result)
14202         DBUG_RETURN(result);
14203       seen_first_key= TRUE;
14204     }
14205     else
14206     {
14207       /* Load the first key in this group into record. */
14208       result= index_next_different (is_index_scan, head->file,
14209                                     index_info->key_part,
14210                                     record, group_prefix, group_prefix_len,
14211                                     group_key_parts);
14212       if (result)
14213         DBUG_RETURN(result);
14214     }
14215   }
14216 
14217   /* Save the prefix of this group for subsequent calls. */
14218   key_copy(group_prefix, record, index_info, group_prefix_len);
14219   /* Append key_infix to group_prefix. */
14220   if (key_infix_len > 0)
14221     memcpy(group_prefix + group_prefix_len,
14222            key_infix, key_infix_len);
14223 
14224   DBUG_RETURN(0);
14225 }
14226 
14227 
14228 /*
14229   Find the minimal key in a group that satisfies some range conditions for the
14230   min/max argument field.
14231 
14232   SYNOPSIS
14233     QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
14234 
14235   DESCRIPTION
14236     Given the sequence of ranges min_max_ranges, find the minimal key that is
14237     in the left-most possible range. If there is no such key, then the current
14238     group does not have a MIN key that satisfies the WHERE clause. If a key is
14239     found, its value is stored in this->record.
14240 
14241   RETURN
14242     0                    on success
14243     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
14244                          the ranges
14245     HA_ERR_END_OF_FILE   - "" -
14246     other                if some error
14247 */
14248 
next_min_in_range()14249 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
14250 {
14251   ha_rkey_function find_flag;
14252   key_part_map keypart_map;
14253   bool found_null= FALSE;
14254   int result= HA_ERR_KEY_NOT_FOUND;
14255 
14256   DBUG_ASSERT(min_max_ranges.size() > 0);
14257 
14258   /* Search from the left-most range to the right. */
14259   for (Quick_ranges::const_iterator it= min_max_ranges.begin();
14260        it != min_max_ranges.end(); ++it)
14261   {
14262     QUICK_RANGE *cur_range= *it;
14263     /*
14264       If the current value for the min/max argument is bigger than the right
14265       boundary of cur_range, there is no need to check this range.
14266     */
14267     if (it != min_max_ranges.begin() && !(cur_range->flag & NO_MAX_RANGE) &&
14268         (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
14269                  min_max_arg_len) == 1))
14270       continue;
14271 
14272     if (cur_range->flag & NO_MIN_RANGE)
14273     {
14274       keypart_map= make_prev_keypart_map(real_key_parts);
14275       find_flag= HA_READ_KEY_EXACT;
14276     }
14277     else
14278     {
14279       /* Extend the search key with the lower boundary for this range. */
14280       memcpy(group_prefix + real_prefix_len, cur_range->min_key,
14281              cur_range->min_length);
14282       keypart_map= make_keypart_map(real_key_parts);
14283       find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
14284                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
14285                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
14286     }
14287 
14288     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
14289                                           find_flag);
14290     if (result)
14291     {
14292       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
14293           (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
14294         continue; /* Check the next range. */
14295 
14296       /*
14297         In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
14298         HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
14299         range, it can't succeed for any other subsequent range.
14300       */
14301       break;
14302     }
14303 
14304     /* A key was found. */
14305     if (cur_range->flag & EQ_RANGE)
14306       break; /* No need to perform the checks below for equal keys. */
14307 
14308     if (cur_range->flag & NULL_RANGE)
14309     {
14310       /*
14311         Remember this key, and continue looking for a non-NULL key that
14312         satisfies some other condition.
14313       */
14314       memcpy(tmp_record, record, head->s->rec_buff_length);
14315       found_null= TRUE;
14316       continue;
14317     }
14318 
14319     /* Check if record belongs to the current group. */
14320     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14321     {
14322       result= HA_ERR_KEY_NOT_FOUND;
14323       continue;
14324     }
14325 
14326     /* If there is an upper limit, check if the found key is in the range. */
14327     if ( !(cur_range->flag & NO_MAX_RANGE) )
14328     {
14329       /* Compose the MAX key for the range. */
14330       uchar *max_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
14331       memcpy(max_key, group_prefix, real_prefix_len);
14332       memcpy(max_key + real_prefix_len, cur_range->max_key,
14333              cur_range->max_length);
14334       /* Compare the found key with max_key. */
14335       int cmp_res= key_cmp(index_info->key_part, max_key,
14336                            real_prefix_len + min_max_arg_len);
14337       /*
14338         The key is outside of the range if:
14339         the interval is open and the key is equal to the maximum boundry
14340         or
14341         the key is greater than the maximum
14342       */
14343       if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
14344           cmp_res > 0)
14345       {
14346         result= HA_ERR_KEY_NOT_FOUND;
14347         continue;
14348       }
14349     }
14350     /* If we got to this point, the current key qualifies as MIN. */
14351     DBUG_ASSERT(result == 0);
14352     break;
14353   }
14354   /*
14355     If there was a key with NULL in the MIN/MAX field, and there was no other
14356     key without NULL from the same group that satisfies some other condition,
14357     then use the key with the NULL.
14358   */
14359   if (found_null && result)
14360   {
14361     memcpy(record, tmp_record, head->s->rec_buff_length);
14362     result= 0;
14363   }
14364   return result;
14365 }
14366 
14367 
14368 /*
14369   Find the maximal key in a group that satisfies some range conditions for the
14370   min/max argument field.
14371 
14372   SYNOPSIS
14373     QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
14374 
14375   DESCRIPTION
14376     Given the sequence of ranges min_max_ranges, find the maximal key that is
14377     in the right-most possible range. If there is no such key, then the current
14378     group does not have a MAX key that satisfies the WHERE clause. If a key is
14379     found, its value is stored in this->record.
14380 
14381   RETURN
14382     0                    on success
14383     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
14384                          the ranges
14385     HA_ERR_END_OF_FILE   - "" -
14386     other                if some error
14387 */
14388 
next_max_in_range()14389 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
14390 {
14391   ha_rkey_function find_flag;
14392   key_part_map keypart_map;
14393   int result;
14394 
14395   DBUG_ASSERT(min_max_ranges.size() > 0);
14396 
14397   /* Search from the right-most range to the left. */
14398   for (Quick_ranges::const_iterator it= min_max_ranges.end();
14399        it != min_max_ranges.begin(); --it)
14400   {
14401     QUICK_RANGE *cur_range = *(it - 1);
14402     /*
14403       If the current value for the min/max argument is smaller than the left
14404       boundary of cur_range, there is no need to check this range.
14405     */
14406     if (it != min_max_ranges.end() &&
14407         !(cur_range->flag & NO_MIN_RANGE) &&
14408         (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
14409                  min_max_arg_len) == -1))
14410       continue;
14411 
14412     if (cur_range->flag & NO_MAX_RANGE)
14413     {
14414       keypart_map= make_prev_keypart_map(real_key_parts);
14415       find_flag= HA_READ_PREFIX_LAST;
14416     }
14417     else
14418     {
14419       /* Extend the search key with the upper boundary for this range. */
14420       memcpy(group_prefix + real_prefix_len, cur_range->max_key,
14421              cur_range->max_length);
14422       keypart_map= make_keypart_map(real_key_parts);
14423       find_flag= (cur_range->flag & EQ_RANGE) ?
14424                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
14425                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
14426     }
14427 
14428     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
14429                                           find_flag);
14430 
14431     if (result)
14432     {
14433       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
14434           (cur_range->flag & EQ_RANGE))
14435         continue; /* Check the next range. */
14436 
14437       /*
14438         In no key was found with this upper bound, there certainly are no keys
14439         in the ranges to the left.
14440       */
14441       return result;
14442     }
14443     /* A key was found. */
14444     if (cur_range->flag & EQ_RANGE)
14445       return 0; /* No need to perform the checks below for equal keys. */
14446 
14447     /* Check if record belongs to the current group. */
14448     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
14449       continue;                                 // Row not found
14450 
14451     /* If there is a lower limit, check if the found key is in the range. */
14452     if ( !(cur_range->flag & NO_MIN_RANGE) )
14453     {
14454       /* Compose the MIN key for the range. */
14455       uchar *min_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
14456       memcpy(min_key, group_prefix, real_prefix_len);
14457       memcpy(min_key + real_prefix_len, cur_range->min_key,
14458              cur_range->min_length);
14459       /* Compare the found key with min_key. */
14460       int cmp_res= key_cmp(index_info->key_part, min_key,
14461                            real_prefix_len + min_max_arg_len);
14462       /*
14463         The key is outside of the range if:
14464         the interval is open and the key is equal to the minimum boundry
14465         or
14466         the key is less than the minimum
14467       */
14468       if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
14469           cmp_res < 0)
14470         continue;
14471     }
14472     /* If we got to this point, the current key qualifies as MAX. */
14473     return result;
14474   }
14475   return HA_ERR_KEY_NOT_FOUND;
14476 }
14477 
14478 
14479 /*
14480   Update all MIN function results with the newly found value.
14481 
14482   SYNOPSIS
14483     QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
14484 
14485   DESCRIPTION
14486     The method iterates through all MIN functions and updates the result value
14487     of each function by calling Item_sum::reset(), which in turn picks the new
14488     result value from this->head->record[0], previously updated by
14489     next_min(). The updated value is stored in a member variable of each of the
14490     Item_sum objects, depending on the value type.
14491 
14492   IMPLEMENTATION
14493     The update must be done separately for MIN and MAX, immediately after
14494     next_min() was called and before next_max() is called, because both MIN and
14495     MAX take their result value from the same buffer this->head->record[0]
14496     (i.e.  this->record).
14497 
14498   RETURN
14499     None
14500 */
14501 
update_min_result()14502 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
14503 {
14504   Item_sum *min_func;
14505 
14506   min_functions_it->rewind();
14507   while ((min_func= (*min_functions_it)++))
14508     min_func->reset_and_add();
14509 }
14510 
14511 
14512 /*
14513   Update all MAX function results with the newly found value.
14514 
14515   SYNOPSIS
14516     QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
14517 
14518   DESCRIPTION
14519     The method iterates through all MAX functions and updates the result value
14520     of each function by calling Item_sum::reset(), which in turn picks the new
14521     result value from this->head->record[0], previously updated by
14522     next_max(). The updated value is stored in a member variable of each of the
14523     Item_sum objects, depending on the value type.
14524 
14525   IMPLEMENTATION
14526     The update must be done separately for MIN and MAX, immediately after
14527     next_max() was called, because both MIN and MAX take their result value
14528     from the same buffer this->head->record[0] (i.e.  this->record).
14529 
14530   RETURN
14531     None
14532 */
14533 
update_max_result()14534 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
14535 {
14536   Item_sum *max_func;
14537 
14538   max_functions_it->rewind();
14539   while ((max_func= (*max_functions_it)++))
14540     max_func->reset_and_add();
14541 }
14542 
14543 
14544 /*
14545   Append comma-separated list of keys this quick select uses to key_names;
14546   append comma-separated list of corresponding used lengths to used_lengths.
14547 
14548   SYNOPSIS
14549     QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
14550     key_names    [out] Names of used indexes
14551     used_lengths [out] Corresponding lengths of the index names
14552 
14553   DESCRIPTION
14554     This method is used by select_describe to extract the names of the
14555     indexes used by a quick select.
14556 
14557 */
14558 
add_keys_and_lengths(String * key_names,String * used_lengths)14559 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
14560                                                       String *used_lengths)
14561 {
14562   char buf[64];
14563   size_t length;
14564   key_names->append(index_info->name);
14565   length= longlong2str(max_used_key_length, buf, 10) - buf;
14566   used_lengths->append(buf, length);
14567 }
14568 
14569 
14570 
14571 /**
14572   Traverse the R-B range tree for this and later keyparts to see if
14573   there are at least as many equality ranges as defined by the limit.
14574 
14575   @param keypart_root   The root of a R-B tree of ranges for a given keypart.
14576   @param count[in,out]  The number of equality ranges found so far
14577   @param limit          The number of ranges
14578 
14579   @retval true if limit > 0 and 'limit' or more equality ranges have been
14580           found in the range R-B trees
14581   @retval false otherwise
14582 
14583 */
eq_ranges_exceeds_limit(SEL_ARG * keypart_root,uint * count,uint limit)14584 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count, uint limit)
14585 {
14586   // "Statistics instead of index dives" feature is turned off
14587   if (limit == 0)
14588     return false;
14589 
14590   /*
14591     Optimization: if there is at least one equality range, index
14592     statistics will be used when limit is 1. It's safe to return true
14593     even without checking that there is an equality range because if
14594     there are none, index statistics will not be used anyway.
14595   */
14596   if (limit == 1)
14597     return true;
14598 
14599   for(SEL_ARG *keypart_range= keypart_root->first();
14600       keypart_range; keypart_range= keypart_range->next)
14601   {
14602     /*
14603       This is an equality range predicate and should be counted if:
14604       1) the range for this keypart does not have a min/max flag
14605          (which indicates <, <= etc), and
14606       2) the lower and upper range boundaries have the same value
14607          (it's not a "x BETWEEN a AND b")
14608 
14609       Note, however, that if this is an "x IS NULL" condition we don't
14610       count it because the number of NULL-values is likely to be off
14611       the index statistics we plan to use.
14612     */
14613     if (!keypart_range->min_flag && !keypart_range->max_flag && // 1)
14614         !keypart_range->cmp_max_to_min(keypart_range) &&        // 2)
14615         !keypart_range->is_null_interval())                     // "x IS NULL"
14616     {
14617       /*
14618          Count predicates in the next keypart, but only if that keypart
14619          is the next in the index.
14620       */
14621       if (keypart_range->next_key_part &&
14622           keypart_range->next_key_part->part == keypart_range->part + 1)
14623         eq_ranges_exceeds_limit(keypart_range->next_key_part, count, limit);
14624       else
14625         // We've found a path of equlity predicates down to a keypart leaf
14626         (*count)++;
14627 
14628       if (*count >= limit)
14629         return true;
14630     }
14631   }
14632   return false;
14633 }
14634 
14635 #ifndef DBUG_OFF
14636 
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)14637 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
14638                            const char *msg)
14639 {
14640   char buff[1024];
14641   DBUG_ENTER("print_sel_tree");
14642 
14643   String tmp(buff,sizeof(buff),&my_charset_bin);
14644   tmp.length(0);
14645   for (uint idx= 0; idx < param->keys; idx++)
14646   {
14647     if (tree_map->is_set(idx))
14648     {
14649       uint keynr= param->real_keynr[idx];
14650       if (tmp.length())
14651         tmp.append(',');
14652       tmp.append(param->table->key_info[keynr].name);
14653     }
14654   }
14655   if (!tmp.length())
14656     tmp.append(STRING_WITH_LEN("(empty)"));
14657 
14658   DBUG_PRINT("info", ("SEL_TREE: %p (%s)  scans: %s", tree, msg, tmp.ptr()));
14659   DBUG_VOID_RETURN;
14660 }
14661 
14662 
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)14663 static void print_ror_scans_arr(TABLE *table, const char *msg,
14664                                 struct st_ror_scan_info **start,
14665                                 struct st_ror_scan_info **end)
14666 {
14667   DBUG_ENTER("print_ror_scans_arr");
14668 
14669   char buff[1024];
14670   String tmp(buff,sizeof(buff),&my_charset_bin);
14671   tmp.length(0);
14672   for (;start != end; start++)
14673   {
14674     if (tmp.length())
14675       tmp.append(',');
14676     tmp.append(table->key_info[(*start)->keynr].name);
14677   }
14678   if (!tmp.length())
14679     tmp.append(STRING_WITH_LEN("(empty)"));
14680   DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
14681   fprintf(DBUG_FILE,"ROR key scans (%s): %s", msg, tmp.ptr());
14682 
14683   DBUG_VOID_RETURN;
14684 }
14685 
14686 
14687 #endif /* !DBUG_OFF */
14688 
14689 /**
14690   Print a key to a string
14691 
14692   @param[out] out          String the key is appended to
14693   @param[in]  key_part     Index components description
14694   @param[in]  key          Key tuple
14695   @param[in]  used_length  Key tuple length
14696 */
14697 static void
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key)14698 print_key_value(String *out, const KEY_PART_INFO *key_part, const uchar *key)
14699 {
14700   Field *field= key_part->field;
14701 
14702   if (field->flags & BLOB_FLAG)
14703   {
14704     // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
14705     if (field->real_maybe_null() && *key)
14706       out->append(STRING_WITH_LEN("NULL"));
14707     else
14708       (field->type() == MYSQL_TYPE_GEOMETRY) ?
14709         out->append(STRING_WITH_LEN("unprintable_geometry_value")) :
14710         out->append(STRING_WITH_LEN("unprintable_blob_value"));
14711     return;
14712   }
14713 
14714   uint store_length= key_part->store_length;
14715 
14716   if (field->real_maybe_null())
14717   {
14718     /*
14719       Byte 0 of key is the null-byte. If set, key is NULL.
14720       Otherwise, print the key value starting immediately after the
14721       null-byte
14722     */
14723     if (*key)
14724     {
14725       out->append(STRING_WITH_LEN("NULL"));
14726       return;
14727     }
14728     key++;                                    // Skip null byte
14729     store_length--;
14730   }
14731 
14732   /*
14733     Binary data cannot be converted to UTF8 which is what the
14734     optimizer trace expects. If the column is binary, the hex
14735     representation is printed to the trace instead.
14736    */
14737   if (field->flags & BINARY_FLAG)
14738   {
14739     out->append("0x");
14740     for (uint i= 0; i < store_length; i++)
14741     {
14742       out->append(_dig_vec_lower[*(key+i) >> 4]);
14743       out->append(_dig_vec_lower[*(key+i) & 0x0F]);
14744     }
14745     return;
14746   }
14747 
14748   char buff[128];
14749   String tmp(buff, sizeof(buff), system_charset_info);
14750   tmp.length(0);
14751 
14752   TABLE *table= field->table;
14753   my_bitmap_map *old_sets[2];
14754 
14755   dbug_tmp_use_all_columns(table, old_sets, table->read_set,
14756                            table->write_set);
14757 
14758   field->set_key_image(key, key_part->length);
14759   if (field->type() == MYSQL_TYPE_BIT)
14760     (void) field->val_int_as_str(&tmp, 1); // may change tmp's charset
14761   else
14762     field->val_str(&tmp); // may change tmp's charset
14763   out->append(tmp.ptr(), tmp.length(), tmp.charset());
14764 
14765   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14766 }
14767 
14768 /**
14769   Append range info for a key part to a string
14770 
14771   @param[in,out] out          String the range info is appended to
14772   @param[in]     key_part     Indexed column used in a range select
14773   @param[in]     min_key      Key tuple describing lower bound of range
14774   @param[in]     max_key      Key tuple describing upper bound of range
14775   @param[in]     flag         Key range flags defining what min_key
14776                               and max_key represent @see my_base.h
14777  */
append_range(String * out,const KEY_PART_INFO * key_part,const uchar * min_key,const uchar * max_key,const uint flag)14778 void append_range(String *out,
14779                   const KEY_PART_INFO *key_part,
14780                   const uchar *min_key, const uchar *max_key,
14781                   const uint flag)
14782 {
14783   if (out->length() > 0)
14784     out->append(STRING_WITH_LEN(" AND "));
14785 
14786   if (flag & GEOM_FLAG)
14787   {
14788     /*
14789       The flags of GEOM ranges do not work the same way as for other
14790       range types, so printing "col < some_geom" doesn't make sense.
14791       Just print the column name, not operator.
14792     */
14793     out->append(key_part->field->field_name);
14794     out->append(STRING_WITH_LEN(" "));
14795     print_key_value(out, key_part, min_key);
14796     return;
14797   }
14798 
14799   if (!(flag & NO_MIN_RANGE))
14800   {
14801     print_key_value(out, key_part, min_key);
14802     if (flag & NEAR_MIN)
14803       out->append(STRING_WITH_LEN(" < "));
14804     else
14805       out->append(STRING_WITH_LEN(" <= "));
14806   }
14807 
14808   out->append(key_part->field->field_name);
14809 
14810   if (!(flag & NO_MAX_RANGE))
14811   {
14812     if (flag & NEAR_MAX)
14813       out->append(STRING_WITH_LEN(" < "));
14814     else
14815       out->append(STRING_WITH_LEN(" <= "));
14816     print_key_value(out, key_part, max_key);
14817   }
14818 }
14819 
14820 /**
14821   Traverse an R-B tree of range conditions and append all ranges for
14822   this keypart and consecutive keyparts to range_trace (if non-NULL)
14823   or to range_string (if range_trace is NULL). See description of R-B
14824   trees/SEL_ARG for details on how ranges are linked.
14825 
14826   @param[in,out] range_trace   Optimizer trace array ranges are appended to
14827   @param[in,out] range_string  The string where range predicates are
14828                                appended when the last keypart has
14829                                been reached.
14830   @param         range_so_far  String containing ranges for keyparts prior
14831                                to this keypart.
14832   @param         keypart_root  The root of the R-B tree containing intervals
14833                                for this keypart.
14834   @param         key_parts     Index components description, used when adding
14835                                information to the optimizer trace
14836   @param         print_full    Whether or not ranges on unusable keyparts
14837                                should be printed. Useful for debugging.
14838 
14839   @note This function mimics the behavior of sel_arg_range_seq_next()
14840 */
append_range_all_keyparts(Opt_trace_array * range_trace,String * range_string,String * range_so_far,SEL_ARG * keypart_root,const KEY_PART_INFO * key_parts,const bool print_full)14841 static void append_range_all_keyparts(Opt_trace_array *range_trace,
14842                                       String *range_string,
14843                                       String *range_so_far,
14844                                       SEL_ARG *keypart_root,
14845                                       const KEY_PART_INFO *key_parts,
14846                                       const bool print_full)
14847 {
14848   DBUG_ASSERT(keypart_root && keypart_root != &null_element);
14849 
14850   const bool append_to_trace= (range_trace != NULL);
14851 
14852   // Either add info to range_string or to range_trace
14853   DBUG_ASSERT(append_to_trace ? !range_string : (range_string != NULL));
14854 
14855   // Navigate to first interval in red-black tree
14856   const KEY_PART_INFO *cur_key_part= key_parts + keypart_root->part;
14857   const SEL_ARG *keypart_range= keypart_root->first();
14858 
14859   const size_t save_range_so_far_length= range_so_far->length();
14860 
14861   while (keypart_range)
14862   {
14863     /*
14864       Skip the rest of condition printing to avoid OOM if appending to
14865       range_string and the string becomes too long. Printing very long
14866       range conditions normally doesn't make sense either.
14867      */
14868     if (!append_to_trace && range_string->length() > 500)
14869     {
14870       range_string->append(STRING_WITH_LEN("..."));
14871       break;
14872     }
14873 
14874     // Append the current range predicate to the range String
14875     append_range(range_so_far, cur_key_part,
14876                  keypart_range->min_value, keypart_range->max_value,
14877                  keypart_range->min_flag | keypart_range->max_flag);
14878 
14879     /*
14880       Print range predicates for consecutive keyparts if
14881       1) There are predicates for later keyparts, and
14882       2) We explicitly requested to print even the ranges that will
14883          not be usable by range access, or
14884       3) There are no "holes" in the used keyparts (keypartX can only
14885          be used if there is a range predicate on keypartX-1), and
14886       4) The current range is an equality range
14887      */
14888     if (keypart_range->next_key_part &&                                    // 1
14889         (print_full ||                                                     // 2
14890          (keypart_range->next_key_part->part == keypart_range->part + 1 && // 3
14891           keypart_range->is_singlepoint())))                               // 4
14892     {
14893       append_range_all_keyparts(range_trace, range_string, range_so_far,
14894                                 keypart_range->next_key_part, key_parts,
14895                                 print_full);
14896     }
14897     else
14898     {
14899       /*
14900         This is the last keypart with a usable range predicate. Print
14901         full range info to the optimizer trace or to the string
14902       */
14903       if (append_to_trace)
14904         range_trace->add_utf8(range_so_far->ptr(),
14905                               range_so_far->length());
14906       else
14907       {
14908         if (range_string->length() == 0)
14909           range_string->append(STRING_WITH_LEN("("));
14910         else
14911           range_string->append(STRING_WITH_LEN(" OR ("));
14912 
14913         range_string->append(range_so_far->ptr(), range_so_far->length());
14914         range_string->append(STRING_WITH_LEN(")"));
14915       }
14916     }
14917     keypart_range= keypart_range->next;
14918     /*
14919       Now moving to next range for this keypart, so "reset"
14920       range_so_far to include only range description of earlier
14921       keyparts
14922     */
14923     range_so_far->length(save_range_so_far_length);
14924   }
14925 }
14926 
14927 /**
14928   Print the ranges in a SEL_TREE to debug log.
14929 
14930   @param tree_name   Descriptive name of the tree
14931   @param tree        The SEL_TREE that will be printed to debug log
14932   @param param       PARAM from test_quick_select
14933 */
dbug_print_tree(const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param)14934 static inline void dbug_print_tree(const char *tree_name,
14935                                    SEL_TREE *tree,
14936                                    const RANGE_OPT_PARAM *param)
14937 {
14938 #ifndef DBUG_OFF
14939   print_tree(NULL, tree_name, tree, param, true);
14940 #endif
14941 }
14942 
14943 
print_tree(String * out,const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param,const bool print_full)14944 static inline void print_tree(String *out,
14945                               const char *tree_name,
14946                               SEL_TREE *tree,
14947                               const RANGE_OPT_PARAM *param,
14948                               const bool print_full)
14949 {
14950   if (!param->using_real_indexes)
14951   {
14952     if (out)
14953     {
14954       out->append(tree_name);
14955       out->append(" uses a partitioned index and cannot be printed");
14956     }
14957     else
14958       DBUG_PRINT("info",
14959                  ("sel_tree: "
14960                   "%s uses a partitioned index and cannot be printed",
14961                   tree_name));
14962     return;
14963   }
14964 
14965   if (!tree)
14966   {
14967     if (out)
14968     {
14969       out->append(tree_name);
14970       out->append(" is NULL");
14971     }
14972     else
14973       DBUG_PRINT("info", ("sel_tree: %s is NULL", tree_name));
14974     return;
14975   }
14976 
14977   if (tree->type == SEL_TREE::IMPOSSIBLE)
14978   {
14979     if (out)
14980     {
14981       out->append(tree_name);
14982       out->append(" is IMPOSSIBLE");
14983     }
14984     else
14985       DBUG_PRINT("info", ("sel_tree: %s is IMPOSSIBLE", tree_name));
14986     return;
14987   }
14988 
14989   if (tree->type == SEL_TREE::ALWAYS)
14990   {
14991     if (out)
14992     {
14993       out->append(tree_name);
14994       out->append(" is ALWAYS");
14995     }
14996     else
14997       DBUG_PRINT("info", ("sel_tree: %s is ALWAYS", tree_name));
14998     return;
14999   }
15000 
15001   if (tree->type == SEL_TREE::MAYBE)
15002   {
15003     if (out)
15004     {
15005       out->append(tree_name);
15006       out->append(" is MAYBE");
15007     }
15008     else
15009       DBUG_PRINT("info", ("sel_tree: %s is MAYBE", tree_name));
15010     return;
15011   }
15012 
15013   if (!tree->merges.is_empty())
15014   {
15015     if (out)
15016     {
15017       out->append(tree_name);
15018       out->append(" contains the following merges");
15019     }
15020     else
15021       DBUG_PRINT("info",
15022                  ("sel_tree: "
15023                   "%s contains the following merges", tree_name));
15024 
15025     List_iterator<SEL_IMERGE> it(tree->merges);
15026     int i= 1;
15027     for (SEL_IMERGE *el= it++; el; el= it++, i++)
15028     {
15029       if (out)
15030       {
15031         out->append("\n--- alternative ");
15032         char istr[22];
15033         out->append(llstr(i, istr));
15034         out->append(" ---\n");
15035       }
15036       else
15037         DBUG_PRINT("info", ("sel_tree: --- alternative %d ---",i));
15038       for (SEL_TREE** current= el->trees;
15039            current != el->trees_next;
15040            current++)
15041         print_tree(out, "  merge_tree", *current, param, print_full);
15042     }
15043   }
15044 
15045   for (uint i= 0; i< param->keys; i++)
15046   {
15047     if (tree->keys[i] == NULL || tree->keys[i] == &null_element)
15048       continue;
15049 
15050     uint real_key_nr= param->real_keynr[i];
15051 
15052     const KEY &cur_key= param->table->key_info[real_key_nr];
15053     const KEY_PART_INFO *key_part= cur_key.key_part;
15054 
15055     /*
15056       String holding the final range description from
15057       append_range_all_keyparts()
15058     */
15059     char buff1[512];
15060     String range_result(buff1, sizeof(buff1), system_charset_info);
15061     range_result.length(0);
15062 
15063     /*
15064       Range description up to a certain keypart - used internally in
15065       append_range_all_keyparts()
15066     */
15067     char buff2[128];
15068     String range_so_far(buff2, sizeof(buff2), system_charset_info);
15069     range_so_far.length(0);
15070 
15071     append_range_all_keyparts(NULL, &range_result, &range_so_far,
15072                               tree->keys[i], key_part, print_full);
15073 
15074     if (out)
15075     {
15076       char istr[22];
15077 
15078       out->append(tree_name);
15079       out->append(" keys[");
15080       out->append(llstr(i, istr));
15081       out->append("]: ");
15082       out->append(range_result.ptr());
15083       out->append("\n");
15084     }
15085     else
15086       DBUG_PRINT("info",
15087                  ("sel_tree: %p, type=%d, %s->keys[%u(%u)]: %s",
15088                   tree->keys[i], tree->keys[i]->type, tree_name, i,
15089                   real_key_nr, range_result.ptr()));
15090   }
15091 }
15092 
15093 
15094 /*****************************************************************************
15095 ** Print a quick range for debugging
15096 ** TODO:
15097 ** This should be changed to use a String to store each row instead
15098 ** of locking the DEBUG stream !
15099 *****************************************************************************/
15100 
15101 #ifndef DBUG_OFF
15102 
15103 static void
print_multiple_key_values(KEY_PART * key_part,const uchar * key,uint used_length)15104 print_multiple_key_values(KEY_PART *key_part, const uchar *key,
15105                           uint used_length)
15106 {
15107   char buff[1024];
15108   const uchar *key_end= key+used_length;
15109   String tmp(buff,sizeof(buff),&my_charset_bin);
15110   uint store_length;
15111   TABLE *table= key_part->field->table;
15112   my_bitmap_map *old_sets[2];
15113 
15114   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
15115 
15116   for (; key < key_end; key+=store_length, key_part++)
15117   {
15118     Field *field=      key_part->field;
15119     store_length= key_part->store_length;
15120 
15121     if (field->real_maybe_null())
15122     {
15123       if (*key)
15124       {
15125         if (fwrite("NULL",sizeof(char),4,DBUG_FILE) != 4) {
15126           goto restore_col_map;
15127         }
15128         continue;
15129       }
15130       key++;                                    // Skip null byte
15131       store_length--;
15132     }
15133     field->set_key_image(key, key_part->length);
15134     if (field->type() == MYSQL_TYPE_BIT)
15135       (void) field->val_int_as_str(&tmp, 1);
15136     else
15137       field->val_str(&tmp);
15138     if (fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE) != tmp.length()) {
15139       goto restore_col_map;
15140     }
15141     if (key+store_length < key_end)
15142       fputc('/',DBUG_FILE);
15143   }
15144 restore_col_map:
15145   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
15146 }
15147 
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)15148 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
15149 {
15150   char buf[MAX_KEY/8+1];
15151   TABLE *table;
15152   my_bitmap_map *old_sets[2];
15153   DBUG_ENTER("print_quick");
15154   if (!quick)
15155     DBUG_VOID_RETURN;
15156   DBUG_LOCK_FILE;
15157 
15158   table= quick->head;
15159   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
15160   quick->dbug_dump(0, TRUE);
15161   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
15162 
15163   fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
15164 
15165   DBUG_UNLOCK_FILE;
15166   DBUG_VOID_RETURN;
15167 }
15168 
dbug_dump(int indent,bool verbose)15169 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
15170 {
15171   /* purecov: begin inspected */
15172   fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
15173           indent, "", head->key_info[index].name, max_used_key_length);
15174 
15175   if (verbose)
15176   {
15177     for (size_t ix= 0; ix < ranges.size(); ++ix)
15178     {
15179       fprintf(DBUG_FILE, "%*s", indent + 2, "");
15180       QUICK_RANGE *range= ranges[ix];
15181       if (!(range->flag & NO_MIN_RANGE))
15182       {
15183         print_multiple_key_values(key_parts, range->min_key,
15184                                   range->min_length);
15185         if (range->flag & NEAR_MIN)
15186           fputs(" < ",DBUG_FILE);
15187         else
15188           fputs(" <= ",DBUG_FILE);
15189       }
15190       fputs("X",DBUG_FILE);
15191 
15192       if (!(range->flag & NO_MAX_RANGE))
15193       {
15194         if (range->flag & NEAR_MAX)
15195           fputs(" < ",DBUG_FILE);
15196         else
15197           fputs(" <= ",DBUG_FILE);
15198         print_multiple_key_values(key_parts, range->max_key,
15199                                   range->max_length);
15200       }
15201       fputs("\n",DBUG_FILE);
15202     }
15203   }
15204   /* purecov: end */
15205 }
15206 
dbug_dump(int indent,bool verbose)15207 void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
15208 {
15209   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
15210   QUICK_RANGE_SELECT *quick;
15211   fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
15212   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15213   while ((quick= it++))
15214     quick->dbug_dump(indent+2, verbose);
15215   if (pk_quick_select)
15216   {
15217     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
15218     pk_quick_select->dbug_dump(indent+2, verbose);
15219   }
15220   fprintf(DBUG_FILE, "%*s}\n", indent, "");
15221 }
15222 
dbug_dump(int indent,bool verbose)15223 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
15224 {
15225   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
15226   QUICK_RANGE_SELECT *quick;
15227   fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
15228           indent, "", need_to_fetch_row? "":"non-");
15229   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15230   while ((quick= it++))
15231     quick->dbug_dump(indent+2, verbose);
15232   if (cpk_quick)
15233   {
15234     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
15235     cpk_quick->dbug_dump(indent+2, verbose);
15236   }
15237   fprintf(DBUG_FILE, "%*s}\n", indent, "");
15238 }
15239 
dbug_dump(int indent,bool verbose)15240 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
15241 {
15242   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
15243   QUICK_SELECT_I *quick;
15244   fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
15245   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
15246   while ((quick= it++))
15247     quick->dbug_dump(indent+2, verbose);
15248   fprintf(DBUG_FILE, "%*s}\n", indent, "");
15249 }
15250 
15251 /*
15252   Print quick select information to DBUG_FILE.
15253 
15254   SYNOPSIS
15255     QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
15256     indent  Indentation offset
15257     verbose If TRUE show more detailed output.
15258 
15259   DESCRIPTION
15260     Print the contents of this quick select to DBUG_FILE. The method also
15261     calls dbug_dump() for the used quick select if any.
15262 
15263   IMPLEMENTATION
15264     Caller is responsible for locking DBUG_FILE before this call and unlocking
15265     it afterwards.
15266 
15267   RETURN
15268     None
15269 */
15270 
dbug_dump(int indent,bool verbose)15271 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
15272 {
15273   fprintf(DBUG_FILE,
15274           "%*squick_group_min_max_select: index %s (%d), length: %d\n",
15275           indent, "", index_info->name, index, max_used_key_length);
15276   if (key_infix_len > 0)
15277   {
15278     fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
15279             indent, "", key_infix_len);
15280   }
15281   if (quick_prefix_select)
15282   {
15283     fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
15284     quick_prefix_select->dbug_dump(indent + 2, verbose);
15285   }
15286   if (min_max_ranges.size() > 0)
15287   {
15288     fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
15289             indent, "", static_cast<int>(min_max_ranges.size()));
15290   }
15291 }
15292 
15293 
15294 #endif /* !DBUG_OFF */
15295 #endif /* OPT_RANGE_CC_INCLUDED */
15296