1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights
2  * reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 /*
25   TODO:
26   Fix that MAYBE_KEY are stored in the tree so that we can detect use
27   of full hash keys for queries like:
28 
29   select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
30 
31 */
32 
33 /*
34   This file contains:
35 
36   RangeAnalysisModule
37     A module that accepts a condition, index (or partitioning) description,
38     and builds lists of intervals (in index/partitioning space), such that
39     all possible records that match the condition are contained within the
40     intervals.
41     The entry point for the range analysis module is get_mm_tree()
42     (mm=min_max) function.
43 
44     The lists are returned in form of complicated structure of interlinked
45     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
46     See quick_range_seq_next, find_used_partitions for examples of how to walk
47     this structure.
48     All direct "users" of this module are located within this file, too.
49 
50 
51   PartitionPruningModule
52     A module that accepts a partitioned table, condition, and finds which
53     partitions we will need to use in query execution. Search down for
54     "PartitionPruningModule" for description.
55     The module has single entry point - prune_partitions() function.
56 
57 
58   Range/index_merge/groupby-minmax optimizer module
59     A module that accepts a table, condition, and returns
60      - a QUICK_*_SELECT object that can be used to retrieve rows that match
61        the specified condition, or a "no records will match the condition"
62        statement.
63 
64     The module entry points are
65       test_quick_select()
66       get_quick_select_for_ref()
67 
68 
69   Record retrieval code for range/index_merge/groupby-min-max.
70     Implementations of QUICK_*_SELECT classes.
71 
72   KeyTupleFormat
73   ~~~~~~~~~~~~~~
74   The code in this file (and elsewhere) makes operations on key value tuples.
75   Those tuples are stored in the following format:
76 
77   The tuple is a sequence of key part values. The length of key part value
78   depends only on its type (and not depends on the what value is stored)
79 
80     KeyTuple: keypart1-data, keypart2-data, ...
81 
82   The value of each keypart is stored in the following format:
83 
84     keypart_data: [isnull_byte] keypart-value-bytes
85 
86   If a keypart may have a NULL value (key_part->field->real_maybe_null() can
87   be used to check this), then the first byte is a NULL indicator with the
88   following valid values:
89     1  - keypart has NULL value.
90     0  - keypart has non-NULL value.
91 
92   <questionable-statement> If isnull_byte==1 (NULL value), then the following
93   keypart->length bytes must be 0.
94   </questionable-statement>
95 
96   keypart-value-bytes holds the value. Its format depends on the field type.
97   The length of keypart-value-bytes may or may not depend on the value being
98   stored. The default is that length is static and equal to
99   KEY_PART_INFO::length.
100 
101   Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
102   value:
103 
104      keypart-value-bytes: value_length value_bytes
105 
106   The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
107 
108   See key_copy() and key_restore() for code to move data between index tuple
109   and table record
110 
111   CAUTION: the above description is only sergefp's understanding of the
112            subject and may omit some details.
113 */
114 
115 #include "sql_priv.h"
116 #include "key.h"        // is_key_used, key_copy, key_cmp, key_restore
117 #include "sql_parse.h"                          // check_stack_overrun
118 #include "sql_partition.h"    // get_part_id_func, PARTITION_ITERATOR,
119                               // struct partition_info, NOT_A_PARTITION_ID
120 #include "sql_base.h"         // free_io_cache
121 #include "records.h"          // init_read_record, end_read_record
122 #include <m_ctype.h>
123 #include "sql_select.h"
124 #include "opt_trace.h"
125 #include "filesort.h"         // filesort_free_buffers
126 #include "sql_optimizer.h"    // is_indexed_agg_distinct,field_time_cmp_date
127 
128 using std::min;
129 using std::max;
130 
131 /*
132   Convert double value to #rows. Currently this does floor(), and we
133   might consider using round() instead.
134 */
135 #define double2rows(x) ((ha_rows)(x))
136 
137 static int sel_cmp(Field *f,uchar *a,uchar *b,uint8 a_flag,uint8 b_flag);
138 
139 static uchar is_null_string[2]= {1,0};
140 
141 class RANGE_OPT_PARAM;
142 /*
143   A construction block of the SEL_ARG-graph.
144 
145   The following description only covers graphs of SEL_ARG objects with
146   sel_arg->type==KEY_RANGE:
147 
148   One SEL_ARG object represents an "elementary interval" in form
149 
150       min_value <=?  table.keypartX  <=? max_value
151 
152   The interval is a non-empty interval of any kind: with[out] minimum/maximum
153   bound, [half]open/closed, single-point interval, etc.
154 
155   1. SEL_ARG GRAPH STRUCTURE
156 
157   SEL_ARG objects are linked together in a graph. The meaning of the graph
158   is better demostrated by an example:
159 
160      tree->keys[i]
161       |
162       |             $              $
163       |    part=1   $     part=2   $    part=3
164       |             $              $
165       |  +-------+  $   +-------+  $   +--------+
166       |  | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
167       |  +-------+  $   +-------+  $   +--------+
168       |      |      $              $       |
169       |      |      $              $   +--------+
170       |      |      $              $   | kp3=12 |
171       |      |      $              $   +--------+
172       |  +-------+  $              $
173       \->| kp1=2 |--$--------------$-+
174          +-------+  $              $ |   +--------+
175              |      $              $  ==>| kp3=11 |
176          +-------+  $              $ |   +--------+
177          | kp1=3 |--$--------------$-+       |
178          +-------+  $              $     +--------+
179              |      $              $     | kp3=14 |
180             ...     $              $     +--------+
181 
182   The entire graph is partitioned into "interval lists".
183 
184   An interval list is a sequence of ordered disjoint intervals over
185   the same key part. SEL_ARG are linked via "next" and "prev" pointers
186   with NULL as sentinel.
187 
188     In the example pic, there are 4 interval lists:
189     "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
190     The vertical lines represent SEL_ARG::next/prev pointers.
191 
192   Additionally, all intervals in the list form a red-black (RB) tree,
193   linked via left/right/parent pointers with null_element as sentinel. The
194   red-black tree root SEL_ARG object will be further called "root of the
195   interval list".
196 
197   A red-black tree with 7 SEL_ARGs will look similar to what is shown
198   below. Left/right/parent pointers are shown while next pointers go from a
199   node with number X to the node with number X+1 (and prev in the
200   opposite direction):
201 
202                          Root
203                         +---+
204                         | 4 |
205                         +---+
206                    left/     \ right
207                     __/       \__
208                    /             \
209               +---+               +---+
210               | 2 |               | 6 |
211               +---+               +---+
212         left /     \ right  left /     \ right
213             |       |           |       |
214         +---+       +---+   +---+       +---+
215         | 1 |       | 3 |   | 5 |       | 7 |
216         +---+       +---+   +---+       +---+
217 
218   In this tree,
219     * node1->prev == node7->next == NULL
220     * node1->left == node1->right ==
221       node3->left == ... node7->right == &null_element
222 
223   In an interval list, each member X may have SEL_ARG::next_key_part pointer
224   pointing to the root of another interval list Y. The pointed interval list
225   must cover a key part with greater number (i.e. Y->part > X->part).
226 
227     In the example pic, the next_key_part pointers are represented by
228     horisontal lines.
229 
230   2. SEL_ARG GRAPH SEMANTICS
231 
232   It represents a condition in a special form (we don't have a name for it ATM)
233   The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
234 
235   For example, the picture represents the condition in form:
236    (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
237    (kp1=2 AND (kp3=11 OR kp3=14)) OR
238    (kp1=3 AND (kp3=11 OR kp3=14))
239 
240   In red-black tree form:
241 
242                      +-------+                 +--------+
243                      | kp1=2 |.................| kp3=14 |
244                      +-------+                 +--------+
245                       /     \                     /
246              +---------+    +-------+     +--------+
247              | kp1 < 1 |    | kp1=3 |     | kp3=11 |
248              +---------+    +-------+     +--------+
249                  .               .
250             ......               .......
251             .                          .
252         +-------+                  +--------+
253         | kp2=5 |                  | kp3=14 |
254         +-------+                  +--------+
255             .                        /
256             .                   +--------+
257        (root of R-B tree        | kp3=11 |
258         for "kp3={10|12}")      +--------+
259 
260 
261   Where / and \ denote left and right pointers and ... denotes
262   next_key_part pointers to the root of the R-B tree of intervals for
263   consecutive key parts.
264 
265   3. SEL_ARG GRAPH USE
266 
267   Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
268   Then walk the SEL_ARG graph and get a list of dijsoint ordered key
269   intervals (i.e. intervals in form
270 
271    (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
272 
273   Those intervals can be used to access the index. The uses are in:
274    - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
275                             how many table records are contained within all
276                             intervals.
277    - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
278                             and create QUICK_RANGE_SELECT object that will
279                             read records within these intervals.
280 
281   4. SPACE COMPLEXITY NOTES
282 
283     SEL_ARG graph is a representation of an ordered disjoint sequence of
284     intervals over the ordered set of index tuple values.
285 
286     For multi-part keys, one can construct a WHERE expression such that its
287     list of intervals will be of combinatorial size. Here is an example:
288 
289       (keypart1 IN (1,2, ..., n1)) AND
290       (keypart2 IN (1,2, ..., n2)) AND
291       (keypart3 IN (1,2, ..., n3))
292 
293     For this WHERE clause the list of intervals will have n1*n2*n3 intervals
294     of form
295 
296       (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
297 
298     SEL_ARG graph structure aims to reduce the amount of required space by
299     "sharing" the elementary intervals when possible (the pic at the
300     beginning of this comment has examples of such sharing). The sharing may
301     prevent combinatorial blowup:
302 
303       There are WHERE clauses that have combinatorial-size interval lists but
304       will be represented by a compact SEL_ARG graph.
305       Example:
306         (keypartN IN (1,2, ..., n1)) AND
307         ...
308         (keypart2 IN (1,2, ..., n2)) AND
309         (keypart1 IN (1,2, ..., n3))
310 
311     but not in all cases:
312 
313     - There are WHERE clauses that do have a compact SEL_ARG-graph
314       representation but get_mm_tree() and its callees will construct a
315       graph of combinatorial size.
316       Example:
317         (keypart1 IN (1,2, ..., n1)) AND
318         (keypart2 IN (1,2, ..., n2)) AND
319         ...
320         (keypartN IN (1,2, ..., n3))
321 
322     - There are WHERE clauses for which the minimal possible SEL_ARG graph
323       representation will have combinatorial size.
324       Example:
325         By induction: Let's take any interval on some keypart in the middle:
326 
327            kp15=c0
328 
329         Then let's AND it with this interval 'structure' from preceding and
330         following keyparts:
331 
332           (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
333 
334         We will obtain this SEL_ARG graph:
335 
336              kp14     $      kp15      $      kp16
337                       $                $
338          +---------+  $   +---------+  $   +---------+
339          | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
340          +---------+  $   +---------+  $   +---------+
341               |       $                $
342          +---------+  $   +---------+  $
343          | kp14=c2 |--$-->| kp15=c0 |  $
344          +---------+  $   +---------+  $
345                       $                $
346 
347        Note that we had to duplicate "kp15=c0" and there was no way to avoid
348        that.
349        The induction step: AND the obtained expression with another "wrapping"
350        expression like (*).
351        When the process ends because of the limit on max. number of keyparts
352        we'll have:
353 
354          WHERE clause length  is O(3*#max_keyparts)
355          SEL_ARG graph size   is O(2^(#max_keyparts/2))
356 
357        (it is also possible to construct a case where instead of 2 in 2^n we
358         have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
359         nodes)
360 
361     We avoid consuming too much memory by setting a limit on the number of
362     SEL_ARG object we can construct during one range analysis invocation.
363 */
364 
365 class SEL_ARG :public Sql_alloc
366 {
367 public:
368   uint8 min_flag,max_flag,maybe_flag;
369   uint8 part;					// Which key part
370   uint8 maybe_null;
371   /*
372     Number of children of this element in the RB-tree, plus 1 for this
373     element itself.
374   */
375   uint16 elements;
376   /*
377     Valid only for elements which are RB-tree roots: Number of times this
378     RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
379     SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
380   */
381   ulong use_count;
382 
383   Field *field;
384   uchar *min_value,*max_value;			// Pointer to range
385 
386   /*
387     eq_tree(), first(), last() etc require that left == right == NULL
388     if the type is MAYBE_KEY. Todo: fix this so SEL_ARGs without R-B
389     children are handled consistently. See related WL#5894.
390    */
391   SEL_ARG *left,*right;   /* R-B tree children */
392   SEL_ARG *next,*prev;    /* Links for bi-directional interval list */
393   SEL_ARG *parent;        /* R-B tree parent */
394   /*
395     R-B tree root of intervals covering keyparts consecutive to this
396     SEL_ARG. See documentation of SEL_ARG GRAPH semantics for details.
397   */
398   SEL_ARG *next_key_part;
399   enum leaf_color { BLACK,RED } color;
400 
401   /**
402     Starting an effort to document this field:
403 
404     IMPOSSIBLE: if the range predicate for this index is always false.
405 
406     ALWAYS: if the range predicate for this index is always true.
407 
408     KEY_RANGE: if there is a range predicate that can be used on this index.
409   */
410   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, MAYBE_KEY, KEY_RANGE } type;
411 
412   enum { MAX_SEL_ARGS = 16000 };
413 
SEL_ARG()414   SEL_ARG() {}
415   SEL_ARG(SEL_ARG &);
416   SEL_ARG(Field *,const uchar *, const uchar *);
417   SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
418 	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
419   /*
420     Used to construct MAYBE_KEY and IMPOSSIBLE SEL_ARGs. left and
421     right is NULL, so this ctor must not be used to create other
422     SEL_ARG types. See todo for left/right pointers.
423   */
SEL_ARG(enum Type type_arg)424   SEL_ARG(enum Type type_arg)
425     :min_flag(0),elements(1),use_count(1),left(NULL),right(NULL),
426      next_key_part(0), color(BLACK), type(type_arg)
427   {
428     DBUG_ASSERT(type_arg == MAYBE_KEY || type_arg == IMPOSSIBLE);
429   }
430   /**
431     returns true if a range predicate is equal. Use all_same()
432     to check for equality of all the predicates on this keypart.
433   */
is_same(const SEL_ARG * arg) const434   inline bool is_same(const SEL_ARG *arg) const
435   {
436     if (type != arg->type || part != arg->part)
437       return false;
438     if (type != KEY_RANGE)
439       return true;
440     return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
441   }
442   /**
443     returns true if all the predicates in the keypart tree are equal
444   */
all_same(const SEL_ARG * arg) const445   bool all_same(const SEL_ARG *arg) const
446   {
447     if (type != arg->type || part != arg->part)
448       return false;
449     if (type != KEY_RANGE)
450       return true;
451     if (arg == this)
452       return true;
453     const SEL_ARG *cmp_arg= arg->first();
454     const SEL_ARG *cur_arg= first();
455     for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg);
456          cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ;
457     if (cur_arg || cmp_arg)
458       return false;
459     return true;
460   }
merge_flags(SEL_ARG * arg)461   inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
maybe_smaller()462   inline void maybe_smaller() { maybe_flag=1; }
463   /* Return true iff it's a single-point null interval */
is_null_interval()464   inline bool is_null_interval() { return maybe_null && max_value[0] == 1; }
cmp_min_to_min(const SEL_ARG * arg) const465   inline int cmp_min_to_min(const SEL_ARG* arg) const
466   {
467     return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
468   }
cmp_min_to_max(const SEL_ARG * arg) const469   inline int cmp_min_to_max(const SEL_ARG* arg) const
470   {
471     return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
472   }
cmp_max_to_max(const SEL_ARG * arg) const473   inline int cmp_max_to_max(const SEL_ARG* arg) const
474   {
475     return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
476   }
cmp_max_to_min(const SEL_ARG * arg) const477   inline int cmp_max_to_min(const SEL_ARG* arg) const
478   {
479     return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
480   }
clone_and(SEL_ARG * arg)481   SEL_ARG *clone_and(SEL_ARG* arg)
482   {						// Get overlapping range
483     uchar *new_min,*new_max;
484     uint8 flag_min,flag_max;
485     if (cmp_min_to_min(arg) >= 0)
486     {
487       new_min=min_value; flag_min=min_flag;
488     }
489     else
490     {
491       new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
492     }
493     if (cmp_max_to_max(arg) <= 0)
494     {
495       new_max=max_value; flag_max=max_flag;
496     }
497     else
498     {
499       new_max=arg->max_value; flag_max=arg->max_flag;
500     }
501     return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
502 		       MY_TEST(maybe_flag && arg->maybe_flag));
503   }
clone_first(SEL_ARG * arg)504   SEL_ARG *clone_first(SEL_ARG *arg)
505   {						// min <= X < arg->min
506     return new SEL_ARG(field,part, min_value, arg->min_value,
507 		       min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
508 		       maybe_flag | arg->maybe_flag);
509   }
clone_last(SEL_ARG * arg)510   SEL_ARG *clone_last(SEL_ARG *arg)
511   {						// min <= X <= key_max
512     return new SEL_ARG(field, part, min_value, arg->max_value,
513 		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
514   }
515   SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next);
516 
copy_min(SEL_ARG * arg)517   bool copy_min(SEL_ARG* arg)
518   {						// Get overlapping range
519     if (cmp_min_to_min(arg) > 0)
520     {
521       min_value=arg->min_value; min_flag=arg->min_flag;
522       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
523 	return 1;				// Full range
524     }
525     maybe_flag|=arg->maybe_flag;
526     return 0;
527   }
copy_max(SEL_ARG * arg)528   bool copy_max(SEL_ARG* arg)
529   {						// Get overlapping range
530     if (cmp_max_to_max(arg) <= 0)
531     {
532       max_value=arg->max_value; max_flag=arg->max_flag;
533       if ((max_flag & NO_MAX_RANGE) && (min_flag & NO_MIN_RANGE))
534 	return 1;				// Full range
535     }
536     maybe_flag|=arg->maybe_flag;
537     return 0;
538   }
539 
copy_min_to_min(SEL_ARG * arg)540   void copy_min_to_min(SEL_ARG *arg)
541   {
542     min_value=arg->min_value; min_flag=arg->min_flag;
543   }
copy_min_to_max(SEL_ARG * arg)544   void copy_min_to_max(SEL_ARG *arg)
545   {
546     max_value=arg->min_value;
547     max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
548   }
copy_max_to_min(SEL_ARG * arg)549   void copy_max_to_min(SEL_ARG *arg)
550   {
551     min_value=arg->max_value;
552     min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
553   }
554   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_min(uint length,uchar ** min_key,uint min_key_flag)555   int store_min(uint length, uchar **min_key,uint min_key_flag)
556   {
557     /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
558     if ((min_flag & GEOM_FLAG) ||
559         (!(min_flag & NO_MIN_RANGE) &&
560 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
561     {
562       if (maybe_null && *min_value)
563       {
564 	**min_key=1;
565 	memset(*min_key+1, 0, length-1);
566       }
567       else
568 	memcpy(*min_key,min_value,length);
569       (*min_key)+= length;
570       return 1;
571     }
572     return 0;
573   }
574   /* returns a number of keypart values (0 or 1) appended to the key buffer */
store_max(uint length,uchar ** max_key,uint max_key_flag)575   int store_max(uint length, uchar **max_key, uint max_key_flag)
576   {
577     if (!(max_flag & NO_MAX_RANGE) &&
578 	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
579     {
580       if (maybe_null && *max_value)
581       {
582 	**max_key=1;
583 	memset(*max_key+1, 0, length-1);
584       }
585       else
586 	memcpy(*max_key,max_value,length);
587       (*max_key)+= length;
588       return 1;
589     }
590     return 0;
591   }
592 
593   /*
594     Returns a number of keypart values appended to the key buffer
595     for min key and max key. This function is used by both Range
596     Analysis and Partition pruning. For partition pruning we have
597     to ensure that we don't store also subpartition fields. Thus
598     we have to stop at the last partition part and not step into
599     the subpartition fields. For Range Analysis we set last_part
600     to MAX_KEY which we should never reach.
601   */
store_min_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)602   int store_min_key(KEY_PART *key,
603                     uchar **range_key,
604                     uint *range_key_flag,
605                     uint last_part)
606   {
607     SEL_ARG *key_tree= first();
608     uint res= key_tree->store_min(key[key_tree->part].store_length,
609                                   range_key, *range_key_flag);
610     *range_key_flag|= key_tree->min_flag;
611 
612     if (key_tree->next_key_part &&
613 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
614         key_tree->part != last_part &&
615 	key_tree->next_key_part->part == key_tree->part+1 &&
616 	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)))
617       res+= key_tree->next_key_part->store_min_key(key,
618                                                    range_key,
619                                                    range_key_flag,
620                                                    last_part);
621     return res;
622   }
623 
624   /* returns a number of keypart values appended to the key buffer */
store_max_key(KEY_PART * key,uchar ** range_key,uint * range_key_flag,uint last_part)625   int store_max_key(KEY_PART *key,
626                     uchar **range_key,
627                     uint *range_key_flag,
628                     uint last_part)
629   {
630     SEL_ARG *key_tree= last();
631     uint res=key_tree->store_max(key[key_tree->part].store_length,
632                                  range_key, *range_key_flag);
633     (*range_key_flag)|= key_tree->max_flag;
634     if (key_tree->next_key_part &&
635 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
636         key_tree->part != last_part &&
637 	key_tree->next_key_part->part == key_tree->part+1 &&
638 	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
639       res+= key_tree->next_key_part->store_max_key(key,
640                                                    range_key,
641                                                    range_key_flag,
642                                                    last_part);
643     return res;
644   }
645 
646   SEL_ARG *insert(SEL_ARG *key);
647   SEL_ARG *tree_delete(SEL_ARG *key);
648   SEL_ARG *find_range(SEL_ARG *key);
649   SEL_ARG *rb_insert(SEL_ARG *leaf);
650   friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
651 #ifndef DBUG_OFF
652   friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
653   void test_use_count(SEL_ARG *root);
654 #endif
655   SEL_ARG *first();
656   const SEL_ARG *first() const;
657   SEL_ARG *last();
658   void make_root();
simple_key()659   inline bool simple_key()
660   {
661     return !next_key_part && elements == 1;
662   }
increment_use_count(long count)663   void increment_use_count(long count)
664   {
665     if (next_key_part)
666     {
667       next_key_part->use_count+=count;
668       for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next)
669 	if (pos->next_key_part)
670 	  pos->increment_use_count(count);
671     }
672   }
free_tree()673   void free_tree()
674   {
675     for (SEL_ARG *pos=first(); pos ; pos=pos->next)
676       if (pos->next_key_part)
677       {
678 	pos->next_key_part->use_count--;
679 	pos->next_key_part->free_tree();
680       }
681   }
682 
parent_ptr()683   inline SEL_ARG **parent_ptr()
684   {
685     return parent->left == this ? &parent->left : &parent->right;
686   }
687 
688 
689   /*
690     Check if this SEL_ARG object represents a single-point interval
691 
692     SYNOPSIS
693       is_singlepoint()
694 
695     DESCRIPTION
696       Check if this SEL_ARG object (not tree) represents a single-point
697       interval, i.e. if it represents a "keypart = const" or
698       "keypart IS NULL".
699 
700     RETURN
701       TRUE   This SEL_ARG object represents a singlepoint interval
702       FALSE  Otherwise
703   */
704 
is_singlepoint() const705   bool is_singlepoint() const
706   {
707     /*
708       Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
709       flags, and the same for right edge.
710     */
711     if (min_flag || max_flag)
712       return FALSE;
713     uchar *min_val= min_value;
714     uchar *max_val= max_value;
715 
716     if (maybe_null)
717     {
718       /* First byte is a NULL value indicator */
719       if (*min_val != *max_val)
720         return FALSE;
721 
722       if (*min_val)
723         return TRUE; /* This "x IS NULL" */
724       min_val++;
725       max_val++;
726     }
727     return !field->key_cmp(min_val, max_val);
728   }
729   SEL_ARG *clone_tree(RANGE_OPT_PARAM *param);
730 };
731 
732 /**
733   Helper function to compare two SEL_ARG's.
734 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)735 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
736 {
737   if (sa1 == NULL && sa2 == NULL)
738     return true;
739   if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
740     return false;
741   return sa1->all_same(sa2);
742 }
743 
744 class SEL_IMERGE;
745 
746 
747 class SEL_TREE :public Sql_alloc
748 {
749 public:
750   /**
751     Starting an effort to document this field:
752 
753     IMPOSSIBLE: if keys[i]->type == SEL_ARG::IMPOSSIBLE for some i,
754       then type == SEL_TREE::IMPOSSIBLE. Rationale: if the predicate for
755       one of the indexes is always false, then the full predicate is also
756       always false.
757 
758     ALWAYS: if either (keys[i]->type == SEL_ARG::ALWAYS) or
759       (keys[i] == NULL) for all i, then type == SEL_TREE::ALWAYS.
760       Rationale: the range access method will not be able to filter
761       out any rows when there are no range predicates that can be used
762       to filter on any index.
763 
764     KEY: There are range predicates that can be used on at least one
765       index.
766 
767     KEY_SMALLER: There are range predicates that can be used on at
768       least one index. In addition, there are predicates that cannot
769       be directly utilized by range access on key parts in the same
770       index. These unused predicates makes it probable that the row
771       estimate for range access on this index is too pessimistic.
772   */
773   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
SEL_TREE(enum Type type_arg)774   SEL_TREE(enum Type type_arg) :type(type_arg) {}
SEL_TREE()775   SEL_TREE() :type(KEY)
776   {
777     memset(keys, 0, sizeof(keys));
778   }
779   SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
780   /*
781     Possible ways to read rows using a single index because the
782     conditions of the query consists of single-index conjunctions:
783 
784        (ranges_for_idx_1) AND (ranges_for_idx_2) AND ...
785 
786     The SEL_ARG graph for each non-NULL element in keys[] may consist
787     of many single-index ranges (disjunctions), so ranges_for_idx_1
788     may e.g. be:
789 
790        "idx_field1 = 1 OR (idx_field1 > 5 AND idx_field2 = 10)"
791 
792     assuming that index1 is a composite index covering
793     (idx_field1,...,idx_field2,..)
794 
795     Index merge intersection intersects ranges on SEL_ARGs from two or
796     more indexes.
797 
798     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
799     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
800     merit in range analyzer functions (e.g. get_mm_parts) returning a
801     pointer to such SEL_TREE instead of NULL)
802   */
803   SEL_ARG *keys[MAX_KEY];
804   key_map keys_map;        /* bitmask of non-NULL elements in keys */
805 
806   /*
807     Possible ways to read rows using Index merge (sort) union.
808 
809     Each element in 'merges' consists of multi-index disjunctions,
810     which means that Index merge (sort) union must be applied to read
811     rows. The nodes in the 'merges' list forms a conjunction of such
812     multi-index disjunctions.
813 
814     The list is non-empty only if type==KEY.
815   */
816   List<SEL_IMERGE> merges;
817 
818   /* The members below are filled/used only after get_mm_tree is done */
819   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
820   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
821 
822   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
823   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
824   /* Note that #records for each key scan is stored in table->quick_rows */
825 };
826 
827 class RANGE_OPT_PARAM
828 {
829 public:
830   THD	*thd;   /* Current thread handle */
831   TABLE *table; /* Table being analyzed */
832   Item *cond;   /* Used inside get_mm_tree(). */
833   table_map prev_tables;
834   table_map read_tables;
835   table_map current_table; /* Bit of the table being analyzed */
836 
837   /* Array of parts of all keys for which range analysis is performed */
838   KEY_PART *key_parts;
839   KEY_PART *key_parts_end;
840   MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
841   MEM_ROOT *old_root; /* Memory that will last until the query end */
842   /*
843     Number of indexes used in range analysis (In SEL_TREE::keys only first
844     #keys elements are not empty)
845   */
846   uint keys;
847 
848   /*
849     If true, the index descriptions describe real indexes (and it is ok to
850     call field->optimize_range(real_keynr[...], ...).
851     Otherwise index description describes fake indexes, like a partitioning
852     expression.
853   */
854   bool using_real_indexes;
855 
856   /*
857     Aggressively remove "scans" that do not have conditions on first
858     keyparts. Such scans are usable when doing partition pruning but not
859     regular range optimization.
860   */
861   bool remove_jump_scans;
862 
863   /*
864     used_key_no -> table_key_no translation table. Only makes sense if
865     using_real_indexes==TRUE
866   */
867   uint real_keynr[MAX_KEY];
868 
869   /*
870     Used to store 'current key tuples', in both range analysis and
871     partitioning (list) analysis
872   */
873   uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
874     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
875 
876   /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
877   uint alloced_sel_args;
878   bool force_default_mrr;
879   /**
880     Whether index statistics or index dives should be used when
881     estimating the number of rows in an equality range. If true, index
882     statistics is used for these indexes.
883   */
884   bool use_index_statistics;
885 
statement_should_be_aborted() const886   bool statement_should_be_aborted() const
887   {
888     return
889       thd->is_fatal_error ||
890       thd->is_error() ||
891       alloced_sel_args > SEL_ARG::MAX_SEL_ARGS;
892   }
893 
894 };
895 
896 class PARAM : public RANGE_OPT_PARAM
897 {
898 public:
899   KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
900   longlong baseflag;
901   uint max_key_part;
902   /* Number of ranges in the last checked tree->key */
903   uint range_count;
904 
905   bool quick;				// Don't calulate possible keys
906 
907   uint fields_bitmap_size;
908   MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
909   MY_BITMAP tmp_covered_fields;
910 
911   key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */
912 
913   uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
914   uint imerge_cost_buff_size; /* size of the buffer */
915 
916   /* TRUE if last checked tree->key can be used for ROR-scan */
917   bool is_ror_scan;
918   /* Number of ranges in the last checked tree->key */
919   uint n_ranges;
920 
921   /*
922      The sort order the range access method must be able
923      to provide. Three-value logic: asc/desc/don't care
924   */
925   ORDER::enum_order order_direction;
926 };
927 
928 class TABLE_READ_PLAN;
929   class TRP_RANGE;
930   class TRP_ROR_INTERSECT;
931   class TRP_ROR_UNION;
932   class TRP_INDEX_MERGE;
933   class TRP_GROUP_MIN_MAX;
934 
935 struct st_ror_scan_info;
936 
937 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,
938                                Item_func *cond_func,Field *field,
939                                Item_func::Functype type,Item *value,
940                                Item_result cmp_type);
941 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,Item *cond_func,Field *field,
942 			    KEY_PART *key_part,
943 			    Item_func::Functype type,Item *value);
944 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond);
945 
946 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts);
947 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
948                                   SEL_ARG *tree, bool update_tbl_stats,
949                                   uint *mrr_flags, uint *bufsize,
950                                   Cost_estimate *cost);
951 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
952                                      SEL_ARG *key_tree, uint mrr_flags,
953                                      uint mrr_buf_size, MEM_ROOT *alloc);
954 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
955                                        bool index_read_must_be_used,
956                                        bool update_tbl_stats,
957                                        double read_time);
958 static
959 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
960                                           double read_time);
961 static
962 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
963                                          double read_time);
964 static
965 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
966                                           double read_time);
967 #ifndef DBUG_OFF
968 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
969                            const char *msg);
970 static void print_ror_scans_arr(TABLE *table, const char *msg,
971                                 struct st_ror_scan_info **start,
972                                 struct st_ror_scan_info **end);
973 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
974 #endif
975 
976 static void append_range_all_keyparts(Opt_trace_array *range_trace,
977                                       String *range_string,
978                                       String *range_so_far,
979                                       SEL_ARG *keypart_root,
980                                       const KEY_PART_INFO *key_parts);
981 static inline void dbug_print_tree(const char *tree_name,
982                                    SEL_TREE *tree,
983                                    const RANGE_OPT_PARAM *param);
984 
985 void append_range(String *out,
986                   const KEY_PART_INFO *key_parts,
987                   const uchar *min_key, const uchar *max_key,
988                   const uint flag);
989 
990 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
991 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
992 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
993 static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
994 static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
995                         uint clone_flag);
996 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
997 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
998                     SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
999                     uchar *max_key,uint max_key_flag);
1000 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
1001 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count,
1002                                     uint limit);
1003 
1004 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
1005 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
1006                              uint length);
1007 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
1008 
1009 
1010 /*
1011   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
1012   a condition in the following form:
1013    (t_1||t_2||...||t_N) && (next)
1014 
1015   where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
1016   (t_i,t_j) contains SEL_ARGS for the same index.
1017 
1018   SEL_TREE contained in SEL_IMERGE always has merges=NULL.
1019 
1020   This class relies on memory manager to do the cleanup.
1021 */
1022 
1023 class SEL_IMERGE : public Sql_alloc
1024 {
1025   enum { PREALLOCED_TREES= 10};
1026 public:
1027   SEL_TREE *trees_prealloced[PREALLOCED_TREES];
1028   SEL_TREE **trees;             /* trees used to do index_merge   */
1029   SEL_TREE **trees_next;        /* last of these trees            */
1030   SEL_TREE **trees_end;         /* end of allocated space         */
1031 
1032   SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
1033 
SEL_IMERGE()1034   SEL_IMERGE() :
1035     trees(&trees_prealloced[0]),
1036     trees_next(trees),
1037     trees_end(trees + PREALLOCED_TREES)
1038   {}
1039   SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
1040   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
1041   int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
1042   int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
1043 };
1044 
1045 
1046 /*
1047   Add SEL_TREE to this index_merge without any checks,
1048 
1049   NOTES
1050     This function implements the following:
1051       (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
1052 
1053   RETURN
1054      0 - OK
1055     -1 - Out of memory.
1056 */
1057 
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)1058 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
1059 {
1060   if (trees_next == trees_end)
1061   {
1062     const int realloc_ratio= 2;		/* Double size for next round */
1063     uint old_elements= (trees_end - trees);
1064     uint old_size= sizeof(SEL_TREE**) * old_elements;
1065     uint new_size= old_size * realloc_ratio;
1066     SEL_TREE **new_trees;
1067     if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
1068       return -1;
1069     memcpy(new_trees, trees, old_size);
1070     trees=      new_trees;
1071     trees_next= trees + old_elements;
1072     trees_end=  trees + old_elements * realloc_ratio;
1073   }
1074   *(trees_next++)= tree;
1075   return 0;
1076 }
1077 
1078 
1079 /*
1080   Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
1081   combining new_tree with one of the trees in this SEL_IMERGE if they both
1082   have SEL_ARGs for the same key.
1083 
1084   SYNOPSIS
1085     or_sel_tree_with_checks()
1086       param    PARAM from SQL_SELECT::test_quick_select
1087       new_tree SEL_TREE with type KEY or KEY_SMALLER.
1088 
1089   NOTES
1090     This does the following:
1091     (t_1||...||t_k)||new_tree =
1092      either
1093        = (t_1||...||t_k||new_tree)
1094      or
1095        = (t_1||....||(t_j|| new_tree)||...||t_k),
1096 
1097      where t_i, y are SEL_TREEs.
1098     new_tree is combined with the first t_j it has a SEL_ARG on common
1099     key with. As a consequence of this, choice of keys to do index_merge
1100     read may depend on the order of conditions in WHERE part of the query.
1101 
1102   RETURN
1103     0  OK
1104     1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
1105        and (*this) should be discarded.
1106    -1  An error occurred.
1107 */
1108 
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,SEL_TREE * new_tree)1109 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
1110 {
1111   for (SEL_TREE** tree = trees;
1112        tree != trees_next;
1113        tree++)
1114   {
1115     if (sel_trees_can_be_ored(*tree, new_tree, param))
1116     {
1117       *tree = tree_or(param, *tree, new_tree);
1118       if (!*tree)
1119         return 1;
1120       if (((*tree)->type == SEL_TREE::MAYBE) ||
1121           ((*tree)->type == SEL_TREE::ALWAYS))
1122         return 1;
1123       /* SEL_TREE::IMPOSSIBLE is impossible here */
1124       return 0;
1125     }
1126   }
1127 
1128   /* New tree cannot be combined with any of existing trees. */
1129   return or_sel_tree(param, new_tree);
1130 }
1131 
1132 
1133 /*
1134   Perform OR operation on this index_merge and supplied index_merge list.
1135 
1136   RETURN
1137     0 - OK
1138     1 - One of conditions in result is always TRUE and this SEL_IMERGE
1139         should be discarded.
1140    -1 - An error occurred
1141 */
1142 
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,SEL_IMERGE * imerge)1143 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
1144 {
1145   for (SEL_TREE** tree= imerge->trees;
1146        tree != imerge->trees_next;
1147        tree++)
1148   {
1149     if (or_sel_tree_with_checks(param, *tree))
1150       return 1;
1151   }
1152   return 0;
1153 }
1154 
1155 
SEL_TREE(SEL_TREE * arg,RANGE_OPT_PARAM * param)1156 SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
1157 {
1158   keys_map= arg->keys_map;
1159   type= arg->type;
1160   for (uint idx= 0; idx < MAX_KEY; idx++)
1161   {
1162     if ((keys[idx]= arg->keys[idx]))
1163     {
1164       keys[idx]->use_count++;
1165       keys[idx]->increment_use_count(1);
1166     }
1167   }
1168 
1169   List_iterator<SEL_IMERGE> it(arg->merges);
1170   for (SEL_IMERGE *el= it++; el; el= it++)
1171   {
1172     SEL_IMERGE *merge= new SEL_IMERGE(el, param);
1173     if (!merge || merge->trees == merge->trees_next)
1174     {
1175       merges.empty();
1176       return;
1177     }
1178     merges.push_back (merge);
1179   }
1180 }
1181 
1182 
SEL_IMERGE(SEL_IMERGE * arg,RANGE_OPT_PARAM * param)1183 SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
1184 {
1185   uint elements= (arg->trees_end - arg->trees);
1186   if (elements > PREALLOCED_TREES)
1187   {
1188     uint size= elements * sizeof (SEL_TREE **);
1189     if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
1190       goto mem_err;
1191   }
1192   else
1193     trees= &trees_prealloced[0];
1194 
1195   trees_next= trees;
1196   trees_end= trees + elements;
1197 
1198   for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end;
1199        tree++, arg_tree++)
1200   {
1201     if (!(*tree= new SEL_TREE(*arg_tree, param)))
1202       goto mem_err;
1203   }
1204 
1205   return;
1206 
1207 mem_err:
1208   trees= &trees_prealloced[0];
1209   trees_next= trees;
1210   trees_end= trees;
1211 }
1212 
1213 
1214 /*
1215   Perform AND operation on two index_merge lists and store result in *im1.
1216 */
1217 
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1218 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
1219 {
1220   im1->concat(im2);
1221 }
1222 
1223 
1224 /*
1225   Perform OR operation on 2 index_merge lists, storing result in first list.
1226 
1227   NOTES
1228     The following conversion is implemented:
1229      (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
1230       => (a_1||b_1).
1231 
1232     i.e. all conjuncts except the first one are currently dropped.
1233     This is done to avoid producing N*K ways to do index_merge.
1234 
1235     If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
1236     and index_merge is discarded (while it is actually possible to try
1237     harder).
1238 
1239     As a consequence of this, choice of keys to do index_merge read may depend
1240     on the order of conditions in WHERE part of the query.
1241 
1242   RETURN
1243     0     OK, result is stored in *im1
1244     other Error, both passed lists are unusable
1245 */
1246 
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)1247 int imerge_list_or_list(RANGE_OPT_PARAM *param,
1248                         List<SEL_IMERGE> *im1,
1249                         List<SEL_IMERGE> *im2)
1250 {
1251   SEL_IMERGE *imerge= im1->head();
1252   im1->empty();
1253   im1->push_back(imerge);
1254 
1255   return imerge->or_sel_imerge_with_checks(param, im2->head());
1256 }
1257 
1258 
1259 /*
1260   Perform OR operation on index_merge list and key tree.
1261 
1262   RETURN
1263     false     OK, result is stored in *im1.
1264     true      Error
1265 */
1266 
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,SEL_TREE * tree)1267 static bool imerge_list_or_tree(RANGE_OPT_PARAM *param,
1268                                 List<SEL_IMERGE> *im1,
1269                                 SEL_TREE *tree)
1270 {
1271   DBUG_ENTER("imerge_list_or_tree");
1272   SEL_IMERGE *imerge;
1273   List_iterator<SEL_IMERGE> it(*im1);
1274 
1275   uint remaining_trees= im1->elements;
1276   while ((imerge= it++))
1277   {
1278     SEL_TREE *or_tree;
1279     /*
1280       Need to make a copy of 'tree' for all but the last OR operation
1281       because or_sel_tree_with_checks() may change it.
1282     */
1283     if (--remaining_trees == 0)
1284       or_tree= tree;
1285     else
1286     {
1287       or_tree= new SEL_TREE (tree, param);
1288       if (!or_tree)
1289         DBUG_RETURN(true);
1290       if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
1291         DBUG_RETURN(false);
1292     }
1293 
1294     int result_or= imerge->or_sel_tree_with_checks(param, or_tree);
1295     if (result_or == 1)
1296       it.remove();
1297     else if (result_or == -1)
1298       DBUG_RETURN(true);
1299   }
1300   DBUG_ASSERT(remaining_trees == 0);
1301   DBUG_RETURN(im1->is_empty());
1302 }
1303 
1304 
1305 /***************************************************************************
1306 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1307 ***************************************************************************/
1308 
1309 	/* make a select from mysql info
1310 	   Error is set as following:
1311 	   0 = ok
1312 	   1 = Got some error (out of memory?)
1313 	   */
1314 
make_select(TABLE * head,table_map const_tables,table_map read_tables,Item * conds,bool allow_null_cond,int * error)1315 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1316 			table_map read_tables, Item *conds,
1317                         bool allow_null_cond,
1318                         int *error)
1319 {
1320   SQL_SELECT *select;
1321   DBUG_ENTER("make_select");
1322 
1323   *error=0;
1324 
1325   if (!conds && !allow_null_cond)
1326     DBUG_RETURN(0);
1327   if (!(select= new SQL_SELECT))
1328   {
1329     *error= 1;			// out of memory
1330     DBUG_RETURN(0);		/* purecov: inspected */
1331   }
1332   select->read_tables=read_tables;
1333   select->const_tables=const_tables;
1334   select->head=head;
1335   select->cond=conds;
1336 
1337   if (head->sort.io_cache)
1338   {
1339     select->file= *head->sort.io_cache;
1340     select->records=(ha_rows) (select->file.end_of_file/
1341 			       head->file->ref_length);
1342     my_free(head->sort.io_cache);
1343     head->sort.io_cache=0;
1344   }
1345   DBUG_RETURN(select);
1346 }
1347 
1348 
SQL_SELECT()1349 SQL_SELECT::SQL_SELECT() :
1350   quick(0), cond(0), icp_cond(0),
1351   free_cond(0), traced_before(false)
1352 {
1353   my_b_clear(&file);
1354 }
1355 
1356 
cleanup()1357 void SQL_SELECT::cleanup()
1358 {
1359   set_quick(NULL);
1360   if (free_cond)
1361   {
1362     free_cond=0;
1363     delete cond;
1364     cond= 0;
1365   }
1366   close_cached_file(&file);
1367   traced_before= false;
1368 }
1369 
1370 
~SQL_SELECT()1371 SQL_SELECT::~SQL_SELECT()
1372 {
1373   cleanup();
1374 }
1375 
1376 #undef index					// Fix for Unixware 7
1377 
QUICK_SELECT_I()1378 QUICK_SELECT_I::QUICK_SELECT_I()
1379   :max_used_key_length(0),
1380    used_key_parts(0)
1381 {}
1382 
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1383 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1384                                        bool no_alloc, MEM_ROOT *parent_alloc,
1385                                        bool *create_error)
1386   :free_file(0), cur_range(NULL), last_range(0),
1387    mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
1388    dont_free(0)
1389 {
1390   my_bitmap_map *bitmap;
1391   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1392 
1393   in_ror_merged_scan= 0;
1394   index= key_nr;
1395   head=  table;
1396   key_part_info= head->key_info[index].key_part;
1397   my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
1398 
1399   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1400   mrr_buf_size= thd->variables.read_rnd_buff_size;
1401 
1402   if (!no_alloc && !parent_alloc)
1403   {
1404     // Allocates everything through the internal memroot
1405     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1406     thd->mem_root= &alloc;
1407   }
1408   else
1409     memset(&alloc, 0, sizeof(alloc));
1410   file= head->file;
1411   record= head->record[0];
1412 
1413   /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
1414   if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
1415                                            MYF(MY_WME))))
1416   {
1417     column_bitmap.bitmap= 0;
1418     *create_error= 1;
1419   }
1420   else
1421     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1422   DBUG_VOID_RETURN;
1423 }
1424 
1425 
need_sorted_output()1426 void QUICK_RANGE_SELECT::need_sorted_output()
1427 {
1428   mrr_flags |= HA_MRR_SORTED;
1429 }
1430 
1431 
init()1432 int QUICK_RANGE_SELECT::init()
1433 {
1434   DBUG_ENTER("QUICK_RANGE_SELECT::init");
1435 
1436   if (file->inited)
1437     file->ha_index_or_rnd_end();
1438   DBUG_RETURN(FALSE);
1439 }
1440 
1441 
range_end()1442 void QUICK_RANGE_SELECT::range_end()
1443 {
1444   if (file->inited)
1445     file->ha_index_or_rnd_end();
1446 }
1447 
1448 
~QUICK_RANGE_SELECT()1449 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1450 {
1451   DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1452   if (!dont_free)
1453   {
1454     /* file is NULL for CPK scan on covering ROR-intersection */
1455     if (file)
1456     {
1457       range_end();
1458       if (free_file)
1459       {
1460         DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1461                             free_file));
1462         file->ha_external_lock(current_thd, F_UNLCK);
1463         file->ha_close();
1464         delete file;
1465       }
1466     }
1467     delete_dynamic(&ranges); /* ranges are allocated in alloc */
1468     free_root(&alloc,MYF(0));
1469     my_free(column_bitmap.bitmap);
1470   }
1471   my_free(mrr_buf_desc);
1472   DBUG_VOID_RETURN;
1473 }
1474 
1475 
QUICK_INDEX_MERGE_SELECT(THD * thd_param,TABLE * table)1476 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
1477                                                    TABLE *table)
1478   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1479 {
1480   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
1481   index= MAX_KEY;
1482   head= table;
1483   memset(&read_record, 0, sizeof(read_record));
1484   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1485   DBUG_VOID_RETURN;
1486 }
1487 
init()1488 int QUICK_INDEX_MERGE_SELECT::init()
1489 {
1490   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
1491   DBUG_RETURN(0);
1492 }
1493 
reset()1494 int QUICK_INDEX_MERGE_SELECT::reset()
1495 {
1496   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
1497   const int retval= read_keys_and_merge();
1498   DBUG_RETURN(retval);
1499 }
1500 
1501 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1502 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1503 {
1504   /*
1505     Save quick_select that does scan on clustered primary key as it will be
1506     processed separately.
1507   */
1508   if (head->file->primary_key_is_clustered() &&
1509       quick_sel_range->index == head->s->primary_key)
1510     pk_quick_select= quick_sel_range;
1511   else
1512     return quick_selects.push_back(quick_sel_range);
1513   return 0;
1514 }
1515 
~QUICK_INDEX_MERGE_SELECT()1516 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
1517 {
1518   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1519   QUICK_RANGE_SELECT* quick;
1520   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
1521   delete unique;
1522   quick_it.rewind();
1523   while ((quick= quick_it++))
1524     quick->file= NULL;
1525   quick_selects.delete_elements();
1526   delete pk_quick_select;
1527   /* It's ok to call the next two even if they are already deinitialized */
1528   end_read_record(&read_record);
1529   free_io_cache(head);
1530   free_root(&alloc,MYF(0));
1531   DBUG_VOID_RETURN;
1532 }
1533 
1534 
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1535 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1536                                                        TABLE *table,
1537                                                        bool retrieve_full_rows,
1538                                                        MEM_ROOT *parent_alloc)
1539   : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1540     scans_inited(FALSE)
1541 {
1542   index= MAX_KEY;
1543   head= table;
1544   record= head->record[0];
1545   if (!parent_alloc)
1546     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1547   else
1548     memset(&alloc, 0, sizeof(MEM_ROOT));
1549   last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1550                                   head->file->ref_length);
1551 }
1552 
1553 
1554 /*
1555   Do post-constructor initialization.
1556   SYNOPSIS
1557     QUICK_ROR_INTERSECT_SELECT::init()
1558 
1559   RETURN
1560     0      OK
1561     other  Error code
1562 */
1563 
init()1564 int QUICK_ROR_INTERSECT_SELECT::init()
1565 {
1566   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1567  /* Check if last_rowid was successfully allocated in ctor */
1568   DBUG_RETURN(!last_rowid);
1569 }
1570 
1571 
1572 /*
1573   Initialize this quick select to be a ROR-merged scan.
1574 
1575   SYNOPSIS
1576     QUICK_RANGE_SELECT::init_ror_merged_scan()
1577       reuse_handler If TRUE, use head->file, otherwise create a separate
1578                     handler object
1579 
1580   NOTES
1581     This function creates and prepares for subsequent use a separate handler
1582     object if it can't reuse head->file. The reason for this is that during
1583     ROR-merge several key scans are performed simultaneously, and a single
1584     handler is only capable of preserving context of a single key scan.
1585 
1586     In ROR-merge the quick select doing merge does full records retrieval,
1587     merged quick selects read only keys.
1588 
1589   RETURN
1590     0  ROR child scan initialized, ok to use.
1591     1  error
1592 */
1593 
init_ror_merged_scan(bool reuse_handler)1594 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1595 {
1596   handler *save_file= file, *org_file;
1597   THD *thd;
1598   MY_BITMAP * const save_read_set= head->read_set;
1599   MY_BITMAP * const save_write_set= head->write_set;
1600   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1601 
1602   in_ror_merged_scan= 1;
1603   mrr_flags|= HA_MRR_SORTED;
1604   if (reuse_handler)
1605   {
1606     DBUG_PRINT("info", ("Reusing handler %p", file));
1607     if (init() || reset())
1608     {
1609       DBUG_RETURN(1);
1610     }
1611     head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1612     file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1613     goto end;
1614   }
1615 
1616   /* Create a separate handler object for this quick select */
1617   if (free_file)
1618   {
1619     /* already have own 'handler' object. */
1620     DBUG_RETURN(0);
1621   }
1622 
1623   thd= head->in_use;
1624   if (!(file= head->file->clone(head->s->normalized_path.str, thd->mem_root)))
1625   {
1626     /*
1627       Manually set the error flag. Note: there seems to be quite a few
1628       places where a failure could cause the server to "hang" the client by
1629       sending no response to a query. ATM those are not real errors because
1630       the storage engine calls in question happen to never fail with the
1631       existing storage engines.
1632     */
1633     my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1634     /* Caller will free the memory */
1635     goto failure;  /* purecov: inspected */
1636   }
1637 
1638   head->column_bitmaps_set(&column_bitmap, &column_bitmap);
1639 
1640   if (file->ha_external_lock(thd, F_RDLCK))
1641     goto failure;
1642 
1643   if (init() || reset())
1644   {
1645     file->ha_external_lock(thd, F_UNLCK);
1646     file->ha_close();
1647     goto failure;
1648   }
1649   free_file= TRUE;
1650   last_rowid= file->ref;
1651   file->extra(HA_EXTRA_SECONDARY_SORT_ROWID);
1652 
1653 end:
1654   /*
1655     We are only going to read key fields and call position() on 'file'
1656     The following sets head->tmp_set to only use this key and then updates
1657     head->read_set and head->write_set to use this bitmap.
1658     The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
1659   */
1660   org_file= head->file;
1661   head->file= file;
1662   /* We don't have to set 'head->keyread' here as the 'file' is unique */
1663   if (!head->no_keyread)
1664     head->mark_columns_used_by_index(index);
1665   head->prepare_for_position();
1666   head->file= org_file;
1667   bitmap_copy(&column_bitmap, head->read_set);
1668 
1669   /*
1670     We have prepared a column_bitmap which get_next() will use. To do this we
1671     used TABLE::read_set/write_set as playground; restore them to their
1672     original value to not pollute other scans.
1673   */
1674   head->column_bitmaps_set(save_read_set, save_write_set);
1675 
1676   DBUG_RETURN(0);
1677 
1678 failure:
1679   head->column_bitmaps_set(save_read_set, save_write_set);
1680   delete file;
1681   file= save_file;
1682   DBUG_RETURN(1);
1683 }
1684 
1685 
1686 /*
1687   Initialize this quick select to be a part of a ROR-merged scan.
1688   SYNOPSIS
1689     QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1690       reuse_handler If TRUE, use head->file, otherwise create separate
1691                     handler object.
1692   RETURN
1693     0     OK
1694     other error code
1695 */
init_ror_merged_scan(bool reuse_handler)1696 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1697 {
1698   int error;
1699   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1700   QUICK_RANGE_SELECT* quick;
1701   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1702 
1703   /* Initialize all merged "children" quick selects */
1704   DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1705   if (!need_to_fetch_row && reuse_handler)
1706   {
1707     quick= quick_it++;
1708     /*
1709       There is no use of this->file. Use it for the first of merged range
1710       selects.
1711     */
1712     int error= quick->init_ror_merged_scan(TRUE);
1713     if (error)
1714       DBUG_RETURN(error);
1715     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1716   }
1717   while ((quick= quick_it++))
1718   {
1719 #ifndef DBUG_OFF
1720     const MY_BITMAP * const save_read_set= quick->head->read_set;
1721     const MY_BITMAP * const save_write_set= quick->head->write_set;
1722 #endif
1723     if ((error= quick->init_ror_merged_scan(FALSE)))
1724       DBUG_RETURN(error);
1725     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1726     // Sets are shared by all members of "quick_selects" so must not change
1727     DBUG_ASSERT(quick->head->read_set == save_read_set);
1728     DBUG_ASSERT(quick->head->write_set == save_write_set);
1729     /* All merged scans share the same record buffer in intersection. */
1730     quick->record= head->record[0];
1731   }
1732 
1733   /* Prepare for ha_rnd_pos calls if needed. */
1734   if (need_to_fetch_row && (error= head->file->ha_rnd_init(false)))
1735   {
1736     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1737     DBUG_RETURN(error);
1738   }
1739   DBUG_RETURN(0);
1740 }
1741 
1742 
1743 /*
1744   Initialize quick select for row retrieval.
1745   SYNOPSIS
1746     reset()
1747   RETURN
1748     0      OK
1749     other  Error code
1750 */
1751 
reset()1752 int QUICK_ROR_INTERSECT_SELECT::reset()
1753 {
1754   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1755   if (!scans_inited && init_ror_merged_scan(TRUE))
1756     DBUG_RETURN(1);
1757   scans_inited= TRUE;
1758   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
1759   QUICK_RANGE_SELECT *quick;
1760   while ((quick= it++))
1761     quick->reset();
1762   DBUG_RETURN(0);
1763 }
1764 
1765 
1766 /*
1767   Add a merged quick select to this ROR-intersection quick select.
1768 
1769   SYNOPSIS
1770     QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1771       quick Quick select to be added. The quick select must return
1772             rows in rowid order.
1773   NOTES
1774     This call can only be made before init() is called.
1775 
1776   RETURN
1777     FALSE OK
1778     TRUE  Out of memory.
1779 */
1780 
1781 bool
push_quick_back(QUICK_RANGE_SELECT * quick)1782 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
1783 {
1784   return quick_selects.push_back(quick);
1785 }
1786 
~QUICK_ROR_INTERSECT_SELECT()1787 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1788 {
1789   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1790   quick_selects.delete_elements();
1791   delete cpk_quick;
1792   free_root(&alloc,MYF(0));
1793   if (need_to_fetch_row && head->file->inited)
1794     head->file->ha_rnd_end();
1795   DBUG_VOID_RETURN;
1796 }
1797 
1798 
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1799 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1800                                                TABLE *table)
1801   : thd(thd_param), scans_inited(FALSE)
1802 {
1803   index= MAX_KEY;
1804   head= table;
1805   rowid_length= table->file->ref_length;
1806   record= head->record[0];
1807   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1808   thd_param->mem_root= &alloc;
1809 }
1810 
1811 
1812 /*
1813   Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1814   queue.
1815 
1816   SYNPOSIS
1817     QUICK_ROR_UNION_SELECT_queue_cmp()
1818       arg   Pointer to QUICK_ROR_UNION_SELECT
1819       val1  First merged select
1820       val2  Second merged select
1821 */
1822 
1823 C_MODE_START
1824 
QUICK_ROR_UNION_SELECT_queue_cmp(void * arg,uchar * val1,uchar * val2)1825 static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1826 {
1827   QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1828   return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1829                                    ((QUICK_SELECT_I*)val2)->last_rowid);
1830 }
1831 
1832 C_MODE_END
1833 
1834 
1835 /*
1836   Do post-constructor initialization.
1837   SYNOPSIS
1838     QUICK_ROR_UNION_SELECT::init()
1839 
1840   RETURN
1841     0      OK
1842     other  Error code
1843 */
1844 
init()1845 int QUICK_ROR_UNION_SELECT::init()
1846 {
1847   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1848   if (init_queue(&queue, quick_selects.elements, 0,
1849                  FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1850                  (void*) this))
1851   {
1852     memset(&queue, 0, sizeof(QUEUE));
1853     DBUG_RETURN(1);
1854   }
1855 
1856   if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1857     DBUG_RETURN(1);
1858   prev_rowid= cur_rowid + head->file->ref_length;
1859   DBUG_RETURN(0);
1860 }
1861 
1862 
1863 /*
1864   Initialize quick select for row retrieval.
1865   SYNOPSIS
1866     reset()
1867 
1868   RETURN
1869     0      OK
1870     other  Error code
1871 */
1872 
reset()1873 int QUICK_ROR_UNION_SELECT::reset()
1874 {
1875   QUICK_SELECT_I *quick;
1876   int error;
1877   DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1878   have_prev_rowid= FALSE;
1879   if (!scans_inited)
1880   {
1881     List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1882     while ((quick= it++))
1883     {
1884       /*
1885         Use mem_root of this "QUICK" as using the statement mem_root
1886         might result in too many allocations when combined with
1887         dynamic range access where range optimizer is invoked many times
1888         for a single statement.
1889       */
1890       THD *thd= quick->head->in_use;
1891       MEM_ROOT *saved_root= thd->mem_root;
1892       thd->mem_root= &alloc;
1893       error= quick->init_ror_merged_scan(false);
1894       thd->mem_root= saved_root;
1895       if (error)
1896         DBUG_RETURN(1);
1897     }
1898     scans_inited= TRUE;
1899   }
1900   queue_remove_all(&queue);
1901   /*
1902     Initialize scans for merged quick selects and put all merged quick
1903     selects into the queue.
1904   */
1905   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1906   while ((quick= it++))
1907   {
1908     if ((error= quick->reset()))
1909       DBUG_RETURN(error);
1910     if ((error= quick->get_next()))
1911     {
1912       if (error == HA_ERR_END_OF_FILE)
1913         continue;
1914       DBUG_RETURN(error);
1915     }
1916     quick->save_last_pos();
1917     queue_insert(&queue, (uchar*)quick);
1918   }
1919 
1920   /* Prepare for ha_rnd_pos calls. */
1921   if (head->file->inited && (error= head->file->ha_rnd_end()))
1922   {
1923     DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1924     DBUG_RETURN(error);
1925   }
1926   if ((error= head->file->ha_rnd_init(false)))
1927   {
1928     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1929     DBUG_RETURN(error);
1930   }
1931 
1932   DBUG_RETURN(0);
1933 }
1934 
1935 
1936 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)1937 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1938 {
1939   return quick_selects.push_back(quick_sel_range);
1940 }
1941 
~QUICK_ROR_UNION_SELECT()1942 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1943 {
1944   DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1945   delete_queue(&queue);
1946   quick_selects.delete_elements();
1947   if (head->file->inited)
1948     head->file->ha_rnd_end();
1949   free_root(&alloc,MYF(0));
1950   DBUG_VOID_RETURN;
1951 }
1952 
1953 
QUICK_RANGE()1954 QUICK_RANGE::QUICK_RANGE()
1955   :min_key(0),max_key(0),min_length(0),max_length(0),
1956    flag(NO_MIN_RANGE | NO_MAX_RANGE),
1957   min_keypart_map(0), max_keypart_map(0)
1958 {}
1959 
QUICK_RANGE(const uchar * min_key_arg,uint min_length_arg,key_part_map min_keypart_map_arg,const uchar * max_key_arg,uint max_length_arg,key_part_map max_keypart_map_arg,uint flag_arg)1960 QUICK_RANGE::QUICK_RANGE(const uchar *min_key_arg, uint min_length_arg,
1961                          key_part_map min_keypart_map_arg,
1962                          const uchar *max_key_arg, uint max_length_arg,
1963                          key_part_map max_keypart_map_arg,
1964                          uint flag_arg)
1965   : min_key(NULL),
1966     max_key(NULL),
1967     min_length((uint16) min_length_arg),
1968     max_length((uint16) max_length_arg),
1969     flag((uint16) flag_arg),
1970     min_keypart_map(min_keypart_map_arg),
1971     max_keypart_map(max_keypart_map_arg)
1972 {
1973   min_key= static_cast<uchar*>(sql_memdup(min_key_arg, min_length_arg + 1));
1974   max_key= static_cast<uchar*>(sql_memdup(max_key_arg, max_length_arg + 1));
1975   // If we get is_null_string as argument, the memdup is undefined behavior.
1976   DBUG_ASSERT(min_key_arg != is_null_string);
1977   DBUG_ASSERT(max_key_arg != is_null_string);
1978 }
1979 
SEL_ARG(SEL_ARG & arg)1980 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1981 {
1982   DBUG_ASSERT(arg.type != MAYBE_KEY);  // Would need left=right=NULL
1983   left=right= &null_element;
1984   prev=next= NULL;
1985   type=arg.type;
1986   min_flag=arg.min_flag;
1987   max_flag=arg.max_flag;
1988   maybe_flag=arg.maybe_flag;
1989   maybe_null=arg.maybe_null;
1990   part=arg.part;
1991   field=arg.field;
1992   min_value=arg.min_value;
1993   max_value=arg.max_value;
1994   next_key_part=arg.next_key_part;
1995   use_count=1; elements=1;
1996 }
1997 
1998 
make_root()1999 inline void SEL_ARG::make_root()
2000 {
2001   left=right= &null_element;
2002   color=BLACK;
2003   next=prev= NULL;
2004   use_count=0; elements=1;
2005 }
2006 
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)2007 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
2008                  const uchar *max_value_arg)
2009   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
2010    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
2011    max_value((uchar*) max_value_arg), next(NULL), prev(NULL),
2012    next_key_part(0), color(BLACK), type(KEY_RANGE)
2013 {
2014   left=right= &null_element;
2015 }
2016 
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)2017 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
2018                  uchar *min_value_, uchar *max_value_,
2019 		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
2020   :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
2021    part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
2022    field(field_), min_value(min_value_), max_value(max_value_),
2023    next(NULL), prev(NULL), next_key_part(0), color(BLACK), type(KEY_RANGE)
2024 {
2025   left=right= &null_element;
2026 }
2027 
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2028 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2029                         SEL_ARG **next_arg)
2030 {
2031   SEL_ARG *tmp;
2032 
2033   /* Bail out if we have already generated too many SEL_ARGs */
2034   if (++param->alloced_sel_args > MAX_SEL_ARGS)
2035     return 0;
2036 
2037   if (type != KEY_RANGE)
2038   {
2039     if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2040       return 0;					// out of memory
2041     tmp->prev= *next_arg;			// Link into next/prev chain
2042     (*next_arg)->next=tmp;
2043     (*next_arg)= tmp;
2044     tmp->part= this->part;
2045   }
2046   else
2047   {
2048     if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2049                                              min_flag, max_flag, maybe_flag)))
2050       return 0;					// OOM
2051     tmp->parent=new_parent;
2052     tmp->next_key_part=next_key_part;
2053     if (left != &null_element)
2054       if (!(tmp->left=left->clone(param, tmp, next_arg)))
2055 	return 0;				// OOM
2056 
2057     tmp->prev= *next_arg;			// Link into next/prev chain
2058     (*next_arg)->next=tmp;
2059     (*next_arg)= tmp;
2060 
2061     if (right != &null_element)
2062       if (!(tmp->right= right->clone(param, tmp, next_arg)))
2063 	return 0;				// OOM
2064   }
2065   increment_use_count(1);
2066   tmp->color= color;
2067   tmp->elements= this->elements;
2068   return tmp;
2069 }
2070 
2071 /**
2072   This gives the first SEL_ARG in the interval list, and the minimal element
2073   in the red-black tree
2074 
2075   @return
2076   SEL_ARG   first SEL_ARG in the interval list
2077 */
first()2078 SEL_ARG *SEL_ARG::first()
2079 {
2080   SEL_ARG *next_arg=this;
2081   if (!next_arg->left)
2082     return 0;					// MAYBE_KEY
2083   while (next_arg->left != &null_element)
2084     next_arg=next_arg->left;
2085   return next_arg;
2086 }
2087 
first() const2088 const SEL_ARG *SEL_ARG::first() const
2089 {
2090   return const_cast<SEL_ARG*>(this)->first();
2091 }
2092 
last()2093 SEL_ARG *SEL_ARG::last()
2094 {
2095   SEL_ARG *next_arg=this;
2096   if (!next_arg->right)
2097     return 0;					// MAYBE_KEY
2098   while (next_arg->right != &null_element)
2099     next_arg=next_arg->right;
2100   return next_arg;
2101 }
2102 
2103 
2104 /*
2105   Check if a compare is ok, when one takes ranges in account
2106   Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
2107 */
2108 
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2109 static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2110                    uint8 b_flag)
2111 {
2112   int cmp;
2113   /* First check if there was a compare to a min or max element */
2114   if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2115   {
2116     if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2117 	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2118       return 0;
2119     return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2120   }
2121   if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2122     return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2123 
2124   if (field->real_maybe_null())			// If null is part of key
2125   {
2126     if (*a != *b)
2127     {
2128       return *a ? -1 : 1;
2129     }
2130     if (*a)
2131       goto end;					// NULL where equal
2132     a++; b++;					// Skip NULL marker
2133   }
2134   cmp=field->key_cmp(a , b);
2135   if (cmp) return cmp < 0 ? -1 : 1;		// The values differed
2136 
2137   // Check if the compared equal arguments was defined with open/closed range
2138  end:
2139   if (a_flag & (NEAR_MIN | NEAR_MAX))
2140   {
2141     if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2142       return 0;
2143     if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2144       return (a_flag & NEAR_MIN) ? 2 : -2;
2145     return (a_flag & NEAR_MIN) ? 1 : -1;
2146   }
2147   if (b_flag & (NEAR_MIN | NEAR_MAX))
2148     return (b_flag & NEAR_MIN) ? -2 : 2;
2149   return 0;					// The elements where equal
2150 }
2151 
2152 
clone_tree(RANGE_OPT_PARAM * param)2153 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2154 {
2155   SEL_ARG tmp_link,*next_arg,*root;
2156   next_arg= &tmp_link;
2157   if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2158     return 0;
2159   next_arg->next=0;				// Fix last link
2160   tmp_link.next->prev=0;			// Fix first link
2161   if (root)					// If not OOM
2162     root->use_count= 0;
2163   return root;
2164 }
2165 
2166 
2167 /*
2168   Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2169   objects from table read plans.
2170 */
2171 class TABLE_READ_PLAN
2172 {
2173 public:
2174   /*
2175     Plan read cost, with or without cost of full row retrieval, depending
2176     on plan creation parameters.
2177   */
2178   double read_cost;
2179   ha_rows records; /* estimate of #rows to be examined */
2180 
2181   /*
2182     If TRUE, the scan returns rows in rowid order. This is used only for
2183     scans that can be both ROR and non-ROR.
2184   */
2185   bool is_ror;
2186 
2187   /*
2188     Create quick select for this plan.
2189     SYNOPSIS
2190      make_quick()
2191        param               Parameter from test_quick_select
2192        retrieve_full_rows  If TRUE, created quick select will do full record
2193                            retrieval.
2194        parent_alloc        Memory pool to use, if any.
2195 
2196     NOTES
2197       retrieve_full_rows is ignored by some implementations.
2198 
2199     RETURN
2200       created quick select
2201       NULL on any error.
2202   */
2203   virtual QUICK_SELECT_I *make_quick(PARAM *param,
2204                                      bool retrieve_full_rows,
2205                                      MEM_ROOT *parent_alloc=NULL) = 0;
2206 
2207   /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2208   static void *operator new(size_t size, MEM_ROOT *mem_root)
2209   { return (void*) alloc_root(mem_root, (uint) size); }
operator delete(void * ptr,size_t size)2210   static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2211   static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2212   virtual ~TABLE_READ_PLAN() {}               /* Remove gcc warning */
2213 
2214   /**
2215      Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2216 
2217      @param param        Parameters for range analysis of this table
2218      @param trace_object The optimizer trace object the info is appended to
2219    */
2220   virtual void trace_basic_info(const PARAM *param,
2221                                 Opt_trace_object *trace_object) const = 0;
2222 };
2223 
2224 /*
2225   Plan for a QUICK_RANGE_SELECT scan.
2226   TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2227   QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2228   record retrieval scans.
2229 */
2230 
2231 class TRP_RANGE : public TABLE_READ_PLAN
2232 {
2233 public:
2234   /**
2235     Root of red-black tree for intervals over key fields to be used in
2236     "range" method retrieval. See SEL_ARG graph description.
2237   */
2238   SEL_ARG *key;
2239   uint     key_idx; /* key number in PARAM::key and PARAM::real_keynr*/
2240   uint     mrr_flags;
2241   uint     mrr_buf_size;
2242 
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2243   TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2244    : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2245   {}
~TRP_RANGE()2246   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
2247 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2248   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2249                              MEM_ROOT *parent_alloc)
2250   {
2251     DBUG_ENTER("TRP_RANGE::make_quick");
2252     QUICK_RANGE_SELECT *quick;
2253     if ((quick= get_quick_select(param, key_idx, key, mrr_flags, mrr_buf_size,
2254                                  parent_alloc)))
2255     {
2256       quick->records= records;
2257       quick->read_time= read_cost;
2258     }
2259     DBUG_RETURN(quick);
2260   }
2261 
2262   void trace_basic_info(const PARAM *param,
2263                         Opt_trace_object *trace_object) const;
2264 };
2265 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2266 void TRP_RANGE::trace_basic_info(const PARAM *param,
2267                                  Opt_trace_object *trace_object) const
2268 {
2269 #ifdef OPTIMIZER_TRACE
2270   DBUG_ASSERT(param->using_real_indexes);
2271   const uint keynr_in_table= param->real_keynr[key_idx];
2272 
2273   const KEY &cur_key= param->table->key_info[keynr_in_table];
2274   const KEY_PART_INFO *key_part= cur_key.key_part;
2275 
2276   trace_object->add_alnum("type", "range_scan").
2277     add_utf8("index", cur_key.name).add("rows", records);
2278 
2279   Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
2280 
2281   // TRP_RANGE should not be created if there are no range intervals
2282   DBUG_ASSERT(key);
2283 
2284   String range_info;
2285   range_info.set_charset(system_charset_info);
2286   append_range_all_keyparts(&trace_range, NULL, &range_info, key, key_part);
2287 
2288 #endif
2289 }
2290 
2291 
2292 typedef struct st_ror_scan_info
2293 {
2294   uint      idx;      ///< # of used key in param->keys
2295   uint      keynr;    ///< # of used key in table
2296   ha_rows   records;  ///< estimate of # records this scan will return
2297 
2298   /** Set of intervals over key fields that will be used for row retrieval. */
2299   SEL_ARG   *sel_arg;
2300 
2301   /** Fields used in the query and covered by this ROR scan. */
2302   MY_BITMAP covered_fields;
2303   /**
2304     Fields used in the query that are a) covered by this ROR scan and
2305     b) not already covered by ROR scans ordered earlier in the merge
2306     sequence.
2307   */
2308   MY_BITMAP covered_fields_remaining;
2309   /** #fields in covered_fields_remaining (caching of bitmap_bits_set()) */
2310   uint      num_covered_fields_remaining;
2311 
2312   /**
2313     Cost of reading all index records with values in sel_arg intervals set
2314     (assuming there is no need to access full table records)
2315   */
2316   double    index_read_cost;
2317 } ROR_SCAN_INFO;
2318 
2319 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2320 
2321 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2322 {
2323 public:
TRP_ROR_INTERSECT()2324   TRP_ROR_INTERSECT() {}                      /* Remove gcc warning */
~TRP_ROR_INTERSECT()2325   virtual ~TRP_ROR_INTERSECT() {}             /* Remove gcc warning */
2326   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2327                              MEM_ROOT *parent_alloc);
2328 
2329   /* Array of pointers to ROR range scans used in this intersection */
2330   struct st_ror_scan_info **first_scan;
2331   struct st_ror_scan_info **last_scan; /* End of the above array */
2332   struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
2333   bool is_covering; /* TRUE if no row retrieval phase is necessary */
2334   double index_scan_costs; /* SUM(cost(index_scan)) */
2335 
2336   void trace_basic_info(const PARAM *param,
2337                         Opt_trace_object *trace_object) const;
2338 };
2339 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2340 void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
2341                                          Opt_trace_object *trace_object) const
2342 {
2343 #ifdef OPTIMIZER_TRACE
2344   trace_object->add_alnum("type", "index_roworder_intersect").
2345     add("rows", records).
2346     add("cost", read_cost).
2347     add("covering", is_covering).
2348     add("clustered_pk_scan", cpk_scan != NULL);
2349 
2350   Opt_trace_context * const trace= &param->thd->opt_trace;
2351   Opt_trace_array ota(trace, "intersect_of");
2352   for (st_ror_scan_info **cur_scan= first_scan;
2353        cur_scan != last_scan;
2354        cur_scan++)
2355   {
2356     const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
2357     const KEY_PART_INFO *key_part= cur_key.key_part;
2358 
2359     Opt_trace_object trace_isect_idx(trace);
2360     trace_isect_idx.add_alnum("type", "range_scan").
2361       add_utf8("index", cur_key.name).add("rows", (*cur_scan)->records);
2362 
2363     Opt_trace_array trace_range(trace, "ranges");
2364     for (const SEL_ARG *current= (*cur_scan)->sel_arg;
2365          current;
2366          current= current->next)
2367     {
2368       String range_info;
2369       range_info.set_charset(system_charset_info);
2370       for (const SEL_ARG *part= current;
2371            part;
2372            part= part->next_key_part)
2373       {
2374         const KEY_PART_INFO *cur_key_part= key_part + part->part;
2375         append_range(&range_info, cur_key_part,
2376                      part->min_value, part->max_value,
2377                      part->min_flag | part->max_flag);
2378       }
2379       trace_range.add_utf8(range_info.ptr(), range_info.length());
2380     }
2381   }
2382 #endif
2383 }
2384 
2385 /*
2386   Plan for QUICK_ROR_UNION_SELECT scan.
2387   QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2388   is ignored by make_quick.
2389 */
2390 
2391 class TRP_ROR_UNION : public TABLE_READ_PLAN
2392 {
2393 public:
TRP_ROR_UNION()2394   TRP_ROR_UNION() {}                          /* Remove gcc warning */
~TRP_ROR_UNION()2395   virtual ~TRP_ROR_UNION() {}                 /* Remove gcc warning */
2396   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2397                              MEM_ROOT *parent_alloc);
2398   TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2399   TABLE_READ_PLAN **last_ror;  /* end of the above array */
2400 
2401   void trace_basic_info(const PARAM *param,
2402                         Opt_trace_object *trace_object) const;
2403 };
2404 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2405 void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
2406                                      Opt_trace_object *trace_object) const
2407 {
2408 #ifdef OPTIMIZER_TRACE
2409   Opt_trace_context * const trace= &param->thd->opt_trace;
2410   trace_object->add_alnum("type", "index_roworder_union");
2411   Opt_trace_array ota(trace, "union_of");
2412   for (TABLE_READ_PLAN **current= first_ror;
2413        current != last_ror;
2414        current++)
2415   {
2416     Opt_trace_object trp_info(trace);
2417     (*current)->trace_basic_info(param, &trp_info);
2418   }
2419 #endif
2420 }
2421 
2422 /*
2423   Plan for QUICK_INDEX_MERGE_SELECT scan.
2424   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2425   is ignored by make_quick.
2426 */
2427 
2428 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2429 {
2430 public:
TRP_INDEX_MERGE()2431   TRP_INDEX_MERGE() {}                        /* Remove gcc warning */
~TRP_INDEX_MERGE()2432   virtual ~TRP_INDEX_MERGE() {}               /* Remove gcc warning */
2433   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2434                              MEM_ROOT *parent_alloc);
2435   TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2436   TRP_RANGE **range_scans_end; /* end of the array */
2437 
2438   void trace_basic_info(const PARAM *param,
2439                         Opt_trace_object *trace_object) const;
2440 };
2441 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2442 void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
2443                                        Opt_trace_object *trace_object) const
2444 {
2445 #ifdef OPTIMIZER_TRACE
2446   Opt_trace_context * const trace= &param->thd->opt_trace;
2447   trace_object->add_alnum("type", "index_merge");
2448   Opt_trace_array ota(trace, "index_merge_of");
2449   for (TRP_RANGE **current= range_scans;
2450        current != range_scans_end;
2451        current++)
2452   {
2453     Opt_trace_object trp_info(trace);
2454     (*current)->trace_basic_info(param, &trp_info);
2455   }
2456 #endif
2457 }
2458 
2459 /*
2460   Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2461 */
2462 
2463 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2464 {
2465 private:
2466   bool have_min;             ///< TRUE if there is a MIN function
2467   bool have_max;             ///< TRUE if there is a MAX function
2468   /**
2469     TRUE if there is an aggregate distinct function, e.g.
2470     "COUNT(DISTINCT x)"
2471    */
2472   bool have_agg_distinct;
2473   /**
2474     The key_part of the only field used by all MIN/MAX functions.
2475     Note that TRP_GROUP_MIN_MAX is not used if there are MIN/MAX
2476     functions on more than one field.
2477   */
2478   KEY_PART_INFO *min_max_arg_part;
2479   uint group_prefix_len;    ///< Length of all key parts in the group prefix
2480   uint used_key_parts;      ///< Number of index key parts used for access
2481   uint group_key_parts;     ///< Number of index key parts in the group prefix
2482   KEY *index_info;          ///< The index chosen for data access
2483   uint index;               ///< The id of the chosen index
2484   uchar key_infix[MAX_KEY_LENGTH];  ///< Constants from equality predicates
2485   uint key_infix_len;       ///< Length of key_infix
2486   SEL_TREE *range_tree;     ///< Represents all range predicates in the query
2487   SEL_ARG  *index_tree;     ///< The sub-tree corresponding to index_info
2488   uint param_idx;           ///< Index of used key in param->key
2489   bool is_index_scan;       ///< Use index_next() instead of random read
2490 public:
2491   /** Number of records selected by the ranges in index_tree. */
2492   ha_rows quick_prefix_records;
2493 public:
2494 
2495   void trace_basic_info(const PARAM *param,
2496                         Opt_trace_object *trace_object) const;
2497 
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2498   TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2499                     bool have_agg_distinct_arg,
2500                     KEY_PART_INFO *min_max_arg_part_arg,
2501                     uint group_prefix_len_arg, uint used_key_parts_arg,
2502                     uint group_key_parts_arg, KEY *index_info_arg,
2503                     uint index_arg, uint key_infix_len_arg,
2504                     uchar *key_infix_arg,
2505                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2506                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
2507   : have_min(have_min_arg), have_max(have_max_arg),
2508     have_agg_distinct(have_agg_distinct_arg),
2509     min_max_arg_part(min_max_arg_part_arg),
2510     group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2511     group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2512     index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2513     index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2514     quick_prefix_records(quick_prefix_records_arg)
2515     {
2516       if (key_infix_len)
2517         memcpy(this->key_infix, key_infix_arg, key_infix_len);
2518     }
~TRP_GROUP_MIN_MAX()2519   virtual ~TRP_GROUP_MIN_MAX() {}             /* Remove gcc warning */
2520 
2521   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2522                              MEM_ROOT *parent_alloc);
use_index_scan()2523   void use_index_scan() { is_index_scan= TRUE; }
2524 };
2525 
trace_basic_info(const PARAM * param,Opt_trace_object * trace_object) const2526 void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
2527                                          Opt_trace_object *trace_object) const
2528 {
2529 #ifdef OPTIMIZER_TRACE
2530   trace_object->add_alnum("type", "index_group").
2531     add_utf8("index", index_info->name);
2532   if (min_max_arg_part)
2533     trace_object->add_utf8("group_attribute",
2534                            min_max_arg_part->field->field_name);
2535   else
2536     trace_object->add_null("group_attribute");
2537   trace_object->add("min_aggregate", have_min).
2538     add("max_aggregate", have_max).
2539     add("distinct_aggregate", have_agg_distinct).
2540     add("rows", records).
2541     add("cost", read_cost);
2542 
2543   const KEY_PART_INFO *key_part= index_info->key_part;
2544   Opt_trace_context * const trace= &param->thd->opt_trace;
2545   {
2546     Opt_trace_array trace_keyparts(trace, "key_parts_used_for_access");
2547     for (uint partno= 0; partno < used_key_parts; partno++)
2548     {
2549       const KEY_PART_INFO *cur_key_part= key_part + partno;
2550       trace_keyparts.add_utf8(cur_key_part->field->field_name);
2551     }
2552   }
2553   Opt_trace_array trace_range(trace, "ranges");
2554 
2555   // can have group quick without ranges
2556   if (index_tree)
2557   {
2558     String range_info;
2559     range_info.set_charset(system_charset_info);
2560     append_range_all_keyparts(&trace_range, NULL,
2561                               &range_info, index_tree, key_part);
2562   }
2563 #endif
2564 }
2565 
2566 /*
2567   Fill param->needed_fields with bitmap of fields used in the query.
2568   SYNOPSIS
2569     fill_used_fields_bitmap()
2570       param Parameter from test_quick_select function.
2571 
2572   NOTES
2573     Clustered PK members are not put into the bitmap as they are implicitly
2574     present in all keys (and it is impossible to avoid reading them).
2575   RETURN
2576     0  Ok
2577     1  Out of memory.
2578 */
2579 
fill_used_fields_bitmap(PARAM * param)2580 static int fill_used_fields_bitmap(PARAM *param)
2581 {
2582   TABLE *table= param->table;
2583   my_bitmap_map *tmp;
2584   uint pk;
2585   param->tmp_covered_fields.bitmap= 0;
2586   param->fields_bitmap_size= table->s->column_bitmap_size;
2587   if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2588                                   param->fields_bitmap_size)) ||
2589       bitmap_init(&param->needed_fields, tmp, table->s->fields, FALSE))
2590     return 1;
2591 
2592   bitmap_copy(&param->needed_fields, table->read_set);
2593   bitmap_union(&param->needed_fields, table->write_set);
2594 
2595   pk= param->table->s->primary_key;
2596   if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2597   {
2598     /* The table uses clustered PK and it is not internally generated */
2599     KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2600     KEY_PART_INFO *key_part_end=
2601       key_part + param->table->key_info[pk].user_defined_key_parts;
2602     for (;key_part != key_part_end; ++key_part)
2603       bitmap_clear_bit(&param->needed_fields, key_part->fieldnr-1);
2604   }
2605   return 0;
2606 }
2607 
2608 
2609 /*
2610   Test if a key can be used in different ranges
2611 
2612   SYNOPSIS
2613     SQL_SELECT::test_quick_select()
2614       thd               Current thread
2615       keys_to_use       Keys to use for range retrieval
2616       prev_tables       Tables assumed to be already read when the scan is
2617                         performed (but not read at the moment of this call)
2618       limit             Query limit
2619       force_quick_range Prefer to use range (instead of full table scan) even
2620                         if it is more expensive.
2621       interesting_order The sort order the range access method must be able
2622                         to provide. Three-value logic: asc/desc/don't care
2623 
2624   NOTES
2625     Updates the following in the select parameter:
2626       needed_reg - Bits for keys with may be used if all prev regs are read
2627       quick      - Parameter to use when reading records.
2628 
2629     In the table struct the following information is updated:
2630       quick_keys           - Which keys can be used
2631       quick_rows           - How many rows the key matches
2632       quick_condition_rows - E(# rows that will satisfy the table condition)
2633 
2634   IMPLEMENTATION
2635     quick_condition_rows value is obtained as follows:
2636 
2637       It is a minimum of E(#output rows) for all considered table access
2638       methods (range and index_merge accesses over various indexes).
2639 
2640     The obtained value is not a true E(#rows that satisfy table condition)
2641     but rather a pessimistic estimate. To obtain a true E(#...) one would
2642     need to combine estimates of various access methods, taking into account
2643     correlations between sets of rows they will return.
2644 
2645     For example, if values of tbl.key1 and tbl.key2 are independent (a right
2646     assumption if we have no information about their correlation) then the
2647     correct estimate will be:
2648 
2649       E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2650       = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2651 
2652     which is smaller than
2653 
2654        MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2655 
2656     which is currently produced.
2657 
2658   TODO
2659    * Change the value returned in quick_condition_rows from a pessimistic
2660      estimate to true E(#rows that satisfy table condition).
2661      (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2662       for this)
2663 
2664    * Check if this function really needs to modify keys_to_use, and change the
2665      code to pass it by reference if it doesn't.
2666 
2667    * In addition to force_quick_range other means can be (an usually are) used
2668      to make this function prefer range over full table scan. Figure out if
2669      force_quick_range is really needed.
2670 
2671   RETURN
2672    -1 if impossible select (i.e. certainly no rows will be selected)
2673     0 if can't use quick_select
2674     1 if found usable ranges and quick select has been successfully created.
2675 */
2676 
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,const ORDER::enum_order interesting_order)2677 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2678                                   table_map prev_tables,
2679                                   ha_rows limit, bool force_quick_range,
2680                                   const ORDER::enum_order interesting_order)
2681 {
2682   uint idx;
2683   double scan_time;
2684   DBUG_ENTER("SQL_SELECT::test_quick_select");
2685   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
2686 		      (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2687 		      (ulong) const_tables));
2688 
2689   set_quick(NULL);
2690   needed_reg.clear_all();
2691   quick_keys.clear_all();
2692   if (keys_to_use.is_clear_all())
2693     DBUG_RETURN(0);
2694   records= head->file->stats.records;
2695   if (!records)
2696     records++;					/* purecov: inspected */
2697   scan_time= records * ROW_EVALUATE_COST + 1;
2698   read_time= head->file->scan_time() + scan_time + 1.1;
2699   if (head->force_index)
2700     scan_time= read_time= DBL_MAX;
2701   if (limit < records)
2702     read_time= (double) records + scan_time + 1; // Force to use index
2703   else if (read_time <= 2.0 && !force_quick_range)
2704     DBUG_RETURN(0);				/* No need for quick select */
2705 
2706   Opt_trace_context * const trace= &thd->opt_trace;
2707   Opt_trace_object trace_range(trace, "range_analysis");
2708   Opt_trace_object(trace, "table_scan").
2709     add("rows", head->file->stats.records).
2710     add("cost", read_time);
2711 
2712   keys_to_use.intersect(head->keys_in_use_for_query);
2713   if (!keys_to_use.is_clear_all())
2714   {
2715     MEM_ROOT alloc;
2716     SEL_TREE *tree= NULL;
2717     KEY_PART *key_parts;
2718     KEY *key_info;
2719     PARAM param;
2720 
2721     /*
2722       Use the 3 multiplier as range optimizer allocates big PARAM structure
2723       and may evaluate a subquery expression
2724       TODO During the optimization phase we should evaluate only inexpensive
2725            single-lookup subqueries.
2726     */
2727     if (check_stack_overrun(thd, 3*STACK_MIN_SIZE + sizeof(PARAM), NULL))
2728       DBUG_RETURN(0);                           // Fatal error flag is set
2729 
2730     /* set up parameter that is passed to all functions */
2731     param.thd= thd;
2732     param.baseflag= head->file->ha_table_flags();
2733     param.prev_tables=prev_tables | const_tables;
2734     param.read_tables=read_tables;
2735     param.current_table= head->map;
2736     param.table=head;
2737     param.keys=0;
2738     param.mem_root= &alloc;
2739     param.old_root= thd->mem_root;
2740     param.needed_reg= &needed_reg;
2741     param.imerge_cost_buff_size= 0;
2742     param.using_real_indexes= TRUE;
2743     param.remove_jump_scans= TRUE;
2744     param.force_default_mrr= (interesting_order == ORDER::ORDER_DESC);
2745     param.order_direction= interesting_order;
2746     param.use_index_statistics= false;
2747 
2748     thd->no_errors=1;				// Don't warn about NULL
2749     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
2750     if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
2751                                                   sizeof(KEY_PART)*
2752                                                   head->s->key_parts)) ||
2753         fill_used_fields_bitmap(&param))
2754     {
2755       thd->no_errors=0;
2756       free_root(&alloc,MYF(0));			// Return memory & allocator
2757       DBUG_RETURN(0);				// Can't use range
2758     }
2759     key_parts= param.key_parts;
2760     thd->mem_root= &alloc;
2761 
2762     {
2763       Opt_trace_array trace_idx(trace,
2764                                 "potential_range_indices",
2765                                 Opt_trace_context::RANGE_OPTIMIZER);
2766       /*
2767         Make an array with description of all key parts of all table keys.
2768         This is used in get_mm_parts function.
2769       */
2770       key_info= head->key_info;
2771       for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2772       {
2773         Opt_trace_object trace_idx_details(trace);
2774         trace_idx_details.add_utf8("index", key_info->name);
2775         KEY_PART_INFO *key_part_info;
2776         if (!keys_to_use.is_set(idx))
2777         {
2778           trace_idx_details.add("usable", false).
2779             add_alnum("cause", "not_applicable");
2780           continue;
2781         }
2782         if (key_info->flags & HA_FULLTEXT)
2783         {
2784           trace_idx_details.add("usable", false).
2785             add_alnum("cause", "fulltext");
2786           continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG
2787         }
2788 
2789         trace_idx_details.add("usable", true);
2790 
2791         param.key[param.keys]=key_parts;
2792         key_part_info= key_info->key_part;
2793         Opt_trace_array trace_keypart(trace, "key_parts");
2794         for (uint part=0 ; part < actual_key_parts(key_info) ;
2795              part++, key_parts++, key_part_info++)
2796         {
2797           key_parts->key=          param.keys;
2798           key_parts->part=         part;
2799           key_parts->length=       key_part_info->length;
2800           key_parts->store_length= key_part_info->store_length;
2801           key_parts->field=        key_part_info->field;
2802           key_parts->null_bit=     key_part_info->null_bit;
2803           key_parts->image_type =
2804             (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2805           /* Only HA_PART_KEY_SEG is used */
2806           key_parts->flag=         (uint8) key_part_info->key_part_flag;
2807           trace_keypart.add_utf8(key_parts->field->field_name);
2808         }
2809         param.real_keynr[param.keys++]=idx;
2810       }
2811     }
2812     param.key_parts_end=key_parts;
2813     param.alloced_sel_args= 0;
2814 
2815     /* Calculate cost of full index read for the shortest covering index */
2816     if (!head->covering_keys.is_clear_all())
2817     {
2818       int key_for_use= find_shortest_key(head, &head->covering_keys);
2819       double key_read_time=
2820         param.table->file->index_only_read_time(key_for_use,
2821                                                 rows2double(records)) +
2822         records * ROW_EVALUATE_COST;
2823 
2824       bool chosen= false;
2825       if (key_read_time < read_time)
2826       {
2827         read_time= key_read_time;
2828         chosen= true;
2829       }
2830 
2831       Opt_trace_object trace_cov(trace,
2832                                  "best_covering_index_scan",
2833                                  Opt_trace_context::RANGE_OPTIMIZER);
2834       trace_cov.add_utf8("index", head->key_info[key_for_use].name).
2835         add("cost", key_read_time).add("chosen", chosen);
2836       if (!chosen)
2837         trace_cov.add_alnum("cause", "cost");
2838     }
2839 
2840     TABLE_READ_PLAN *best_trp= NULL;
2841     TRP_GROUP_MIN_MAX *group_trp;
2842     double best_read_time= read_time;
2843 
2844     if (cond)
2845     {
2846       {
2847         Opt_trace_array trace_setup_cond(trace, "setup_range_conditions");
2848         tree= get_mm_tree(&param,cond);
2849       }
2850       if (tree)
2851       {
2852         if (tree->type == SEL_TREE::IMPOSSIBLE)
2853         {
2854           trace_range.add("impossible_range", true);
2855           records=0L;                      /* Return -1 from this function. */
2856           read_time= (double) HA_POS_ERROR;
2857           goto free_mem;
2858         }
2859         /*
2860           If the tree can't be used for range scans, proceed anyway, as we
2861           can construct a group-min-max quick select
2862         */
2863         if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2864         {
2865           trace_range.add("range_scan_possible", false);
2866           if (tree->type == SEL_TREE::ALWAYS)
2867             trace_range.add_alnum("cause", "condition_always_true");
2868 
2869           tree= NULL;
2870         }
2871       }
2872     }
2873 
2874     /*
2875       Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
2876       Notice that it can be constructed no matter if there is a range tree.
2877     */
2878     group_trp= get_best_group_min_max(&param, tree, best_read_time);
2879     if (group_trp)
2880     {
2881       param.table->quick_condition_rows= min(group_trp->records,
2882                                              head->file->stats.records);
2883       Opt_trace_object grp_summary(trace,
2884                                    "best_group_range_summary",
2885                                    Opt_trace_context::RANGE_OPTIMIZER);
2886       if (unlikely(trace->is_started()))
2887         group_trp->trace_basic_info(&param, &grp_summary);
2888       if (group_trp->read_cost < best_read_time)
2889       {
2890         grp_summary.add("chosen", true);
2891         best_trp= group_trp;
2892         best_read_time= best_trp->read_cost;
2893       }
2894       else
2895         grp_summary.add("chosen", false).add_alnum("cause", "cost");
2896     }
2897 
2898     if (tree)
2899     {
2900       /*
2901         It is possible to use a range-based quick select (but it might be
2902         slower than 'all' table scan).
2903       */
2904       dbug_print_tree("final_tree", tree, &param);
2905 
2906       {
2907         /*
2908           Calculate cost of single index range scan and possible
2909           intersections of these
2910         */
2911         Opt_trace_object trace_range(trace,
2912                                      "analyzing_range_alternatives",
2913                                      Opt_trace_context::RANGE_OPTIMIZER);
2914         TRP_RANGE         *range_trp;
2915         TRP_ROR_INTERSECT *rori_trp;
2916 
2917         /* Get best 'range' plan and prepare data for making other plans */
2918         if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
2919                                              best_read_time)))
2920         {
2921           best_trp= range_trp;
2922           best_read_time= best_trp->read_cost;
2923         }
2924 
2925         /*
2926           Simultaneous key scans and row deletes on several handler
2927           objects are not allowed so don't use ROR-intersection for
2928           table deletes. Also, ROR-intersection cannot return rows in
2929           descending order
2930         */
2931         if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2932             thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2933             interesting_order != ORDER::ORDER_DESC)
2934         {
2935           /*
2936             Get best non-covering ROR-intersection plan and prepare data for
2937             building covering ROR-intersection.
2938           */
2939           if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time)))
2940           {
2941             best_trp= rori_trp;
2942             best_read_time= best_trp->read_cost;
2943           }
2944         }
2945       }
2946 
2947       // Here we calculate cost of union index merge
2948       if (!tree->merges.is_empty())
2949       {
2950         // Cannot return rows in descending order.
2951         if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE) &&
2952             interesting_order != ORDER::ORDER_DESC &&
2953             param.table->file->stats.records)
2954         {
2955           /* Try creating index_merge/ROR-union scan. */
2956           SEL_IMERGE *imerge;
2957           TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
2958           LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
2959           List_iterator_fast<SEL_IMERGE> it(tree->merges);
2960           Opt_trace_array trace_idx_merge(trace,
2961                                           "analyzing_index_merge",
2962                                           Opt_trace_context::RANGE_OPTIMIZER);
2963           while ((imerge= it++))
2964           {
2965             new_conj_trp= get_best_disjunct_quick(&param, imerge,
2966                                                   best_read_time);
2967             if (new_conj_trp)
2968               set_if_smaller(param.table->quick_condition_rows,
2969                              new_conj_trp->records);
2970             if (!best_conj_trp ||
2971                 (new_conj_trp &&
2972                  new_conj_trp->read_cost < best_conj_trp->read_cost))
2973             {
2974               best_conj_trp= new_conj_trp;
2975             }
2976           }
2977           if (best_conj_trp)
2978             best_trp= best_conj_trp;
2979         }
2980       }
2981     }
2982 
2983     thd->mem_root= param.old_root;
2984 
2985     /* If we got a read plan, create a quick select from it. */
2986     if (best_trp)
2987     {
2988       records= best_trp->records;
2989       if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
2990         set_quick(NULL);
2991     }
2992 
2993 free_mem:
2994     if (unlikely(quick && trace->is_started() && best_trp))
2995     {
2996       // best_trp cannot be NULL if quick is set, done to keep fortify happy
2997       Opt_trace_object trace_range_summary(trace,
2998                                            "chosen_range_access_summary");
2999       {
3000         Opt_trace_object trace_range_plan(trace,
3001                                           "range_access_plan");
3002         best_trp->trace_basic_info(&param, &trace_range_plan);
3003       }
3004       trace_range_summary.add("rows_for_plan", quick->records).
3005         add("cost_for_plan", quick->read_time).
3006         add("chosen", true);
3007     }
3008 
3009     free_root(&alloc,MYF(0));			// Return memory & allocator
3010     thd->mem_root= param.old_root;
3011     thd->no_errors=0;
3012   }
3013 
3014   DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
3015 
3016   /*
3017     Assume that if the user is using 'limit' we will only need to scan
3018     limit rows if we are using a key
3019   */
3020   DBUG_RETURN(records ? MY_TEST(quick) : -1);
3021 }
3022 
3023 /****************************************************************************
3024  * Partition pruning module
3025  ****************************************************************************/
3026 #ifdef WITH_PARTITION_STORAGE_ENGINE
3027 
3028 /*
3029   PartitionPruningModule
3030 
3031   This part of the code does partition pruning. Partition pruning solves the
3032   following problem: given a query over partitioned tables, find partitions
3033   that we will not need to access (i.e. partitions that we can assume to be
3034   empty) when executing the query.
3035   The set of partitions to prune doesn't depend on which query execution
3036   plan will be used to execute the query.
3037 
3038   HOW IT WORKS
3039 
3040   Partition pruning module makes use of RangeAnalysisModule. The following
3041   examples show how the problem of partition pruning can be reduced to the
3042   range analysis problem:
3043 
3044   EXAMPLE 1
3045     Consider a query:
3046 
3047       SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3048 
3049     where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
3050     way to find the used (i.e. not pruned away) partitions is as follows:
3051 
3052     1. analyze the WHERE clause and extract the list of intervals over t1.a
3053        for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3054 
3055     2. for each interval I
3056        {
3057          find partitions that have non-empty intersection with I;
3058          mark them as used;
3059        }
3060 
3061   EXAMPLE 2
3062     Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3063     we need to:
3064 
3065     1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3066        The list of intervals we'll obtain will look like this:
3067        ((t1.a, t1.b) = (1,'foo')),
3068        ((t1.a, t1.b) = (2,'bar')),
3069        ((t1,a, t1.b) > (10,'zz'))
3070 
3071     2. for each interval I
3072        {
3073          if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3074          {
3075            calculate HASH(part_func(t1.a, t1.b));
3076            find which partition has records with this hash value and mark
3077              it as used;
3078          }
3079          else
3080          {
3081            mark all partitions as used;
3082            break;
3083          }
3084        }
3085 
3086    For both examples the step #1 is exactly what RangeAnalysisModule could
3087    be used to do, if it was provided with appropriate index description
3088    (array of KEY_PART structures).
3089    In example #1, we need to provide it with description of index(t1.a),
3090    in example #2, we need to provide it with description of index(t1.a, t1.b).
3091 
3092    These index descriptions are further called "partitioning index
3093    descriptions". Note that it doesn't matter if such indexes really exist,
3094    as range analysis module only uses the description.
3095 
3096    Putting it all together, partitioning module works as follows:
3097 
3098    prune_partitions() {
3099      call create_partition_index_description();
3100 
3101      call get_mm_tree(); // invoke the RangeAnalysisModule
3102 
3103      // analyze the obtained interval list and get used partitions
3104      call find_used_partitions();
3105   }
3106 
3107 */
3108 
3109 struct st_part_prune_param;
3110 struct st_part_opt_info;
3111 
3112 typedef void (*mark_full_part_func)(partition_info*, uint32);
3113 
3114 /*
3115   Partition pruning operation context
3116 */
3117 typedef struct st_part_prune_param
3118 {
3119   RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3120 
3121   /***************************************************************
3122    Following fields are filled in based solely on partitioning
3123    definition and not modified after that:
3124    **************************************************************/
3125   partition_info *part_info; /* Copy of table->part_info */
3126   /* Function to get partition id from partitioning fields only */
3127   get_part_id_func get_top_partition_id_func;
3128   /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3129   mark_full_part_func mark_full_partition_used;
3130 
3131   /* Partitioning 'index' description, array of key parts */
3132   KEY_PART *key;
3133 
3134   /*
3135     Number of fields in partitioning 'index' definition created for
3136     partitioning (0 if partitioning 'index' doesn't include partitioning
3137     fields)
3138   */
3139   uint part_fields;
3140   uint subpart_fields; /* Same as above for subpartitioning */
3141 
3142   /*
3143     Number of the last partitioning field keypart in the index, or -1 if
3144     partitioning index definition doesn't include partitioning fields.
3145   */
3146   int last_part_partno;
3147   int last_subpart_partno; /* Same as above for supartitioning */
3148 
3149   /*
3150     is_part_keypart[i] == test(keypart #i in partitioning index is a member
3151                                used in partitioning)
3152     Used to maintain current values of cur_part_fields and cur_subpart_fields
3153   */
3154   my_bool *is_part_keypart;
3155   /* Same as above for subpartitioning */
3156   my_bool *is_subpart_keypart;
3157 
3158   my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3159 
3160   /***************************************************************
3161    Following fields form find_used_partitions() recursion context:
3162    **************************************************************/
3163   SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
3164   SEL_ARG **arg_stack_end; /* Top of the stack    */
3165   /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3166   uint cur_part_fields;
3167   /* Same as cur_part_fields, but for subpartitioning */
3168   uint cur_subpart_fields;
3169 
3170   /* Iterator to be used to obtain the "current" set of used partitions */
3171   PARTITION_ITERATOR part_iter;
3172 
3173   /* Initialized bitmap of num_subparts size */
3174   MY_BITMAP subparts_bitmap;
3175 
3176   uchar *cur_min_key;
3177   uchar *cur_max_key;
3178 
3179   uint cur_min_flag, cur_max_flag;
3180 } PART_PRUNE_PARAM;
3181 
3182 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3183 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3184 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3185                                        SEL_IMERGE *imerge);
3186 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3187                                             List<SEL_IMERGE> &merges);
3188 static void mark_all_partitions_as_used(partition_info *part_info);
3189 
3190 #ifndef DBUG_OFF
3191 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3192 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3193 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3194 #endif
3195 
3196 
3197 /**
3198   Perform partition pruning for a given table and condition.
3199 
3200   @param      thd            Thread handle
3201   @param      table          Table to perform partition pruning for
3202   @param      pprune_cond    Condition to use for partition pruning
3203 
3204   @note This function assumes that lock_partitions are setup when it
3205   is invoked. The function analyzes the condition, finds partitions that
3206   need to be used to retrieve the records that match the condition, and
3207   marks them as used by setting appropriate bit in part_info->read_partitions
3208   In the worst case all partitions are marked as used. If the table is not
3209   yet locked, it will also unset bits in part_info->lock_partitions that is
3210   not set in read_partitions.
3211 
3212   This function returns promptly if called for non-partitioned table.
3213 
3214   @return Operation status
3215     @retval true  Failure
3216     @retval false Success
3217 */
3218 
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3219 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3220 {
3221   partition_info *part_info = table->part_info;
3222   DBUG_ENTER("prune_partitions");
3223   table->all_partitions_pruned_away= false;
3224 
3225   if (!part_info)
3226     DBUG_RETURN(FALSE); /* not a partitioned table */
3227 
3228   if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION &&
3229       part_info->is_auto_partitioned)
3230     DBUG_RETURN(false); /* Should not prune auto partitioned table */
3231 
3232   if (!pprune_cond)
3233   {
3234     mark_all_partitions_as_used(part_info);
3235     DBUG_RETURN(FALSE);
3236   }
3237 
3238   /* No need to continue pruning if there is no more partitions to prune! */
3239   if (bitmap_is_clear_all(&part_info->lock_partitions))
3240     bitmap_clear_all(&part_info->read_partitions);
3241   if (bitmap_is_clear_all(&part_info->read_partitions))
3242   {
3243     table->all_partitions_pruned_away= true;
3244     DBUG_RETURN(false);
3245   }
3246 
3247   /*
3248     If the prepare stage already have completed pruning successfully,
3249     it is no use of running prune_partitions() again on the same condition.
3250     Since it will not be able to prune anything more than the previous call
3251     from the prepare step.
3252   */
3253   if (part_info->is_pruning_completed)
3254     DBUG_RETURN(false);
3255 
3256   PART_PRUNE_PARAM prune_param;
3257   MEM_ROOT alloc;
3258   RANGE_OPT_PARAM  *range_par= &prune_param.range_param;
3259   my_bitmap_map *old_sets[2];
3260 
3261   prune_param.part_info= part_info;
3262   init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
3263   range_par->mem_root= &alloc;
3264   range_par->old_root= thd->mem_root;
3265 
3266   if (create_partition_index_description(&prune_param))
3267   {
3268     mark_all_partitions_as_used(part_info);
3269     free_root(&alloc,MYF(0));		// Return memory & allocator
3270     DBUG_RETURN(FALSE);
3271   }
3272 
3273   dbug_tmp_use_all_columns(table, old_sets,
3274                            table->read_set, table->write_set);
3275   range_par->thd= thd;
3276   range_par->table= table;
3277   /* range_par->cond doesn't need initialization */
3278   range_par->prev_tables= range_par->read_tables= 0;
3279   range_par->current_table= table->map;
3280 
3281   range_par->keys= 1; // one index
3282   range_par->using_real_indexes= FALSE;
3283   range_par->remove_jump_scans= FALSE;
3284   range_par->real_keynr[0]= 0;
3285   range_par->alloced_sel_args= 0;
3286 
3287   thd->no_errors=1;				// Don't warn about NULL
3288   thd->mem_root=&alloc;
3289 
3290   bitmap_clear_all(&part_info->read_partitions);
3291 
3292   prune_param.key= prune_param.range_param.key_parts;
3293   SEL_TREE *tree;
3294   int res;
3295 
3296   tree= get_mm_tree(range_par, pprune_cond);
3297   if (!tree)
3298     goto all_used;
3299 
3300   if (tree->type == SEL_TREE::IMPOSSIBLE)
3301   {
3302     /* Cannot improve the pruning any further. */
3303     part_info->is_pruning_completed= true;
3304     goto end;
3305   }
3306 
3307   if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3308     goto all_used;
3309 
3310   if (tree->merges.is_empty())
3311   {
3312     /* Range analysis has produced a single list of intervals. */
3313     prune_param.arg_stack_end= prune_param.arg_stack;
3314     prune_param.cur_part_fields= 0;
3315     prune_param.cur_subpart_fields= 0;
3316 
3317     prune_param.cur_min_key= prune_param.range_param.min_key;
3318     prune_param.cur_max_key= prune_param.range_param.max_key;
3319     prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3320 
3321     init_all_partitions_iterator(part_info, &prune_param.part_iter);
3322     if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3323                                                             tree->keys[0]))))
3324       goto all_used;
3325   }
3326   else
3327   {
3328     if (tree->merges.elements == 1)
3329     {
3330       /*
3331         Range analysis has produced a "merge" of several intervals lists, a
3332         SEL_TREE that represents an expression in form
3333           sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3334         that cannot be reduced to one tree. This can only happen when
3335         partitioning index has several keyparts and the condition is OR of
3336         conditions that refer to different key parts. For example, we'll get
3337         here for "partitioning_field=const1 OR subpartitioning_field=const2"
3338       */
3339       if (-1 == (res= find_used_partitions_imerge(&prune_param,
3340                                                   tree->merges.head())))
3341         goto all_used;
3342     }
3343     else
3344     {
3345       /*
3346         Range analysis has produced a list of several imerges, i.e. a
3347         structure that represents a condition in form
3348         imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3349         This is produced for complicated WHERE clauses that range analyzer
3350         can't really analyze properly.
3351       */
3352       if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3353                                                        tree->merges)))
3354         goto all_used;
3355     }
3356   }
3357 
3358   /*
3359     If the condition can be evaluated now, we are done with pruning.
3360 
3361     During the prepare phase, before locking, subqueries and stored programs
3362     are not evaluated. So we need to run prune_partitions() a second time in
3363     the optimize phase to prune partitions for reading, when subqueries and
3364     stored programs may be evaluated.
3365   */
3366   if (pprune_cond->can_be_evaluated_now())
3367     part_info->is_pruning_completed= true;
3368   goto end;
3369 
3370 all_used:
3371   mark_all_partitions_as_used(prune_param.part_info);
3372 end:
3373   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3374   thd->no_errors=0;
3375   thd->mem_root= range_par->old_root;
3376   free_root(&alloc,MYF(0));			// Return memory & allocator
3377   /*
3378     Must be a subset of the locked partitions.
3379     lock_partitions contains the partitions marked by explicit partition
3380     selection (... t PARTITION (pX) ...) and we must only use partitions
3381     within that set.
3382   */
3383   bitmap_intersect(&prune_param.part_info->read_partitions,
3384                    &prune_param.part_info->lock_partitions);
3385   /*
3386     If not yet locked, also prune partitions to lock if not UPDATEing
3387     partition key fields. This will also prune lock_partitions if we are under
3388     LOCK TABLES, so prune away calls to start_stmt().
3389     TODO: enhance this prune locking to also allow pruning of
3390     'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3391     a lock for part_key partition.
3392   */
3393   if (!thd->lex->is_query_tables_locked() &&
3394       !partition_key_modified(table, table->write_set))
3395   {
3396     bitmap_copy(&prune_param.part_info->lock_partitions,
3397                 &prune_param.part_info->read_partitions);
3398   }
3399   if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3400     table->all_partitions_pruned_away= true;
3401   DBUG_RETURN(false);
3402 }
3403 
3404 
3405 /*
3406   Store field key image to table record
3407 
3408   SYNOPSIS
3409     store_key_image_to_rec()
3410       field  Field which key image should be stored
3411       ptr    Field value in key format
3412       len    Length of the value, in bytes
3413 
3414   DESCRIPTION
3415     Copy the field value from its key image to the table record. The source
3416     is the value in key image format, occupying len bytes in buffer pointed
3417     by ptr. The destination is table record, in "field value in table record"
3418     format.
3419 */
3420 
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3421 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3422 {
3423   /* Do the same as print_key_value() does */
3424   my_bitmap_map *old_map;
3425 
3426   if (field->real_maybe_null())
3427   {
3428     if (*ptr)
3429     {
3430       field->set_null();
3431       return;
3432     }
3433     field->set_notnull();
3434     ptr++;
3435   }
3436   old_map= dbug_tmp_use_all_columns(field->table,
3437                                     field->table->write_set);
3438   field->set_key_image(ptr, len);
3439   dbug_tmp_restore_column_map(field->table->write_set, old_map);
3440 }
3441 
3442 
3443 /*
3444   For SEL_ARG* array, store sel_arg->min values into table record buffer
3445 
3446   SYNOPSIS
3447     store_selargs_to_rec()
3448       ppar   Partition pruning context
3449       start  Array of SEL_ARG* for which the minimum values should be stored
3450       num    Number of elements in the array
3451 
3452   DESCRIPTION
3453     For each SEL_ARG* interval in the specified array, store the left edge
3454     field value (sel_arg->min, key image format) into the table record.
3455 */
3456 
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)3457 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3458                                  int num)
3459 {
3460   KEY_PART *parts= ppar->range_param.key_parts;
3461   for (SEL_ARG **end= start + num; start != end; start++)
3462   {
3463     SEL_ARG *sel_arg= (*start);
3464     store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3465                            parts[sel_arg->part].length);
3466   }
3467 }
3468 
3469 
3470 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)3471 static void mark_full_partition_used_no_parts(partition_info* part_info,
3472                                               uint32 part_id)
3473 {
3474   DBUG_ENTER("mark_full_partition_used_no_parts");
3475   DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3476   bitmap_set_bit(&part_info->read_partitions, part_id);
3477   DBUG_VOID_RETURN;
3478 }
3479 
3480 
3481 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)3482 static void mark_full_partition_used_with_parts(partition_info *part_info,
3483                                                 uint32 part_id)
3484 {
3485   uint32 start= part_id * part_info->num_subparts;
3486   uint32 end=   start + part_info->num_subparts;
3487   DBUG_ENTER("mark_full_partition_used_with_parts");
3488 
3489   for (; start != end; start++)
3490   {
3491     DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3492     bitmap_set_bit(&part_info->read_partitions, start);
3493   }
3494   DBUG_VOID_RETURN;
3495 }
3496 
3497 /*
3498   Find the set of used partitions for List<SEL_IMERGE>
3499   SYNOPSIS
3500     find_used_partitions_imerge_list
3501       ppar      Partition pruning context.
3502       key_tree  Intervals tree to perform pruning for.
3503 
3504   DESCRIPTION
3505     List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3506     The set of used partitions is an intersection of used partitions sets
3507     for imerge_{i}.
3508     We accumulate this intersection in a separate bitmap.
3509 
3510   RETURN
3511     See find_used_partitions()
3512 */
3513 
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)3514 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3515                                             List<SEL_IMERGE> &merges)
3516 {
3517   MY_BITMAP all_merges;
3518   uint bitmap_bytes;
3519   my_bitmap_map *bitmap_buf;
3520   uint n_bits= ppar->part_info->read_partitions.n_bits;
3521   bitmap_bytes= bitmap_buffer_size(n_bits);
3522   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3523                                                 bitmap_bytes)))
3524   {
3525     /*
3526       Fallback, process just the first SEL_IMERGE. This can leave us with more
3527       partitions marked as used then actually needed.
3528     */
3529     return find_used_partitions_imerge(ppar, merges.head());
3530   }
3531   bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3532   bitmap_set_prefix(&all_merges, n_bits);
3533 
3534   List_iterator<SEL_IMERGE> it(merges);
3535   SEL_IMERGE *imerge;
3536   while ((imerge=it++))
3537   {
3538     int res= find_used_partitions_imerge(ppar, imerge);
3539     if (!res)
3540     {
3541       /* no used partitions on one ANDed imerge => no used partitions at all */
3542       return 0;
3543     }
3544 
3545     if (res != -1)
3546       bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3547 
3548     if (bitmap_is_clear_all(&all_merges))
3549       return 0;
3550 
3551     bitmap_clear_all(&ppar->part_info->read_partitions);
3552   }
3553   memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3554          bitmap_bytes);
3555   return 1;
3556 }
3557 
3558 
3559 /*
3560   Find the set of used partitions for SEL_IMERGE structure
3561   SYNOPSIS
3562     find_used_partitions_imerge()
3563       ppar      Partition pruning context.
3564       key_tree  Intervals tree to perform pruning for.
3565 
3566   DESCRIPTION
3567     SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3568     trivial - just use mark used partitions for each tree and bail out early
3569     if for some tree_{i} all partitions are used.
3570 
3571   RETURN
3572     See find_used_partitions().
3573 */
3574 
3575 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)3576 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3577 {
3578   int res= 0;
3579   for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3580   {
3581     ppar->arg_stack_end= ppar->arg_stack;
3582     ppar->cur_part_fields= 0;
3583     ppar->cur_subpart_fields= 0;
3584 
3585     ppar->cur_min_key= ppar->range_param.min_key;
3586     ppar->cur_max_key= ppar->range_param.max_key;
3587     ppar->cur_min_flag= ppar->cur_max_flag= 0;
3588 
3589     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3590     SEL_ARG *key_tree= (*ptree)->keys[0];
3591     if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3592       return -1;
3593   }
3594   return res;
3595 }
3596 
3597 
3598 /*
3599   Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3600 
3601   SYNOPSIS
3602     find_used_partitions()
3603       ppar      Partition pruning context.
3604       key_tree  SEL_ARG range tree to perform pruning for
3605 
3606   DESCRIPTION
3607     This function
3608       * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3609       * finds the partitions one needs to use to get rows in these intervals
3610       * marks these partitions as used.
3611     The next session desribes the process in greater detail.
3612 
3613   IMPLEMENTATION
3614     TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3615     We can find out which [sub]partitions to use if we obtain restrictions on
3616     [sub]partitioning fields in the following form:
3617     1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
3618     1.1  Same as (1) but for subpartition fields
3619 
3620     If partitioning supports interval analysis (i.e. partitioning is a
3621     function of a single table field, and partition_info::
3622     get_part_iter_for_interval != NULL), then we can also use condition in
3623     this form:
3624     2.  "const1 <=? partition_field <=? const2"
3625     2.1  Same as (2) but for subpartition_field
3626 
3627     INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3628 
3629     The below is an example of what SEL_ARG tree may represent:
3630 
3631     (start)
3632      |                           $
3633      |   Partitioning keyparts   $  subpartitioning keyparts
3634      |                           $
3635      |     ...          ...      $
3636      |      |            |       $
3637      | +---------+  +---------+  $  +-----------+  +-----------+
3638      \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3639        +---------+  +---------+  $  +-----------+  +-----------+
3640             |                    $        |             |
3641             |                    $        |        +-----------+
3642             |                    $        |        | subpar2=c6|
3643             |                    $        |        +-----------+
3644             |                    $        |
3645             |                    $  +-----------+  +-----------+
3646             |                    $  | subpar1=c4|--| subpar2=c8|
3647             |                    $  +-----------+  +-----------+
3648             |                    $
3649             |                    $
3650        +---------+               $  +------------+  +------------+
3651        | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3652        +---------+               $  +------------+  +------------+
3653             |                    $
3654            ...                   $
3655 
3656     The up-down connections are connections via SEL_ARG::left and
3657     SEL_ARG::right. A horizontal connection to the right is the
3658     SEL_ARG::next_key_part connection.
3659 
3660     find_used_partitions() traverses the entire tree via recursion on
3661      * SEL_ARG::next_key_part (from left to right on the picture)
3662      * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3663        performed for each depth level.
3664 
3665     Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3666     ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3667     For the example in the above picture, one of stack states is:
3668       in find_used_partitions(key_tree = "subpar2=c5") (***)
3669       in find_used_partitions(key_tree = "subpar1=c3")
3670       in find_used_partitions(key_tree = "par2=c2")   (**)
3671       in find_used_partitions(key_tree = "par1=c1")
3672       in prune_partitions(...)
3673     We apply partitioning limits as soon as possible, e.g. when we reach the
3674     depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3675     and save them in ppar->part_iter.
3676     When we reach the depth (***), we find which subpartition(s) correspond to
3677     "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3678     appropriate subpartitions as used.
3679 
3680     It is possible that constraints on some partitioning fields are missing.
3681     For the above example, consider this stack state:
3682       in find_used_partitions(key_tree = "subpar2=c12") (***)
3683       in find_used_partitions(key_tree = "subpar1=c10")
3684       in find_used_partitions(key_tree = "par1=c2")
3685       in prune_partitions(...)
3686     Here we don't have constraints for all partitioning fields. Since we've
3687     never set the ppar->part_iter to contain used set of partitions, we use
3688     its default "all partitions" value.  We get  subpartition id for
3689     "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3690     partition.
3691 
3692     The inverse is also possible: we may get constraints on partitioning
3693     fields, but not constraints on subpartitioning fields. In that case,
3694     calls to find_used_partitions() with depth below (**) will return -1,
3695     and we will mark entire partition as used.
3696 
3697   TODO
3698     Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3699 
3700   RETURN
3701     1   OK, one or more [sub]partitions are marked as used.
3702     0   The passed condition doesn't match any partitions
3703    -1   Couldn't infer any partition pruning "intervals" from the passed
3704         SEL_ARG* tree (which means that all partitions should be marked as
3705         used) Marking partitions as used is the responsibility of the caller.
3706 */
3707 
3708 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)3709 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3710 {
3711   int res, left_res=0, right_res=0;
3712   int key_tree_part= (int)key_tree->part;
3713   bool set_full_part_if_bad_ret= FALSE;
3714   bool ignore_part_fields= ppar->ignore_part_fields;
3715   bool did_set_ignore_part_fields= FALSE;
3716   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3717 
3718   if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3719     return -1;
3720 
3721   if (key_tree->left != &null_element)
3722   {
3723     if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3724       return -1;
3725   }
3726 
3727   /* Push SEL_ARG's to stack to enable looking backwards as well */
3728   ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3729   ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3730   *(ppar->arg_stack_end++)= key_tree;
3731 
3732   if (ignore_part_fields)
3733   {
3734     /*
3735       We come here when a condition on the first partitioning
3736       fields led to evaluating the partitioning condition
3737       (due to finding a condition of the type a < const or
3738       b > const). Thus we must ignore the rest of the
3739       partitioning fields but we still want to analyse the
3740       subpartitioning fields.
3741     */
3742     if (key_tree->next_key_part)
3743       res= find_used_partitions(ppar, key_tree->next_key_part);
3744     else
3745       res= -1;
3746     goto pop_and_go_right;
3747   }
3748 
3749   if (key_tree->type == SEL_ARG::KEY_RANGE)
3750   {
3751     if (ppar->part_info->get_part_iter_for_interval &&
3752         key_tree->part <= ppar->last_part_partno)
3753     {
3754       /* Collect left and right bound, their lengths and flags */
3755       uchar *min_key= ppar->cur_min_key;
3756       uchar *max_key= ppar->cur_max_key;
3757       uchar *tmp_min_key= min_key;
3758       uchar *tmp_max_key= max_key;
3759       key_tree->store_min(ppar->key[key_tree->part].store_length,
3760                           &tmp_min_key, ppar->cur_min_flag);
3761       key_tree->store_max(ppar->key[key_tree->part].store_length,
3762                           &tmp_max_key, ppar->cur_max_flag);
3763       uint flag;
3764       if (key_tree->next_key_part &&
3765           key_tree->next_key_part->part == key_tree->part+1 &&
3766           key_tree->next_key_part->part <= ppar->last_part_partno &&
3767           key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3768       {
3769         /*
3770           There are more key parts for partition pruning to handle
3771           This mainly happens when the condition is an equality
3772           condition.
3773         */
3774         if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3775             (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3776             !key_tree->min_flag && !key_tree->max_flag)
3777         {
3778           /* Set 'parameters' */
3779           ppar->cur_min_key= tmp_min_key;
3780           ppar->cur_max_key= tmp_max_key;
3781           uint save_min_flag= ppar->cur_min_flag;
3782           uint save_max_flag= ppar->cur_max_flag;
3783 
3784           ppar->cur_min_flag|= key_tree->min_flag;
3785           ppar->cur_max_flag|= key_tree->max_flag;
3786 
3787           res= find_used_partitions(ppar, key_tree->next_key_part);
3788 
3789           /* Restore 'parameters' back */
3790           ppar->cur_min_key= min_key;
3791           ppar->cur_max_key= max_key;
3792 
3793           ppar->cur_min_flag= save_min_flag;
3794           ppar->cur_max_flag= save_max_flag;
3795           goto pop_and_go_right;
3796         }
3797         /* We have arrived at the last field in the partition pruning */
3798         uint tmp_min_flag= key_tree->min_flag,
3799              tmp_max_flag= key_tree->max_flag;
3800         if (!tmp_min_flag)
3801           key_tree->next_key_part->store_min_key(ppar->key,
3802                                                  &tmp_min_key,
3803                                                  &tmp_min_flag,
3804                                                  ppar->last_part_partno);
3805         if (!tmp_max_flag)
3806           key_tree->next_key_part->store_max_key(ppar->key,
3807                                                  &tmp_max_key,
3808                                                  &tmp_max_flag,
3809                                                  ppar->last_part_partno);
3810         flag= tmp_min_flag | tmp_max_flag;
3811       }
3812       else
3813         flag= key_tree->min_flag | key_tree->max_flag;
3814 
3815       if (tmp_min_key != range_par->min_key)
3816         flag&= ~NO_MIN_RANGE;
3817       else
3818         flag|= NO_MIN_RANGE;
3819       if (tmp_max_key != range_par->max_key)
3820         flag&= ~NO_MAX_RANGE;
3821       else
3822         flag|= NO_MAX_RANGE;
3823 
3824       /*
3825         We need to call the interval mapper if we have a condition which
3826         makes sense to prune on. In the example of COLUMNS on a and
3827         b it makes sense if we have a condition on a, or conditions on
3828         both a and b. If we only have conditions on b it might make sense
3829         but this is a harder case we will solve later. For the harder case
3830         this clause then turns into use of all partitions and thus we
3831         simply set res= -1 as if the mapper had returned that.
3832         TODO: What to do here is defined in WL#4065.
3833       */
3834       if (ppar->arg_stack[0]->part == 0)
3835       {
3836         uint32 i;
3837         uint32 store_length_array[MAX_KEY];
3838         uint32 num_keys= ppar->part_fields;
3839 
3840         for (i= 0; i < num_keys; i++)
3841           store_length_array[i]= ppar->key[i].store_length;
3842         res= ppar->part_info->
3843              get_part_iter_for_interval(ppar->part_info,
3844                                         FALSE,
3845                                         store_length_array,
3846                                         range_par->min_key,
3847                                         range_par->max_key,
3848                                         tmp_min_key - range_par->min_key,
3849                                         tmp_max_key - range_par->max_key,
3850                                         flag,
3851                                         &ppar->part_iter);
3852         if (!res)
3853           goto pop_and_go_right; /* res==0 --> no satisfying partitions */
3854       }
3855       else
3856         res= -1;
3857 
3858       if (res == -1)
3859       {
3860         /* get a full range iterator */
3861         init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3862       }
3863       /*
3864         Save our intent to mark full partition as used if we will not be able
3865         to obtain further limits on subpartitions
3866       */
3867       if (key_tree_part < ppar->last_part_partno)
3868       {
3869         /*
3870           We need to ignore the rest of the partitioning fields in all
3871           evaluations after this
3872         */
3873         did_set_ignore_part_fields= TRUE;
3874         ppar->ignore_part_fields= TRUE;
3875       }
3876       set_full_part_if_bad_ret= TRUE;
3877       goto process_next_key_part;
3878     }
3879 
3880     if (key_tree_part == ppar->last_subpart_partno &&
3881         (NULL != ppar->part_info->get_subpart_iter_for_interval))
3882     {
3883       PARTITION_ITERATOR subpart_iter;
3884       DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
3885                                                     range_par->key_parts););
3886       res= ppar->part_info->
3887            get_subpart_iter_for_interval(ppar->part_info,
3888                                          TRUE,
3889                                          NULL, /* Currently not used here */
3890                                          key_tree->min_value,
3891                                          key_tree->max_value,
3892                                          0, 0, /* Those are ignored here */
3893                                          key_tree->min_flag |
3894                                            key_tree->max_flag,
3895                                          &subpart_iter);
3896       if (res == 0)
3897       {
3898         /*
3899            The only case where we can get "no satisfying subpartitions"
3900            returned from the above call is when an error has occurred.
3901         */
3902         DBUG_ASSERT(range_par->thd->is_error());
3903         return 0;
3904       }
3905 
3906       if (res == -1)
3907         goto pop_and_go_right; /* all subpartitions satisfy */
3908 
3909       uint32 subpart_id;
3910       bitmap_clear_all(&ppar->subparts_bitmap);
3911       while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
3912              NOT_A_PARTITION_ID)
3913         bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
3914 
3915       /* Mark each partition as used in each subpartition.  */
3916       uint32 part_id;
3917       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3918               NOT_A_PARTITION_ID)
3919       {
3920         for (uint i= 0; i < ppar->part_info->num_subparts; i++)
3921           if (bitmap_is_set(&ppar->subparts_bitmap, i))
3922             bitmap_set_bit(&ppar->part_info->read_partitions,
3923                            part_id * ppar->part_info->num_subparts + i);
3924       }
3925       goto pop_and_go_right;
3926     }
3927 
3928     if (key_tree->is_singlepoint())
3929     {
3930       if (key_tree_part == ppar->last_part_partno &&
3931           ppar->cur_part_fields == ppar->part_fields &&
3932           ppar->part_info->get_part_iter_for_interval == NULL)
3933       {
3934         /*
3935           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
3936           fields. Save all constN constants into table record buffer.
3937         */
3938         store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
3939         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
3940                                                        ppar->part_fields););
3941         uint32 part_id;
3942         longlong func_value;
3943         /* Find in which partition the {const1, ...,constN} tuple goes */
3944         if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
3945                                             &func_value))
3946         {
3947           res= 0; /* No satisfying partitions */
3948           goto pop_and_go_right;
3949         }
3950         /* Rembember the limit we got - single partition #part_id */
3951         init_single_partition_iterator(part_id, &ppar->part_iter);
3952 
3953         /*
3954           If there are no subpartitions/we fail to get any limit for them,
3955           then we'll mark full partition as used.
3956         */
3957         set_full_part_if_bad_ret= TRUE;
3958         goto process_next_key_part;
3959       }
3960 
3961       if (key_tree_part == ppar->last_subpart_partno &&
3962           ppar->cur_subpart_fields == ppar->subpart_fields)
3963       {
3964         /*
3965           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
3966           fields. Save all constN constants into table record buffer.
3967         */
3968         store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
3969                              ppar->subpart_fields);
3970         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
3971                                                        ppar->subpart_fields,
3972                                                        ppar->subpart_fields););
3973         /* Find the subpartition (it's HASH/KEY so we always have one) */
3974         partition_info *part_info= ppar->part_info;
3975         uint32 part_id, subpart_id;
3976 
3977         if (part_info->get_subpartition_id(part_info, &subpart_id))
3978           return 0;
3979 
3980         /* Mark this partition as used in each subpartition. */
3981         while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
3982                 NOT_A_PARTITION_ID)
3983         {
3984           bitmap_set_bit(&part_info->read_partitions,
3985                          part_id * part_info->num_subparts + subpart_id);
3986         }
3987         res= 1; /* Some partitions were marked as used */
3988         goto pop_and_go_right;
3989       }
3990     }
3991     else
3992     {
3993       /*
3994         Can't handle condition on current key part. If we're that deep that
3995         we're processing subpartititoning's key parts, this means we'll not be
3996         able to infer any suitable condition, so bail out.
3997       */
3998       if (key_tree_part >= ppar->last_part_partno)
3999       {
4000         res= -1;
4001         goto pop_and_go_right;
4002       }
4003       /*
4004         No meaning in continuing with rest of partitioning key parts.
4005         Will try to continue with subpartitioning key parts.
4006       */
4007       ppar->ignore_part_fields= true;
4008       did_set_ignore_part_fields= true;
4009       goto process_next_key_part;
4010     }
4011   }
4012 
4013 process_next_key_part:
4014   if (key_tree->next_key_part)
4015     res= find_used_partitions(ppar, key_tree->next_key_part);
4016   else
4017     res= -1;
4018 
4019   if (did_set_ignore_part_fields)
4020   {
4021     /*
4022       We have returned from processing all key trees linked to our next
4023       key part. We are ready to be moving down (using right pointers) and
4024       this tree is a new evaluation requiring its own decision on whether
4025       to ignore partitioning fields.
4026     */
4027     ppar->ignore_part_fields= FALSE;
4028   }
4029   if (set_full_part_if_bad_ret)
4030   {
4031     if (res == -1)
4032     {
4033       /* Got "full range" for subpartitioning fields */
4034       uint32 part_id;
4035       bool found= FALSE;
4036       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4037              NOT_A_PARTITION_ID)
4038       {
4039         ppar->mark_full_partition_used(ppar->part_info, part_id);
4040         found= TRUE;
4041       }
4042       res= MY_TEST(found);
4043     }
4044     /*
4045       Restore the "used partitions iterator" to the default setting that
4046       specifies iteration over all partitions.
4047     */
4048     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4049   }
4050 
4051 pop_and_go_right:
4052   /* Pop this key part info off the "stack" */
4053   ppar->arg_stack_end--;
4054   ppar->cur_part_fields-=    ppar->is_part_keypart[key_tree_part];
4055   ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4056 
4057   if (res == -1)
4058     return -1;
4059   if (key_tree->right != &null_element)
4060   {
4061     if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4062       return -1;
4063   }
4064   return (left_res || right_res || res);
4065 }
4066 
4067 
mark_all_partitions_as_used(partition_info * part_info)4068 static void mark_all_partitions_as_used(partition_info *part_info)
4069 {
4070   bitmap_copy(&(part_info->read_partitions),
4071               &(part_info->lock_partitions));
4072 }
4073 
4074 
4075 /*
4076   Check if field types allow to construct partitioning index description
4077 
4078   SYNOPSIS
4079     fields_ok_for_partition_index()
4080       pfield  NULL-terminated array of pointers to fields.
4081 
4082   DESCRIPTION
4083     For an array of fields, check if we can use all of the fields to create
4084     partitioning index description.
4085 
4086     We can't process GEOMETRY fields - for these fields singlepoint intervals
4087     cant be generated, and non-singlepoint are "special" kinds of intervals
4088     to which our processing logic can't be applied.
4089 
4090     It is not known if we could process ENUM fields, so they are disabled to be
4091     on the safe side.
4092 
4093   RETURN
4094     TRUE   Yes, fields can be used in partitioning index
4095     FALSE  Otherwise
4096 */
4097 
fields_ok_for_partition_index(Field ** pfield)4098 static bool fields_ok_for_partition_index(Field **pfield)
4099 {
4100   if (!pfield)
4101     return FALSE;
4102   for (; (*pfield); pfield++)
4103   {
4104     enum_field_types ftype= (*pfield)->real_type();
4105     if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4106       return FALSE;
4107   }
4108   return TRUE;
4109 }
4110 
4111 
4112 /*
4113   Create partition index description and fill related info in the context
4114   struct
4115 
4116   SYNOPSIS
4117     create_partition_index_description()
4118       prune_par  INOUT Partition pruning context
4119 
4120   DESCRIPTION
4121     Create partition index description. Partition index description is:
4122 
4123       part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4124 
4125     If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4126     corresponding fields_list(...) is not included into index description
4127     and we don't perform partition pruning for partitions/subpartitions.
4128 
4129   RETURN
4130     TRUE   Out of memory or can't do partition pruning at all
4131     FALSE  OK
4132 */
4133 
create_partition_index_description(PART_PRUNE_PARAM * ppar)4134 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4135 {
4136   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4137   partition_info *part_info= ppar->part_info;
4138   uint used_part_fields, used_subpart_fields;
4139 
4140   used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4141                       part_info->num_part_fields : 0;
4142   used_subpart_fields=
4143     fields_ok_for_partition_index(part_info->subpart_field_array)?
4144       part_info->num_subpart_fields : 0;
4145 
4146   uint total_parts= used_part_fields + used_subpart_fields;
4147 
4148   ppar->ignore_part_fields= FALSE;
4149   ppar->part_fields=      used_part_fields;
4150   ppar->last_part_partno= (int)used_part_fields - 1;
4151 
4152   ppar->subpart_fields= used_subpart_fields;
4153   ppar->last_subpart_partno=
4154     used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4155 
4156   if (part_info->is_sub_partitioned())
4157   {
4158     ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
4159     ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4160   }
4161   else
4162   {
4163     ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
4164     ppar->get_top_partition_id_func= part_info->get_partition_id;
4165   }
4166 
4167   KEY_PART *key_part;
4168   MEM_ROOT *alloc= range_par->mem_root;
4169   if (!total_parts ||
4170       !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4171                                                total_parts)) ||
4172       !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4173                                                       total_parts)) ||
4174       !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4175                                                            total_parts)) ||
4176       !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4177                                                            total_parts)))
4178     return TRUE;
4179 
4180   if (ppar->subpart_fields)
4181   {
4182     my_bitmap_map *buf;
4183     uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4184     if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4185       return TRUE;
4186     bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4187                 FALSE);
4188   }
4189   range_par->key_parts= key_part;
4190   Field **field= (ppar->part_fields)? part_info->part_field_array :
4191                                            part_info->subpart_field_array;
4192   bool in_subpart_fields= FALSE;
4193   for (uint part= 0; part < total_parts; part++, key_part++)
4194   {
4195     key_part->key=          0;
4196     key_part->part=	    part;
4197     key_part->length= (uint16)(*field)->key_length();
4198     key_part->store_length= (uint16)get_partition_field_store_length(*field);
4199 
4200     DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4201                          key_part->length, key_part->store_length));
4202 
4203     key_part->field=        (*field);
4204     key_part->image_type =  Field::itRAW;
4205     /*
4206       We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4207       in the RangeAnalysisModule.
4208     */
4209     key_part->flag=         0;
4210     /* We don't set key_parts->null_bit as it will not be used */
4211 
4212     ppar->is_part_keypart[part]= !in_subpart_fields;
4213     ppar->is_subpart_keypart[part]= in_subpart_fields;
4214 
4215     /*
4216       Check if this was last field in this array, in this case we
4217       switch to subpartitioning fields. (This will only happens if
4218       there are subpartitioning fields to cater for).
4219     */
4220     if (!*(++field))
4221     {
4222       field= part_info->subpart_field_array;
4223       in_subpart_fields= TRUE;
4224     }
4225   }
4226   range_par->key_parts_end= key_part;
4227 
4228   DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4229                                                 range_par->key_parts_end););
4230   return FALSE;
4231 }
4232 
4233 
4234 #ifndef DBUG_OFF
4235 
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4236 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4237 {
4238   DBUG_ENTER("print_partitioning_index");
4239   DBUG_LOCK_FILE;
4240   fprintf(DBUG_FILE, "partitioning INDEX(");
4241   for (KEY_PART *p=parts; p != parts_end; p++)
4242   {
4243     fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
4244   }
4245   fputs(");\n", DBUG_FILE);
4246   DBUG_UNLOCK_FILE;
4247   DBUG_VOID_RETURN;
4248 }
4249 
4250 
4251 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4252 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4253 {
4254   DBUG_ENTER("dbug_print_segment_range");
4255   DBUG_LOCK_FILE;
4256   if (!(arg->min_flag & NO_MIN_RANGE))
4257   {
4258     store_key_image_to_rec(part->field, arg->min_value, part->length);
4259     part->field->dbug_print();
4260     if (arg->min_flag & NEAR_MIN)
4261       fputs(" < ", DBUG_FILE);
4262     else
4263       fputs(" <= ", DBUG_FILE);
4264   }
4265 
4266   fprintf(DBUG_FILE, "%s", part->field->field_name);
4267 
4268   if (!(arg->max_flag & NO_MAX_RANGE))
4269   {
4270     if (arg->max_flag & NEAR_MAX)
4271       fputs(" < ", DBUG_FILE);
4272     else
4273       fputs(" <= ", DBUG_FILE);
4274     store_key_image_to_rec(part->field, arg->max_value, part->length);
4275     part->field->dbug_print();
4276   }
4277   fputs("\n", DBUG_FILE);
4278   DBUG_UNLOCK_FILE;
4279   DBUG_VOID_RETURN;
4280 }
4281 
4282 
4283 /*
4284   Print a singlepoint multi-keypart range interval to debug trace
4285 
4286   SYNOPSIS
4287     dbug_print_singlepoint_range()
4288       start  Array of SEL_ARG* ptrs representing conditions on key parts
4289       num    Number of elements in the array.
4290 
4291   DESCRIPTION
4292     This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4293     interval to debug trace.
4294 */
4295 
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4296 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4297 {
4298   DBUG_ENTER("dbug_print_singlepoint_range");
4299   DBUG_LOCK_FILE;
4300   SEL_ARG **end= start + num;
4301 
4302   for (SEL_ARG **arg= start; arg != end; arg++)
4303   {
4304     Field *field= (*arg)->field;
4305     fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
4306     field->dbug_print();
4307   }
4308   fputs("\n", DBUG_FILE);
4309   DBUG_UNLOCK_FILE;
4310   DBUG_VOID_RETURN;
4311 }
4312 #endif
4313 
4314 /****************************************************************************
4315  * Partition pruning code ends
4316  ****************************************************************************/
4317 #endif
4318 
4319 
4320 /*
4321   Get best plan for a SEL_IMERGE disjunctive expression.
4322   SYNOPSIS
4323     get_best_disjunct_quick()
4324       param     Parameter from check_quick_select function
4325       imerge    Expression to use
4326       read_time Don't create scans with cost > read_time
4327 
4328   NOTES
4329     index_merge cost is calculated as follows:
4330     index_merge_cost =
4331       cost(index_reads) +         (see #1)
4332       cost(rowid_to_row_scan) +   (see #2)
4333       cost(unique_use)            (see #3)
4334 
4335     1. cost(index_reads) =SUM_i(cost(index_read_i))
4336        For non-CPK scans,
4337          cost(index_read_i) = {cost of ordinary 'index only' scan}
4338        For CPK scan,
4339          cost(index_read_i) = {cost of non-'index only' scan}
4340 
4341     2. cost(rowid_to_row_scan)
4342       If table PK is clustered then
4343         cost(rowid_to_row_scan) =
4344           {cost of ordinary clustered PK scan with n_ranges=n_rows}
4345 
4346       Otherwise, we use the following model to calculate costs:
4347       We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4348       We assume that offsets of rows we need are independent variates with
4349       uniform distribution in [0..max_file_offset] range.
4350 
4351       We'll denote block as "busy" if it contains row(s) we need to retrieve
4352       and "empty" if doesn't contain rows we need.
4353 
4354       Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4355       applies to any block in file). Let x_i be a variate taking value 1 if
4356       block #i is empty and 0 otherwise.
4357 
4358       Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4359 
4360       E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4361         = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4362        ~= n_blocks * exp(-n_rows/n_blocks).
4363 
4364       E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4365        ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4366 
4367       Average size of "hole" between neighbor non-empty blocks is
4368            E(hole_size) = n_blocks/E(n_busy_blocks).
4369 
4370       The total cost of reading all needed blocks in one "sweep" is:
4371 
4372       E(n_busy_blocks)*
4373        (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
4374 
4375     3. Cost of Unique use is calculated in Unique::get_use_cost function.
4376 
4377   ROR-union cost is calculated in the same way index_merge, but instead of
4378   Unique a priority queue is used.
4379 
4380   RETURN
4381     Created read plan
4382     NULL - Out of memory or no read scan could be built.
4383 */
4384 
4385 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,double read_time)4386 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4387                                          double read_time)
4388 {
4389   SEL_TREE **ptree;
4390   TRP_INDEX_MERGE *imerge_trp= NULL;
4391   uint n_child_scans= imerge->trees_next - imerge->trees;
4392   TRP_RANGE **range_scans;
4393   TRP_RANGE **cur_child;
4394   TRP_RANGE **cpk_scan= NULL;
4395   bool imerge_too_expensive= FALSE;
4396   double imerge_cost= 0.0;
4397   ha_rows cpk_scan_records= 0;
4398   ha_rows non_cpk_scan_records= 0;
4399   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4400   bool all_scans_ror_able= TRUE;
4401   bool all_scans_rors= TRUE;
4402   uint unique_calc_buff_size;
4403   TABLE_READ_PLAN **roru_read_plans;
4404   TABLE_READ_PLAN **cur_roru_plan;
4405   double roru_index_costs;
4406   ha_rows roru_total_records;
4407   double roru_intersect_part= 1.0;
4408   DBUG_ENTER("get_best_disjunct_quick");
4409   DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
4410 
4411   DBUG_ASSERT(param->table->file->stats.records);
4412 
4413   Opt_trace_context * const trace= &param->thd->opt_trace;
4414   Opt_trace_object trace_best_disjunct(trace);
4415   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4416                                              sizeof(TRP_RANGE*)*
4417                                              n_child_scans)))
4418     DBUG_RETURN(NULL);
4419   // Note: to_merge.end() is called to close this object after this for-loop.
4420   Opt_trace_array to_merge(trace, "indices_to_merge");
4421   /*
4422     Collect best 'range' scan for each of disjuncts, and, while doing so,
4423     analyze possibility of ROR scans. Also calculate some values needed by
4424     other parts of the code.
4425   */
4426   for (ptree= imerge->trees, cur_child= range_scans;
4427        ptree != imerge->trees_next;
4428        ptree++, cur_child++)
4429   {
4430     DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4431                                         "tree in SEL_IMERGE"););
4432     Opt_trace_object trace_idx(trace);
4433     if (!(*cur_child=
4434           get_key_scans_params(param, *ptree, true, false, read_time)))
4435     {
4436       /*
4437         One of index scans in this index_merge is more expensive than entire
4438         table read for another available option. The entire index_merge (and
4439         any possible ROR-union) will be more expensive then, too. We continue
4440         here only to update SQL_SELECT members.
4441       */
4442       imerge_too_expensive= true;
4443     }
4444     if (imerge_too_expensive)
4445     {
4446       trace_idx.add("chosen", false).add_alnum("cause", "cost");
4447       continue;
4448     }
4449 
4450     const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
4451     imerge_cost += (*cur_child)->read_cost;
4452     all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4453     all_scans_rors &= (*cur_child)->is_ror;
4454     if (pk_is_clustered &&
4455         keynr_in_table == param->table->s->primary_key)
4456     {
4457       cpk_scan= cur_child;
4458       cpk_scan_records= (*cur_child)->records;
4459     }
4460     else
4461       non_cpk_scan_records += (*cur_child)->records;
4462 
4463     trace_idx.
4464       add_utf8("index_to_merge", param->table->key_info[keynr_in_table].name).
4465       add("cumulated_cost", imerge_cost);
4466   }
4467 
4468   // Note: to_merge trace object is closed here
4469   to_merge.end();
4470 
4471 
4472   trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
4473   if (imerge_too_expensive || (imerge_cost > read_time) ||
4474       ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
4475       read_time != DBL_MAX))
4476   {
4477     /*
4478       Bail out if it is obvious that both index_merge and ROR-union will be
4479       more expensive
4480     */
4481     DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4482                         "full table scan, bailing out"));
4483     trace_best_disjunct.add("chosen", false).add_alnum("cause", "cost");
4484     DBUG_RETURN(NULL);
4485   }
4486 
4487   /*
4488     If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4489     guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4490     disabled in @@optimizer_switch
4491   */
4492   if (all_scans_rors &&
4493       param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4494   {
4495     roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4496     trace_best_disjunct.add("use_roworder_union", true).
4497       add_alnum("cause", "always_cheaper_than_not_roworder_retrieval");
4498     goto skip_to_ror_scan;
4499   }
4500 
4501   if (cpk_scan)
4502   {
4503     /*
4504       Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
4505       is done in QUICK_RANGE_SELECT::row_in_ranges)
4506      */
4507     const double rid_comp_cost= non_cpk_scan_records * ROWID_COMPARE_COST;
4508     imerge_cost+= rid_comp_cost;
4509     trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
4510                             rid_comp_cost);
4511   }
4512 
4513   /* Calculate cost(rowid_to_row_scan) */
4514   {
4515     Cost_estimate sweep_cost;
4516     JOIN *join= param->thd->lex->select_lex.join;
4517     const bool is_interrupted= join && join->tables != 1;
4518     get_sweep_read_cost(param->table, non_cpk_scan_records, is_interrupted,
4519                         &sweep_cost);
4520     const double sweep_total_cost= sweep_cost.total_cost();
4521     imerge_cost+= sweep_total_cost;
4522     trace_best_disjunct.add("cost_sort_rowid_and_read_disk",
4523                             sweep_total_cost);
4524   }
4525   DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4526                      imerge_cost));
4527   if (imerge_cost > read_time ||
4528       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
4529   {
4530     trace_best_disjunct.add("use_roworder_index_merge", true).
4531       add_alnum("cause", "cost");
4532     goto build_ror_index_merge;
4533   }
4534 
4535   /* Add Unique operations cost */
4536   unique_calc_buff_size=
4537     Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4538                                     param->table->file->ref_length,
4539                                     param->thd->variables.sortbuff_size);
4540   if (param->imerge_cost_buff_size < unique_calc_buff_size)
4541   {
4542     if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
4543                                                      unique_calc_buff_size)))
4544       DBUG_RETURN(NULL);
4545     param->imerge_cost_buff_size= unique_calc_buff_size;
4546   }
4547 
4548   {
4549     const double dup_removal_cost=
4550       Unique::get_use_cost(param->imerge_cost_buff,
4551                            (uint)non_cpk_scan_records,
4552                            param->table->file->ref_length,
4553                            param->thd->variables.sortbuff_size);
4554 
4555     trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost);
4556     imerge_cost += dup_removal_cost;
4557     trace_best_disjunct.add("total_cost", imerge_cost);
4558     DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
4559                        imerge_cost, read_time));
4560   }
4561   if (imerge_cost < read_time)
4562   {
4563     if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
4564     {
4565       imerge_trp->read_cost= imerge_cost;
4566       imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
4567       imerge_trp->records= min(imerge_trp->records,
4568                                param->table->file->stats.records);
4569       imerge_trp->range_scans= range_scans;
4570       imerge_trp->range_scans_end= range_scans + n_child_scans;
4571       read_time= imerge_cost;
4572     }
4573   }
4574 
4575 build_ror_index_merge:
4576   if (!all_scans_ror_able ||
4577       param->thd->lex->sql_command == SQLCOM_DELETE ||
4578       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4579     DBUG_RETURN(imerge_trp);
4580 
4581   /* Ok, it is possible to build a ROR-union, try it. */
4582   if (!(roru_read_plans=
4583           (TABLE_READ_PLAN**)alloc_root(param->mem_root,
4584                                         sizeof(TABLE_READ_PLAN*)*
4585                                         n_child_scans)))
4586     DBUG_RETURN(imerge_trp);
4587 skip_to_ror_scan:
4588   roru_index_costs= 0.0;
4589   roru_total_records= 0;
4590   cur_roru_plan= roru_read_plans;
4591 
4592   /*
4593     Note: trace_analyze_ror.end() is called to close this object after
4594     this for-loop.
4595   */
4596   Opt_trace_array trace_analyze_ror(trace, "analyzing_roworder_scans");
4597   /* Find 'best' ROR scan for each of trees in disjunction */
4598   for (ptree= imerge->trees, cur_child= range_scans;
4599        ptree != imerge->trees_next;
4600        ptree++, cur_child++, cur_roru_plan++)
4601   {
4602     Opt_trace_object trp_info(trace);
4603     if (unlikely(trace->is_started()))
4604       (*cur_child)->trace_basic_info(param, &trp_info);
4605 
4606     /*
4607       Assume the best ROR scan is the one that has cheapest
4608       full-row-retrieval scan cost.
4609       Also accumulate index_only scan costs as we'll need them to
4610       calculate overall index_intersection cost.
4611     */
4612     double cost;
4613     if ((*cur_child)->is_ror)
4614     {
4615       /* Ok, we have index_only cost, now get full rows scan cost */
4616       cost= param->table->file->
4617         read_time(param->real_keynr[(*cur_child)->key_idx], 1,
4618                   (*cur_child)->records) +
4619         rows2double((*cur_child)->records) * ROW_EVALUATE_COST;
4620     }
4621     else
4622       cost= read_time;
4623 
4624     TABLE_READ_PLAN *prev_plan= *cur_child;
4625     if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost)))
4626     {
4627       if (prev_plan->is_ror)
4628         *cur_roru_plan= prev_plan;
4629       else
4630         DBUG_RETURN(imerge_trp);
4631       roru_index_costs += (*cur_roru_plan)->read_cost;
4632     }
4633     else
4634       roru_index_costs +=
4635         ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
4636     roru_total_records += (*cur_roru_plan)->records;
4637     roru_intersect_part *= (*cur_roru_plan)->records /
4638       param->table->file->stats.records;
4639   }
4640   // Note: trace_analyze_ror trace object is closed here
4641   trace_analyze_ror.end();
4642 
4643   /*
4644     rows to retrieve=
4645       SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
4646     This is valid because index_merge construction guarantees that conditions
4647     in disjunction do not share key parts.
4648   */
4649   roru_total_records -= (ha_rows)(roru_intersect_part*
4650                                   param->table->file->stats.records);
4651   /* ok, got a ROR read plan for each of the disjuncts
4652     Calculate cost:
4653     cost(index_union_scan(scan_1, ... scan_n)) =
4654       SUM_i(cost_of_index_only_scan(scan_i)) +
4655       queue_use_cost(rowid_len, n) +
4656       cost_of_row_retrieval
4657     See get_merge_buffers_cost function for queue_use_cost formula derivation.
4658   */
4659   double roru_total_cost;
4660   {
4661     Cost_estimate sweep_cost;
4662     JOIN *join= param->thd->lex->select_lex.join;
4663     const bool is_interrupted= join && join->tables != 1;
4664     get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
4665                         &sweep_cost);
4666     roru_total_cost= roru_index_costs +
4667                      rows2double(roru_total_records) *
4668                      log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 +
4669                      sweep_cost.total_cost();
4670   }
4671 
4672   trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost).
4673     add("members", n_child_scans);
4674   TRP_ROR_UNION* roru;
4675   if (roru_total_cost < read_time)
4676   {
4677     if ((roru= new (param->mem_root) TRP_ROR_UNION))
4678     {
4679       trace_best_disjunct.add("chosen", true);
4680       roru->first_ror= roru_read_plans;
4681       roru->last_ror= roru_read_plans + n_child_scans;
4682       roru->read_cost= roru_total_cost;
4683       roru->records= roru_total_records;
4684       DBUG_RETURN(roru);
4685     }
4686   }
4687   trace_best_disjunct.add("chosen", false);
4688 
4689   DBUG_RETURN(imerge_trp);
4690 }
4691 
4692 
4693 /*
4694   Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
4695   sel_arg set of intervals.
4696 
4697   SYNOPSIS
4698     make_ror_scan()
4699       param    Parameter from test_quick_select function
4700       idx      Index of key in param->keys
4701       sel_arg  Set of intervals for a given key
4702 
4703   RETURN
4704     NULL - out of memory
4705     ROR scan structure containing a scan for {idx, sel_arg}
4706 */
4707 
4708 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)4709 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
4710 {
4711   ROR_SCAN_INFO *ror_scan;
4712   my_bitmap_map *bitmap_buf1;
4713   my_bitmap_map *bitmap_buf2;
4714   uint keynr;
4715   DBUG_ENTER("make_ror_scan");
4716 
4717   if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
4718                                              sizeof(ROR_SCAN_INFO))))
4719     DBUG_RETURN(NULL);
4720 
4721   ror_scan->idx= idx;
4722   ror_scan->keynr= keynr= param->real_keynr[idx];
4723   ror_scan->sel_arg= sel_arg;
4724   ror_scan->records= param->table->quick_rows[keynr];
4725 
4726   if (!(bitmap_buf1= (my_bitmap_map*) alloc_root(param->mem_root,
4727                                                  param->fields_bitmap_size)))
4728     DBUG_RETURN(NULL);
4729   if (!(bitmap_buf2= (my_bitmap_map*) alloc_root(param->mem_root,
4730                                                  param->fields_bitmap_size)))
4731     DBUG_RETURN(NULL);
4732 
4733   if (bitmap_init(&ror_scan->covered_fields, bitmap_buf1,
4734                   param->table->s->fields, FALSE))
4735     DBUG_RETURN(NULL);
4736   if (bitmap_init(&ror_scan->covered_fields_remaining, bitmap_buf2,
4737                   param->table->s->fields, FALSE))
4738     DBUG_RETURN(NULL);
4739 
4740   bitmap_clear_all(&ror_scan->covered_fields);
4741 
4742   KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
4743   KEY_PART_INFO *key_part_end=
4744     key_part + param->table->key_info[keynr].user_defined_key_parts;
4745   for (;key_part != key_part_end; ++key_part)
4746   {
4747     if (bitmap_is_set(&param->needed_fields, key_part->fieldnr-1))
4748       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
4749   }
4750   bitmap_copy(&ror_scan->covered_fields_remaining, &ror_scan->covered_fields);
4751 
4752   double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
4753   ror_scan->index_read_cost=
4754     param->table->file->index_only_read_time(ror_scan->keynr, rows);
4755   DBUG_RETURN(ror_scan);
4756 }
4757 
4758 
4759 /**
4760   Compare two ROR_SCAN_INFO* by
4761     1. #fields in this index that are not already covered
4762        by other indexes earlier in the intersect ordering: descending
4763     2. E(#records): ascending
4764 
4765   @param scan1   first ror scan to compare
4766   @param scan2   second ror scan to compare
4767 
4768   @return true if scan1 > scan2, false otherwise
4769 */
is_better_intersect_match(const ROR_SCAN_INFO * scan1,const ROR_SCAN_INFO * scan2)4770 static bool is_better_intersect_match(const ROR_SCAN_INFO *scan1,
4771                                       const ROR_SCAN_INFO *scan2)
4772 {
4773   if (scan1 == scan2)
4774     return false;
4775 
4776   if (scan1->num_covered_fields_remaining >
4777       scan2->num_covered_fields_remaining)
4778     return false;
4779 
4780   if (scan1->num_covered_fields_remaining <
4781       scan2->num_covered_fields_remaining)
4782     return true;
4783 
4784   return (scan1->records > scan2->records);
4785 }
4786 
4787 /**
4788   Sort indexes in an order that is likely to be a good index merge
4789   intersection order. After running this function, [start, ..., end-1]
4790   is ordered according to this strategy:
4791 
4792     1) Minimize the number of indexes that must be used in the
4793        intersection. I.e., the index covering most fields not already
4794        covered by other indexes earlier in the sort order is picked first.
4795     2) When multiple indexes cover equally many uncovered fields, the
4796        index with lowest E(#rows) is chosen.
4797 
4798   Note that all permutations of index ordering are not tested, so this
4799   function may not find the optimal order.
4800 
4801   @param[in,out] start     Pointer to the start of indexes that may
4802                            be used in index merge intersection
4803   @param         end       Pointer past the last index that may be used.
4804   @param         param     Parameter from test_quick_select function.
4805 */
find_intersect_order(ROR_SCAN_INFO ** start,ROR_SCAN_INFO ** end,const PARAM * param)4806 static void find_intersect_order(ROR_SCAN_INFO **start,
4807                                  ROR_SCAN_INFO **end,
4808                                  const PARAM *param)
4809 {
4810   // nothing to sort if there are only zero or one ROR scans
4811   if ((start == end) || (start + 1 == end))
4812     return;
4813 
4814   /*
4815     Bitmap of fields we would like the ROR scans to cover. Will be
4816     modified by the loop below so that when we're looking for a ROR
4817     scan in position 'x' in the ordering, all fields covered by ROR
4818     scans 0,...,x-1 have been removed.
4819   */
4820   MY_BITMAP fields_to_cover;
4821   my_bitmap_map *map;
4822   if (!(map= (my_bitmap_map*) alloc_root(param->mem_root,
4823                                          param->fields_bitmap_size)))
4824     return;
4825   bitmap_init(&fields_to_cover, map, param->needed_fields.n_bits, FALSE);
4826   bitmap_copy(&fields_to_cover, &param->needed_fields);
4827 
4828   // Sort ROR scans in [start,...,end-1]
4829   for (ROR_SCAN_INFO **place= start; place < (end - 1); place++)
4830   {
4831     /* Placeholder for the best ROR scan found for position 'place' so far */
4832     ROR_SCAN_INFO **best= place;
4833     ROR_SCAN_INFO **current= place + 1;
4834 
4835     {
4836       /*
4837         Calculate how many fields in 'fields_to_cover' not already
4838         covered by [start,...,place-1] the 'best' index covers. The
4839         result is used in is_better_intersect_match() and is valid
4840         when finding the best ROR scan for position 'place' only.
4841       */
4842       bitmap_intersect(&(*best)->covered_fields_remaining, &fields_to_cover);
4843       (*best)->num_covered_fields_remaining=
4844         bitmap_bits_set(&(*best)->covered_fields_remaining);
4845     }
4846     for (; current < end; current++)
4847     {
4848       {
4849         /*
4850           Calculate how many fields in 'fields_to_cover' not already
4851           covered by [start,...,place-1] the 'current' index covers.
4852           The result is used in is_better_intersect_match() and is
4853           valid when finding the best ROR scan for position 'place' only.
4854         */
4855         bitmap_intersect(&(*current)->covered_fields_remaining,
4856                          &fields_to_cover);
4857         (*current)->num_covered_fields_remaining=
4858           bitmap_bits_set(&(*current)->covered_fields_remaining);
4859 
4860         /*
4861           No need to compare with 'best' if 'current' does not
4862           contribute with uncovered fields.
4863         */
4864         if ((*current)->num_covered_fields_remaining == 0)
4865           continue;
4866       }
4867 
4868       if (is_better_intersect_match(*best, *current))
4869         best= current;
4870     }
4871 
4872     /*
4873       'best' is now the ROR scan that will be sorted in position
4874       'place'. When searching for the best ROR scans later in the sort
4875       sequence we do not need coverage of the fields covered by 'best'
4876      */
4877     bitmap_subtract(&fields_to_cover, &(*best)->covered_fields);
4878     if (best != place)
4879       swap_variables(ROR_SCAN_INFO*, *best, *place);
4880 
4881     if (bitmap_is_clear_all(&fields_to_cover))
4882       return;                                   // No more fields to cover
4883   }
4884 }
4885 
4886 /* Auxiliary structure for incremental ROR-intersection creation */
4887 typedef struct
4888 {
4889   const PARAM *param;
4890   MY_BITMAP covered_fields; /* union of fields covered by all scans */
4891   /*
4892     Fraction of table records that satisfies conditions of all scans.
4893     This is the number of full records that will be retrieved if a
4894     non-index_only index intersection will be employed.
4895   */
4896   double out_rows;
4897   /* TRUE if covered_fields is a superset of needed_fields */
4898   bool is_covering;
4899 
4900   ha_rows index_records; /* sum(#records to look in indexes) */
4901   double index_scan_costs; /* SUM(cost of 'index-only' scans) */
4902   double total_cost;
4903 } ROR_INTERSECT_INFO;
4904 
4905 
4906 /*
4907   Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
4908 
4909   SYNOPSIS
4910     ror_intersect_init()
4911       param         Parameter from test_quick_select
4912 
4913   RETURN
4914     allocated structure
4915     NULL on error
4916 */
4917 
4918 static
ror_intersect_init(const PARAM * param)4919 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
4920 {
4921   ROR_INTERSECT_INFO *info;
4922   my_bitmap_map* buf;
4923   if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
4924                                               sizeof(ROR_INTERSECT_INFO))))
4925     return NULL;
4926   info->param= param;
4927   if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
4928                                          param->fields_bitmap_size)))
4929     return NULL;
4930   if (bitmap_init(&info->covered_fields, buf, param->table->s->fields,
4931                   FALSE))
4932     return NULL;
4933   info->is_covering= FALSE;
4934   info->index_scan_costs= 0.0;
4935   info->total_cost= 0.0;
4936   info->index_records= 0;
4937   info->out_rows= (double) param->table->file->stats.records;
4938   bitmap_clear_all(&info->covered_fields);
4939   return info;
4940 }
4941 
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)4942 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
4943 {
4944   dst->param= src->param;
4945   memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
4946          no_bytes_in_map(&src->covered_fields));
4947   dst->out_rows= src->out_rows;
4948   dst->is_covering= src->is_covering;
4949   dst->index_records= src->index_records;
4950   dst->index_scan_costs= src->index_scan_costs;
4951   dst->total_cost= src->total_cost;
4952 }
4953 
4954 
4955 /*
4956   Get selectivity of adding a ROR scan to the ROR-intersection.
4957 
4958   SYNOPSIS
4959     ror_scan_selectivity()
4960       info  ROR-interection, an intersection of ROR index scans
4961       scan  ROR scan that may or may not improve the selectivity
4962             of 'info'
4963 
4964   NOTES
4965     Suppose we have conditions on several keys
4966     cond=k_11=c_11 AND k_12=c_12 AND ...  // key_parts of first key in 'info'
4967          k_21=c_21 AND k_22=c_22 AND ...  // key_parts of second key in 'info'
4968           ...
4969          k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //key_parts of 'scan'
4970 
4971     where k_ij may be the same as any k_pq (i.e. keys may have common parts).
4972 
4973     Note that for ROR retrieval, only equality conditions are usable so there
4974     are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'
4975 
4976     A full row is retrieved if entire condition holds.
4977 
4978     The recursive procedure for finding P(cond) is as follows:
4979 
4980     First step:
4981     Pick 1st part of 1st key and break conjunction (1) into two parts:
4982       cond= (k_11=c_11 AND R)
4983 
4984     Here R may still contain condition(s) equivalent to k_11=c_11.
4985     Nevertheless, the following holds:
4986 
4987       P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
4988 
4989     Mark k_11 as fixed field (and satisfied condition) F, save P(F),
4990     save R to be cond and proceed to recursion step.
4991 
4992     Recursion step:
4993     We have a set of fixed fields/satisfied conditions) F, probability P(F),
4994     and remaining conjunction R
4995     Pick next key part on current key and its condition "k_ij=c_ij".
4996     We will add "k_ij=c_ij" into F and update P(F).
4997     Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
4998 
4999      P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
5000 
5001     (where '|' mean conditional probability, not "or")
5002 
5003     Consider the first multiplier in (2). One of the following holds:
5004     a) F contains condition on field used in t (i.e. t AND F = F).
5005       Then P(t|F) = 1
5006 
5007     b) F doesn't contain condition on field used in t. Then F and t are
5008      considered independent.
5009 
5010      P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
5011           = P(t|fields_before_t_in_key).
5012 
5013      P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
5014                                    #records(fields_before_t_in_key, t)
5015 
5016     The second multiplier is calculated by applying this step recursively.
5017 
5018   IMPLEMENTATION
5019     This function calculates the result of application of the "recursion step"
5020     described above for all fixed key members of a single key, accumulating set
5021     of covered fields, selectivity, etc.
5022 
5023     The calculation is conducted as follows:
5024     Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
5025 
5026      n_{k1}      n_{k2}
5027     --------- * ---------  * .... (3)
5028      n_{k1-1}    n_{k2-1}
5029 
5030     where k1,k2,... are key parts which fields were not yet marked as fixed
5031     ( this is result of application of option b) of the recursion step for
5032       parts of a single key).
5033     Since it is reasonable to expect that most of the fields are not marked
5034     as fixed, we calculate (3) as
5035 
5036                                   n_{i1}      n_{i2}
5037     (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
5038                                   n_{i1-1}    n_{i2-1}
5039 
5040     where i1,i2, .. are key parts that were already marked as fixed.
5041 
5042     In order to minimize number of expensive records_in_range calls we
5043     group and reduce adjacent fractions. Note that on the optimizer's
5044     request, index statistics may be used instead of records_in_range
5045     @see RANGE_OPT_PARAM::use_index_statistics.
5046 
5047   RETURN
5048     Selectivity of given ROR scan, a number between 0 and 1. 1 means that
5049     adding 'scan' to the intersection does not improve the selectivity.
5050 */
5051 
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)5052 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
5053                                    const ROR_SCAN_INFO *scan)
5054 {
5055   double selectivity_mult= 1.0;
5056   const TABLE * const table= info->param->table;
5057   const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;
5058   /**
5059     key values tuple, used to store both min_range.key and
5060     max_range.key. This function is only called for equality ranges;
5061     open ranges (e.g. "min_value < X < max_value") cannot be used for
5062     rowid ordered retrieval, so in this function we know that
5063     min_range.key == max_range.key
5064   */
5065   uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
5066   uchar *key_ptr= key_val;
5067   SEL_ARG *sel_arg, *tuple_arg= NULL;
5068   key_part_map keypart_map= 0;
5069   bool cur_covered;
5070   bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5071                                            key_part->fieldnr-1));
5072   key_range min_range;
5073   key_range max_range;
5074   min_range.key= key_val;
5075   min_range.flag= HA_READ_KEY_EXACT;
5076   max_range.key= key_val;
5077   max_range.flag= HA_READ_AFTER_KEY;
5078   ha_rows prev_records= table->file->stats.records;
5079   DBUG_ENTER("ror_scan_selectivity");
5080 
5081   for (sel_arg= scan->sel_arg; sel_arg;
5082        sel_arg= sel_arg->next_key_part)
5083   {
5084     DBUG_PRINT("info",("sel_arg step"));
5085     cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
5086                                        key_part[sel_arg->part].fieldnr-1));
5087     if (cur_covered != prev_covered)
5088     {
5089       /* create (part1val, ..., part{n-1}val) tuple. */
5090       bool is_null_range= false;
5091       ha_rows records;
5092       if (!tuple_arg)
5093       {
5094         tuple_arg= scan->sel_arg;
5095         /* Here we use the length of the first key part */
5096         tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);
5097         is_null_range|= tuple_arg->is_null_interval();
5098         keypart_map= 1;
5099       }
5100       while (tuple_arg->next_key_part != sel_arg)
5101       {
5102         tuple_arg= tuple_arg->next_key_part;
5103         tuple_arg->store_min(key_part[tuple_arg->part].store_length,
5104                              &key_ptr, 0);
5105         is_null_range|= tuple_arg->is_null_interval();
5106         keypart_map= (keypart_map << 1) | 1;
5107       }
5108       min_range.length= max_range.length= (size_t) (key_ptr - key_val);
5109       min_range.keypart_map= max_range.keypart_map= keypart_map;
5110 
5111       /*
5112         Get the number of rows in this range. This is done by calling
5113         records_in_range() unless all these are true:
5114           1) The user has requested that index statistics should be used
5115              for equality ranges to avoid the incurred overhead of
5116              index dives in records_in_range()
5117           2) The range is not on the form "x IS NULL". The reason is
5118              that the number of rows with this value are likely to be
5119              very different than the values in the index statistics
5120           3) Index statistics is available.
5121         @see key_val
5122       */
5123       if (!info->param->use_index_statistics ||        // (1)
5124           is_null_range ||                             // (2)
5125           !(records= table->key_info[scan->keynr].
5126                      rec_per_key[tuple_arg->part]))    // (3)
5127       {
5128         DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5129         DBUG_ASSERT(min_range.length > 0);
5130         records= (table->file->
5131                   records_in_range(scan->keynr, &min_range, &max_range));
5132       }
5133       if (cur_covered)
5134       {
5135         /* uncovered -> covered */
5136         double tmp= rows2double(records)/rows2double(prev_records);
5137         DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5138         selectivity_mult *= tmp;
5139         prev_records= HA_POS_ERROR;
5140       }
5141       else
5142       {
5143         /* covered -> uncovered */
5144         prev_records= records;
5145       }
5146     }
5147     prev_covered= cur_covered;
5148   }
5149   if (!prev_covered)
5150   {
5151     double tmp= rows2double(table->quick_rows[scan->keynr]) /
5152                 rows2double(prev_records);
5153     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
5154     selectivity_mult *= tmp;
5155   }
5156   // Todo: This assert fires in PB sysqa RQG tests.
5157   // DBUG_ASSERT(selectivity_mult <= 1.0);
5158   DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
5159   DBUG_RETURN(selectivity_mult);
5160 }
5161 
5162 
5163 /*
5164   Check if adding a ROR scan to a ROR-intersection reduces its cost of
5165   ROR-intersection and if yes, update parameters of ROR-intersection,
5166   including its cost.
5167 
5168   SYNOPSIS
5169     ror_intersect_add()
5170       param        Parameter from test_quick_select
5171       info         ROR-intersection structure to add the scan to.
5172       ror_scan     ROR scan info to add.
5173       is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
5174                    from other parameters and is passed separately only to
5175                    avoid duplicating the inference code)
5176       trace_costs  Optimizer trace object cost details are added to
5177 
5178   NOTES
5179     Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
5180     intersection decreases. The cost of ROR-intersection is calculated as
5181     follows:
5182 
5183     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
5184 
5185     When we add a scan the first increases and the second decreases.
5186 
5187     cost_of_full_rows_retrieval=
5188       (union of indexes used covers all needed fields) ?
5189         cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
5190         0
5191 
5192     E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
5193                            ror_scan_selectivity({scan1}, scan2) * ... *
5194                            ror_scan_selectivity({scan1,...}, scanN).
5195   RETURN
5196     TRUE   ROR scan added to ROR-intersection, cost updated.
5197     FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
5198 */
5199 
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,bool is_cpk_scan,Opt_trace_object * trace_costs)5200 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
5201                               ROR_SCAN_INFO* ror_scan, bool is_cpk_scan,
5202                               Opt_trace_object *trace_costs)
5203 {
5204   double selectivity_mult= 1.0;
5205 
5206   DBUG_ENTER("ror_intersect_add");
5207   DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
5208   DBUG_PRINT("info", ("Adding scan on %s",
5209                       info->param->table->key_info[ror_scan->keynr].name));
5210   DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
5211 
5212   selectivity_mult = ror_scan_selectivity(info, ror_scan);
5213   if (selectivity_mult == 1.0)
5214   {
5215     /* Don't add this scan if it doesn't improve selectivity. */
5216     DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
5217     DBUG_RETURN(FALSE);
5218   }
5219 
5220   info->out_rows *= selectivity_mult;
5221 
5222   if (is_cpk_scan)
5223   {
5224     /*
5225       CPK scan is used to filter out rows. We apply filtering for
5226       each record of every scan. Assuming ROWID_COMPARE_COST
5227       per check this gives us:
5228     */
5229     const double idx_cost=
5230       rows2double(info->index_records) * ROWID_COMPARE_COST;
5231     info->index_scan_costs+= idx_cost;
5232     trace_costs->add("index_scan_cost", idx_cost);
5233   }
5234   else
5235   {
5236     info->index_records += info->param->table->quick_rows[ror_scan->keynr];
5237     info->index_scan_costs += ror_scan->index_read_cost;
5238     trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
5239     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
5240     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
5241                                                &info->covered_fields))
5242     {
5243       DBUG_PRINT("info", ("ROR-intersect is covering now"));
5244       info->is_covering= TRUE;
5245     }
5246   }
5247 
5248   info->total_cost= info->index_scan_costs;
5249   trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs);
5250 
5251   if (!info->is_covering)
5252   {
5253     Cost_estimate sweep_cost;
5254     JOIN *join= info->param->thd->lex->select_lex.join;
5255     const bool is_interrupted= join && join->tables != 1;
5256     get_sweep_read_cost(info->param->table, double2rows(info->out_rows),
5257                         is_interrupted, &sweep_cost);
5258     info->total_cost += sweep_cost.total_cost();
5259     trace_costs->add("disk_sweep_cost", sweep_cost.total_cost());
5260   }
5261   else
5262     trace_costs->add("disk_sweep_cost", 0);
5263 
5264   DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
5265   DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
5266                       info->is_covering?"" : "non-"));
5267   DBUG_RETURN(TRUE);
5268 }
5269 
5270 
5271 /*
5272   Get best ROR-intersection plan using non-covering ROR-intersection search
5273   algorithm. The returned plan may be covering.
5274 
5275   SYNOPSIS
5276     get_best_ror_intersect()
5277       param            Parameter from test_quick_select function.
5278       tree             Transformed restriction condition to be used to look
5279                        for ROR scans.
5280       read_time        Do not return read plans with cost > read_time.
5281       are_all_covering [out] set to TRUE if union of all scans covers all
5282                        fields needed by the query (and it is possible to build
5283                        a covering ROR-intersection)
5284 
5285   NOTES
5286     get_key_scans_params must be called before this function can be called.
5287 
5288     When this function is called by ROR-union construction algorithm it
5289     assumes it is building an uncovered ROR-intersection (and thus # of full
5290     records to be retrieved is wrong here). This is a hack.
5291 
5292   IMPLEMENTATION
5293     The approximate best non-covering plan search algorithm is as follows:
5294 
5295     find_min_ror_intersection_scan()
5296     {
5297       R= select all ROR scans;
5298       order R by (E(#records_matched) * key_record_length).
5299 
5300       S= first(R); -- set of scans that will be used for ROR-intersection
5301       R= R-first(S);
5302       min_cost= cost(S);
5303       min_scan= make_scan(S);
5304       while (R is not empty)
5305       {
5306         firstR= R - first(R);
5307         if (!selectivity(S + firstR < selectivity(S)))
5308           continue;
5309 
5310         S= S + first(R);
5311         if (cost(S) < min_cost)
5312         {
5313           min_cost= cost(S);
5314           min_scan= make_scan(S);
5315         }
5316       }
5317       return min_scan;
5318     }
5319 
5320     See ror_intersect_add function for ROR intersection costs.
5321 
5322     Special handling for Clustered PK scans
5323     Clustered PK contains all table fields, so using it as a regular scan in
5324     index intersection doesn't make sense: a range scan on CPK will be less
5325     expensive in this case.
5326     Clustered PK scan has special handling in ROR-intersection: it is not used
5327     to retrieve rows, instead its condition is used to filter row references
5328     we get from scans on other keys.
5329 
5330   RETURN
5331     ROR-intersection table read plan
5332     NULL if out of memory or no suitable plan found.
5333 */
5334 
5335 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,double read_time)5336 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
5337                                           double read_time)
5338 {
5339   uint idx;
5340   double min_cost= DBL_MAX;
5341   Opt_trace_context * const trace= &param->thd->opt_trace;
5342   DBUG_ENTER("get_best_ror_intersect");
5343 
5344   Opt_trace_object trace_ror(trace, "analyzing_roworder_intersect");
5345 
5346   if ((tree->n_ror_scans < 2) || !param->table->file->stats.records ||
5347       !param->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
5348   {
5349     trace_ror.add("usable", false);
5350     if (tree->n_ror_scans < 2)
5351       trace_ror.add_alnum("cause", "too_few_roworder_scans");
5352     else
5353       trace_ror.add("need_tracing", true);
5354     DBUG_RETURN(NULL);
5355   }
5356 
5357   if (param->order_direction == ORDER::ORDER_DESC)
5358     DBUG_RETURN(NULL);
5359 
5360   /*
5361     Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
5362     them. Also find and save clustered PK scan if there is one.
5363   */
5364   ROR_SCAN_INFO **cur_ror_scan;
5365   ROR_SCAN_INFO *cpk_scan= NULL;
5366   uint cpk_no;
5367   bool cpk_scan_used= FALSE;
5368 
5369   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5370                                                      sizeof(ROR_SCAN_INFO*)*
5371                                                      param->keys)))
5372     return NULL;
5373   cpk_no= ((param->table->file->primary_key_is_clustered()) ?
5374            param->table->s->primary_key : MAX_KEY);
5375 
5376   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
5377   {
5378     ROR_SCAN_INFO *scan;
5379     if (!tree->ror_scans_map.is_set(idx))
5380       continue;
5381     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
5382       return NULL;
5383     if (param->real_keynr[idx] == cpk_no)
5384     {
5385       cpk_scan= scan;
5386       tree->n_ror_scans--;
5387     }
5388     else
5389       *(cur_ror_scan++)= scan;
5390   }
5391 
5392   tree->ror_scans_end= cur_ror_scan;
5393   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
5394                                           tree->ror_scans,
5395                                           tree->ror_scans_end););
5396   /*
5397     Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
5398     ROR_SCAN_INFO's.
5399     Step 2: Get best ROR-intersection using an approximate algorithm.
5400   */
5401   find_intersect_order(tree->ror_scans, tree->ror_scans_end, param);
5402 
5403   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
5404                                           tree->ror_scans,
5405                                           tree->ror_scans_end););
5406 
5407   ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
5408   ROR_SCAN_INFO **intersect_scans_end;
5409   if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5410                                                      sizeof(ROR_SCAN_INFO*)*
5411                                                      tree->n_ror_scans)))
5412     return NULL;
5413   intersect_scans_end= intersect_scans;
5414 
5415   /* Create and incrementally update ROR intersection. */
5416   ROR_INTERSECT_INFO *intersect, *intersect_best;
5417   if (!(intersect= ror_intersect_init(param)) ||
5418       !(intersect_best= ror_intersect_init(param)))
5419     return NULL;
5420 
5421   /* [intersect_scans,intersect_scans_best) will hold the best intersection */
5422   ROR_SCAN_INFO **intersect_scans_best;
5423   cur_ror_scan= tree->ror_scans;
5424   intersect_scans_best= intersect_scans;
5425   /*
5426     Note: trace_isect_idx.end() is called to close this object after
5427     this while-loop.
5428   */
5429   Opt_trace_array trace_isect_idx(trace, "intersecting_indices");
5430   while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
5431   {
5432     Opt_trace_object trace_idx(trace);
5433     trace_idx.add_utf8("index",
5434                        param->table->key_info[(*cur_ror_scan)->keynr].name);
5435     /* S= S + first(R);  R= R - first(R); */
5436     if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE, &trace_idx))
5437     {
5438       trace_idx.add("cumulated_total_cost", intersect->total_cost).
5439         add("usable", false).
5440         add_alnum("cause", "does_not_reduce_cost_of_intersect");
5441       cur_ror_scan++;
5442       continue;
5443     }
5444 
5445     trace_idx.add("cumulated_total_cost", intersect->total_cost).
5446       add("usable", true).
5447       add("matching_rows_now", intersect->out_rows).
5448       add("isect_covering_with_this_index", intersect->is_covering);
5449 
5450     *(intersect_scans_end++)= *(cur_ror_scan++);
5451 
5452     if (intersect->total_cost < min_cost)
5453     {
5454       /* Local minimum found, save it */
5455       ror_intersect_cpy(intersect_best, intersect);
5456       intersect_scans_best= intersect_scans_end;
5457       min_cost = intersect->total_cost;
5458       trace_idx.add("chosen", true);
5459     }
5460     else
5461     {
5462       trace_idx.add("chosen", false).
5463         add_alnum("cause", "does_not_reduce_cost");
5464     }
5465   }
5466   // Note: trace_isect_idx trace object is closed here
5467   trace_isect_idx.end();
5468 
5469   if (intersect_scans_best == intersect_scans)
5470   {
5471     trace_ror.add("chosen", false).
5472       add_alnum("cause", "does_not_increase_selectivity");
5473     DBUG_PRINT("info", ("None of scans increase selectivity"));
5474     DBUG_RETURN(NULL);
5475   }
5476 
5477   DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
5478                                           "best ROR-intersection",
5479                                           intersect_scans,
5480                                           intersect_scans_best););
5481 
5482   uint best_num= intersect_scans_best - intersect_scans;
5483   ror_intersect_cpy(intersect, intersect_best);
5484 
5485   /*
5486     Ok, found the best ROR-intersection of non-CPK key scans.
5487     Check if we should add a CPK scan. If the obtained ROR-intersection is
5488     covering, it doesn't make sense to add CPK scan.
5489   */
5490   { // Scope for trace object
5491     Opt_trace_object trace_cpk(trace, "clustered_pk");
5492     if (cpk_scan && !intersect->is_covering)
5493     {
5494       if (ror_intersect_add(intersect, cpk_scan, TRUE, &trace_cpk) &&
5495           (intersect->total_cost < min_cost))
5496       {
5497         trace_cpk.add("clustered_pk_scan_added_to_intersect", true).
5498           add("cumulated_cost", intersect->total_cost);
5499         cpk_scan_used= TRUE;
5500         intersect_best= intersect; //just set pointer here
5501       }
5502       else
5503         trace_cpk.add("clustered_pk_added_to_intersect", false).
5504           add_alnum("cause", "cost");
5505     }
5506     else
5507     {
5508       trace_cpk.add("clustered_pk_added_to_intersect", false).
5509         add_alnum("cause", cpk_scan ?
5510                   "roworder_is_covering" : "no_clustered_pk_index");
5511     }
5512   }
5513   /* Ok, return ROR-intersect plan if we have found one */
5514   TRP_ROR_INTERSECT *trp= NULL;
5515   if (min_cost < read_time && (cpk_scan_used || best_num > 1))
5516   {
5517     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
5518       DBUG_RETURN(trp);
5519     if (!(trp->first_scan=
5520            (ROR_SCAN_INFO**)alloc_root(param->mem_root,
5521                                        sizeof(ROR_SCAN_INFO*)*best_num)))
5522       DBUG_RETURN(NULL);
5523     memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
5524     trp->last_scan=  trp->first_scan + best_num;
5525     trp->is_covering= intersect_best->is_covering;
5526     trp->read_cost= intersect_best->total_cost;
5527     /* Prevent divisons by zero */
5528     ha_rows best_rows = double2rows(intersect_best->out_rows);
5529     if (!best_rows)
5530       best_rows= 1;
5531     set_if_smaller(param->table->quick_condition_rows, best_rows);
5532     trp->records= best_rows;
5533     trp->index_scan_costs= intersect_best->index_scan_costs;
5534     trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
5535 
5536     trace_ror.add("rows", trp->records).
5537       add("cost", trp->read_cost).
5538       add("covering", trp->is_covering).
5539       add("chosen", true);
5540 
5541     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
5542                         "cost %g, records %lu",
5543                         trp->read_cost, (ulong) trp->records));
5544   }
5545   else
5546   {
5547     trace_ror.add("chosen", false).
5548       add_alnum("cause", (min_cost >= read_time) ? "cost" :
5549                 "too_few_indexes_to_merge");
5550 
5551   }
5552   DBUG_RETURN(trp);
5553 }
5554 
5555 /*
5556   Get best "range" table read plan for given SEL_TREE, also update some info
5557 
5558   SYNOPSIS
5559     get_key_scans_params()
5560       param                    Parameters from test_quick_select
5561       tree                     Make range select for this SEL_TREE
5562       index_read_must_be_used  TRUE <=> assume 'index only' option will be set
5563                                (except for clustered PK indexes)
5564       update_tbl_stats         TRUE <=> update table->quick_* with information
5565                                about range scans we've evaluated.
5566       read_time                Maximum cost. i.e. don't create read plans with
5567                                cost > read_time.
5568 
5569   DESCRIPTION
5570     Find the best "range" table read plan for given SEL_TREE.
5571     The side effects are
5572      - tree->ror_scans is updated to indicate which scans are ROR scans.
5573      - if update_tbl_stats=TRUE then table->quick_* is updated with info
5574        about every possible range scan.
5575 
5576   RETURN
5577     Best range read plan
5578     NULL if no plan found or error occurred
5579 */
5580 
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool update_tbl_stats,double read_time)5581 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
5582                                        bool index_read_must_be_used,
5583                                        bool update_tbl_stats,
5584                                        double read_time)
5585 {
5586   uint idx;
5587   SEL_ARG **key,**end, **key_to_read= NULL;
5588   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
5589   uint    best_mrr_flags, best_buf_size;
5590   TRP_RANGE* read_plan= NULL;
5591   DBUG_ENTER("get_key_scans_params");
5592   LINT_INIT(best_mrr_flags); /* protected by key_to_read */
5593   LINT_INIT(best_buf_size); /* protected by key_to_read */
5594   Opt_trace_context * const trace= &param->thd->opt_trace;
5595   /*
5596     Note that there may be trees that have type SEL_TREE::KEY but contain no
5597     key reads at all, e.g. tree for expression "key1 is not null" where key1
5598     is defined as "not null".
5599   */
5600   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
5601                                       "tree scans"););
5602   Opt_trace_array ota(trace, "range_scan_alternatives");
5603 
5604   tree->ror_scans_map.clear_all();
5605   tree->n_ror_scans= 0;
5606   for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
5607   {
5608     if (*key)
5609     {
5610       ha_rows found_records;
5611       Cost_estimate cost;
5612       double found_read_time;
5613       uint mrr_flags, buf_size;
5614       uint keynr= param->real_keynr[idx];
5615       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
5616           (*key)->maybe_flag)
5617         param->needed_reg->set_bit(keynr);
5618 
5619       bool read_index_only= index_read_must_be_used ? TRUE :
5620                             (bool) param->table->covering_keys.is_set(keynr);
5621 
5622       Opt_trace_object trace_idx(trace);
5623       trace_idx.add_utf8("index", param->table->key_info[keynr].name);
5624 
5625       found_records= check_quick_select(param, idx, read_index_only, *key,
5626                                         update_tbl_stats, &mrr_flags,
5627                                         &buf_size, &cost);
5628 
5629 #ifdef OPTIMIZER_TRACE
5630       // check_quick_select() says don't use range if it returns HA_POS_ERROR
5631       if (found_records != HA_POS_ERROR &&
5632           param->thd->opt_trace.is_started())
5633       {
5634         Opt_trace_array trace_range(&param->thd->opt_trace, "ranges");
5635 
5636         const KEY &cur_key= param->table->key_info[keynr];
5637         const KEY_PART_INFO *key_part= cur_key.key_part;
5638 
5639         String range_info;
5640         range_info.set_charset(system_charset_info);
5641         append_range_all_keyparts(&trace_range, NULL,
5642                                   &range_info, *key, key_part);
5643         trace_range.end(); // NOTE: ends the tracing scope
5644 
5645         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics).
5646           add("rowid_ordered", param->is_ror_scan).
5647           add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)).
5648           add("index_only", read_index_only).
5649           add("rows", found_records).
5650           add("cost", cost.total_cost());
5651       }
5652 #endif
5653 
5654       if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
5655       {
5656         tree->n_ror_scans++;
5657         tree->ror_scans_map.set_bit(idx);
5658       }
5659 
5660 
5661       if (found_records != HA_POS_ERROR &&
5662           read_time > (found_read_time= cost.total_cost()))
5663       {
5664         trace_idx.add("chosen", true);
5665         read_time=    found_read_time;
5666         best_records= found_records;
5667         key_to_read=  key;
5668         best_mrr_flags= mrr_flags;
5669         best_buf_size=  buf_size;
5670       }
5671       else
5672         trace_idx.add("chosen", false).
5673           add_alnum("cause",
5674                     (found_records == HA_POS_ERROR) ? "unknown" : "cost");
5675 
5676     }
5677   }
5678 
5679   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
5680                                       "ROR scans"););
5681   if (key_to_read)
5682   {
5683     idx= key_to_read - tree->keys;
5684     if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
5685                                                     best_mrr_flags)))
5686     {
5687       read_plan->records= best_records;
5688       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
5689       read_plan->read_cost= read_time;
5690       read_plan->mrr_buf_size= best_buf_size;
5691       DBUG_PRINT("info",
5692                 ("Returning range plan for key %s, cost %g, records %lu",
5693                  param->table->key_info[param->real_keynr[idx]].name,
5694                  read_plan->read_cost, (ulong) read_plan->records));
5695     }
5696   }
5697   else
5698     DBUG_PRINT("info", ("No 'range' table read plan found"));
5699 
5700   DBUG_RETURN(read_plan);
5701 }
5702 
5703 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5704 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
5705                                             bool retrieve_full_rows,
5706                                             MEM_ROOT *parent_alloc)
5707 {
5708   QUICK_INDEX_MERGE_SELECT *quick_imerge;
5709   QUICK_RANGE_SELECT *quick;
5710   /* index_merge always retrieves full rows, ignore retrieve_full_rows */
5711   if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
5712     return NULL;
5713 
5714   quick_imerge->records= records;
5715   quick_imerge->read_time= read_cost;
5716   for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
5717        range_scan++)
5718   {
5719     if (!(quick= (QUICK_RANGE_SELECT*)
5720           ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
5721         quick_imerge->push_quick_back(quick))
5722     {
5723       delete quick;
5724       delete quick_imerge;
5725       return NULL;
5726     }
5727   }
5728   return quick_imerge;
5729 }
5730 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5731 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
5732                                               bool retrieve_full_rows,
5733                                               MEM_ROOT *parent_alloc)
5734 {
5735   QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
5736   QUICK_RANGE_SELECT *quick;
5737   DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
5738   MEM_ROOT *alloc;
5739 
5740   if ((quick_intrsect=
5741          new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
5742                                         (retrieve_full_rows? (!is_covering) :
5743                                          FALSE),
5744                                         parent_alloc)))
5745   {
5746     DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
5747                                              "creating ROR-intersect",
5748                                              first_scan, last_scan););
5749     alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
5750     for (st_ror_scan_info **current= first_scan;
5751          current != last_scan;
5752          current++)
5753     {
5754       if (!(quick= get_quick_select(param, (*current)->idx,
5755                                     (*current)->sel_arg,
5756                                     HA_MRR_SORTED,
5757                                     0, alloc)) ||
5758           quick_intrsect->push_quick_back(quick))
5759       {
5760         delete quick_intrsect;
5761         DBUG_RETURN(NULL);
5762       }
5763     }
5764     if (cpk_scan)
5765     {
5766       if (!(quick= get_quick_select(param, cpk_scan->idx,
5767                                     cpk_scan->sel_arg,
5768                                     HA_MRR_SORTED,
5769                                     0, alloc)))
5770       {
5771         delete quick_intrsect;
5772         DBUG_RETURN(NULL);
5773       }
5774       quick->file= NULL;
5775       quick_intrsect->cpk_quick= quick;
5776     }
5777     quick_intrsect->records= records;
5778     quick_intrsect->read_time= read_cost;
5779   }
5780   DBUG_RETURN(quick_intrsect);
5781 }
5782 
5783 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)5784 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
5785                                           bool retrieve_full_rows,
5786                                           MEM_ROOT *parent_alloc)
5787 {
5788   QUICK_ROR_UNION_SELECT *quick_roru;
5789   TABLE_READ_PLAN **scan;
5790   QUICK_SELECT_I *quick;
5791   DBUG_ENTER("TRP_ROR_UNION::make_quick");
5792   /*
5793     It is impossible to construct a ROR-union that will not retrieve full
5794     rows, ignore retrieve_full_rows parameter.
5795   */
5796   if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
5797   {
5798     for (scan= first_ror; scan != last_ror; scan++)
5799     {
5800       if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
5801           quick_roru->push_quick_back(quick))
5802         DBUG_RETURN(NULL);
5803     }
5804     quick_roru->records= records;
5805     quick_roru->read_time= read_cost;
5806   }
5807   DBUG_RETURN(quick_roru);
5808 }
5809 
5810 
5811 /**
5812    If EXPLAIN EXTENDED, add a warning that the index cannot be
5813    used for range access due to either type conversion or different
5814    collations on the field used for comparison
5815 
5816    @param param              PARAM from SQL_SELECT::test_quick_select
5817    @param key_num            Key number
5818    @param field              Field in the predicate
5819  */
5820 static void
if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM * param,const uint key_num,const Field * field)5821 if_extended_explain_warn_index_not_applicable(const RANGE_OPT_PARAM *param,
5822                                               const uint key_num,
5823                                               const Field *field)
5824 {
5825   if (param->using_real_indexes &&
5826       param->thd->lex->describe & DESCRIBE_EXTENDED)
5827     push_warning_printf(
5828             param->thd,
5829             Sql_condition::WARN_LEVEL_WARN,
5830             ER_WARN_INDEX_NOT_APPLICABLE,
5831             ER(ER_WARN_INDEX_NOT_APPLICABLE),
5832             "range",
5833             field->table->key_info[param->real_keynr[key_num]].name,
5834             field->field_name);
5835 }
5836 
5837 
5838 /*
5839   Build a SEL_TREE for <> or NOT BETWEEN predicate
5840 
5841   SYNOPSIS
5842     get_ne_mm_tree()
5843       param       PARAM from SQL_SELECT::test_quick_select
5844       cond_func   item for the predicate
5845       field       field in the predicate
5846       lt_value    constant that field should be smaller
5847       gt_value    constant that field should be greaterr
5848       cmp_type    compare type for the field
5849 
5850   RETURN
5851     #  Pointer to tree built tree
5852     0  on error
5853 */
get_ne_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * lt_value,Item * gt_value,Item_result cmp_type)5854 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5855                                 Field *field,
5856                                 Item *lt_value, Item *gt_value,
5857                                 Item_result cmp_type)
5858 {
5859   SEL_TREE *tree;
5860   tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
5861                      lt_value, cmp_type);
5862   if (tree)
5863   {
5864     tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
5865 					    Item_func::GT_FUNC,
5866 					    gt_value, cmp_type));
5867   }
5868   return tree;
5869 }
5870 
5871 
5872 /*
5873   Build a SEL_TREE for a simple predicate
5874 
5875   SYNOPSIS
5876     get_func_mm_tree()
5877       param       PARAM from SQL_SELECT::test_quick_select
5878       cond_func   item for the predicate
5879       field       field in the predicate
5880       value       constant in the predicate
5881       cmp_type    compare type for the field
5882       inv         TRUE <> NOT cond_func is considered
5883                   (makes sense only when cond_func is BETWEEN or IN)
5884 
5885   RETURN
5886     Pointer to the tree built tree
5887 */
5888 
get_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item * value,Item_result cmp_type,bool inv)5889 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
5890                                   Field *field, Item *value,
5891                                   Item_result cmp_type, bool inv)
5892 {
5893   SEL_TREE *tree= 0;
5894   DBUG_ENTER("get_func_mm_tree");
5895 
5896   switch (cond_func->functype()) {
5897 
5898   case Item_func::XOR_FUNC:
5899     DBUG_RETURN(NULL); // Always true (don't use range access on XOR).
5900     break;             // See WL#5800
5901 
5902   case Item_func::NE_FUNC:
5903     tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
5904     break;
5905 
5906   case Item_func::BETWEEN:
5907   {
5908     if (!value)
5909     {
5910       if (inv)
5911       {
5912         tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1],
5913                              cond_func->arguments()[2], cmp_type);
5914       }
5915       else
5916       {
5917         tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
5918 		           cond_func->arguments()[1],cmp_type);
5919         if (tree)
5920         {
5921           tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
5922 					           Item_func::LE_FUNC,
5923 					           cond_func->arguments()[2],
5924                                                    cmp_type));
5925         }
5926       }
5927     }
5928     else
5929       tree= get_mm_parts(param, cond_func, field,
5930                          (inv ?
5931                           (value == (Item*)1 ? Item_func::GT_FUNC :
5932                                                Item_func::LT_FUNC):
5933                           (value == (Item*)1 ? Item_func::LE_FUNC :
5934                                                Item_func::GE_FUNC)),
5935                          cond_func->arguments()[0], cmp_type);
5936     break;
5937   }
5938   case Item_func::IN_FUNC:
5939   {
5940     Item_func_in *func=(Item_func_in*) cond_func;
5941 
5942     /*
5943       Array for IN() is constructed when all values have the same result
5944       type. Tree won't be built for values with different result types,
5945       so we check it here to avoid unnecessary work.
5946     */
5947     if (!func->arg_types_compatible)
5948       break;
5949 
5950     if (inv)
5951     {
5952       if (func->array && func->array->result_type() != ROW_RESULT)
5953       {
5954         /*
5955           We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
5956           where c{i} are constants. Our goal is to produce a SEL_TREE that
5957           represents intervals:
5958 
5959           ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
5960 
5961           where $MIN is either "-inf" or NULL.
5962 
5963           The most straightforward way to produce it is to convert NOT IN
5964           into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
5965           analyzer to build SEL_TREE from that. The problem is that the
5966           range analyzer will use O(N^2) memory (which is probably a bug),
5967           and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
5968           will run out of memory.
5969 
5970           Another problem with big lists like (*) is that a big list is
5971           unlikely to produce a good "range" access, while considering that
5972           range access will require expensive CPU calculations (and for
5973           MyISAM even index accesses). In short, big NOT IN lists are rarely
5974           worth analyzing.
5975 
5976           Considering the above, we'll handle NOT IN as follows:
5977           * if the number of entries in the NOT IN list is less than
5978             NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
5979           * Otherwise, don't produce a SEL_TREE.
5980         */
5981 #define NOT_IN_IGNORE_THRESHOLD 1000
5982         MEM_ROOT *tmp_root= param->mem_root;
5983         param->thd->mem_root= param->old_root;
5984         /*
5985           Create one Item_type constant object. We'll need it as
5986           get_mm_parts only accepts constant values wrapped in Item_Type
5987           objects.
5988           We create the Item on param->mem_root which points to
5989           per-statement mem_root (while thd->mem_root is currently pointing
5990           to mem_root local to range optimizer).
5991         */
5992         Item *value_item= func->array->create_item();
5993         param->thd->mem_root= tmp_root;
5994 
5995         if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
5996           break;
5997 
5998         /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
5999         uint i=0;
6000         do
6001         {
6002           func->array->value_to_item(i, value_item);
6003           tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6004                              value_item, cmp_type);
6005           if (!tree)
6006             break;
6007           i++;
6008         } while (i < func->array->count && tree->type == SEL_TREE::IMPOSSIBLE);
6009 
6010         if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
6011         {
6012           /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
6013           tree= NULL;
6014           break;
6015         }
6016         SEL_TREE *tree2;
6017         for (; i < func->array->count; i++)
6018         {
6019           if (func->array->compare_elems(i, i-1))
6020           {
6021             /* Get a SEL_TREE for "-inf < X < c_i" interval */
6022             func->array->value_to_item(i, value_item);
6023             tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
6024                                 value_item, cmp_type);
6025             if (!tree2)
6026             {
6027               tree= NULL;
6028               break;
6029             }
6030 
6031             /* Change all intervals to be "c_{i-1} < X < c_i" */
6032             for (uint idx= 0; idx < param->keys; idx++)
6033             {
6034               SEL_ARG *new_interval, *last_val;
6035               if (((new_interval= tree2->keys[idx])) &&
6036                   (tree->keys[idx]) &&
6037                   ((last_val= tree->keys[idx]->last())))
6038               {
6039                 new_interval->min_value= last_val->max_value;
6040                 new_interval->min_flag= NEAR_MIN;
6041 
6042                 /*
6043                   If the interval is over a partial keypart, the
6044                   interval must be "c_{i-1} <= X < c_i" instead of
6045                   "c_{i-1} < X < c_i". Reason:
6046 
6047                   Consider a table with a column "my_col VARCHAR(3)",
6048                   and an index with definition
6049                   "INDEX my_idx my_col(1)". If the table contains rows
6050                   with my_col values "f" and "foo", the index will not
6051                   distinguish the two rows.
6052 
6053                   Note that tree_or() below will effectively merge
6054                   this range with the range created for c_{i-1} and
6055                   we'll eventually end up with only one range:
6056                   "NULL < X".
6057 
6058                   Partitioning indexes are never partial.
6059                 */
6060                 if (param->using_real_indexes)
6061                 {
6062                   const KEY key=
6063                     param->table->key_info[param->real_keynr[idx]];
6064                   const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
6065 
6066                   if (kpi->key_part_flag & HA_PART_KEY_SEG)
6067                     new_interval->min_flag= 0;
6068                 }
6069               }
6070             }
6071             /*
6072               The following doesn't try to allocate memory so no need to
6073               check for NULL.
6074             */
6075             tree= tree_or(param, tree, tree2);
6076           }
6077         }
6078 
6079         if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
6080         {
6081           /*
6082             Get the SEL_TREE for the last "c_last < X < +inf" interval
6083             (value_item cotains c_last already)
6084           */
6085           tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC,
6086                               value_item, cmp_type);
6087           tree= tree_or(param, tree, tree2);
6088         }
6089       }
6090       else
6091       {
6092         tree= get_ne_mm_tree(param, cond_func, field,
6093                              func->arguments()[1], func->arguments()[1],
6094                              cmp_type);
6095         if (tree)
6096         {
6097           Item **arg, **end;
6098           for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6099                arg < end ; arg++)
6100           {
6101             tree=  tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
6102                                                         *arg, *arg, cmp_type));
6103           }
6104         }
6105       }
6106     }
6107     else
6108     {
6109       tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC,
6110                          func->arguments()[1], cmp_type);
6111       if (tree)
6112       {
6113         Item **arg, **end;
6114         for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
6115              arg < end ; arg++)
6116         {
6117           tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
6118                                                   Item_func::EQ_FUNC,
6119                                                   *arg, cmp_type));
6120         }
6121       }
6122     }
6123     break;
6124   }
6125   default:
6126   {
6127     /*
6128        Here the function for the following predicates are processed:
6129        <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL and GIS functions.
6130        If the predicate is of the form (value op field) it is handled
6131        as the equivalent predicate (field rev_op value), e.g.
6132        2 <= a is handled as a >= 2.
6133     */
6134     Item_func::Functype func_type=
6135       (value != cond_func->arguments()[0]) ? cond_func->functype() :
6136         ((Item_bool_func2*) cond_func)->rev_functype();
6137     tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
6138   }
6139   }
6140 
6141   DBUG_RETURN(tree);
6142 }
6143 
6144 
6145 /*
6146   Build conjunction of all SEL_TREEs for a simple predicate applying equalities
6147 
6148   SYNOPSIS
6149     get_full_func_mm_tree()
6150       param       PARAM from SQL_SELECT::test_quick_select
6151       cond_func   item for the predicate
6152       field_item  field in the predicate
6153       value       constant in the predicate (or a field already read from
6154                   a table in the case of dynamic range access)
6155                   (for BETWEEN it contains the number of the field argument,
6156                    for IN it's always 0)
6157       inv         TRUE <> NOT cond_func is considered
6158                   (makes sense only when cond_func is BETWEEN or IN)
6159 
6160   DESCRIPTION
6161     For a simple SARGable predicate of the form (f op c), where f is a field and
6162     c is a constant, the function builds a conjunction of all SEL_TREES that can
6163     be obtained by the substitution of f for all different fields equal to f.
6164 
6165   NOTES
6166     If the WHERE condition contains a predicate (fi op c),
6167     then not only SELL_TREE for this predicate is built, but
6168     the trees for the results of substitution of fi for
6169     each fj belonging to the same multiple equality as fi
6170     are built as well.
6171     E.g. for WHERE t1.a=t2.a AND t2.a > 10
6172     a SEL_TREE for t2.a > 10 will be built for quick select from t2
6173     and
6174     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
6175 
6176     A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
6177     in a similar way: we build a conjuction of trees for the results
6178     of all substitutions of fi for equal fj.
6179     Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
6180     differently. It is considered as a conjuction of two SARGable
6181     predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
6182     is called for each of them separately producing trees for
6183        AND j (f1j <=c ) and AND j (f2j <= c)
6184     After this these two trees are united in one conjunctive tree.
6185     It's easy to see that the same tree is obtained for
6186        AND j,k (f1j <=c AND f2k<=c)
6187     which is equivalent to
6188        AND j,k (c BETWEEN f1j AND f2k).
6189     The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
6190     which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
6191     function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
6192     producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
6193     trees are united in one OR-tree. The expression
6194       (AND j (f1j > c) OR AND j (f2j < c)
6195     is equivalent to the expression
6196       AND j,k (f1j > c OR f2k < c)
6197     which is just a translation of
6198       AND j,k (c NOT BETWEEN f1j AND f2k)
6199 
6200     In the cases when one of the items f1, f2 is a constant c1 we do not create
6201     a tree for it at all. It works for BETWEEN predicates but does not
6202     work for NOT BETWEEN predicates as we have to evaluate the expression
6203     with it. If it is TRUE then the other tree can be completely ignored.
6204     We do not do it now and no trees are built in these cases for
6205     NOT BETWEEN predicates.
6206 
6207     As to IN predicates only ones of the form (f IN (c1,...,cn)),
6208     where f1 is a field and c1,...,cn are constant, are considered as
6209     SARGable. We never try to narrow the index scan using predicates of
6210     the form (c IN (c1,...,f,...,cn)).
6211 
6212   RETURN
6213     Pointer to the tree representing the built conjunction of SEL_TREEs
6214 */
6215 
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item_func * cond_func,Item_field * field_item,Item * value,bool inv)6216 static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
6217                                        Item_func *cond_func,
6218                                        Item_field *field_item, Item *value,
6219                                        bool inv)
6220 {
6221   SEL_TREE *tree= 0;
6222   SEL_TREE *ftree= 0;
6223   table_map ref_tables= 0;
6224   table_map param_comp= ~(param->prev_tables | param->read_tables |
6225 		          param->current_table);
6226   DBUG_ENTER("get_full_func_mm_tree");
6227 
6228   for (uint i= 0; i < cond_func->arg_count; i++)
6229   {
6230     Item *arg= cond_func->arguments()[i]->real_item();
6231     if (arg != field_item)
6232       ref_tables|= arg->used_tables();
6233   }
6234   Field *field= field_item->field;
6235   Item_result cmp_type= field->cmp_type();
6236   if (!((ref_tables | field->table->map) & param_comp))
6237     ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv);
6238   Item_equal *item_equal= field_item->item_equal;
6239   if (item_equal)
6240   {
6241     Item_equal_iterator it(*item_equal);
6242     Item_field *item;
6243     while ((item= it++))
6244     {
6245       Field *f= item->field;
6246       if (field->eq(f))
6247         continue;
6248       if (!((ref_tables | f->table->map) & param_comp))
6249       {
6250         tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv);
6251         ftree= !ftree ? tree : tree_and(param, ftree, tree);
6252       }
6253     }
6254   }
6255   DBUG_RETURN(ftree);
6256 }
6257 
6258 /**
6259   The Range Analysis Module, which finds range access alternatives
6260   applicable to single or multi-index (UNION) access. The function
6261   does not calculate or care about the cost of the different
6262   alternatives.
6263 
6264   get_mm_tree() employs a relaxed boolean algebra where the solution
6265   may be bigger than what the rules of boolean algebra accept. In
6266   other words, get_mm_tree() may return range access plans that will
6267   read more rows than the input conditions dictate. In it's simplest
6268   form, consider a condition on two fields indexed by two different
6269   indexes:
6270 
6271      "WHERE fld1 > 'x' AND fld2 > 'y'"
6272 
6273   In this case, there are two single-index range access alternatives.
6274   No matter which access path is chosen, rows that are not in the
6275   result set may be read.
6276 
6277   In the case above, get_mm_tree() will create range access
6278   alternatives for both indexes, so boolean algebra is still correct.
6279   In other cases, however, the conditions are too complex to be used
6280   without relaxing the rules. This typically happens when ORing a
6281   conjunction to a multi-index disjunctions (@see e.g.
6282   imerge_list_or_tree()). When this happens, the range optimizer may
6283   choose to ignore conjunctions (any condition connected with AND). The
6284   effect of this is that the result includes a "bigger" solution than
6285   neccessary. This is OK since all conditions will be used as filters
6286   after row retrieval.
6287 
6288   @see SEL_TREE::keys and SEL_TREE::merges for details of how single
6289   and multi-index range access alternatives are stored.
6290 */
get_mm_tree(RANGE_OPT_PARAM * param,Item * cond)6291 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,Item *cond)
6292 {
6293   SEL_TREE *tree=0;
6294   SEL_TREE *ftree= 0;
6295   Item_field *field_item= 0;
6296   bool inv= FALSE;
6297   Item *value= 0;
6298   DBUG_ENTER("get_mm_tree");
6299 
6300   if (cond->type() == Item::COND_ITEM)
6301   {
6302     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
6303 
6304     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6305     {
6306       tree= NULL;
6307       Item *item;
6308       while ((item=li++))
6309       {
6310         SEL_TREE *new_tree= get_mm_tree(param,item);
6311         if (param->statement_should_be_aborted())
6312           DBUG_RETURN(NULL);
6313         tree= tree_and(param,tree,new_tree);
6314         dbug_print_tree("after_and", tree, param);
6315         if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
6316           break;
6317       }
6318     }
6319     else
6320     {                                           // Item OR
6321       tree= get_mm_tree(param,li++);
6322       if (param->statement_should_be_aborted())
6323         DBUG_RETURN(NULL);
6324       if (tree)
6325       {
6326         Item *item;
6327         while ((item=li++))
6328         {
6329           SEL_TREE *new_tree=get_mm_tree(param,item);
6330           if (new_tree == NULL || param->statement_should_be_aborted())
6331             DBUG_RETURN(NULL);
6332           tree= tree_or(param,tree,new_tree);
6333           dbug_print_tree("after_or", tree, param);
6334           if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
6335             break;
6336         }
6337       }
6338     }
6339     dbug_print_tree("tree_returned", tree, param);
6340     DBUG_RETURN(tree);
6341   }
6342   /*
6343     Here when simple cond
6344     There are limits on what kinds of const items we can evaluate.
6345     At this stage a subquery in 'cond' might not be fully transformed yet
6346     (example: semijoin) thus cannot be evaluated.
6347   */
6348   if (cond->const_item() && !cond->is_expensive() && !cond->has_subquery())
6349   {
6350     /*
6351       During the cond->val_int() evaluation we can come across a subselect
6352       item which may allocate memory on the thd->mem_root and assumes
6353       all the memory allocated has the same life span as the subselect
6354       item itself. So we have to restore the thread's mem_root here.
6355     */
6356     MEM_ROOT *tmp_root= param->mem_root;
6357     param->thd->mem_root= param->old_root;
6358     tree= cond->val_int() ? new(tmp_root) SEL_TREE(SEL_TREE::ALWAYS) :
6359                             new(tmp_root) SEL_TREE(SEL_TREE::IMPOSSIBLE);
6360     param->thd->mem_root= tmp_root;
6361     dbug_print_tree("tree_returned", tree, param);
6362     DBUG_RETURN(tree);
6363   }
6364 
6365   table_map ref_tables= 0;
6366   table_map param_comp= ~(param->prev_tables | param->read_tables |
6367 		          param->current_table);
6368   if (cond->type() != Item::FUNC_ITEM)
6369   {						// Should be a field
6370     ref_tables= cond->used_tables();
6371     if ((ref_tables & param->current_table) ||
6372 	(ref_tables & ~(param->prev_tables | param->read_tables)))
6373       DBUG_RETURN(0);
6374     DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE));
6375   }
6376 
6377   Item_func *cond_func= (Item_func*) cond;
6378   if (cond_func->functype() == Item_func::BETWEEN ||
6379       cond_func->functype() == Item_func::IN_FUNC)
6380     inv= ((Item_func_opt_neg *) cond_func)->negated;
6381   else
6382   {
6383     /*
6384       During the cond_func->select_optimize() evaluation we can come across a
6385       subselect item which may allocate memory on the thd->mem_root and assumes
6386       all the memory allocated has the same life span as the subselect item
6387       itself. So we have to restore the thread's mem_root here.
6388     */
6389     MEM_ROOT *tmp_root= param->mem_root;
6390     param->thd->mem_root= param->old_root;
6391     Item_func::optimize_type opt_type= cond_func->select_optimize();
6392     param->thd->mem_root= tmp_root;
6393     if (opt_type == Item_func::OPTIMIZE_NONE)
6394       DBUG_RETURN(NULL);
6395   }
6396 
6397   param->cond= cond;
6398 
6399   switch (cond_func->functype()) {
6400   case Item_func::BETWEEN:
6401     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6402     {
6403       field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6404       ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6405     }
6406 
6407     /*
6408       Concerning the code below see the NOTES section in
6409       the comments for the function get_full_func_mm_tree()
6410     */
6411     for (uint i= 1 ; i < cond_func->arg_count ; i++)
6412     {
6413       if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
6414       {
6415         field_item= (Item_field*) (cond_func->arguments()[i]->real_item());
6416         SEL_TREE *tmp= get_full_func_mm_tree(param, cond_func,
6417                                     field_item, (Item*)(intptr)i, inv);
6418         if (inv)
6419         {
6420           tree= !tree ? tmp : tree_or(param, tree, tmp);
6421           if (tree == NULL)
6422             break;
6423         }
6424         else
6425           tree= tree_and(param, tree, tmp);
6426       }
6427       else if (inv)
6428       {
6429         tree= 0;
6430         break;
6431       }
6432     }
6433 
6434     ftree = tree_and(param, ftree, tree);
6435     break;
6436   case Item_func::IN_FUNC:
6437   {
6438     Item_func_in *func=(Item_func_in*) cond_func;
6439     if (func->key_item()->real_item()->type() != Item::FIELD_ITEM)
6440       DBUG_RETURN(0);
6441     field_item= (Item_field*) (func->key_item()->real_item());
6442     ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv);
6443     break;
6444   }
6445   case Item_func::MULT_EQUAL_FUNC:
6446   {
6447     Item_equal *item_equal= (Item_equal *) cond;
6448     if (!(value= item_equal->get_const()))
6449       DBUG_RETURN(0);
6450     Item_equal_iterator it(*item_equal);
6451     ref_tables= value->used_tables();
6452     while ((field_item= it++))
6453     {
6454       Field *field= field_item->field;
6455       Item_result cmp_type= field->cmp_type();
6456       if (!((ref_tables | field->table->map) & param_comp))
6457       {
6458         tree= get_mm_parts(param, item_equal, field, Item_func::EQ_FUNC,
6459 		           value,cmp_type);
6460         ftree= !ftree ? tree : tree_and(param, ftree, tree);
6461       }
6462     }
6463 
6464     dbug_print_tree("tree_returned", ftree, param);
6465     DBUG_RETURN(ftree);
6466   }
6467   default:
6468 
6469     DBUG_ASSERT (!ftree);
6470     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
6471     {
6472       field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
6473       value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : NULL;
6474       ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6475     }
6476     /*
6477       Even if get_full_func_mm_tree() was executed above and did not
6478       return a range predicate it may still be possible to create one
6479       by reversing the order of the operands. Note that this only
6480       applies to predicates where both operands are fields. Example: A
6481       query of the form
6482 
6483          WHERE t1.a OP t2.b
6484 
6485       In this case, arguments()[0] == t1.a and arguments()[1] == t2.b.
6486       When creating range predicates for t2, get_full_func_mm_tree()
6487       above will return NULL because 'field' belongs to t1 and only
6488       predicates that applies to t2 are of interest. In this case a
6489       call to get_full_func_mm_tree() with reversed operands (see
6490       below) may succeed.
6491      */
6492     if (!ftree && cond_func->have_rev_func() &&
6493         cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM)
6494     {
6495       field_item= (Item_field*) (cond_func->arguments()[1]->real_item());
6496       value= cond_func->arguments()[0];
6497       ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
6498     }
6499   }
6500 
6501   dbug_print_tree("tree_returned", ftree, param);
6502   DBUG_RETURN(ftree);
6503 }
6504 
6505 /**
6506   Test whether a comparison operator is a spatial comparison
6507   operator, i.e. Item_func::SP_*.
6508 
6509   Used to check if range access using operator 'op_type' is applicable
6510   for a non-spatial index.
6511 
6512   @param   op_type  The comparison operator.
6513   @return  true if 'op_type' is a spatial comparison operator, false otherwise.
6514 
6515 */
is_spatial_operator(Item_func::Functype op_type)6516 bool is_spatial_operator(Item_func::Functype op_type)
6517 {
6518   switch (op_type)
6519   {
6520   case Item_func::SP_EQUALS_FUNC:
6521   case Item_func::SP_DISJOINT_FUNC:
6522   case Item_func::SP_INTERSECTS_FUNC:
6523   case Item_func::SP_TOUCHES_FUNC:
6524   case Item_func::SP_CROSSES_FUNC:
6525   case Item_func::SP_WITHIN_FUNC:
6526   case Item_func::SP_CONTAINS_FUNC:
6527   case Item_func::SP_OVERLAPS_FUNC:
6528   case Item_func::SP_STARTPOINT:
6529   case Item_func::SP_ENDPOINT:
6530   case Item_func::SP_EXTERIORRING:
6531   case Item_func::SP_POINTN:
6532   case Item_func::SP_GEOMETRYN:
6533   case Item_func::SP_INTERIORRINGN:
6534     return true;
6535   default:
6536     return false;
6537   }
6538 }
6539 
6540 static SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Item_func * cond_func,Field * field,Item_func::Functype type,Item * value,Item_result cmp_type)6541 get_mm_parts(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field,
6542 	     Item_func::Functype type,
6543 	     Item *value, Item_result cmp_type)
6544 {
6545   DBUG_ENTER("get_mm_parts");
6546   if (field->table != param->table)
6547     DBUG_RETURN(0);
6548 
6549   KEY_PART *key_part = param->key_parts;
6550   KEY_PART *end = param->key_parts_end;
6551   SEL_TREE *tree=0;
6552   if (value &&
6553       value->used_tables() & ~(param->prev_tables | param->read_tables))
6554     DBUG_RETURN(0);
6555   for (; key_part != end ; key_part++)
6556   {
6557     if (field->eq(key_part->field))
6558     {
6559       /*
6560         Cannot do range access for spatial operators when a
6561         non-spatial index is used.
6562       */
6563       if (key_part->image_type != Field::itMBR &&
6564           is_spatial_operator(cond_func->functype()))
6565         continue;
6566 
6567       SEL_ARG *sel_arg=0;
6568       if (!tree && !(tree=new SEL_TREE()))
6569 	DBUG_RETURN(0);				// OOM
6570       if (!value || !(value->used_tables() & ~param->read_tables))
6571       {
6572 	sel_arg=get_mm_leaf(param,cond_func,
6573 			    key_part->field,key_part,type,value);
6574 	if (!sel_arg)
6575 	  continue;
6576 	if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
6577 	{
6578 	  tree->type=SEL_TREE::IMPOSSIBLE;
6579 	  DBUG_RETURN(tree);
6580 	}
6581       }
6582       else
6583       {
6584 	// This key may be used later
6585 	if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
6586 	  DBUG_RETURN(0);			// OOM
6587       }
6588       sel_arg->part=(uchar) key_part->part;
6589       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
6590       tree->keys_map.set_bit(key_part->key);
6591     }
6592   }
6593 
6594   if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
6595     tree= NULL;
6596   DBUG_RETURN(tree);
6597 }
6598 
6599 /**
6600   Saves 'value' in 'field' and handles potential type conversion
6601   problems.
6602 
6603   @param tree [out]                 The SEL_ARG leaf under construction. If
6604                                     an always false predicate is found it is
6605                                     modified to point to a SEL_ARG with
6606                                     type == SEL_ARG::IMPOSSIBLE
6607   @param value                      The Item that contains a value that shall
6608                                     be stored in 'field'.
6609   @param comp_op                    Comparison operator: >, >=, <=> etc.
6610   @param field                      The field that 'value' is stored into.
6611   @param impossible_cond_cause[out] Set to a descriptive string if an
6612                                     impossible condition is found.
6613   @param memroot                    Memroot for creation of new SEL_ARG.
6614 
6615   @retval false  if saving went fine and it makes sense to continue
6616                  optimizing for this predicate.
6617   @retval true   if always true/false predicate was found, in which
6618                  case 'tree' has been modified to reflect this: NULL
6619                  pointer if always true, SEL_ARG with type IMPOSSIBLE
6620                  if always false.
6621 */
save_value_and_handle_conversion(SEL_ARG ** tree,Item * value,const Item_func::Functype comp_op,Field * field,const char ** impossible_cond_cause,MEM_ROOT * memroot)6622 static bool save_value_and_handle_conversion(SEL_ARG **tree,
6623                                              Item *value,
6624                                              const Item_func::Functype comp_op,
6625                                              Field *field,
6626                                              const char **impossible_cond_cause,
6627                                              MEM_ROOT *memroot)
6628 {
6629   // A SEL_ARG should not have been created for this predicate yet.
6630   DBUG_ASSERT(*tree == NULL);
6631 
6632   if (!value->can_be_evaluated_now())
6633   {
6634     /*
6635       We cannot evaluate the value yet (i.e. required tables are not yet
6636       locked.)
6637       This is the case of prune_partitions() called during JOIN::prepare().
6638     */
6639     return true;
6640   }
6641 
6642   // For comparison purposes allow invalid dates like 2000-01-32
6643   const sql_mode_t orig_sql_mode= field->table->in_use->variables.sql_mode;
6644   field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
6645 
6646   /*
6647     We want to change "field > value" to "field OP V"
6648     where:
6649     * V is what is in "field" after we stored "value" in it via
6650     save_in_field_no_warning() (such store operation may have done
6651     rounding...)
6652     * OP is > or >=, depending on what's correct.
6653     For example, if c is an INT column,
6654     "c > 2.9" is changed to "c OP 3"
6655     where OP is ">=" (">" would not be correct, as 3 > 2.9, a comparison
6656     done with stored_field_cmp_to_item()). And
6657     "c > 3.1" is changed to "c OP 3" where OP is ">" (3 < 3.1...).
6658   */
6659 
6660   // Note that value may be a stored function call, executed here.
6661   const type_conversion_status err= value->save_in_field_no_warnings(field, 1);
6662   field->table->in_use->variables.sql_mode= orig_sql_mode;
6663 
6664   switch (err) {
6665   case TYPE_OK:
6666   case TYPE_NOTE_TRUNCATED:
6667     return false;
6668   case TYPE_ERR_BAD_VALUE:
6669     /*
6670       In the case of incompatible values, MySQL's SQL dialect has some
6671       strange interpretations. For example,
6672 
6673           "int_col > 'foo'" is interpreted as "int_col > 0"
6674 
6675       instead of always false. Because of this, we assume that the
6676       range predicate is always true instead of always false and let
6677       evaluate_join_record() decide the outcome.
6678     */
6679     return true;
6680   case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
6681     // Checking NULL value on a field that cannot contain NULL.
6682     *impossible_cond_cause= "null_field_in_non_null_column";
6683     goto impossible_cond;
6684   case TYPE_WARN_OUT_OF_RANGE:
6685     /*
6686       value to store was either higher than field::max_value or lower
6687       than field::min_value. The field's max/min value has been stored
6688       instead.
6689      */
6690     if (comp_op == Item_func::EQUAL_FUNC || comp_op == Item_func::EQ_FUNC)
6691     {
6692       /*
6693         Independent of data type, "out_of_range_value =/<=> field" is
6694         always false.
6695       */
6696       *impossible_cond_cause= "value_out_of_range";
6697       goto impossible_cond;
6698     }
6699 
6700     // If the field is numeric, we can interpret the out of range value.
6701     if ((field->type() != FIELD_TYPE_BIT) &&
6702         (field->result_type() == REAL_RESULT ||
6703          field->result_type() == INT_RESULT ||
6704          field->result_type() == DECIMAL_RESULT))
6705     {
6706       /*
6707         value to store was higher than field::max_value if
6708            a) field has a value greater than 0, or
6709            b) if field is unsigned and has a negative value (which, when
6710               cast to unsigned, means some value higher than LONGLONG_MAX).
6711       */
6712       if ((field->val_int() > 0) ||                              // a)
6713           (static_cast<Field_num*>(field)->unsigned_flag &&
6714            field->val_int() < 0))                                // b)
6715       {
6716         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6717         {
6718           /*
6719             '<' or '<=' compared to a value higher than the field
6720             can store is always true.
6721           */
6722           return true;
6723         }
6724         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6725         {
6726           /*
6727             '>' or '>=' compared to a value higher than the field can
6728             store is always false.
6729           */
6730           *impossible_cond_cause= "value_out_of_range";
6731           goto impossible_cond;
6732         }
6733       }
6734       else // value is lower than field::min_value
6735       {
6736         if (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC)
6737         {
6738           /*
6739             '>' or '>=' compared to a value lower than the field
6740             can store is always true.
6741           */
6742           return true;
6743         }
6744         if (comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC)
6745         {
6746           /*
6747             '<' or '=' compared to a value lower than the field can
6748             store is always false.
6749           */
6750           *impossible_cond_cause= "value_out_of_range";
6751           goto impossible_cond;
6752         }
6753       }
6754     }
6755     /*
6756       Value is out of range on a datatype where it can't be decided if
6757       it was underflow or overflow. It is therefore not possible to
6758       determine whether or not the condition is impossible or always
6759       true and we have to assume always true.
6760     */
6761     return true;
6762   case TYPE_NOTE_TIME_TRUNCATED:
6763     if (field->type() == FIELD_TYPE_DATE &&
6764         (comp_op == Item_func::GT_FUNC || comp_op == Item_func::GE_FUNC ||
6765          comp_op == Item_func::LT_FUNC || comp_op == Item_func::LE_FUNC))
6766     {
6767       /*
6768         We were saving DATETIME into a DATE column, the conversion went ok
6769         but a non-zero time part was cut off.
6770 
6771         In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
6772         values. Index over a DATE column uses DATE comparison. Changing
6773         from one comparison to the other is possible:
6774 
6775         datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
6776         datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
6777 
6778         datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
6779         datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
6780 
6781         but we'll need to convert '>' to '>=' and '<' to '<='. This will
6782         be done together with other types at the end of get_mm_leaf()
6783         (grep for stored_field_cmp_to_item)
6784       */
6785       return false;
6786     }
6787     if (comp_op == Item_func::EQ_FUNC || comp_op == Item_func::EQUAL_FUNC)
6788     {
6789       // Equality comparison is always false when time info has been truncated.
6790       goto impossible_cond;
6791     }
6792     // Fall through
6793   default:
6794     return true;
6795   }
6796 
6797   DBUG_ASSERT(FALSE); // Should never get here.
6798 
6799 impossible_cond:
6800   *tree= new (memroot) SEL_ARG(field, 0, 0);
6801   (*tree)->type= SEL_ARG::IMPOSSIBLE;
6802   return true;
6803 }
6804 
6805 static SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Item * conf_func,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)6806 get_mm_leaf(RANGE_OPT_PARAM *param, Item *conf_func, Field *field,
6807             KEY_PART *key_part, Item_func::Functype type,Item *value)
6808 {
6809   uint maybe_null=(uint) field->real_maybe_null();
6810   bool optimize_range;
6811   SEL_ARG *tree= 0;
6812   MEM_ROOT *alloc= param->mem_root;
6813   uchar *str;
6814   const char *impossible_cond_cause= NULL;
6815   DBUG_ENTER("get_mm_leaf");
6816 
6817   /*
6818     We need to restore the runtime mem_root of the thread in this
6819     function because it evaluates the value of its argument, while
6820     the argument can be any, e.g. a subselect. The subselect
6821     items, in turn, assume that all the memory allocated during
6822     the evaluation has the same life span as the item itself.
6823     TODO: opt_range.cc should not reset thd->mem_root at all.
6824   */
6825   param->thd->mem_root= param->old_root;
6826   if (!value)					// IS NULL or IS NOT NULL
6827   {
6828     if (field->table->maybe_null)		// Can't use a key on this
6829       goto end;
6830     if (!maybe_null)				// Not null field
6831     {
6832       if (type == Item_func::ISNULL_FUNC)
6833         tree= &null_element;
6834       goto end;
6835     }
6836     uchar *null_string=
6837       static_cast<uchar*>(alloc_root(alloc, key_part->store_length + 1));
6838     if (!null_string)
6839       goto end;                                 // out of memory
6840 
6841     TRASH(null_string, key_part->store_length + 1);
6842     memcpy(null_string, is_null_string, sizeof(is_null_string));
6843 
6844     if (!(tree= new (alloc) SEL_ARG(field, null_string, null_string)))
6845       goto end;                                 // out of memory
6846     if (type == Item_func::ISNOTNULL_FUNC)
6847     {
6848       tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
6849       tree->max_flag=NO_MAX_RANGE;
6850     }
6851     goto end;
6852   }
6853 
6854   /*
6855     1. Usually we can't use an index if the column collation
6856        differ from the operation collation.
6857 
6858     2. However, we can reuse a case insensitive index for
6859        the binary searches:
6860 
6861        WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;
6862 
6863        WHERE latin1_swedish_ci_colimn = BINARY 'a '
6864   */
6865   if ((field->result_type() == STRING_RESULT &&
6866        field->match_collation_to_optimize_range() &&
6867        value->result_type() == STRING_RESULT &&
6868        key_part->image_type == Field::itRAW &&
6869        field->charset() != conf_func->compare_collation() &&
6870        !(conf_func->compare_collation()->state & MY_CS_BINSORT &&
6871          (type == Item_func::EQUAL_FUNC || type == Item_func::EQ_FUNC))))
6872   {
6873     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6874     goto end;
6875   }
6876 
6877   /*
6878     Temporal values: Cannot use range access if:
6879       1) 'temporal_value = indexed_varchar_column' because there are
6880          many ways to represent the same date as a string. A few
6881          examples: "01-01-2001", "1-1-2001", "2001-01-01",
6882          "2001#01#01". The same problem applies to time. Thus, we
6883          cannot create a usefull range predicate for temporal values
6884          into VARCHAR column indexes. @see add_key_field()
6885       2) 'temporal_value_with_date_part = indexed_time' because:
6886          - without index, a TIME column with value '48:00:00' is
6887            equal to a DATETIME column with value
6888            'CURDATE() + 2 days'
6889          - with range access into the TIME column, CURDATE() + 2
6890            days becomes "00:00:00" (Field_timef::store_internal()
6891            simply extracts the time part from the datetime) which
6892            is a lookup key which does not match "48:00:00"; so
6893            ref access is not be able to give the same result as
6894            On the other hand, we can do ref access for
6895            IndexedDatetimeComparedToTime because
6896            Field_temporal_with_date::store_time() will convert
6897            48:00:00 to CURDATE() + 2 days which is the correct
6898            lookup key.
6899    */
6900   if ((!field->is_temporal() && value->is_temporal()) ||   // 1)
6901       field_time_cmp_date(field, value))                   // 2)
6902   {
6903     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
6904     goto end;
6905   }
6906 
6907   if (key_part->image_type == Field::itMBR)
6908   {
6909     // @todo: use is_spatial_operator() instead?
6910     switch (type) {
6911     case Item_func::SP_EQUALS_FUNC:
6912     case Item_func::SP_DISJOINT_FUNC:
6913     case Item_func::SP_INTERSECTS_FUNC:
6914     case Item_func::SP_TOUCHES_FUNC:
6915     case Item_func::SP_CROSSES_FUNC:
6916     case Item_func::SP_WITHIN_FUNC:
6917     case Item_func::SP_CONTAINS_FUNC:
6918     case Item_func::SP_OVERLAPS_FUNC:
6919       break;
6920     default:
6921       /*
6922         We cannot involve spatial indexes for queries that
6923         don't use MBREQUALS(), MBRDISJOINT(), etc. functions.
6924       */
6925       goto end;
6926     }
6927   }
6928 
6929   if (param->using_real_indexes)
6930     optimize_range= field->optimize_range(param->real_keynr[key_part->key],
6931                                           key_part->part);
6932   else
6933     optimize_range= TRUE;
6934 
6935   if (type == Item_func::LIKE_FUNC)
6936   {
6937     bool like_error;
6938     char buff1[MAX_FIELD_WIDTH];
6939     uchar *min_str,*max_str;
6940     String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
6941     size_t length, offset, min_length, max_length;
6942     uint field_length= field->pack_length()+maybe_null;
6943 
6944     if (!optimize_range)
6945       goto end;
6946     if (!(res= value->val_str(&tmp)))
6947     {
6948       tree= &null_element;
6949       goto end;
6950     }
6951 
6952     /*
6953       TODO:
6954       Check if this was a function. This should have be optimized away
6955       in the sql_select.cc
6956     */
6957     if (res != &tmp)
6958     {
6959       tmp.copy(*res);				// Get own copy
6960       res= &tmp;
6961     }
6962     if (field->cmp_type() != STRING_RESULT)
6963       goto end;                                 // Can only optimize strings
6964 
6965     offset=maybe_null;
6966     length=key_part->store_length;
6967 
6968     if (length != key_part->length  + maybe_null)
6969     {
6970       /* key packed with length prefix */
6971       offset+= HA_KEY_BLOB_LENGTH;
6972       field_length= length - HA_KEY_BLOB_LENGTH;
6973     }
6974     else
6975     {
6976       if (unlikely(length < field_length))
6977       {
6978 	/*
6979 	  This can only happen in a table created with UNIREG where one key
6980 	  overlaps many fields
6981 	*/
6982 	length= field_length;
6983       }
6984       else
6985 	field_length= length;
6986     }
6987     length+=offset;
6988     if (!(min_str= (uchar*) alloc_root(alloc, length*2)))
6989       goto end;
6990 
6991     max_str=min_str+length;
6992     if (maybe_null)
6993       max_str[0]= min_str[0]=0;
6994 
6995     field_length-= maybe_null;
6996     like_error= my_like_range(field->charset(),
6997 			      res->ptr(), res->length(),
6998 			      ((Item_func_like*)(param->cond))->escape,
6999 			      wild_one, wild_many,
7000 			      field_length,
7001 			      (char*) min_str+offset, (char*) max_str+offset,
7002 			      &min_length, &max_length);
7003     if (like_error)				// Can't optimize with LIKE
7004       goto end;
7005 
7006     if (offset != maybe_null)			// BLOB or VARCHAR
7007     {
7008       int2store(min_str+maybe_null,min_length);
7009       int2store(max_str+maybe_null,max_length);
7010     }
7011     tree= new (alloc) SEL_ARG(field, min_str, max_str);
7012     goto end;
7013   }
7014 
7015   if (!optimize_range &&
7016       type != Item_func::EQ_FUNC &&
7017       type != Item_func::EQUAL_FUNC)
7018     goto end;                                   // Can't optimize this
7019 
7020   /*
7021     We can't always use indexes when comparing a string index to a number
7022     cmp_type() is checked to allow compare of dates to numbers
7023   */
7024   if (field->result_type() == STRING_RESULT &&
7025       value->result_type() != STRING_RESULT &&
7026       field->cmp_type() != value->result_type())
7027   {
7028     if_extended_explain_warn_index_not_applicable(param, key_part->key, field);
7029     goto end;
7030   }
7031 
7032   if (save_value_and_handle_conversion(&tree, value, type, field,
7033                                        &impossible_cond_cause, alloc))
7034     goto end;
7035 
7036   /*
7037     Any sargable predicate except "<=>" involving NULL as a constant is always
7038     FALSE
7039   */
7040   if (type != Item_func::EQUAL_FUNC && field->is_real_null())
7041   {
7042     impossible_cond_cause= "comparison_with_null_always_false";
7043     tree= &null_element;
7044     goto end;
7045   }
7046 
7047   str= (uchar*) alloc_root(alloc, key_part->store_length+1);
7048   if (!str)
7049     goto end;
7050   if (maybe_null)
7051     *str= (uchar) field->is_real_null();        // Set to 1 if null
7052   field->get_key_image(str+maybe_null, key_part->length,
7053                        key_part->image_type);
7054   if (!(tree= new (alloc) SEL_ARG(field, str, str)))
7055     goto end;                                   // out of memory
7056 
7057   /*
7058     Check if we are comparing an UNSIGNED integer with a negative constant.
7059     In this case we know that:
7060     (a) (unsigned_int [< | <=] negative_constant) == FALSE
7061     (b) (unsigned_int [> | >=] negative_constant) == TRUE
7062     In case (a) the condition is false for all values, and in case (b) it
7063     is true for all values, so we can avoid unnecessary retrieval and condition
7064     testing, and we also get correct comparison of unsinged integers with
7065     negative integers (which otherwise fails because at query execution time
7066     negative integers are cast to unsigned if compared with unsigned).
7067    */
7068   if (field->result_type() == INT_RESULT &&
7069       value->result_type() == INT_RESULT &&
7070       ((field->type() == FIELD_TYPE_BIT ||
7071        ((Field_num *) field)->unsigned_flag) &&
7072        !((Item_int*) value)->unsigned_flag))
7073   {
7074     longlong item_val= value->val_int();
7075     if (item_val < 0)
7076     {
7077       if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
7078       {
7079         impossible_cond_cause= "unsigned_int_cannot_be_negative";
7080         tree->type= SEL_ARG::IMPOSSIBLE;
7081         goto end;
7082       }
7083       if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
7084       {
7085         tree= 0;
7086         goto end;
7087       }
7088     }
7089   }
7090 
7091   switch (type) {
7092   case Item_func::LT_FUNC:
7093     /* Don't use open ranges for partial key_segments */
7094     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7095         stored_field_cmp_to_item(param->thd, field, value) == 0)
7096       tree->max_flag=NEAR_MAX;
7097     /* fall through */
7098   case Item_func::LE_FUNC:
7099     if (!maybe_null)
7100       tree->min_flag=NO_MIN_RANGE;		/* From start */
7101     else
7102     {						// > NULL
7103       if (!(tree->min_value=
7104             static_cast<uchar*>(alloc_root(alloc, key_part->store_length+1))))
7105         goto end;
7106       TRASH(tree->min_value, key_part->store_length + 1);
7107       memcpy(tree->min_value, is_null_string, sizeof(is_null_string));
7108       tree->min_flag=NEAR_MIN;
7109     }
7110     break;
7111   case Item_func::GT_FUNC:
7112     /* Don't use open ranges for partial key_segments */
7113     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7114         (stored_field_cmp_to_item(param->thd, field, value) <= 0))
7115       tree->min_flag=NEAR_MIN;
7116     tree->max_flag= NO_MAX_RANGE;
7117     break;
7118   case Item_func::GE_FUNC:
7119     /* Don't use open ranges for partial key_segments */
7120     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
7121         (stored_field_cmp_to_item(param->thd, field, value) < 0))
7122       tree->min_flag= NEAR_MIN;
7123     tree->max_flag=NO_MAX_RANGE;
7124     break;
7125   case Item_func::SP_EQUALS_FUNC:
7126     tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512;
7127     tree->max_flag=NO_MAX_RANGE;
7128     break;
7129   case Item_func::SP_DISJOINT_FUNC:
7130     tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512;
7131     tree->max_flag=NO_MAX_RANGE;
7132     break;
7133   case Item_func::SP_INTERSECTS_FUNC:
7134     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7135     tree->max_flag=NO_MAX_RANGE;
7136     break;
7137   case Item_func::SP_TOUCHES_FUNC:
7138     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7139     tree->max_flag=NO_MAX_RANGE;
7140     break;
7141 
7142   case Item_func::SP_CROSSES_FUNC:
7143     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7144     tree->max_flag=NO_MAX_RANGE;
7145     break;
7146   case Item_func::SP_WITHIN_FUNC:
7147     /*
7148       Adjust the min_flag as MyISAM implements this function
7149       in reverse order.
7150     */
7151     tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512;
7152     tree->max_flag=NO_MAX_RANGE;
7153     break;
7154 
7155   case Item_func::SP_CONTAINS_FUNC:
7156     /*
7157       Adjust the min_flag as MyISAM implements this function
7158       in reverse order.
7159     */
7160     tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512;
7161     tree->max_flag=NO_MAX_RANGE;
7162     break;
7163   case Item_func::SP_OVERLAPS_FUNC:
7164     tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
7165     tree->max_flag=NO_MAX_RANGE;
7166     break;
7167 
7168   default:
7169     break;
7170   }
7171 
7172 end:
7173   if (impossible_cond_cause != NULL)
7174   {
7175     Opt_trace_object wrapper (&param->thd->opt_trace);
7176     Opt_trace_object (&param->thd->opt_trace, "impossible_condition",
7177                       Opt_trace_context::RANGE_OPTIMIZER).
7178       add_alnum("cause", impossible_cond_cause);
7179   }
7180   param->thd->mem_root= alloc;
7181   DBUG_RETURN(tree);
7182 }
7183 
7184 
7185 /******************************************************************************
7186 ** Tree manipulation functions
7187 ** If tree is 0 it means that the condition can't be tested. It refers
7188 ** to a non existent table or to a field in current table with isn't a key.
7189 ** The different tree flags:
7190 ** IMPOSSIBLE:	 Condition is never TRUE
7191 ** ALWAYS:	 Condition is always TRUE
7192 ** MAYBE:	 Condition may exists when tables are read
7193 ** MAYBE_KEY:	 Condition refers to a key that may be used in join loop
7194 ** KEY_RANGE:	 Condition uses a key
7195 ******************************************************************************/
7196 
7197 /*
7198   Add a new key test to a key when scanning through all keys
7199   This will never be called for same key parts.
7200 */
7201 
7202 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)7203 sel_add(SEL_ARG *key1,SEL_ARG *key2)
7204 {
7205   SEL_ARG *root,**key_link;
7206 
7207   if (!key1)
7208     return key2;
7209   if (!key2)
7210     return key1;
7211 
7212   key_link= &root;
7213   while (key1 && key2)
7214   {
7215     if (key1->part < key2->part)
7216     {
7217       *key_link= key1;
7218       key_link= &key1->next_key_part;
7219       key1=key1->next_key_part;
7220     }
7221     else
7222     {
7223       *key_link= key2;
7224       key_link= &key2->next_key_part;
7225       key2=key2->next_key_part;
7226     }
7227   }
7228   *key_link=key1 ? key1 : key2;
7229   return root;
7230 }
7231 
7232 #define CLONE_KEY1_MAYBE 1
7233 #define CLONE_KEY2_MAYBE 2
7234 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
7235 
7236 
7237 static SEL_TREE *
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7238 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7239 {
7240   DBUG_ENTER("tree_and");
7241   if (!tree1)
7242     DBUG_RETURN(tree2);
7243   if (!tree2)
7244     DBUG_RETURN(tree1);
7245   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7246     DBUG_RETURN(tree1);
7247   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7248     DBUG_RETURN(tree2);
7249   if (tree1->type == SEL_TREE::MAYBE)
7250   {
7251     if (tree2->type == SEL_TREE::KEY)
7252       tree2->type=SEL_TREE::KEY_SMALLER;
7253     DBUG_RETURN(tree2);
7254   }
7255   if (tree2->type == SEL_TREE::MAYBE)
7256   {
7257     tree1->type=SEL_TREE::KEY_SMALLER;
7258     DBUG_RETURN(tree1);
7259   }
7260 
7261   dbug_print_tree("tree1", tree1, param);
7262   dbug_print_tree("tree2", tree2, param);
7263 
7264   key_map  result_keys;
7265 
7266   /* Join the trees key per key */
7267   SEL_ARG **key1,**key2,**end;
7268   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
7269        key1 != end ; key1++,key2++)
7270   {
7271     uint flag=0;
7272     if (*key1 || *key2)
7273     {
7274       if (*key1 && !(*key1)->simple_key())
7275 	flag|=CLONE_KEY1_MAYBE;
7276       if (*key2 && !(*key2)->simple_key())
7277 	flag|=CLONE_KEY2_MAYBE;
7278       *key1=key_and(param, *key1, *key2, flag);
7279       if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
7280       {
7281 	tree1->type= SEL_TREE::IMPOSSIBLE;
7282         DBUG_RETURN(tree1);
7283       }
7284       result_keys.set_bit(key1 - tree1->keys);
7285 #ifndef DBUG_OFF
7286         if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7287           (*key1)->test_use_count(*key1);
7288 #endif
7289     }
7290   }
7291   tree1->keys_map= result_keys;
7292 
7293   /* ok, both trees are index_merge trees */
7294   imerge_list_and_list(&tree1->merges, &tree2->merges);
7295   DBUG_RETURN(tree1);
7296 }
7297 
7298 
7299 /*
7300   Check if two SEL_TREES can be combined into one (i.e. a single key range
7301   read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
7302   using index_merge.
7303 */
7304 
sel_trees_can_be_ored(SEL_TREE * tree1,SEL_TREE * tree2,RANGE_OPT_PARAM * param)7305 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
7306                            RANGE_OPT_PARAM* param)
7307 {
7308   key_map common_keys= tree1->keys_map;
7309   DBUG_ENTER("sel_trees_can_be_ored");
7310   common_keys.intersect(tree2->keys_map);
7311 
7312   dbug_print_tree("tree1", tree1, param);
7313   dbug_print_tree("tree2", tree2, param);
7314 
7315   if (common_keys.is_clear_all())
7316     DBUG_RETURN(FALSE);
7317 
7318   /* trees have a common key, check if they refer to same key part */
7319   SEL_ARG **key1,**key2;
7320   for (uint key_no=0; key_no < param->keys; key_no++)
7321   {
7322     if (common_keys.is_set(key_no))
7323     {
7324       key1= tree1->keys + key_no;
7325       key2= tree2->keys + key_no;
7326       if ((*key1)->part == (*key2)->part)
7327         DBUG_RETURN(TRUE);
7328     }
7329   }
7330   DBUG_RETURN(FALSE);
7331 }
7332 
7333 
7334 /*
7335   Remove the trees that are not suitable for record retrieval.
7336   SYNOPSIS
7337     param  Range analysis parameter
7338     tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
7339 
7340   DESCRIPTION
7341     This function walks through tree->keys[] and removes the SEL_ARG* trees
7342     that are not "maybe" trees (*) and cannot be used to construct quick range
7343     selects.
7344     (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
7345           these types here as well.
7346 
7347     A SEL_ARG* tree cannot be used to construct quick select if it has
7348     tree->part != 0. (e.g. it could represent "keypart2 < const").
7349 
7350     WHY THIS FUNCTION IS NEEDED
7351 
7352     Normally we allow construction of SEL_TREE objects that have SEL_ARG
7353     trees that do not allow quick range select construction. For example for
7354     " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
7355     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
7356     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
7357                                                from this
7358     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
7359                                    tree.
7360 
7361     There is an exception though: when we construct index_merge SEL_TREE,
7362     any SEL_ARG* tree that cannot be used to construct quick range select can
7363     be removed, because current range analysis code doesn't provide any way
7364     that tree could be later combined with another tree.
7365     Consider an example: we should not construct
7366     st1 = SEL_TREE {
7367       merges = SEL_IMERGE {
7368                             SEL_TREE(t.key1part1 = 1),
7369                             SEL_TREE(t.key2part2 = 2)   -- (*)
7370                           }
7371                    };
7372     because
7373      - (*) cannot be used to construct quick range select,
7374      - There is no execution path that would cause (*) to be converted to
7375        a tree that could be used.
7376 
7377     The latter is easy to verify: first, notice that the only way to convert
7378     (*) into a usable tree is to call tree_and(something, (*)).
7379 
7380     Second look at what tree_and/tree_or function would do when passed a
7381     SEL_TREE that has the structure like st1 tree has, and conlcude that
7382     tree_and(something, (*)) will not be called.
7383 
7384   RETURN
7385     0  Ok, some suitable trees left
7386     1  No tree->keys[] left.
7387 */
7388 
remove_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree)7389 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
7390 {
7391   bool res= FALSE;
7392   for (uint i=0; i < param->keys; i++)
7393   {
7394     if (tree->keys[i])
7395     {
7396       if (tree->keys[i]->part)
7397       {
7398         tree->keys[i]= NULL;
7399         tree->keys_map.clear_bit(i);
7400       }
7401       else
7402         res= TRUE;
7403     }
7404   }
7405   return !res;
7406 }
7407 
7408 
7409 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)7410 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
7411 {
7412   DBUG_ENTER("tree_or");
7413   if (!tree1 || !tree2)
7414     DBUG_RETURN(0);
7415   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
7416     DBUG_RETURN(tree2);
7417   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
7418     DBUG_RETURN(tree1);
7419   if (tree1->type == SEL_TREE::MAYBE)
7420     DBUG_RETURN(tree1);				// Can't use this
7421   if (tree2->type == SEL_TREE::MAYBE)
7422     DBUG_RETURN(tree2);
7423 
7424   /*
7425     It is possible that a tree contains both
7426     a) simple range predicates (in tree->keys[]) and
7427     b) index merge range predicates (in tree->merges)
7428 
7429     If a tree has both, they represent equally *valid* range
7430     predicate alternatives; both will return all relevant rows from
7431     the table but one may return more unnecessary rows than the
7432     other (additional rows will be filtered later). However, doing
7433     an OR operation on trees with both types of predicates is too
7434     complex at the time. We therefore remove the index merge
7435     predicates (if we have both types) before OR'ing the trees.
7436 
7437     TODO: enable tree_or() for trees with both simple and index
7438     merge range predicates.
7439   */
7440   if (!tree1->merges.is_empty())
7441   {
7442     for (uint i= 0; i < param->keys; i++)
7443       if (tree1->keys[i] != NULL && tree2->keys[i] != &null_element)
7444       {
7445         tree1->merges.empty();
7446         break;
7447       }
7448   }
7449   if (!tree2->merges.is_empty())
7450   {
7451     for (uint i= 0; i< param->keys; i++)
7452       if (tree2->keys[i] != NULL && tree2->keys[i] != &null_element)
7453       {
7454         tree2->merges.empty();
7455         break;
7456       }
7457   }
7458 
7459   SEL_TREE *result= 0;
7460   key_map  result_keys;
7461   if (sel_trees_can_be_ored(tree1, tree2, param))
7462   {
7463     /* Join the trees key per key */
7464     SEL_ARG **key1,**key2,**end;
7465     for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
7466          key1 != end ; key1++,key2++)
7467     {
7468       *key1=key_or(param, *key1, *key2);
7469       if (*key1)
7470       {
7471         result=tree1;				// Added to tree1
7472         result_keys.set_bit(key1 - tree1->keys);
7473 #ifndef DBUG_OFF
7474         if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
7475           (*key1)->test_use_count(*key1);
7476 #endif
7477       }
7478     }
7479     if (result)
7480       result->keys_map= result_keys;
7481   }
7482   else
7483   {
7484     /* ok, two trees have KEY type but cannot be used without index merge */
7485     if (tree1->merges.is_empty() && tree2->merges.is_empty())
7486     {
7487       if (param->remove_jump_scans)
7488       {
7489         bool no_trees= remove_nonrange_trees(param, tree1);
7490         no_trees= no_trees || remove_nonrange_trees(param, tree2);
7491         if (no_trees)
7492           DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7493       }
7494       SEL_IMERGE *merge;
7495       /* both trees are "range" trees, produce new index merge structure */
7496       if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
7497           (result->merges.push_back(merge)) ||
7498           (merge->or_sel_tree(param, tree1)) ||
7499           (merge->or_sel_tree(param, tree2)))
7500         result= NULL;
7501       else
7502         result->type= tree1->type;
7503     }
7504     else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
7505     {
7506       if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
7507         result= new SEL_TREE(SEL_TREE::ALWAYS);
7508       else
7509         result= tree1;
7510     }
7511     else
7512     {
7513       /* one tree is index merge tree and another is range tree */
7514       if (tree1->merges.is_empty())
7515         swap_variables(SEL_TREE*, tree1, tree2);
7516 
7517       if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
7518          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
7519       /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
7520       if (imerge_list_or_tree(param, &tree1->merges, tree2))
7521         result= new SEL_TREE(SEL_TREE::ALWAYS);
7522       else
7523         result= tree1;
7524     }
7525   }
7526   DBUG_RETURN(result);
7527 }
7528 
7529 
7530 /* And key trees where key1->part < key2 -> part */
7531 
7532 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7533 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
7534              uint clone_flag)
7535 {
7536   SEL_ARG *next;
7537   ulong use_count=key1->use_count;
7538 
7539   if (key1->elements != 1)
7540   {
7541     key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
7542     key2->increment_use_count((int) key1->elements-1);
7543   }
7544   if (key1->type == SEL_ARG::MAYBE_KEY)
7545   {
7546     // See todo for left/right pointers
7547     DBUG_ASSERT(!key1->left);
7548     DBUG_ASSERT(!key1->right);
7549     key1->next= key1->prev= 0;
7550   }
7551   for (next=key1->first(); next ; next=next->next)
7552   {
7553     if (next->next_key_part)
7554     {
7555       SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
7556       if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
7557       {
7558 	key1=key1->tree_delete(next);
7559 	continue;
7560       }
7561       next->next_key_part=tmp;
7562       if (use_count)
7563 	next->increment_use_count(use_count);
7564       if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
7565         break;
7566     }
7567     else
7568       next->next_key_part=key2;
7569   }
7570   if (!key1)
7571     return &null_element;			// Impossible ranges
7572   key1->use_count++;
7573   return key1;
7574 }
7575 
7576 
7577 /*
7578   Produce a SEL_ARG graph that represents "key1 AND key2"
7579 
7580   SYNOPSIS
7581     key_and()
7582       param   Range analysis context (needed to track if we have allocated
7583               too many SEL_ARGs)
7584       key1    First argument, root of its RB-tree
7585       key2    Second argument, root of its RB-tree
7586 
7587   RETURN
7588     RB-tree root of the resulting SEL_ARG graph.
7589     NULL if the result of AND operation is an empty interval {0}.
7590 */
7591 
7592 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)7593 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
7594 {
7595   if (!key1)
7596     return key2;
7597   if (!key2)
7598     return key1;
7599   if (key1->part != key2->part)
7600   {
7601     if (key1->part > key2->part)
7602     {
7603       swap_variables(SEL_ARG *, key1, key2);
7604       clone_flag=swap_clone_flag(clone_flag);
7605     }
7606     // key1->part < key2->part
7607     key1->use_count--;
7608     /*
7609      Clone key1 if the use_count is greater than 0 otherwise use the
7610      "clone_flag" to determine if a key needs to be cloned.
7611      "clone_flag" is set to true if the conditions which need to be
7612      ANDed (in tree_and) are not simple (has many OR conditions within).
7613    */
7614     if (key1->use_count > 0 || (clone_flag & CLONE_KEY2_MAYBE))
7615       if (!(key1= key1->clone_tree(param)))
7616 	return 0;				// OOM
7617     return and_all_keys(param, key1, key2, clone_flag);
7618   }
7619 
7620   if (((clone_flag & CLONE_KEY2_MAYBE) &&
7621        !(clone_flag & CLONE_KEY1_MAYBE) &&
7622        key2->type != SEL_ARG::MAYBE_KEY) ||
7623       key1->type == SEL_ARG::MAYBE_KEY)
7624   {						// Put simple key in key2
7625     swap_variables(SEL_ARG *, key1, key2);
7626     clone_flag=swap_clone_flag(clone_flag);
7627   }
7628 
7629   /* If one of the key is MAYBE_KEY then the found region may be smaller */
7630   if (key2->type == SEL_ARG::MAYBE_KEY)
7631   {
7632     if (key1->use_count > 1)
7633     {
7634       key1->use_count--;
7635       if (!(key1=key1->clone_tree(param)))
7636 	return 0;				// OOM
7637       key1->use_count++;
7638     }
7639     if (key1->type == SEL_ARG::MAYBE_KEY)
7640     {						// Both are maybe key
7641       key1->next_key_part=key_and(param, key1->next_key_part,
7642                                   key2->next_key_part, clone_flag);
7643       if (key1->next_key_part &&
7644 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
7645 	return key1;
7646     }
7647     else
7648     {
7649       key1->maybe_smaller();
7650       if (key2->next_key_part)
7651       {
7652 	key1->use_count--;			// Incremented in and_all_keys
7653 	return and_all_keys(param, key1, key2, clone_flag);
7654       }
7655       key2->use_count--;			// Key2 doesn't have a tree
7656     }
7657     return key1;
7658   }
7659 
7660   if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
7661   {
7662     /* TODO: why not leave one of the trees? */
7663     key1->free_tree();
7664     key2->free_tree();
7665     return 0;					// Can't optimize this
7666   }
7667 
7668   key1->use_count--;
7669   key2->use_count--;
7670   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
7671 
7672   while (e1 && e2)
7673   {
7674     int cmp=e1->cmp_min_to_min(e2);
7675     if (cmp < 0)
7676     {
7677       if (get_range(&e1,&e2,key1))
7678 	continue;
7679     }
7680     else if (get_range(&e2,&e1,key2))
7681       continue;
7682     SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
7683                           clone_flag);
7684     e1->increment_use_count(1);
7685     e2->increment_use_count(1);
7686     if (!next || next->type != SEL_ARG::IMPOSSIBLE)
7687     {
7688       SEL_ARG *new_arg= e1->clone_and(e2);
7689       if (!new_arg)
7690 	return &null_element;			// End of memory
7691       new_arg->next_key_part=next;
7692       if (!new_tree)
7693       {
7694 	new_tree=new_arg;
7695       }
7696       else
7697 	new_tree=new_tree->insert(new_arg);
7698     }
7699     if (e1->cmp_max_to_max(e2) < 0)
7700       e1=e1->next;				// e1 can't overlapp next e2
7701     else
7702       e2=e2->next;
7703   }
7704   key1->free_tree();
7705   key2->free_tree();
7706   if (!new_tree)
7707     return &null_element;			// Impossible range
7708   return new_tree;
7709 }
7710 
7711 
7712 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)7713 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
7714 {
7715   (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
7716   if ((*e1)->cmp_max_to_min(*e2) < 0)
7717   {
7718     if (!((*e1)=(*e1)->next))
7719       return 1;
7720     if ((*e1)->cmp_min_to_max(*e2) > 0)
7721     {
7722       (*e2)=(*e2)->next;
7723       return 1;
7724     }
7725   }
7726   return 0;
7727 }
7728 
7729 
7730 /**
7731    Combine two range expression under a common OR. On a logical level, the
7732    transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
7733 
7734    Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
7735    theformat is reminiscent of DNF, since an expression such as the following
7736 
7737    ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
7738 
7739    where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
7740    and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
7741    SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
7742    the first range and ranges must not overlap. It follows that they are also
7743    ordered by maximum endpoints. Thus
7744 
7745    ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
7746 
7747    Is a a valid SER_ARG expression for a key of at least 2 keyparts.
7748 
7749    For simplicity, we will assume that expr2 is a single range predicate,
7750    i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
7751    disjunction of several predicates by subsequently call key_or for each
7752    disjunct.
7753 
7754    The algorithm iterates over each disjunct of expr1, and for each disjunct
7755    where the first keypart's range overlaps with the first keypart's range in
7756    expr2:
7757 
7758    If the predicates are equal for the rest of the keyparts, or if there are
7759    no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
7760    node in expr2 is deallocated. If more ranges became connected in expr1, the
7761    surplus is also dealocated. If they differ, two ranges are created.
7762 
7763    - The range leading up to the overlap. Empty if endpoints are equal.
7764 
7765    - The overlapping sub-range. May be the entire range if they are equal.
7766 
7767    Finally, there may be one more range if expr2's first keypart's range has a
7768    greater maximum endpoint than the last range in expr1.
7769 
7770    For the overlapping sub-range, we recursively call key_or. Thus in order to
7771    compute key_or of
7772 
7773      (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
7774 
7775      (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
7776 
7777    We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
7778    first one, we simply hook on the condition for the second keypart from (1)
7779    : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
7780    < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
7781    the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
7782 
7783    ( 1  <  kp1 <= 2 AND 1 < kp2 < 10 ) OR
7784    ( 2  <  kp1 < 10 AND 1 < kp2 < 20 ) OR
7785    ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
7786 
7787    @param param    PARAM from SQL_SELECT::test_quick_select
7788    @param key1     Root of RB-tree of SEL_ARGs to be ORed with key2
7789    @param key2     Root of RB-tree of SEL_ARGs to be ORed with key1
7790 */
7791 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)7792 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2)
7793 {
7794   if (key1 == NULL || key1->type == SEL_ARG::ALWAYS)
7795   {
7796     if (key2)
7797     {
7798       key2->use_count--;
7799       key2->free_tree();
7800     }
7801     return key1;
7802   }
7803   if (key2 == NULL || key2->type == SEL_ARG::ALWAYS)
7804     // Case is symmetric to the one above, just flip parameters.
7805     return key_or(param, key2, key1);
7806 
7807   key1->use_count--;
7808   key2->use_count--;
7809 
7810   if (key1->part != key2->part ||
7811       (key1->min_flag | key2->min_flag) & GEOM_FLAG)
7812   {
7813     key1->free_tree();
7814     key2->free_tree();
7815     return 0;                                   // Can't optimize this
7816   }
7817 
7818   // If one of the key is MAYBE_KEY then the found region may be bigger
7819   if (key1->type == SEL_ARG::MAYBE_KEY)
7820   {
7821     key2->free_tree();
7822     key1->use_count++;
7823     return key1;
7824   }
7825   if (key2->type == SEL_ARG::MAYBE_KEY)
7826   {
7827     key1->free_tree();
7828     key2->use_count++;
7829     return key2;
7830   }
7831 
7832   if (key1->use_count > 0)
7833   {
7834     if (key2->use_count == 0 || key1->elements > key2->elements)
7835     {
7836       swap_variables(SEL_ARG *,key1,key2);
7837     }
7838     if (key1->use_count > 0 && (key1= key1->clone_tree(param)) == NULL)
7839       return 0;                                 // OOM
7840   }
7841 
7842   // Add tree at key2 to tree at key1
7843   const bool key2_shared= (key2->use_count != 0);
7844   key1->maybe_flag|= key2->maybe_flag;
7845 
7846   /*
7847     Notation for illustrations used in the rest of this function:
7848 
7849       Range: [--------]
7850              ^        ^
7851              start    stop
7852 
7853       Two overlapping ranges:
7854         [-----]               [----]            [--]
7855             [---]     or    [---]       or   [-------]
7856 
7857       Ambiguity: ***
7858         The range starts or stops somewhere in the "***" range.
7859         Example: a starts before b and may end before/the same place/after b
7860         a: [----***]
7861         b:   [---]
7862 
7863       Adjacent ranges:
7864         Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
7865         a: ----]
7866         b:      [----
7867    */
7868 
7869   SEL_ARG *cur_key2= key2->first();
7870   while (cur_key2)
7871   {
7872     /*
7873       key1 consists of one or more ranges. cur_key1 is the
7874       range currently being handled.
7875 
7876       initialize cur_key1 to the latest range in key1 that starts the
7877       same place or before the range in cur_key2 starts
7878 
7879       cur_key2:            [------]
7880       key1:      [---] [-----] [----]
7881                        ^
7882                        cur_key1
7883     */
7884     SEL_ARG *cur_key1= key1->find_range(cur_key2);
7885 
7886     /*
7887       Used to describe how two key values are positioned compared to
7888       each other. Consider key_value_a.<cmp_func>(key_value_b):
7889 
7890         -2: key_value_a is smaller than key_value_b, and they are adjacent
7891         -1: key_value_a is smaller than key_value_b (not adjacent)
7892          0: the key values are equal
7893          1: key_value_a is bigger than key_value_b (not adjacent)
7894          2: key_value_a is bigger than key_value_b, and they are adjacent
7895 
7896       Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
7897 
7898       cur_key2:          [--------           (10 <= x ...  )
7899       cur_key1:    -----]                    (  ... x <  10) => cmp==-2
7900       cur_key1:    ----]                     (  ... x <   9) => cmp==-1
7901       cur_key1:    ------]                   (  ... x <= 10) => cmp== 0
7902       cur_key1:    --------]                 (  ... x <= 12) => cmp== 1
7903       (cmp == 2 does not make sense for cmp_max_to_min())
7904      */
7905     int cmp= 0;
7906 
7907     if (!cur_key1)
7908     {
7909       /*
7910         The range in cur_key2 starts before the first range in key1. Use
7911         the first range in key1 as cur_key1.
7912 
7913         cur_key2: [--------]
7914         key1:            [****--] [----]   [-------]
7915                          ^
7916                          cur_key1
7917       */
7918       cur_key1= key1->first();
7919       cmp= -1;
7920     }
7921     else if ((cmp= cur_key1->cmp_max_to_min(cur_key2)) < 0)
7922     {
7923       /*
7924         This is the case:
7925         cur_key2:           [-------]
7926         cur_key1:   [----**]
7927        */
7928       SEL_ARG *next_key1= cur_key1->next;
7929       if (cmp == -2 &&
7930           eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
7931       {
7932         /*
7933           Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
7934 
7935           This is the case:
7936           cur_key2:           [-------]
7937           cur_key1:     [----]
7938 
7939           Result:
7940           cur_key2:     [-------------]     => inserted into key1 below
7941           cur_key1:                         => deleted
7942         */
7943         SEL_ARG *next_key2= cur_key2->next;
7944         if (key2_shared)
7945         {
7946           if (!(cur_key2= new SEL_ARG(*cur_key2)))
7947             return 0;           // out of memory
7948           cur_key2->increment_use_count(key1->use_count+1);
7949           cur_key2->next= next_key2;                 // New copy of cur_key2
7950         }
7951 
7952         if (cur_key2->copy_min(cur_key1))
7953         {
7954           // cur_key2 is full range: [-inf <= cur_key2 <= +inf]
7955           key1->free_tree();
7956           key2->free_tree();
7957           key1->type= SEL_ARG::ALWAYS;
7958           key2->type= SEL_ARG::ALWAYS;
7959           if (key1->maybe_flag)
7960             return new SEL_ARG(SEL_ARG::MAYBE_KEY);
7961           return 0;
7962         }
7963 
7964         if (!(key1= key1->tree_delete(cur_key1)))
7965         {
7966           /*
7967             cur_key1 was the last range in key1; move the cur_key2
7968             range that was merged above to key1
7969           */
7970           key1= cur_key2;
7971           key1->make_root();
7972           cur_key2= next_key2;
7973           break;
7974         }
7975       }
7976       // Move to next range in key1. Now cur_key1.min > cur_key2.min
7977       if (!(cur_key1= next_key1))
7978         break;         // No more ranges in key1. Copy rest of key2
7979     }
7980 
7981     if (cmp < 0)
7982     {
7983       /*
7984         This is the case:
7985         cur_key2:   [--***]
7986         cur_key1:       [----]
7987       */
7988       int cur_key1_cmp;
7989       if ((cur_key1_cmp= cur_key1->cmp_min_to_max(cur_key2)) > 0)
7990       {
7991         /*
7992           This is the case:
7993           cur_key2:  [------**]
7994           cur_key1:            [----]
7995         */
7996         if (cur_key1_cmp == 2 &&
7997             eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
7998         {
7999           /*
8000             Adjacent ranges with equal next_key_part. Merge like this:
8001 
8002             This is the case:
8003             cur_key2:    [------]
8004             cur_key1:            [-----]
8005 
8006             Result:
8007             cur_key2:    [------]
8008             cur_key1:    [-------------]
8009 
8010             Then move on to next key2 range.
8011           */
8012           cur_key1->copy_min_to_min(cur_key2);
8013           key1->merge_flags(cur_key2); //should be cur_key1->merge...() ?
8014           if (cur_key1->min_flag & NO_MIN_RANGE &&
8015               cur_key1->max_flag & NO_MAX_RANGE)
8016           {
8017             if (key1->maybe_flag)
8018               return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8019             return 0;
8020           }
8021           cur_key2->increment_use_count(-1);        // Free not used tree
8022           cur_key2=cur_key2->next;
8023           continue;
8024         }
8025         else
8026         {
8027           /*
8028             cur_key2 not adjacent to cur_key1 or has different next_key_part.
8029             Insert into key1 and move to next range in key2
8030 
8031             This is the case:
8032             cur_key2:   [------**]
8033             cur_key1:             [----]
8034 
8035             Result:
8036             key1:       [------**][----]
8037                         ^         ^
8038                         insert    cur_key1
8039           */
8040           SEL_ARG *next_key2= cur_key2->next;
8041           if (key2_shared)
8042           {
8043             SEL_ARG *cpy= new SEL_ARG(*cur_key2);   // Must make copy
8044             if (!cpy)
8045               return 0;                         // OOM
8046             key1= key1->insert(cpy);
8047             cur_key2->increment_use_count(key1->use_count+1);
8048           }
8049           else
8050             key1= key1->insert(cur_key2); // Will destroy key2_root
8051           cur_key2= next_key2;
8052           continue;
8053         }
8054       }
8055     }
8056 
8057     /*
8058       The ranges in cur_key1 and cur_key2 are overlapping:
8059 
8060       cur_key2:       [----------]
8061       cur_key1:    [*****-----*****]
8062 
8063       Corollary: cur_key1.min <= cur_key2.max
8064     */
8065     if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part))
8066     {
8067       // Merge overlapping ranges with equal next_key_part
8068       if (cur_key1->is_same(cur_key2))
8069       {
8070         /*
8071           cur_key1 covers exactly the same range as cur_key2
8072           Use the relevant range in key1.
8073         */
8074         cur_key1->merge_flags(cur_key2);        // Copy maybe flags
8075         cur_key2->increment_use_count(-1);      // Free not used tree
8076       }
8077       else
8078       {
8079         SEL_ARG *last= cur_key1;
8080         SEL_ARG *first= cur_key1;
8081 
8082         /*
8083           Find the last range in key1 that overlaps cur_key2 and
8084           where all ranges first...last have the same next_key_part as
8085           cur_key2.
8086 
8087           cur_key2:  [****----------------------*******]
8088           key1:         [--]  [----] [---]  [-----] [xxxx]
8089                         ^                   ^       ^
8090                         first               last    different next_key_part
8091 
8092           Since cur_key2 covers them, the ranges between first and last
8093           are merged into one range by deleting first...last-1 from
8094           the key1 tree. In the figure, this applies to first and the
8095           two consecutive ranges. The range of last is then extended:
8096             * last.min: Set to min(cur_key2.min, first.min)
8097             * last.max: If there is a last->next that overlaps cur_key2
8098                         (i.e., last->next has a different next_key_part):
8099                                         Set adjacent to last->next.min
8100                         Otherwise:      Set to max(cur_key2.max, last.max)
8101 
8102           Result:
8103           cur_key2:  [****----------------------*******]
8104                         [--]  [----] [---]                 => deleted from key1
8105           key1:      [**------------------------***][xxxx]
8106                      ^                              ^
8107                      cur_key1=last                  different next_key_part
8108         */
8109         while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
8110                eq_tree(last->next->next_key_part, cur_key2->next_key_part))
8111         {
8112           /*
8113             last->next is covered by cur_key2 and has same next_key_part.
8114             last can be deleted
8115           */
8116           SEL_ARG *save=last;
8117           last=last->next;
8118           key1= key1->tree_delete(save);
8119         }
8120         // Redirect cur_key1 to last which will cover the entire range
8121         cur_key1= last;
8122 
8123         /*
8124           Extend last to cover the entire range of
8125           [min(first.min_value,cur_key2.min_value)...last.max_value].
8126           If this forms a full range (the range covers all possible
8127           values) we return no SEL_ARG RB-tree.
8128         */
8129         bool full_range= last->copy_min(first);
8130         if (!full_range)
8131           full_range= last->copy_min(cur_key2);
8132 
8133         if (!full_range)
8134         {
8135           if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0)
8136           {
8137             /*
8138               This is the case:
8139               cur_key2:   [-------------]
8140               key1:     [***------]  [xxxx]
8141                         ^            ^
8142                         last         different next_key_part
8143 
8144               Extend range of last up to last->next:
8145               cur_key2:   [-------------]
8146               key1:     [***--------][xxxx]
8147             */
8148             last->copy_min_to_max(last->next);
8149           }
8150           else
8151             /*
8152               This is the case:
8153               cur_key2:   [--------*****]
8154               key1:     [***---------]    [xxxx]
8155                         ^                 ^
8156                         last              different next_key_part
8157 
8158               Extend range of last up to max(last.max, cur_key2.max):
8159               cur_key2:   [--------*****]
8160               key1:     [***----------**] [xxxx]
8161              */
8162             full_range= last->copy_max(cur_key2);
8163         }
8164         if (full_range)
8165         {                                       // Full range
8166           key1->free_tree();
8167           key1->type= SEL_ARG::ALWAYS;
8168           key2->type= SEL_ARG::ALWAYS;
8169           for (; cur_key2 ; cur_key2= cur_key2->next)
8170             cur_key2->increment_use_count(-1);  // Free not used tree
8171           if (key1->maybe_flag)
8172             return new SEL_ARG(SEL_ARG::MAYBE_KEY);
8173           return 0;
8174         }
8175       }
8176     }
8177 
8178     if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0)
8179     {
8180       /*
8181         This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
8182         cur_key2:                [-------]
8183         cur_key1:         [----------*******]
8184       */
8185 
8186       if (!cur_key1->next_key_part)
8187       {
8188         /*
8189           cur_key1->next_key_part is empty: cut the range that
8190           is covered by cur_key1 from cur_key2.
8191           Reason: (cur_key2->next_key_part OR
8192           cur_key1->next_key_part) will be empty and therefore
8193           equal to cur_key1->next_key_part. Thus, this part of
8194           the cur_key2 range is completely covered by cur_key1.
8195         */
8196         if (cur_key1->cmp_max_to_max(cur_key2) >= 0)
8197         {
8198           /*
8199             cur_key1 covers the entire range in cur_key2.
8200             cur_key2:            [-------]
8201             cur_key1:     [-----------------]
8202 
8203             Move on to next range in key2
8204           */
8205           cur_key2->increment_use_count(-1); // Free not used tree
8206           cur_key2= cur_key2->next;
8207           continue;
8208         }
8209         else
8210         {
8211           /*
8212             This is the case:
8213             cur_key2:            [-------]
8214             cur_key1:     [---------]
8215 
8216             Result:
8217             cur_key2:                [---]
8218             cur_key1:     [---------]
8219           */
8220           cur_key2->copy_max_to_min(cur_key1);
8221           continue;
8222         }
8223       }
8224 
8225       /*
8226         The ranges are overlapping but have not been merged because
8227         next_key_part of cur_key1 and cur_key2 differ.
8228         cur_key2:               [----]
8229         cur_key1:     [------------*****]
8230 
8231         Split cur_key1 in two where cur_key2 starts:
8232         cur_key2:               [----]
8233         key1:         [--------][--*****]
8234                       ^         ^
8235                       insert    cur_key1
8236       */
8237       SEL_ARG *new_arg= cur_key1->clone_first(cur_key2);
8238       if (!new_arg)
8239         return 0;                               // OOM
8240       if ((new_arg->next_key_part= cur_key1->next_key_part))
8241         new_arg->increment_use_count(key1->use_count+1);
8242       cur_key1->copy_min_to_min(cur_key2);
8243       key1= key1->insert(new_arg);
8244     } // cur_key1.min >= cur_key2.min due to this if()
8245 
8246     /*
8247       Now cur_key2.min <= cur_key1.min <= cur_key2.max:
8248       cur_key2:    [---------]
8249       cur_key1:    [****---*****]
8250      */
8251     SEL_ARG key2_cpy(*cur_key2); // Get copy we can modify
8252     for (;;)
8253     {
8254       if (cur_key1->cmp_min_to_min(&key2_cpy) > 0)
8255       {
8256         /*
8257           This is the case:
8258           key2_cpy:    [------------]
8259           key1:                 [-*****]
8260                                 ^
8261                                 cur_key1
8262 
8263           Result:
8264           key2_cpy:             [---]
8265           key1:        [-------][-*****]
8266                        ^        ^
8267                        insert   cur_key1
8268          */
8269         SEL_ARG *new_arg=key2_cpy.clone_first(cur_key1);
8270         if (!new_arg)
8271           return 0; // OOM
8272         if ((new_arg->next_key_part=key2_cpy.next_key_part))
8273           new_arg->increment_use_count(key1->use_count+1);
8274         key1= key1->insert(new_arg);
8275         key2_cpy.copy_min_to_min(cur_key1);
8276       }
8277       // Now key2_cpy.min == cur_key1.min
8278 
8279       if ((cmp= cur_key1->cmp_max_to_max(&key2_cpy)) <= 0)
8280       {
8281         /*
8282           cur_key1.max <= key2_cpy.max:
8283           key2_cpy:       a)  [-------]    or b)     [----]
8284           cur_key1:           [----]                 [----]
8285 
8286           Steps:
8287 
8288            1) Update next_key_part of cur_key1: OR it with
8289               key2_cpy->next_key_part.
8290            2) If case a: Insert range [cur_key1.max, key2_cpy.max]
8291               into key1 using next_key_part of key2_cpy
8292 
8293            Result:
8294            key1:          a)  [----][-]    or b)     [----]
8295          */
8296         cur_key1->maybe_flag|= key2_cpy.maybe_flag;
8297         key2_cpy.increment_use_count(key1->use_count+1);
8298         cur_key1->next_key_part=
8299           key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part);
8300 
8301         if (!cmp)
8302           break;                     // case b: done with this key2 range
8303 
8304         // Make key2_cpy the range [cur_key1.max, key2_cpy.max]
8305         key2_cpy.copy_max_to_min(cur_key1);
8306         if (!(cur_key1= cur_key1->next))
8307         {
8308           /*
8309             No more ranges in key1. Insert key2_cpy and go to "end"
8310             label to insert remaining ranges in key2 if any.
8311           */
8312           SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8313           if (!new_key1_range)
8314             return 0; // OOM
8315           key1= key1->insert(new_key1_range);
8316           cur_key2= cur_key2->next;
8317           goto end;
8318         }
8319         if (cur_key1->cmp_min_to_max(&key2_cpy) > 0)
8320         {
8321           /*
8322             The next range in key1 does not overlap with key2_cpy.
8323             Insert this range into key1 and move on to the next range
8324             in key2.
8325           */
8326           SEL_ARG *new_key1_range= new SEL_ARG(key2_cpy);
8327           if (!new_key1_range)
8328             return 0;                           // OOM
8329           key1= key1->insert(new_key1_range);
8330           break;
8331         }
8332         /*
8333           key2_cpy overlaps with the next range in key1 and the case
8334           is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
8335           to for(;;) to handle this situation.
8336         */
8337         continue;
8338       }
8339       else
8340       {
8341         /*
8342           This is the case:
8343           key2_cpy:        [-------]
8344           cur_key1:        [------------]
8345 
8346           Result:
8347           key1:            [-------][---]
8348                            ^        ^
8349                            new_arg  cur_key1
8350           Steps:
8351 
8352            0) If cur_key1->next_key_part is empty: do nothing.
8353               Reason: (key2_cpy->next_key_part OR
8354               cur_key1->next_key_part) will be empty and
8355               therefore equal to cur_key1->next_key_part. Thus,
8356               the range in key2_cpy is completely covered by
8357               cur_key1
8358            1) Make new_arg with range [cur_key1.min, key2_cpy.max].
8359               new_arg->next_key_part is OR between next_key_part of
8360               cur_key1 and key2_cpy
8361            2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
8362            3) Insert new_arg into key1
8363         */
8364         if (!cur_key1->next_key_part) // Step 0
8365         {
8366           key2_cpy.increment_use_count(-1);     // Free not used tree
8367           break;
8368         }
8369         SEL_ARG *new_arg= cur_key1->clone_last(&key2_cpy);
8370         if (!new_arg)
8371           return 0; // OOM
8372         cur_key1->copy_max_to_min(&key2_cpy);
8373         cur_key1->increment_use_count(key1->use_count+1);
8374         /* Increment key count as it may be used for next loop */
8375         key2_cpy.increment_use_count(1);
8376         new_arg->next_key_part= key_or(param, cur_key1->next_key_part,
8377                                        key2_cpy.next_key_part);
8378         key1= key1->insert(new_arg);
8379         break;
8380       }
8381     }
8382     // Move on to next range in key2
8383     cur_key2= cur_key2->next;
8384   }
8385 
8386 end:
8387   /*
8388     Add key2 ranges that are non-overlapping with and higher than the
8389     highest range in key1.
8390   */
8391   while (cur_key2)
8392   {
8393     SEL_ARG *next= cur_key2->next;
8394     if (key2_shared)
8395     {
8396       SEL_ARG *key2_cpy=new SEL_ARG(*cur_key2);  // Must make copy
8397       if (!key2_cpy)
8398         return 0;
8399       cur_key2->increment_use_count(key1->use_count+1);
8400       key1= key1->insert(key2_cpy);
8401     }
8402     else
8403       key1= key1->insert(cur_key2);   // Will destroy key2_root
8404     cur_key2= next;
8405   }
8406   key1->use_count++;
8407 
8408   return key1;
8409 }
8410 
8411 
8412 /* Compare if two trees are equal */
8413 
eq_tree(SEL_ARG * a,SEL_ARG * b)8414 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
8415 {
8416   if (a == b)
8417     return 1;
8418   if (!a || !b || !a->is_same(b))
8419     return 0;
8420   if (a->left != &null_element && b->left != &null_element)
8421   {
8422     if (!eq_tree(a->left,b->left))
8423       return 0;
8424   }
8425   else if (a->left != &null_element || b->left != &null_element)
8426     return 0;
8427   if (a->right != &null_element && b->right != &null_element)
8428   {
8429     if (!eq_tree(a->right,b->right))
8430       return 0;
8431   }
8432   else if (a->right != &null_element || b->right != &null_element)
8433     return 0;
8434   if (a->next_key_part != b->next_key_part)
8435   {						// Sub range
8436     if (!a->next_key_part != !b->next_key_part ||
8437 	!eq_tree(a->next_key_part, b->next_key_part))
8438       return 0;
8439   }
8440   return 1;
8441 }
8442 
8443 
8444 SEL_ARG *
insert(SEL_ARG * key)8445 SEL_ARG::insert(SEL_ARG *key)
8446 {
8447   SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element);
8448 
8449   for (element= this; element != &null_element ; )
8450   {
8451     last_element=element;
8452     if (key->cmp_min_to_min(element) > 0)
8453     {
8454       par= &element->right; element= element->right;
8455     }
8456     else
8457     {
8458       par = &element->left; element= element->left;
8459     }
8460   }
8461   *par=key;
8462   key->parent=last_element;
8463 	/* Link in list */
8464   if (par == &last_element->left)
8465   {
8466     key->next=last_element;
8467     if ((key->prev=last_element->prev))
8468       key->prev->next=key;
8469     last_element->prev=key;
8470   }
8471   else
8472   {
8473     if ((key->next=last_element->next))
8474       key->next->prev=key;
8475     key->prev=last_element;
8476     last_element->next=key;
8477   }
8478   key->left=key->right= &null_element;
8479   SEL_ARG *root=rb_insert(key);			// rebalance tree
8480   root->use_count=this->use_count;		// copy root info
8481   root->elements= this->elements+1;
8482   root->maybe_flag=this->maybe_flag;
8483   return root;
8484 }
8485 
8486 
8487 /*
8488 ** Find best key with min <= given key
8489 ** Because the call context this should never return 0 to get_range
8490 */
8491 
8492 SEL_ARG *
find_range(SEL_ARG * key)8493 SEL_ARG::find_range(SEL_ARG *key)
8494 {
8495   SEL_ARG *element=this,*found=0;
8496 
8497   for (;;)
8498   {
8499     if (element == &null_element)
8500       return found;
8501     int cmp=element->cmp_min_to_min(key);
8502     if (cmp == 0)
8503       return element;
8504     if (cmp < 0)
8505     {
8506       found=element;
8507       element=element->right;
8508     }
8509     else
8510       element=element->left;
8511   }
8512 }
8513 
8514 
8515 /*
8516   Remove a element from the tree
8517 
8518   SYNOPSIS
8519     tree_delete()
8520     key		Key that is to be deleted from tree (this)
8521 
8522   NOTE
8523     This also frees all sub trees that is used by the element
8524 
8525   RETURN
8526     root of new tree (with key deleted)
8527 */
8528 
8529 SEL_ARG *
tree_delete(SEL_ARG * key)8530 SEL_ARG::tree_delete(SEL_ARG *key)
8531 {
8532   enum leaf_color remove_color;
8533   SEL_ARG *root,*nod,**par,*fix_par;
8534   DBUG_ENTER("tree_delete");
8535 
8536   root=this;
8537   this->parent= 0;
8538 
8539   /* Unlink from list */
8540   if (key->prev)
8541     key->prev->next=key->next;
8542   if (key->next)
8543     key->next->prev=key->prev;
8544   key->increment_use_count(-1);
8545   if (!key->parent)
8546     par= &root;
8547   else
8548     par=key->parent_ptr();
8549 
8550   if (key->left == &null_element)
8551   {
8552     *par=nod=key->right;
8553     fix_par=key->parent;
8554     if (nod != &null_element)
8555       nod->parent=fix_par;
8556     remove_color= key->color;
8557   }
8558   else if (key->right == &null_element)
8559   {
8560     *par= nod=key->left;
8561     nod->parent=fix_par=key->parent;
8562     remove_color= key->color;
8563   }
8564   else
8565   {
8566     SEL_ARG *tmp=key->next;			// next bigger key (exist!)
8567     nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
8568     fix_par=tmp->parent;
8569     if (nod != &null_element)
8570       nod->parent=fix_par;
8571     remove_color= tmp->color;
8572 
8573     tmp->parent=key->parent;			// Move node in place of key
8574     (tmp->left=key->left)->parent=tmp;
8575     if ((tmp->right=key->right) != &null_element)
8576       tmp->right->parent=tmp;
8577     tmp->color=key->color;
8578     *par=tmp;
8579     if (fix_par == key)				// key->right == key->next
8580       fix_par=tmp;				// new parent of nod
8581   }
8582 
8583   if (root == &null_element)
8584     DBUG_RETURN(0);				// Maybe root later
8585   if (remove_color == BLACK)
8586     root=rb_delete_fixup(root,nod,fix_par);
8587 #ifndef DBUG_OFF
8588   test_rb_tree(root,root->parent);
8589 #endif
8590   root->use_count=this->use_count;		// Fix root counters
8591   root->elements=this->elements-1;
8592   root->maybe_flag=this->maybe_flag;
8593   DBUG_RETURN(root);
8594 }
8595 
8596 
8597 	/* Functions to fix up the tree after insert and delete */
8598 
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)8599 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
8600 {
8601   SEL_ARG *y=leaf->right;
8602   leaf->right=y->left;
8603   if (y->left != &null_element)
8604     y->left->parent=leaf;
8605   if (!(y->parent=leaf->parent))
8606     *root=y;
8607   else
8608     *leaf->parent_ptr()=y;
8609   y->left=leaf;
8610   leaf->parent=y;
8611 }
8612 
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)8613 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
8614 {
8615   SEL_ARG *y=leaf->left;
8616   leaf->left=y->right;
8617   if (y->right != &null_element)
8618     y->right->parent=leaf;
8619   if (!(y->parent=leaf->parent))
8620     *root=y;
8621   else
8622     *leaf->parent_ptr()=y;
8623   y->right=leaf;
8624   leaf->parent=y;
8625 }
8626 
8627 
8628 SEL_ARG *
rb_insert(SEL_ARG * leaf)8629 SEL_ARG::rb_insert(SEL_ARG *leaf)
8630 {
8631   SEL_ARG *y,*par,*par2,*root;
8632   root= this; root->parent= 0;
8633 
8634   leaf->color=RED;
8635   while (leaf != root && (par= leaf->parent)->color == RED)
8636   {					// This can't be root or 1 level under
8637     if (par == (par2= leaf->parent->parent)->left)
8638     {
8639       y= par2->right;
8640       if (y->color == RED)
8641       {
8642 	par->color=BLACK;
8643 	y->color=BLACK;
8644 	leaf=par2;
8645 	leaf->color=RED;		/* And the loop continues */
8646       }
8647       else
8648       {
8649 	if (leaf == par->right)
8650 	{
8651 	  left_rotate(&root,leaf->parent);
8652 	  par=leaf;			/* leaf is now parent to old leaf */
8653 	}
8654 	par->color=BLACK;
8655 	par2->color=RED;
8656 	right_rotate(&root,par2);
8657 	break;
8658       }
8659     }
8660     else
8661     {
8662       y= par2->left;
8663       if (y->color == RED)
8664       {
8665 	par->color=BLACK;
8666 	y->color=BLACK;
8667 	leaf=par2;
8668 	leaf->color=RED;		/* And the loop continues */
8669       }
8670       else
8671       {
8672 	if (leaf == par->left)
8673 	{
8674 	  right_rotate(&root,par);
8675 	  par=leaf;
8676 	}
8677 	par->color=BLACK;
8678 	par2->color=RED;
8679 	left_rotate(&root,par2);
8680 	break;
8681       }
8682     }
8683   }
8684   root->color=BLACK;
8685 #ifndef DBUG_OFF
8686   test_rb_tree(root,root->parent);
8687 #endif
8688   return root;
8689 }
8690 
8691 
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)8692 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
8693 {
8694   SEL_ARG *x,*w;
8695   root->parent=0;
8696 
8697   x= key;
8698   while (x != root && x->color == SEL_ARG::BLACK)
8699   {
8700     if (x == par->left)
8701     {
8702       w=par->right;
8703       if (w->color == SEL_ARG::RED)
8704       {
8705 	w->color=SEL_ARG::BLACK;
8706 	par->color=SEL_ARG::RED;
8707 	left_rotate(&root,par);
8708 	w=par->right;
8709       }
8710       if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
8711       {
8712 	w->color=SEL_ARG::RED;
8713 	x=par;
8714       }
8715       else
8716       {
8717 	if (w->right->color == SEL_ARG::BLACK)
8718 	{
8719 	  w->left->color=SEL_ARG::BLACK;
8720 	  w->color=SEL_ARG::RED;
8721 	  right_rotate(&root,w);
8722 	  w=par->right;
8723 	}
8724 	w->color=par->color;
8725 	par->color=SEL_ARG::BLACK;
8726 	w->right->color=SEL_ARG::BLACK;
8727 	left_rotate(&root,par);
8728 	x=root;
8729 	break;
8730       }
8731     }
8732     else
8733     {
8734       w=par->left;
8735       if (w->color == SEL_ARG::RED)
8736       {
8737 	w->color=SEL_ARG::BLACK;
8738 	par->color=SEL_ARG::RED;
8739 	right_rotate(&root,par);
8740 	w=par->left;
8741       }
8742       if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
8743       {
8744 	w->color=SEL_ARG::RED;
8745 	x=par;
8746       }
8747       else
8748       {
8749 	if (w->left->color == SEL_ARG::BLACK)
8750 	{
8751 	  w->right->color=SEL_ARG::BLACK;
8752 	  w->color=SEL_ARG::RED;
8753 	  left_rotate(&root,w);
8754 	  w=par->left;
8755 	}
8756 	w->color=par->color;
8757 	par->color=SEL_ARG::BLACK;
8758 	w->left->color=SEL_ARG::BLACK;
8759 	right_rotate(&root,par);
8760 	x=root;
8761 	break;
8762       }
8763     }
8764     par=x->parent;
8765   }
8766   x->color=SEL_ARG::BLACK;
8767   return root;
8768 }
8769 
8770 
8771 #ifndef DBUG_OFF
8772 	/* Test that the properties for a red-black tree hold */
8773 
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)8774 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
8775 {
8776   int count_l,count_r;
8777 
8778   if (element == &null_element)
8779     return 0;					// Found end of tree
8780   if (element->parent != parent)
8781   {
8782     sql_print_error("Wrong tree: Parent doesn't point at parent");
8783     return -1;
8784   }
8785   if (element->color == SEL_ARG::RED &&
8786       (element->left->color == SEL_ARG::RED ||
8787        element->right->color == SEL_ARG::RED))
8788   {
8789     sql_print_error("Wrong tree: Found two red in a row");
8790     return -1;
8791   }
8792   if (element->left == element->right && element->left != &null_element)
8793   {						// Dummy test
8794     sql_print_error("Wrong tree: Found right == left");
8795     return -1;
8796   }
8797   count_l=test_rb_tree(element->left,element);
8798   count_r=test_rb_tree(element->right,element);
8799   if (count_l >= 0 && count_r >= 0)
8800   {
8801     if (count_l == count_r)
8802       return count_l+(element->color == SEL_ARG::BLACK);
8803     sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
8804 	    count_l,count_r);
8805   }
8806   return -1;					// Error, no more warnings
8807 }
8808 
8809 
8810 /**
8811   Count how many times SEL_ARG graph "root" refers to its part "key" via
8812   transitive closure.
8813 
8814   @param root  An RB-Root node in a SEL_ARG graph.
8815   @param key   Another RB-Root node in that SEL_ARG graph.
8816 
8817   The passed "root" node may refer to "key" node via root->next_key_part,
8818   root->next->n
8819 
8820   This function counts how many times the node "key" is referred (via
8821   SEL_ARG::next_key_part) by
8822   - intervals of RB-tree pointed by "root",
8823   - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
8824   intervals of RB-tree pointed by "root",
8825   - and so on.
8826 
8827   Here is an example (horizontal links represent next_key_part pointers,
8828   vertical links - next/prev prev pointers):
8829 
8830          +----+               $
8831          |root|-----------------+
8832          +----+               $ |
8833            |                  $ |
8834            |                  $ |
8835          +----+       +---+   $ |     +---+    Here the return value
8836          |    |- ... -|   |---$-+--+->|key|    will be 4.
8837          +----+       +---+   $ |  |  +---+
8838            |                  $ |  |
8839           ...                 $ |  |
8840            |                  $ |  |
8841          +----+   +---+       $ |  |
8842          |    |---|   |---------+  |
8843          +----+   +---+       $    |
8844            |        |         $    |
8845           ...     +---+       $    |
8846                   |   |------------+
8847                   +---+       $
8848   @return
8849   Number of links to "key" from nodes reachable from "root".
8850 */
8851 
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)8852 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
8853 {
8854   ulong count= 0;
8855   for (root=root->first(); root ; root=root->next)
8856   {
8857     if (root->next_key_part)
8858     {
8859       if (root->next_key_part == key)
8860 	count++;
8861       if (root->next_key_part->part < key->part)
8862 	count+=count_key_part_usage(root->next_key_part,key);
8863     }
8864   }
8865   return count;
8866 }
8867 
8868 
8869 /*
8870   Check if SEL_ARG::use_count value is correct
8871 
8872   SYNOPSIS
8873     SEL_ARG::test_use_count()
8874       root  The root node of the SEL_ARG graph (an RB-tree root node that
8875             has the least value of sel_arg->part in the entire graph, and
8876             thus is the "origin" of the graph)
8877 
8878   DESCRIPTION
8879     Check if SEL_ARG::use_count value is correct. See the definition of
8880     use_count for what is "correct".
8881 */
8882 
test_use_count(SEL_ARG * root)8883 void SEL_ARG::test_use_count(SEL_ARG *root)
8884 {
8885   uint e_count=0;
8886   if (this == root && use_count != 1)
8887   {
8888     sql_print_information("Use_count: Wrong count %lu for root",use_count);
8889     // DBUG_ASSERT(false); // Todo - enable and clean up mess
8890     return;
8891   }
8892   if (this->type != SEL_ARG::KEY_RANGE)
8893     return;
8894   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
8895   {
8896     e_count++;
8897     if (pos->next_key_part)
8898     {
8899       ulong count=count_key_part_usage(root,pos->next_key_part);
8900       if (count > pos->next_key_part->use_count)
8901       {
8902         sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu "
8903                               "should be %lu", (long unsigned int)pos,
8904                               pos->next_key_part->use_count, count);
8905         // DBUG_ASSERT(false); // Todo - enable and clean up mess
8906 	return;
8907       }
8908       pos->next_key_part->test_use_count(root);
8909     }
8910   }
8911   if (e_count != elements)
8912   {
8913     sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
8914                       e_count, elements, (long unsigned int) this);
8915     // DBUG_ASSERT(false); // Todo - enable and clean up mess
8916   }
8917 }
8918 #endif
8919 
8920 /****************************************************************************
8921   MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
8922  ****************************************************************************/
8923 
8924 /* MRR range sequence, SEL_ARG* implementation: stack entry */
8925 typedef struct st_range_seq_entry
8926 {
8927   /*
8928     Pointers in min and max keys. They point to right-after-end of key
8929     images. The 0-th entry has these pointing to key tuple start.
8930   */
8931   uchar *min_key, *max_key;
8932 
8933   /*
8934     Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
8935     min_key_flag may have NULL_RANGE set.
8936   */
8937   uint min_key_flag, max_key_flag;
8938 
8939   /* Number of key parts */
8940   uint min_key_parts, max_key_parts;
8941   /**
8942     Pointer into the R-B tree for this keypart. It points to the
8943     currently active range for the keypart, so calling next on it will
8944     get to the next range. sel_arg_range_seq_next() uses this to avoid
8945     reparsing the R-B range trees each time a new range is fetched.
8946   */
8947   SEL_ARG *key_tree;
8948 } RANGE_SEQ_ENTRY;
8949 
8950 
8951 /*
8952   MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
8953 */
8954 class Sel_arg_range_sequence
8955 {
8956 private:
8957 
8958   /**
8959     Stack of ranges for the curr_kp first keyparts. Used by
8960     sel_arg_range_seq_next() so that if the next range is equal to the
8961     previous one for the first x keyparts, stack[x-1] can be
8962     accumulated with the new range in keyparts > x to quickly form
8963     the next range to return.
8964 
8965     Notation used below: "x:y" means a range where
8966     "column_in_keypart_0=x" and "column_in_keypart_1=y". For
8967     simplicity, only equality (no BETWEEN, < etc) is considered in the
8968     example but the same principle applies to other range predicate
8969     operators too.
8970 
8971     Consider a query with these range predicates:
8972       (kp0=1 and kp1=2 and kp2=3) or
8973       (kp0=1 and kp1=2 and kp2=4) or
8974       (kp0=1 and kp1=3 and kp2=5) or
8975       (kp0=1 and kp1=3 and kp2=6)
8976 
8977     1) sel_arg_range_seq_next() is called the first time
8978        - traverse the R-B tree (see SEL_ARG) to find the first range
8979        - returns range "1:2:3"
8980        - values in stack after this: stack[1, 1:2, 1:2:3]
8981     2) sel_arg_range_seq_next() is called second time
8982        - keypart 2 has another range, so the next range in
8983          keypart 2 is appended to stack[1] and saved
8984          in stack[2]
8985        - returns range "1:2:4"
8986        - values in stack after this: stack[1, 1:2, 1:2:4]
8987     3) sel_arg_range_seq_next() is called the third time
8988        - no more ranges in keypart 2, but keypart 1 has
8989          another range, so the next range in keypart 1 is
8990          appended to stack[0] and saved in stack[1]. The first
8991          range in keypart 2 is then appended to stack[1] and
8992          saved in stack[2]
8993        - returns range "1:3:5"
8994        - values in stack after this: stack[1, 1:3, 1:3:5]
8995     4) sel_arg_range_seq_next() is called the fourth time
8996        - keypart 2 has another range, see 2)
8997        - returns range "1:3:6"
8998        - values in stack after this: stack[1, 1:3, 1:3:6]
8999    */
9000   RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
9001   /*
9002     Index of last used element in the above array. A value of -1 means
9003     that the stack is empty.
9004   */
9005   int curr_kp;
9006 
9007 public:
9008   uint keyno;      /* index of used tree in SEL_TREE structure */
9009   uint real_keyno; /* Number of the index in tables */
9010 
9011   PARAM * const param;
9012   SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
9013 
Sel_arg_range_sequence(PARAM * param_arg)9014   Sel_arg_range_sequence(PARAM *param_arg) : param(param_arg) { reset(); }
9015 
reset()9016   void reset()
9017   {
9018     stack[0].key_tree= NULL;
9019     stack[0].min_key= (uchar*)param->min_key;
9020     stack[0].min_key_flag= 0;
9021     stack[0].min_key_parts= 0;
9022 
9023     stack[0].max_key= (uchar*)param->max_key;
9024     stack[0].max_key_flag= 0;
9025     stack[0].max_key_parts= 0;
9026     curr_kp= -1;
9027   }
9028 
stack_empty() const9029   bool stack_empty() const { return (curr_kp == -1); }
9030 
9031   void stack_push_range(SEL_ARG *key_tree);
9032 
stack_pop_range()9033   void stack_pop_range()
9034   {
9035     DBUG_ASSERT(!stack_empty());
9036     if (curr_kp == 0)
9037       reset();
9038     else
9039       curr_kp--;
9040   }
9041 
stack_size() const9042   int stack_size() const { return curr_kp + 1; }
9043 
stack_top()9044   RANGE_SEQ_ENTRY *stack_top()
9045   {
9046     return stack_empty() ? NULL : &stack[curr_kp];
9047   }
9048 };
9049 
9050 
9051 /*
9052   Range sequence interface, SEL_ARG* implementation: Initialize the traversal
9053 
9054   SYNOPSIS
9055     init()
9056       init_params  SEL_ARG tree traversal context
9057       n_ranges     [ignored] The number of ranges obtained
9058       flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
9059 
9060   RETURN
9061     Value of init_param
9062 */
9063 
sel_arg_range_seq_init(void * init_param,uint n_ranges,uint flags)9064 range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
9065 {
9066   Sel_arg_range_sequence *seq=
9067     static_cast<Sel_arg_range_sequence*>(init_param);
9068   seq->reset();
9069   return init_param;
9070 }
9071 
9072 
stack_push_range(SEL_ARG * key_tree)9073 void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree)
9074 {
9075 
9076   DBUG_ASSERT((uint)curr_kp+1 < MAX_REF_PARTS);
9077 
9078   RANGE_SEQ_ENTRY *push_position= &stack[curr_kp + 1];
9079   RANGE_SEQ_ENTRY *last_added_kp= stack_top();
9080   if (stack_empty())
9081   {
9082     /*
9083        If we get here this is either
9084          a) the first time a range sequence is constructed for this
9085             range access method (in which case stack[0] has not been
9086             modified since the constructor was called), or
9087          b) there are multiple ranges for the first keypart in the
9088             condition (and we have called stack_pop_range() to empty
9089             the stack).
9090        In both cases, reset() has been called and all fields in
9091        push_position have been reset. All we need to do is to copy the
9092        min/max key flags from the predicate we're about to add to
9093        stack[0].
9094     */
9095     push_position->min_key_flag= key_tree->min_flag;
9096     push_position->max_key_flag= key_tree->max_flag;
9097   }
9098   else
9099   {
9100     push_position->min_key= last_added_kp->min_key;
9101     push_position->max_key= last_added_kp->max_key;
9102     push_position->min_key_parts= last_added_kp->min_key_parts;
9103     push_position->max_key_parts= last_added_kp->max_key_parts;
9104     push_position->min_key_flag= last_added_kp->min_key_flag |
9105                                  key_tree->min_flag;
9106     push_position->max_key_flag= last_added_kp->max_key_flag |
9107                                  key_tree->max_flag;
9108   }
9109 
9110   push_position->key_tree= key_tree;
9111   uint16 stor_length= param->key[keyno][key_tree->part].store_length;
9112   /* psergey-merge-done:
9113   key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
9114                   &cur->min_key, prev->min_key_flag,
9115                   &cur->max_key, prev->max_key_flag);
9116   */
9117   push_position->min_key_parts+=
9118     key_tree->store_min(stor_length, &push_position->min_key,
9119                         last_added_kp ? last_added_kp->min_key_flag : 0);
9120   push_position->max_key_parts+=
9121     key_tree->store_max(stor_length, &push_position->max_key,
9122                         last_added_kp ? last_added_kp->max_key_flag : 0);
9123 
9124   if (key_tree->is_null_interval())
9125     push_position->min_key_flag |= NULL_RANGE;
9126   curr_kp++;
9127 }
9128 
9129 
9130 /*
9131   Range sequence interface, SEL_ARG* implementation: get the next interval
9132   in the R-B tree
9133 
9134   SYNOPSIS
9135     sel_arg_range_seq_next()
9136       rseq        Value returned from sel_arg_range_seq_init
9137       range  OUT  Store information about the range here
9138 
9139   DESCRIPTION
9140     This is "get_next" function for Range sequence interface implementation
9141     for SEL_ARG* tree.
9142 
9143   IMPLEMENTATION
9144     The traversal also updates those param members:
9145       - is_ror_scan
9146       - range_count
9147       - max_key_part
9148 
9149   RETURN
9150     0  Ok
9151     1  No more ranges in the sequence
9152 
9153   NOTE: append_range_all_keyparts(), which is used to e.g. print
9154   ranges to Optimizer Trace in a human readable format, mimics the
9155   behavior of this function.
9156 */
9157 
9158 //psergey-merge-todo: support check_quick_keys:max_keypart
sel_arg_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)9159 uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
9160 {
9161   SEL_ARG *key_tree;
9162   Sel_arg_range_sequence *seq= static_cast<Sel_arg_range_sequence*>(rseq);
9163 
9164   if (seq->stack_empty())
9165   {
9166     /*
9167       This is the first time sel_arg_range_seq_next is called.
9168       seq->start points to the root of the R-B tree for the first
9169       keypart
9170     */
9171     key_tree= seq->start;
9172 
9173     /*
9174       Move to the first range for the first keypart. Save this range
9175       in seq->stack[0] and carry on to ranges in the next keypart if
9176       any
9177     */
9178     key_tree= key_tree->first();
9179     seq->stack_push_range(key_tree);
9180   }
9181   else
9182   {
9183     /*
9184       This is not the first time sel_arg_range_seq_next is called, so
9185       seq->stack is populated with the range the last call to this
9186       function found. seq->stack[current_keypart].key_tree points to a
9187       leaf in the R-B tree of the last keypart that was part of the
9188       former range. This is the starting point for finding the next
9189       range. @see Sel_arg_range_sequence::stack
9190     */
9191     // See if there are more ranges in this or any of the previous keyparts
9192     while (true)
9193     {
9194       key_tree= seq->stack_top()->key_tree;
9195       seq->stack_pop_range();
9196       if (key_tree->next)
9197       {
9198         /* This keypart has more ranges */
9199         DBUG_ASSERT(key_tree->next != &null_element);
9200         key_tree= key_tree->next;
9201 
9202         /*
9203           save the next range for this keypart and carry on to ranges in
9204           the next keypart if any
9205         */
9206         seq->stack_push_range(key_tree);
9207         seq->param->is_ror_scan= FALSE;
9208         break;
9209       }
9210 
9211       if (seq->stack_empty())
9212       {
9213         // There are no more ranges for the first keypart: we're done
9214         return 1;
9215       }
9216       /*
9217          There are no more ranges for the current keypart. Step back
9218          to the previous keypart and see if there are more ranges
9219          there.
9220       */
9221     }
9222   }
9223 
9224   DBUG_ASSERT(!seq->stack_empty());
9225 
9226   /*
9227     Add range info for the next keypart if
9228       1) there is a range predicate for a later keypart
9229       2) the range predicate is for the next keypart in the index: a
9230          range predicate on keypartX+1 can only be used if there is a
9231          range predicate on keypartX.
9232       3) the range predicate on the next keypart is usable
9233   */
9234   while (key_tree->next_key_part &&                              // 1)
9235          key_tree->next_key_part != &null_element &&             // 1)
9236          key_tree->next_key_part->part == key_tree->part + 1 &&  // 2)
9237          key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)    // 3)
9238   {
9239     {
9240       DBUG_PRINT("info", ("while(): key_tree->part %d",key_tree->part));
9241       RANGE_SEQ_ENTRY *cur= seq->stack_top();
9242       const uint min_key_total_length= cur->min_key - seq->param->min_key;
9243       const uint max_key_total_length= cur->max_key - seq->param->max_key;
9244 
9245       /*
9246         Check if more ranges can be added. This is the case if all
9247         predicates for keyparts handled so far are equality
9248         predicates. If either of the following apply, there are
9249         non-equality predicates in stack[]:
9250 
9251         1) min_key_total_length != max_key_total_length (because
9252            equality ranges are stored as "min_key = max_key = <value>")
9253         2) memcmp(<min_key_values>,<max_key_values>) != 0 (same argument as 1)
9254         3) A min or max flag has been set: Because flags denote ranges
9255            ('<', '<=' etc), any value but 0 indicates a non-equality
9256            predicate.
9257        */
9258 
9259       uchar* min_key_start;
9260       uchar* max_key_start;
9261       uint cur_key_length;
9262 
9263       if (seq->stack_size() == 1)
9264       {
9265         min_key_start= seq->param->min_key;
9266         max_key_start= seq->param->max_key;
9267         cur_key_length= min_key_total_length;
9268       }
9269       else
9270       {
9271         const RANGE_SEQ_ENTRY prev= cur[-1];
9272         min_key_start= prev.min_key;
9273         max_key_start= prev.max_key;
9274         cur_key_length= cur->min_key - prev.min_key;
9275       }
9276 
9277       if ((min_key_total_length != max_key_total_length) ||         // 1)
9278           (memcmp(min_key_start, max_key_start, cur_key_length)) || // 2)
9279           (key_tree->min_flag || key_tree->max_flag))               // 3)
9280       {
9281         DBUG_PRINT("info", ("while(): inside if()"));
9282         /*
9283           The range predicate up to and including the one in key_tree
9284           is usable by range access but does not allow subranges made
9285           up from predicates in later keyparts. This may e.g. be
9286           because the predicate operator is "<". Since there are range
9287           predicates on more keyparts, we use those to more closely
9288           specify the start and stop locations for the range. Example:
9289 
9290                 "SELECT * FROM t1 WHERE a >= 2 AND b >= 3":
9291 
9292                 t1 content:
9293                 -----------
9294                 1 1
9295                 2 1     <- 1)
9296                 2 2
9297                 2 3     <- 2)
9298                 2 4
9299                 3 1
9300                 3 2
9301                 3 3
9302 
9303           The predicate cannot be translated into something like
9304              "(a=2 and b>=3) or (a=3 and b>=3) or ..."
9305           I.e., it cannot be divided into subranges, but by storing
9306           min/max key below we can at least start the scan from 2)
9307           instead of 1)
9308         */
9309         SEL_ARG *store_key_part= key_tree->next_key_part;
9310         seq->param->is_ror_scan= FALSE;
9311         if (!key_tree->min_flag)
9312           cur->min_key_parts +=
9313             store_key_part->store_min_key(seq->param->key[seq->keyno],
9314                                           &cur->min_key,
9315                                           &cur->min_key_flag,
9316                                           MAX_KEY);
9317         if (!key_tree->max_flag)
9318           cur->max_key_parts +=
9319             store_key_part->store_max_key(seq->param->key[seq->keyno],
9320                                           &cur->max_key,
9321                                           &cur->max_key_flag,
9322                                           MAX_KEY);
9323         break;
9324       }
9325     }
9326 
9327     /*
9328       There are usable range predicates for the next keypart and the
9329       range predicate for the current keypart allows us to make use of
9330       them. Move to the first range predicate for the next keypart.
9331       Push this range predicate to seq->stack and move on to the next
9332       keypart (if any). @see Sel_arg_range_sequence::stack
9333     */
9334     key_tree= key_tree->next_key_part->first();
9335     seq->stack_push_range(key_tree);
9336   }
9337 
9338   DBUG_ASSERT(!seq->stack_empty() && (seq->stack_top() != NULL));
9339 
9340   // We now have a full range predicate in seq->stack_top()
9341   RANGE_SEQ_ENTRY *cur= seq->stack_top();
9342   PARAM *param= seq->param;
9343   uint min_key_length= cur->min_key - param->min_key;
9344 
9345   if (cur->min_key_flag & GEOM_FLAG)
9346   {
9347     range->range_flag= cur->min_key_flag;
9348 
9349     /* Here minimum contains also function code bits, and maximum is +inf */
9350     range->start_key.key=    param->min_key;
9351     range->start_key.length= min_key_length;
9352     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9353     range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
9354     /*
9355       Spatial operators are only allowed on spatial indexes, and no
9356       spatial index can at the moment return rows in ROWID order
9357     */
9358     DBUG_ASSERT(!param->is_ror_scan);
9359   }
9360   else
9361   {
9362     const KEY *cur_key_info= &param->table->key_info[seq->real_keyno];
9363     range->range_flag= cur->min_key_flag | cur->max_key_flag;
9364 
9365     range->start_key.key=    param->min_key;
9366     range->start_key.length= cur->min_key - param->min_key;
9367     range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
9368     range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
9369                                                            HA_READ_KEY_EXACT);
9370 
9371     range->end_key.key=    param->max_key;
9372     range->end_key.length= cur->max_key - param->max_key;
9373     range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
9374     range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
9375                                                          HA_READ_AFTER_KEY);
9376 
9377     /*
9378       This is an equality range (keypart_0=X and ... and keypart_n=Z) if
9379         1) There are no flags indicating open range (e.g.,
9380            "keypart_x > y") or GIS.
9381         2) The lower bound and the upper bound of the range has the
9382            same value (min_key == max_key).
9383      */
9384     const uint is_open_range= (NO_MIN_RANGE | NO_MAX_RANGE |
9385                                NEAR_MIN | NEAR_MAX | GEOM_FLAG);
9386     const bool is_eq_range_pred=
9387       !(cur->min_key_flag & is_open_range) &&                           // 1)
9388       !(cur->max_key_flag & is_open_range) &&                           // 1)
9389       range->start_key.length == range->end_key.length &&               // 2)
9390       !memcmp(param->min_key, param->max_key, range->start_key.length);
9391 
9392     if (is_eq_range_pred)
9393     {
9394       range->range_flag= EQ_RANGE;
9395       /*
9396         Use statistics instead of index dives for estimates of rows in
9397         this range if the user requested it
9398       */
9399       if (param->use_index_statistics)
9400         range->range_flag|= USE_INDEX_STATISTICS;
9401 
9402       /*
9403         An equality range is a unique range (0 or 1 rows in the range)
9404         if the index is unique (1) and all keyparts are used (2).
9405         Note that keys which are extended with PK parts have no
9406         HA_NOSAME flag. So we can use user_defined_key_parts.
9407       */
9408       if (cur_key_info->flags & HA_NOSAME &&                              // 1)
9409           (uint)key_tree->part+1 == cur_key_info->user_defined_key_parts) // 2)
9410         range->range_flag|= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
9411     }
9412 
9413     if (param->is_ror_scan)
9414     {
9415       const uint key_part_number= key_tree->part + 1;
9416       /*
9417         If we get here, the condition on the key was converted to form
9418         "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
9419           somecond(keyXpart{key_tree->part})"
9420         Check if
9421           somecond is "keyXpart{key_tree->part} = const" and
9422           uncovered "tail" of KeyX parts is either empty or is identical to
9423           first members of clustered primary key.
9424 
9425         If last key part is PK part added to the key as an extension
9426         and is_key_scan_ror() result is TRUE then it's possible to
9427         use ROR scan.
9428       */
9429       if ((!is_eq_range_pred &&
9430            key_part_number <= cur_key_info->user_defined_key_parts) ||
9431           !is_key_scan_ror(param, seq->real_keyno, key_part_number))
9432         param->is_ror_scan= FALSE;
9433     }
9434   }
9435 
9436   seq->param->range_count++;
9437   seq->param->max_key_part=max<uint>(seq->param->max_key_part,key_tree->part);
9438 
9439   return 0;
9440 }
9441 
9442 
9443 /*
9444   Calculate estimate of number records that will be retrieved by a range
9445   scan on given index using given SEL_ARG intervals tree.
9446 
9447   SYNOPSIS
9448     check_quick_select()
9449       param             Parameter from test_quick_select
9450       idx               Number of index to use in PARAM::key SEL_TREE::key
9451       index_only        TRUE  - assume only index tuples will be accessed
9452                         FALSE - assume full table rows will be read
9453       tree              Transformed selection condition, tree->key[idx] holds
9454                         the intervals for the given index.
9455       update_tbl_stats  TRUE <=> update table->quick_* with information
9456                         about range scan we've evaluated.
9457       mrr_flags   INOUT MRR access flags
9458       cost        OUT   Scan cost
9459 
9460   NOTES
9461     param->is_ror_scan is set to reflect if the key scan is a ROR (see
9462     is_key_scan_ror function for more info)
9463     param->table->quick_*, param->range_count (and maybe others) are
9464     updated with data of given key scan, see quick_range_seq_next for details.
9465 
9466   RETURN
9467     Estimate # of records to be retrieved.
9468     HA_POS_ERROR if estimate calculation failed due to table handler problems.
9469 */
9470 
9471 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost)9472 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
9473                            SEL_ARG *tree, bool update_tbl_stats,
9474                            uint *mrr_flags, uint *bufsize, Cost_estimate *cost)
9475 {
9476   Sel_arg_range_sequence seq(param);
9477   RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
9478   handler *file= param->table->file;
9479   ha_rows rows;
9480   uint keynr= param->real_keynr[idx];
9481   DBUG_ENTER("check_quick_select");
9482 
9483   /* Handle cases when we don't have a valid non-empty list of range */
9484   if (!tree)
9485     DBUG_RETURN(HA_POS_ERROR);
9486   if (tree->type == SEL_ARG::IMPOSSIBLE)
9487     DBUG_RETURN(0L);
9488   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
9489     DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
9490 
9491   seq.keyno= idx;
9492   seq.real_keyno= keynr;
9493   seq.start= tree;
9494 
9495   param->range_count=0;
9496   param->max_key_part=0;
9497 
9498   /*
9499     If there are more equality ranges than specified by the
9500     eq_range_index_dive_limit variable we switches from using index
9501     dives to use statistics.
9502   */
9503   uint range_count= 0;
9504   param->use_index_statistics=
9505     eq_ranges_exceeds_limit(tree, &range_count,
9506                             param->thd->variables.eq_range_index_dive_limit);
9507 
9508   param->is_ror_scan= TRUE;
9509   if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9510     param->is_ror_scan= FALSE;
9511 
9512   *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
9513   *mrr_flags|= HA_MRR_NO_ASSOCIATION;
9514   /*
9515     Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
9516   */
9517   if (param->order_direction != ORDER::ORDER_NOT_RELEVANT)
9518     *mrr_flags|= HA_MRR_SORTED;
9519 
9520   bool pk_is_clustered= file->primary_key_is_clustered();
9521   if (index_only &&
9522       (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
9523       !(pk_is_clustered && keynr == param->table->s->primary_key))
9524      *mrr_flags |= HA_MRR_INDEX_ONLY;
9525 
9526   if (current_thd->lex->sql_command != SQLCOM_SELECT)
9527     *mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
9528 
9529   *bufsize= param->thd->variables.read_rnd_buff_size;
9530   // Sets is_ror_scan to false for some queries, e.g. multi-ranges
9531   rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
9532                                           bufsize, mrr_flags, cost);
9533   if (rows != HA_POS_ERROR)
9534   {
9535     param->table->quick_rows[keynr]=rows;
9536     if (update_tbl_stats)
9537     {
9538       param->table->quick_keys.set_bit(keynr);
9539       param->table->quick_key_parts[keynr]=param->max_key_part+1;
9540       param->table->quick_n_ranges[keynr]= param->range_count;
9541       param->table->quick_condition_rows=
9542         min(param->table->quick_condition_rows, rows);
9543     }
9544     param->table->possible_quick_keys.set_bit(keynr);
9545   }
9546   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
9547   enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
9548   if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
9549   {
9550     /*
9551       All scans are non-ROR scans for those index types.
9552       TODO: Don't have this logic here, make table engines return
9553       appropriate flags instead.
9554     */
9555     param->is_ror_scan= FALSE;
9556   }
9557   else
9558   {
9559     /* Clustered PK scan is always a ROR scan (TODO: same as above) */
9560     if (param->table->s->primary_key == keynr && pk_is_clustered)
9561       param->is_ror_scan= TRUE;
9562   }
9563   if (param->table->file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
9564     param->is_ror_scan= FALSE;
9565   DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
9566   DBUG_RETURN(rows);
9567 }
9568 
9569 
9570 /*
9571   Check if key scan on given index with equality conditions on first n key
9572   parts is a ROR scan.
9573 
9574   SYNOPSIS
9575     is_key_scan_ror()
9576       param  Parameter from test_quick_select
9577       keynr  Number of key in the table. The key must not be a clustered
9578              primary key.
9579       nparts Number of first key parts for which equality conditions
9580              are present.
9581 
9582   NOTES
9583     ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
9584     ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
9585 
9586     This function is needed to handle a practically-important special case:
9587     an index scan is a ROR scan if it is done using a condition in form
9588 
9589         "key1_1=c_1 AND ... AND key1_n=c_n"
9590 
9591     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
9592 
9593     and the table has a clustered Primary Key defined as
9594 
9595       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
9596 
9597     i.e. the first key parts of it are identical to uncovered parts ot the
9598     key being scanned. This function assumes that the index flags do not
9599     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
9600 
9601     Check (1) is made in quick_range_seq_next()
9602 
9603   RETURN
9604     TRUE   The scan is ROR-scan
9605     FALSE  Otherwise
9606 */
9607 
is_key_scan_ror(PARAM * param,uint keynr,uint nparts)9608 static bool is_key_scan_ror(PARAM *param, uint keynr, uint nparts)
9609 {
9610   KEY *table_key= param->table->key_info + keynr;
9611 
9612   /*
9613     Range predicates on hidden key parts do not change the fact
9614     that a scan is rowid ordered, so we only care about user
9615     defined keyparts
9616   */
9617   const uint user_defined_nparts=
9618     std::min<uint>(nparts, table_key->user_defined_key_parts);
9619 
9620   KEY_PART_INFO *key_part= table_key->key_part + user_defined_nparts;
9621   KEY_PART_INFO *key_part_end= (table_key->key_part +
9622                                 table_key->user_defined_key_parts);
9623   uint pk_number;
9624 
9625   for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
9626   {
9627     uint16 fieldnr= param->table->key_info[keynr].
9628                     key_part[kp - table_key->key_part].fieldnr - 1;
9629     if (param->table->field[fieldnr]->key_length() != kp->length)
9630       return FALSE;
9631   }
9632 
9633   if (key_part == key_part_end)
9634     return TRUE;
9635 
9636   key_part= table_key->key_part + user_defined_nparts;
9637   pk_number= param->table->s->primary_key;
9638   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
9639     return FALSE;
9640 
9641   KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
9642   KEY_PART_INFO *pk_part_end=
9643     pk_part + param->table->key_info[pk_number].user_defined_key_parts;
9644   for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
9645        ++key_part, ++pk_part)
9646   {
9647     if ((key_part->field != pk_part->field) ||
9648         (key_part->length != pk_part->length))
9649       return FALSE;
9650   }
9651   return (key_part == key_part_end);
9652 }
9653 
9654 
9655 /*
9656   Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
9657 
9658   SYNOPSIS
9659     get_quick_select()
9660       param
9661       idx            Index of used key in param->key.
9662       key_tree       SEL_ARG tree for the used key
9663       mrr_flags      MRR parameter for quick select
9664       mrr_buf_size   MRR parameter for quick select
9665       parent_alloc   If not NULL, use it to allocate memory for
9666                      quick select data. Otherwise use quick->alloc.
9667   NOTES
9668     The caller must call QUICK_SELECT::init for returned quick select.
9669 
9670     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
9671     deallocated when the returned quick select is deleted.
9672 
9673   RETURN
9674     NULL on error
9675     otherwise created quick select
9676 */
9677 
9678 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)9679 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
9680                  uint mrr_buf_size, MEM_ROOT *parent_alloc)
9681 {
9682   QUICK_RANGE_SELECT *quick;
9683   bool create_err= FALSE;
9684   DBUG_ENTER("get_quick_select");
9685 
9686   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
9687     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
9688                                       param->real_keynr[idx],
9689                                       MY_TEST(parent_alloc),
9690                                       parent_alloc, &create_err);
9691   else
9692     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
9693                                  param->real_keynr[idx],
9694                                  MY_TEST(parent_alloc), NULL, &create_err);
9695 
9696   if (quick)
9697   {
9698     if (create_err ||
9699 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
9700 		       param->max_key,0))
9701     {
9702       delete quick;
9703       quick=0;
9704     }
9705     else
9706     {
9707       quick->mrr_flags= mrr_flags;
9708       quick->mrr_buf_size= mrr_buf_size;
9709       quick->key_parts=(KEY_PART*)
9710         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
9711                     (char*) param->key[idx],
9712                     sizeof(KEY_PART) *
9713                     actual_key_parts(&param->
9714                                      table->key_info[param->real_keynr[idx]]));
9715     }
9716   }
9717   DBUG_RETURN(quick);
9718 }
9719 
9720 
9721 /*
9722 ** Fix this to get all possible sub_ranges
9723 */
9724 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)9725 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
9726 	       SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
9727 	       uchar *max_key, uint max_key_flag)
9728 {
9729   QUICK_RANGE *range;
9730   uint flag;
9731   int min_part= key_tree->part-1, // # of keypart values in min_key buffer
9732       max_part= key_tree->part-1; // # of keypart values in max_key buffer
9733 
9734   if (key_tree->left != &null_element)
9735   {
9736     if (get_quick_keys(param,quick,key,key_tree->left,
9737 		       min_key,min_key_flag, max_key, max_key_flag))
9738       return 1;
9739   }
9740   uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
9741   min_part+= key_tree->store_min(key[key_tree->part].store_length,
9742                                  &tmp_min_key,min_key_flag);
9743   max_part+= key_tree->store_max(key[key_tree->part].store_length,
9744                                  &tmp_max_key,max_key_flag);
9745 
9746   if (key_tree->next_key_part &&
9747       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
9748       key_tree->next_key_part->part == key_tree->part+1)
9749   {						  // const key as prefix
9750     if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
9751          memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
9752 	 key_tree->min_flag==0 && key_tree->max_flag==0)
9753     {
9754       if (get_quick_keys(param,quick,key,key_tree->next_key_part,
9755 			 tmp_min_key, min_key_flag | key_tree->min_flag,
9756 			 tmp_max_key, max_key_flag | key_tree->max_flag))
9757 	return 1;
9758       goto end;					// Ugly, but efficient
9759     }
9760     {
9761       uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
9762       if (!tmp_min_flag)
9763         min_part+= key_tree->next_key_part->store_min_key(key,
9764                                                           &tmp_min_key,
9765                                                           &tmp_min_flag,
9766                                                           MAX_KEY);
9767       if (!tmp_max_flag)
9768         max_part+= key_tree->next_key_part->store_max_key(key,
9769                                                           &tmp_max_key,
9770                                                           &tmp_max_flag,
9771                                                           MAX_KEY);
9772       flag=tmp_min_flag | tmp_max_flag;
9773     }
9774   }
9775   else
9776   {
9777     flag = (key_tree->min_flag & GEOM_FLAG) ?
9778       key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
9779   }
9780 
9781   /*
9782     Ensure that some part of min_key and max_key are used.  If not,
9783     regard this as no lower/upper range
9784   */
9785   if ((flag & GEOM_FLAG) == 0)
9786   {
9787     if (tmp_min_key != param->min_key)
9788       flag&= ~NO_MIN_RANGE;
9789     else
9790       flag|= NO_MIN_RANGE;
9791     if (tmp_max_key != param->max_key)
9792       flag&= ~NO_MAX_RANGE;
9793     else
9794       flag|= NO_MAX_RANGE;
9795   }
9796   if (flag == 0)
9797   {
9798     uint length= (uint) (tmp_min_key - param->min_key);
9799     if (length == (uint) (tmp_max_key - param->max_key) &&
9800 	!memcmp(param->min_key,param->max_key,length))
9801     {
9802       const KEY *table_key=quick->head->key_info+quick->index;
9803       flag=EQ_RANGE;
9804       /*
9805         Note that keys which are extended with PK parts have no
9806         HA_NOSAME flag. So we can use user_defined_key_parts.
9807       */
9808       if ((table_key->flags & HA_NOSAME) &&
9809           key_tree->part == table_key->user_defined_key_parts - 1)
9810       {
9811         if ((table_key->flags & HA_NULL_PART_KEY) &&
9812             null_part_in_key(key,
9813                              param->min_key,
9814                              (uint) (tmp_min_key - param->min_key)))
9815           flag|= NULL_RANGE;
9816         else
9817           flag|= UNIQUE_RANGE;
9818       }
9819     }
9820   }
9821 
9822   /* Get range for retrieving rows in QUICK_SELECT::get_next */
9823   if (!(range= new QUICK_RANGE(param->min_key,
9824 			       (uint) (tmp_min_key - param->min_key),
9825                                min_part >=0 ? make_keypart_map(min_part) : 0,
9826 			       param->max_key,
9827 			       (uint) (tmp_max_key - param->max_key),
9828                                max_part >=0 ? make_keypart_map(max_part) : 0,
9829 			       flag)))
9830     return 1;			// out of memory
9831 
9832   set_if_bigger(quick->max_used_key_length, range->min_length);
9833   set_if_bigger(quick->max_used_key_length, range->max_length);
9834   set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
9835   if (insert_dynamic(&quick->ranges, &range))
9836     return 1;
9837 
9838  end:
9839   if (key_tree->right != &null_element)
9840     return get_quick_keys(param,quick,key,key_tree->right,
9841 			  min_key,min_key_flag,
9842 			  max_key,max_key_flag);
9843   return 0;
9844 }
9845 
9846 /*
9847   Return 1 if there is only one range and this uses the whole unique key
9848 */
9849 
unique_key_range()9850 bool QUICK_RANGE_SELECT::unique_key_range()
9851 {
9852   if (ranges.elements == 1)
9853   {
9854     QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
9855     if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
9856     {
9857       KEY *key=head->key_info+index;
9858       return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
9859     }
9860   }
9861   return 0;
9862 }
9863 
9864 
9865 
9866 /*
9867   Return TRUE if any part of the key is NULL
9868 
9869   SYNOPSIS
9870     null_part_in_key()
9871       key_part  Array of key parts (index description)
9872       key       Key values tuple
9873       length    Length of key values tuple in bytes.
9874 
9875   RETURN
9876     TRUE   The tuple has at least one "keypartX is NULL"
9877     FALSE  Otherwise
9878 */
9879 
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)9880 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
9881 {
9882   for (const uchar *end=key+length ;
9883        key < end;
9884        key+= key_part++->store_length)
9885   {
9886     if (key_part->null_bit && *key)
9887       return 1;
9888   }
9889   return 0;
9890 }
9891 
9892 
is_keys_used(const MY_BITMAP * fields)9893 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
9894 {
9895   return is_key_used(head, index, fields);
9896 }
9897 
is_keys_used(const MY_BITMAP * fields)9898 bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
9899 {
9900   QUICK_RANGE_SELECT *quick;
9901   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9902   while ((quick= it++))
9903   {
9904     if (is_key_used(head, quick->index, fields))
9905       return 1;
9906   }
9907   return 0;
9908 }
9909 
is_keys_used(const MY_BITMAP * fields)9910 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
9911 {
9912   QUICK_RANGE_SELECT *quick;
9913   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
9914   while ((quick= it++))
9915   {
9916     if (is_key_used(head, quick->index, fields))
9917       return 1;
9918   }
9919   return 0;
9920 }
9921 
is_keys_used(const MY_BITMAP * fields)9922 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
9923 {
9924   QUICK_SELECT_I *quick;
9925   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
9926   while ((quick= it++))
9927   {
9928     if (quick->is_keys_used(fields))
9929       return 1;
9930   }
9931   return 0;
9932 }
9933 
9934 
get_ft_select(THD * thd,TABLE * table,uint key)9935 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
9936 {
9937   bool create_err= FALSE;
9938   FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
9939   if (create_err)
9940   {
9941     delete fts;
9942     return NULL;
9943   }
9944   else
9945     return fts;
9946 }
9947 
9948 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
9949 static bool
key_has_nulls(const KEY * key_info,const uchar * key,uint key_len)9950 key_has_nulls(const KEY* key_info, const uchar *key, uint key_len)
9951 {
9952   KEY_PART_INFO *curr_part, *end_part;
9953   const uchar* end_ptr= key + key_len;
9954   curr_part= key_info->key_part;
9955   end_part= curr_part + key_info->user_defined_key_parts;
9956 
9957   for (; curr_part != end_part && key < end_ptr; curr_part++)
9958   {
9959     if (curr_part->null_bit && *key)
9960       return TRUE;
9961 
9962     key += curr_part->store_length;
9963   }
9964   return FALSE;
9965 }
9966 #endif
9967 
9968 /*
9969   Create quick select from ref/ref_or_null scan.
9970 
9971   SYNOPSIS
9972     get_quick_select_for_ref()
9973       thd      Thread handle
9974       table    Table to access
9975       ref      ref[_or_null] scan parameters
9976       records  Estimate of number of records (needed only to construct
9977                quick select)
9978   NOTES
9979     This allocates things in a new memory root, as this may be called many
9980     times during a query.
9981 
9982   RETURN
9983     Quick select that retrieves the same rows as passed ref scan
9984     NULL on error.
9985 */
9986 
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)9987 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
9988                                              TABLE_REF *ref, ha_rows records)
9989 {
9990   MEM_ROOT *old_root, *alloc;
9991   QUICK_RANGE_SELECT *quick;
9992   KEY *key_info = &table->key_info[ref->key];
9993   KEY_PART *key_part;
9994   QUICK_RANGE *range;
9995   uint part;
9996   bool create_err= FALSE;
9997   Cost_estimate cost;
9998 
9999   old_root= thd->mem_root;
10000   /* The following call may change thd->mem_root */
10001   quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
10002   /* save mem_root set by QUICK_RANGE_SELECT constructor */
10003   alloc= thd->mem_root;
10004   /*
10005     return back default mem_root (thd->mem_root) changed by
10006     QUICK_RANGE_SELECT constructor
10007   */
10008   thd->mem_root= old_root;
10009 
10010   if (!quick || create_err)
10011     return 0;			/* no ranges found */
10012   if (quick->init())
10013     goto err;
10014   quick->records= records;
10015 
10016   if ((cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error) ||
10017       !(range= new(alloc) QUICK_RANGE()))
10018     goto err;                                   // out of memory
10019 
10020   range->min_key= range->max_key= ref->key_buff;
10021   range->min_length= range->max_length= ref->key_length;
10022   range->min_keypart_map= range->max_keypart_map=
10023     make_prev_keypart_map(ref->key_parts);
10024   range->flag= (ref->key_length == key_info->key_length ? EQ_RANGE : 0);
10025 
10026   if (!(quick->key_parts=key_part=(KEY_PART *)
10027 	alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
10028     goto err;
10029 
10030   for (part=0 ; part < ref->key_parts ;part++,key_part++)
10031   {
10032     key_part->part=part;
10033     key_part->field=        key_info->key_part[part].field;
10034     key_part->length=       key_info->key_part[part].length;
10035     key_part->store_length= key_info->key_part[part].store_length;
10036     key_part->null_bit=     key_info->key_part[part].null_bit;
10037     key_part->flag=         (uint8) key_info->key_part[part].key_part_flag;
10038   }
10039   if (insert_dynamic(&quick->ranges, &range))
10040     goto err;
10041 
10042   /*
10043      Add a NULL range if REF_OR_NULL optimization is used.
10044      For example:
10045        if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
10046        and have ref->null_ref_key set. Will create a new NULL range here.
10047   */
10048   if (ref->null_ref_key)
10049   {
10050     QUICK_RANGE *null_range;
10051 
10052     *ref->null_ref_key= 1;		// Set null byte then create a range
10053     if (!(null_range= new (alloc)
10054           QUICK_RANGE(ref->key_buff, ref->key_length,
10055                       make_prev_keypart_map(ref->key_parts),
10056                       ref->key_buff, ref->key_length,
10057                       make_prev_keypart_map(ref->key_parts), EQ_RANGE)))
10058       goto err;
10059     *ref->null_ref_key= 0;		// Clear null byte
10060     if (insert_dynamic(&quick->ranges, &null_range))
10061       goto err;
10062   }
10063 
10064   /* Call multi_range_read_info() to get the MRR flags and buffer size */
10065   quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
10066                     (table->key_read ? HA_MRR_INDEX_ONLY : 0);
10067   if (thd->lex->sql_command != SQLCOM_SELECT)
10068     quick->mrr_flags|= HA_MRR_SORTED; // Assumed to give faster ins/upd/del
10069 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
10070   if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
10071                                            ref->key_length))
10072     quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
10073 #endif
10074 
10075   quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
10076   if (table->file->multi_range_read_info(quick->index, 1, records,
10077                                          &quick->mrr_buf_size,
10078                                          &quick->mrr_flags, &cost))
10079     goto err;
10080 
10081   return quick;
10082 err:
10083   delete quick;
10084   return 0;
10085 }
10086 
10087 
10088 /*
10089   Perform key scans for all used indexes (except CPK), get rowids and merge
10090   them into an ordered non-recurrent sequence of rowids.
10091 
10092   The merge/duplicate removal is performed using Unique class. We put all
10093   rowids into Unique, get the sorted sequence and destroy the Unique.
10094 
10095   If table has a clustered primary key that covers all rows (TRUE for bdb
10096   and innodb currently) and one of the index_merge scans is a scan on PK,
10097   then rows that will be retrieved by PK scan are not put into Unique and
10098   primary key scan is not performed here, it is performed later separately.
10099 
10100   RETURN
10101     0     OK
10102     other error
10103 */
10104 
read_keys_and_merge()10105 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
10106 {
10107   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
10108   QUICK_RANGE_SELECT* cur_quick;
10109   int result;
10110   handler *file= head->file;
10111   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
10112 
10113   /* We're going to just read rowids. */
10114   head->set_keyread(TRUE);
10115   head->prepare_for_position();
10116 
10117   cur_quick_it.rewind();
10118   cur_quick= cur_quick_it++;
10119   DBUG_ASSERT(cur_quick != 0);
10120 
10121   DBUG_EXECUTE_IF("simulate_bug13919180",
10122                   {
10123                     my_error(ER_UNKNOWN_ERROR, MYF(0));
10124                     DBUG_RETURN(1);
10125                   });
10126   /*
10127     We reuse the same instance of handler so we need to call both init and
10128     reset here.
10129   */
10130   if (cur_quick->init() || cur_quick->reset())
10131     DBUG_RETURN(1);
10132 
10133   if (unique == NULL)
10134   {
10135     DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_ABORT(); );
10136     DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
10137                     DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
10138 
10139     unique= new Unique(refpos_order_cmp, (void *)file,
10140                        file->ref_length,
10141                        thd->variables.sortbuff_size);
10142   }
10143   else
10144   {
10145     unique->reset();
10146     filesort_free_buffers(head, false);
10147   }
10148 
10149   DBUG_ASSERT(file->ref_length == unique->get_size());
10150   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
10151 
10152   if (!unique)
10153     DBUG_RETURN(1);
10154   for (;;)
10155   {
10156     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
10157     {
10158       cur_quick->range_end();
10159       cur_quick= cur_quick_it++;
10160       if (!cur_quick)
10161         break;
10162 
10163       if (cur_quick->file->inited)
10164         cur_quick->file->ha_index_end();
10165       if (cur_quick->init() || cur_quick->reset())
10166         DBUG_RETURN(1);
10167     }
10168 
10169     if (result)
10170     {
10171       if (result != HA_ERR_END_OF_FILE)
10172       {
10173         cur_quick->range_end();
10174         DBUG_RETURN(result);
10175       }
10176       break;
10177     }
10178 
10179     if (thd->killed)
10180       DBUG_RETURN(1);
10181 
10182     /* skip row if it will be retrieved by clustered PK scan */
10183     if (pk_quick_select && pk_quick_select->row_in_ranges())
10184       continue;
10185 
10186     cur_quick->file->position(cur_quick->record);
10187     result= unique->unique_add((char*)cur_quick->file->ref);
10188     if (result)
10189       DBUG_RETURN(1);
10190   }
10191 
10192   /*
10193     Ok all rowids are in the Unique now. The next call will initialize
10194     head->sort structure so it can be used to iterate through the rowids
10195     sequence.
10196   */
10197   result= unique->get(head);
10198   doing_pk_scan= FALSE;
10199   /* index_merge currently doesn't support "using index" at all */
10200   head->set_keyread(FALSE);
10201   if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1, TRUE))
10202     DBUG_RETURN(1);
10203   DBUG_RETURN(result);
10204 }
10205 
10206 
10207 /*
10208   Get next row for index_merge.
10209   NOTES
10210     The rows are read from
10211       1. rowids stored in Unique.
10212       2. QUICK_RANGE_SELECT with clustered primary key (if any).
10213     The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
10214 */
10215 
get_next()10216 int QUICK_INDEX_MERGE_SELECT::get_next()
10217 {
10218   int result;
10219   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
10220 
10221   if (doing_pk_scan)
10222     DBUG_RETURN(pk_quick_select->get_next());
10223 
10224   if ((result= read_record.read_record(&read_record)) == -1)
10225   {
10226     result= HA_ERR_END_OF_FILE;
10227     end_read_record(&read_record);
10228     free_io_cache(head);
10229     /* All rows from Unique have been retrieved, do a clustered PK scan */
10230     if (pk_quick_select)
10231     {
10232       doing_pk_scan= TRUE;
10233       if ((result= pk_quick_select->init()) ||
10234           (result= pk_quick_select->reset()))
10235         DBUG_RETURN(result);
10236       DBUG_RETURN(pk_quick_select->get_next());
10237     }
10238   }
10239 
10240   DBUG_RETURN(result);
10241 }
10242 
10243 
10244 /*
10245   Retrieve next record.
10246   SYNOPSIS
10247      QUICK_ROR_INTERSECT_SELECT::get_next()
10248 
10249   NOTES
10250     Invariant on enter/exit: all intersected selects have retrieved all index
10251     records with rowid <= some_rowid_val and no intersected select has
10252     retrieved any index records with rowid > some_rowid_val.
10253     We start fresh and loop until we have retrieved the same rowid in each of
10254     the key scans or we got an error.
10255 
10256     If a Clustered PK scan is present, it is used only to check if row
10257     satisfies its condition (and never used for row retrieval).
10258 
10259     Locking: to ensure that exclusive locks are only set on records that
10260     are included in the final result we must release the lock
10261     on all rows we read but do not include in the final result. This
10262     must be done on each index that reads the record and the lock
10263     must be released using the same handler (the same quick object) as
10264     used when reading the record.
10265 
10266   RETURN
10267    0     - Ok
10268    other - Error code if any error occurred.
10269 */
10270 
get_next()10271 int QUICK_ROR_INTERSECT_SELECT::get_next()
10272 {
10273   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
10274   QUICK_RANGE_SELECT* quick;
10275 
10276   /* quick that reads the given rowid first. This is needed in order
10277   to be able to unlock the row using the same handler object that locked
10278   it */
10279   QUICK_RANGE_SELECT* quick_with_last_rowid;
10280 
10281   int error, cmp;
10282   uint last_rowid_count=0;
10283   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
10284 
10285   do
10286   {
10287     /* Get a rowid for first quick and save it as a 'candidate' */
10288     quick= quick_it++;
10289     error= quick->get_next();
10290     if (cpk_quick)
10291     {
10292       while (!error && !cpk_quick->row_in_ranges())
10293       {
10294         quick->file->unlock_row(); /* row not in range; unlock */
10295         error= quick->get_next();
10296       }
10297     }
10298     if (error)
10299       DBUG_RETURN(error);
10300 
10301     quick->file->position(quick->record);
10302     memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10303     last_rowid_count= 1;
10304     quick_with_last_rowid= quick;
10305 
10306     while (last_rowid_count < quick_selects.elements)
10307     {
10308       if (!(quick= quick_it++))
10309       {
10310         quick_it.rewind();
10311         quick= quick_it++;
10312       }
10313 
10314       do
10315       {
10316         DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
10317                         DBUG_SET("+d,innodb_report_deadlock"););
10318         if ((error= quick->get_next()))
10319         {
10320           /* On certain errors like deadlock, trx might be rolled back.*/
10321           if (!current_thd->transaction_rollback_request)
10322             quick_with_last_rowid->file->unlock_row();
10323           DBUG_RETURN(error);
10324         }
10325         quick->file->position(quick->record);
10326         cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
10327         if (cmp < 0)
10328         {
10329           /* This row is being skipped.  Release lock on it. */
10330           quick->file->unlock_row();
10331         }
10332       } while (cmp < 0);
10333 
10334       /* Ok, current select 'caught up' and returned ref >= cur_ref */
10335       if (cmp > 0)
10336       {
10337         /* Found a row with ref > cur_ref. Make it a new 'candidate' */
10338         if (cpk_quick)
10339         {
10340           while (!cpk_quick->row_in_ranges())
10341           {
10342             quick->file->unlock_row(); /* row not in range; unlock */
10343             if ((error= quick->get_next()))
10344             {
10345               /* On certain errors like deadlock, trx might be rolled back.*/
10346               if (!current_thd->transaction_rollback_request)
10347                 quick_with_last_rowid->file->unlock_row();
10348               DBUG_RETURN(error);
10349             }
10350           }
10351           quick->file->position(quick->record);
10352         }
10353         memcpy(last_rowid, quick->file->ref, head->file->ref_length);
10354         quick_with_last_rowid->file->unlock_row();
10355         last_rowid_count= 1;
10356         quick_with_last_rowid= quick;
10357       }
10358       else
10359       {
10360         /* current 'candidate' row confirmed by this select */
10361         last_rowid_count++;
10362       }
10363     }
10364 
10365     /* We get here if we got the same row ref in all scans. */
10366     if (need_to_fetch_row)
10367       error= head->file->ha_rnd_pos(head->record[0], last_rowid);
10368   } while (error == HA_ERR_RECORD_DELETED);
10369   DBUG_RETURN(error);
10370 }
10371 
10372 
10373 /*
10374   Retrieve next record.
10375   SYNOPSIS
10376     QUICK_ROR_UNION_SELECT::get_next()
10377 
10378   NOTES
10379     Enter/exit invariant:
10380     For each quick select in the queue a {key,rowid} tuple has been
10381     retrieved but the corresponding row hasn't been passed to output.
10382 
10383   RETURN
10384    0     - Ok
10385    other - Error code if any error occurred.
10386 */
10387 
get_next()10388 int QUICK_ROR_UNION_SELECT::get_next()
10389 {
10390   int error, dup_row;
10391   QUICK_SELECT_I *quick;
10392   uchar *tmp;
10393   DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
10394 
10395   do
10396   {
10397     do
10398     {
10399       if (!queue.elements)
10400         DBUG_RETURN(HA_ERR_END_OF_FILE);
10401       /* Ok, we have a queue with >= 1 scans */
10402 
10403       quick= (QUICK_SELECT_I*)queue_top(&queue);
10404       memcpy(cur_rowid, quick->last_rowid, rowid_length);
10405 
10406       /* put into queue rowid from the same stream as top element */
10407       if ((error= quick->get_next()))
10408       {
10409         if (error != HA_ERR_END_OF_FILE)
10410           DBUG_RETURN(error);
10411         queue_remove(&queue, 0);
10412       }
10413       else
10414       {
10415         quick->save_last_pos();
10416         queue_replaced(&queue);
10417       }
10418 
10419       if (!have_prev_rowid)
10420       {
10421         /* No rows have been returned yet */
10422         dup_row= FALSE;
10423         have_prev_rowid= TRUE;
10424       }
10425       else
10426         dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
10427     } while (dup_row);
10428 
10429     tmp= cur_rowid;
10430     cur_rowid= prev_rowid;
10431     prev_rowid= tmp;
10432 
10433     error= head->file->ha_rnd_pos(quick->record, prev_rowid);
10434   } while (error == HA_ERR_RECORD_DELETED);
10435   DBUG_RETURN(error);
10436 }
10437 
10438 
reset()10439 int QUICK_RANGE_SELECT::reset()
10440 {
10441   uint  buf_size;
10442   uchar *mrange_buff;
10443   int   error;
10444   HANDLER_BUFFER empty_buf;
10445   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
10446   last_range= NULL;
10447   cur_range= (QUICK_RANGE**) ranges.buffer;
10448 
10449   /* set keyread to TRUE if index is covering */
10450   if(!head->no_keyread && head->covering_keys.is_set(index))
10451     head->set_keyread(true);
10452   else
10453     head->set_keyread(false);
10454 
10455   if (!file->inited)
10456   {
10457     if (in_ror_merged_scan)
10458       head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10459     const bool sorted= (mrr_flags & HA_MRR_SORTED);
10460     DBUG_EXECUTE_IF("bug14365043_2",
10461                     DBUG_SET("+d,ha_index_init_fail"););
10462     if ((error= file->ha_index_init(index, sorted)))
10463     {
10464       file->print_error(error, MYF(0));
10465       DBUG_RETURN(error);
10466     }
10467   }
10468 
10469   /* Allocate buffer if we need one but haven't allocated it yet */
10470   if (mrr_buf_size && !mrr_buf_desc)
10471   {
10472     buf_size= mrr_buf_size;
10473     while (buf_size && !my_multi_malloc(MYF(MY_WME),
10474                                         &mrr_buf_desc, sizeof(*mrr_buf_desc),
10475                                         &mrange_buff, buf_size,
10476                                         NullS))
10477     {
10478       /* Try to shrink the buffers until both are 0. */
10479       buf_size/= 2;
10480     }
10481     if (!mrr_buf_desc)
10482       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
10483 
10484     /* Initialize the handler buffer. */
10485     mrr_buf_desc->buffer= mrange_buff;
10486     mrr_buf_desc->buffer_end= mrange_buff + buf_size;
10487     mrr_buf_desc->end_of_used_area= mrange_buff;
10488 #ifdef HAVE_purify
10489     /*
10490       We need this until ndb will use the buffer efficiently
10491       (Now ndb stores  complete row in here, instead of only the used fields
10492       which gives us valgrind warnings in compare_record[])
10493     */
10494     memset(mrange_buff, 0, buf_size);
10495 #endif
10496   }
10497 
10498   if (!mrr_buf_desc)
10499     empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
10500 
10501   RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
10502   error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
10503                                      mrr_flags, mrr_buf_desc? mrr_buf_desc:
10504                                                               &empty_buf);
10505   DBUG_RETURN(error);
10506 }
10507 
10508 
10509 /*
10510   Range sequence interface implementation for array<QUICK_RANGE>: initialize
10511 
10512   SYNOPSIS
10513     quick_range_seq_init()
10514       init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
10515       n_ranges    Number of ranges in the sequence (ignored)
10516       flags       MRR flags (currently not used)
10517 
10518   RETURN
10519     Opaque value to be passed to quick_range_seq_next
10520 */
10521 
quick_range_seq_init(void * init_param,uint n_ranges,uint flags)10522 range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
10523 {
10524   QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
10525   quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
10526   quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
10527   quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur +
10528                                   quick->ranges.elements;
10529   return &quick->qr_traversal_ctx;
10530 }
10531 
10532 
10533 /*
10534   Range sequence interface implementation for array<QUICK_RANGE>: get next
10535 
10536   SYNOPSIS
10537     quick_range_seq_next()
10538       rseq        Value returned from quick_range_seq_init
10539       range  OUT  Store information about the range here
10540 
10541   RETURN
10542     0  Ok
10543     1  No more ranges in the sequence
10544 */
10545 
quick_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)10546 uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
10547 {
10548   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
10549 
10550   if (ctx->cur == ctx->last)
10551     return 1; /* no more ranges */
10552 
10553   QUICK_RANGE *cur= *(ctx->cur);
10554   key_range *start_key= &range->start_key;
10555   key_range *end_key=   &range->end_key;
10556 
10557   start_key->key=    cur->min_key;
10558   start_key->length= cur->min_length;
10559   start_key->keypart_map= cur->min_keypart_map;
10560   start_key->flag=   ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
10561                       (cur->flag & EQ_RANGE) ?
10562                       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
10563   end_key->key=      cur->max_key;
10564   end_key->length=   cur->max_length;
10565   end_key->keypart_map= cur->max_keypart_map;
10566   /*
10567     We use HA_READ_AFTER_KEY here because if we are reading on a key
10568     prefix. We want to find all keys with this prefix.
10569   */
10570   end_key->flag=     (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
10571                       HA_READ_AFTER_KEY);
10572   range->range_flag= cur->flag;
10573   ctx->cur++;
10574   return 0;
10575 }
10576 
10577 
10578 /*
10579   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10580 
10581   SYNOPSIS
10582     mrr_persistent_flag_storage()
10583       seq  Range sequence being traversed
10584       idx  Number of range
10585 
10586   DESCRIPTION
10587     MRR/NDB implementation needs to store some bits for each range. This
10588     function returns a reference to the "range_flag" associated with the
10589     range number idx.
10590 
10591     This function should be removed when we get a proper MRR/NDB
10592     implementation.
10593 
10594   RETURN
10595     Reference to range_flag associated with range number #idx
10596 */
10597 
mrr_persistent_flag_storage(range_seq_t seq,uint idx)10598 uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
10599 {
10600   QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
10601   return ctx->first[idx]->flag;
10602 }
10603 
10604 
10605 /*
10606   MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
10607 
10608   SYNOPSIS
10609     mrr_get_ptr_by_idx()
10610       seq  Range sequence bening traversed
10611       idx  Number of the range
10612 
10613   DESCRIPTION
10614     An extension of MRR range sequence interface needed by NDB: return the
10615     data associated with the given range.
10616 
10617     A proper MRR interface implementer is supposed to store and return
10618     range-associated data. NDB stores number of the range instead. So this
10619     is a helper function that translates range number to range associated
10620     data.
10621 
10622     This function does nothing, as currrently there is only one user of the
10623     MRR interface - the quick range select code, and this user doesn't need
10624     to use range-associated data.
10625 
10626   RETURN
10627     Reference to range-associated data
10628 */
10629 
mrr_get_ptr_by_idx(range_seq_t seq,uint idx)10630 char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
10631 {
10632   static char *dummy;
10633   return dummy;
10634 }
10635 
10636 
10637 /*
10638   Get next possible record using quick-struct.
10639 
10640   SYNOPSIS
10641     QUICK_RANGE_SELECT::get_next()
10642 
10643   NOTES
10644     Record is read into table->record[0]
10645 
10646   RETURN
10647     0			Found row
10648     HA_ERR_END_OF_FILE	No (more) rows in range
10649     #			Error code
10650 */
10651 
get_next()10652 int QUICK_RANGE_SELECT::get_next()
10653 {
10654   char *dummy;
10655   MY_BITMAP * const save_read_set= head->read_set;
10656   MY_BITMAP * const save_write_set= head->write_set;
10657   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
10658 
10659   if (in_ror_merged_scan)
10660   {
10661     /*
10662       We don't need to signal the bitmap change as the bitmap is always the
10663       same for this head->file
10664     */
10665     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
10666   }
10667 
10668   int result= file->multi_range_read_next(&dummy);
10669 
10670   if (in_ror_merged_scan)
10671   {
10672     /* Restore bitmaps set on entry */
10673     head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
10674   }
10675   DBUG_RETURN(result);
10676 }
10677 
10678 
10679 /*
10680   Get the next record with a different prefix.
10681 
10682   @param prefix_length   length of cur_prefix
10683   @param group_key_parts The number of key parts in the group prefix
10684   @param cur_prefix      prefix of a key to be searched for
10685 
10686   Each subsequent call to the method retrieves the first record that has a
10687   prefix with length prefix_length and which is different from cur_prefix,
10688   such that the record with the new prefix is within the ranges described by
10689   this->ranges. The record found is stored into the buffer pointed by
10690   this->record. The method is useful for GROUP-BY queries with range
10691   conditions to discover the prefix of the next group that satisfies the range
10692   conditions.
10693 
10694   @todo
10695 
10696     This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
10697     methods should be unified into a more general one to reduce code
10698     duplication.
10699 
10700   @retval 0                  on success
10701   @retval HA_ERR_END_OF_FILE if returned all keys
10702   @retval other              if some error occurred
10703 */
10704 
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)10705 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
10706                                         uint group_key_parts,
10707                                         uchar *cur_prefix)
10708 {
10709   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
10710   const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
10711 
10712   for (;;)
10713   {
10714     int result;
10715     if (last_range)
10716     {
10717       /* Read the next record in the same range with prefix after cur_prefix. */
10718       DBUG_ASSERT(cur_prefix != NULL);
10719       result= file->ha_index_read_map(record, cur_prefix, keypart_map,
10720                                       HA_READ_AFTER_KEY);
10721       if (result || last_range->max_keypart_map == 0)
10722         DBUG_RETURN(result);
10723 
10724       key_range previous_endpoint;
10725       last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
10726       if (file->compare_key(&previous_endpoint) <= 0)
10727         DBUG_RETURN(0);
10728     }
10729 
10730     uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10731     if (count == 0)
10732     {
10733       /* Ranges have already been used up before. None is left for read. */
10734       last_range= 0;
10735       DBUG_RETURN(HA_ERR_END_OF_FILE);
10736     }
10737     last_range= *(cur_range++);
10738 
10739     key_range start_key, end_key;
10740     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
10741     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
10742 
10743     const bool sorted= (mrr_flags & HA_MRR_SORTED);
10744     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
10745 				   last_range->max_keypart_map ? &end_key : 0,
10746                                    MY_TEST(last_range->flag & EQ_RANGE),
10747 				   sorted);
10748     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
10749       last_range= 0;			// Stop searching
10750 
10751     if (result != HA_ERR_END_OF_FILE)
10752       DBUG_RETURN(result);
10753     last_range= 0;			// No matching rows; go to next range
10754   }
10755 }
10756 
10757 
10758 /* Get next for geometrical indexes */
10759 
get_next()10760 int QUICK_RANGE_SELECT_GEOM::get_next()
10761 {
10762   DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
10763 
10764   for (;;)
10765   {
10766     int result;
10767     if (last_range)
10768     {
10769       // Already read through key
10770       result= file->ha_index_next_same(record, last_range->min_key,
10771                                        last_range->min_length);
10772       if (result != HA_ERR_END_OF_FILE)
10773 	DBUG_RETURN(result);
10774     }
10775 
10776     uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
10777     if (count == 0)
10778     {
10779       /* Ranges have already been used up before. None is left for read. */
10780       last_range= 0;
10781       DBUG_RETURN(HA_ERR_END_OF_FILE);
10782     }
10783     last_range= *(cur_range++);
10784 
10785     result= file->ha_index_read_map(record, last_range->min_key,
10786                                     last_range->min_keypart_map,
10787                                     (ha_rkey_function)(last_range->flag ^
10788                                                        GEOM_FLAG));
10789     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
10790       DBUG_RETURN(result);
10791     last_range= 0;				// Not found, to next range
10792   }
10793 }
10794 
10795 
10796 /*
10797   Check if current row will be retrieved by this QUICK_RANGE_SELECT
10798 
10799   NOTES
10800     It is assumed that currently a scan is being done on another index
10801     which reads all necessary parts of the index that is scanned by this
10802     quick select.
10803     The implementation does a binary search on sorted array of disjoint
10804     ranges, without taking size of range into account.
10805 
10806     This function is used to filter out clustered PK scan rows in
10807     index_merge quick select.
10808 
10809   RETURN
10810     TRUE  if current row will be retrieved by this quick select
10811     FALSE if not
10812 */
10813 
row_in_ranges()10814 bool QUICK_RANGE_SELECT::row_in_ranges()
10815 {
10816   QUICK_RANGE *res;
10817   uint min= 0;
10818   uint max= ranges.elements - 1;
10819   uint mid= (max + min)/2;
10820 
10821   while (min != max)
10822   {
10823     if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
10824     {
10825       /* current row value > mid->max */
10826       min= mid + 1;
10827     }
10828     else
10829       max= mid;
10830     mid= (min + max) / 2;
10831   }
10832   res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
10833   return (!cmp_next(res) && !cmp_prev(res));
10834 }
10835 
10836 /*
10837   This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
10838   get_next() interface, but we have to hold a pointer to the original
10839   QUICK_RANGE_SELECT because its data are used all over the place. What
10840   should be done is to factor out the data that is needed into a base
10841   class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
10842   which handle the ranges and implement the get_next() function.  But
10843   for now, this seems to work right at least.
10844  */
10845 
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg,bool * error)10846 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
10847                                      uint used_key_parts_arg,
10848                                      bool *error)
10849  :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
10850   used_key_parts (used_key_parts_arg)
10851 {
10852   QUICK_RANGE *r;
10853   /*
10854     Use default MRR implementation for reverse scans. No table engine
10855     currently can do an MRR scan with output in reverse index order.
10856   */
10857   mrr_buf_desc= NULL;
10858   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
10859   mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
10860   mrr_buf_size= 0;
10861 
10862 
10863   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
10864   QUICK_RANGE **end_range= pr + ranges.elements;
10865   for (; pr!=end_range; pr++)
10866     rev_ranges.push_front(*pr);
10867 
10868   /* Remove EQ_RANGE flag for keys that are not using the full key */
10869   for (r = rev_it++; r; r = rev_it++)
10870   {
10871     if ((r->flag & EQ_RANGE) &&
10872 	head->key_info[index].key_length != r->max_length)
10873       r->flag&= ~EQ_RANGE;
10874   }
10875   rev_it.rewind();
10876   q->dont_free=1;				// Don't free shared mem
10877 }
10878 
10879 
get_next()10880 int QUICK_SELECT_DESC::get_next()
10881 {
10882   DBUG_ENTER("QUICK_SELECT_DESC::get_next");
10883 
10884   /* The max key is handled as follows:
10885    *   - if there is NO_MAX_RANGE, start at the end and move backwards
10886    *   - if it is an EQ_RANGE (which means that max key covers the entire
10887    *     key) and the query does not use any hidden key fields that are
10888    *     not considered when the range optimzier sets EQ_RANGE (e.g. the
10889    *     primary key added by InnoDB), then go directly to the key and
10890    *     read through it (sorting backwards is same as sorting forwards).
10891    *   - if it is NEAR_MAX, go to the key or next, step back once, and
10892    *     move backwards
10893    *   - otherwise (not NEAR_MAX == include the key), go after the key,
10894    *     step back once, and move backwards
10895    */
10896 
10897   for (;;)
10898   {
10899     int result;
10900     if (last_range)
10901     {						// Already read through key
10902       result = ((last_range->flag & EQ_RANGE &&
10903                  used_key_parts <=
10904                  head->key_info[index].user_defined_key_parts) ?
10905                 file->ha_index_next_same(record, last_range->min_key,
10906                                          last_range->min_length) :
10907                 file->ha_index_prev(record));
10908       if (!result)
10909       {
10910 	if (cmp_prev(*rev_it.ref()) == 0)
10911 	  DBUG_RETURN(0);
10912       }
10913       else if (result != HA_ERR_END_OF_FILE)
10914 	DBUG_RETURN(result);
10915     }
10916 
10917     if (!(last_range= rev_it++))
10918       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
10919 
10920     // Case where we can avoid descending scan, see comment above
10921     const bool eqrange_all_keyparts= (last_range->flag & EQ_RANGE) &&
10922       (used_key_parts <= head->key_info[index].user_defined_key_parts);
10923 
10924     /*
10925       If we have pushed an index condition (ICP) and this quick select
10926       will use ha_index_prev() to read data, we need to let the
10927       handler know where to end the scan in order to avoid that the
10928       ICP implemention continues to read past the range boundary.
10929     */
10930     if (file->pushed_idx_cond)
10931     {
10932       if (!eqrange_all_keyparts)
10933       {
10934         key_range min_range;
10935         last_range->make_min_endpoint(&min_range);
10936         if(min_range.length > 0)
10937           file->set_end_range(&min_range, handler::RANGE_SCAN_DESC);
10938         else
10939           file->set_end_range(NULL, handler::RANGE_SCAN_DESC);
10940       }
10941       else
10942       {
10943         /*
10944           Will use ha_index_next_same() for reading records. In case we have
10945           set the end range for an earlier range, this need to be cleared.
10946         */
10947         file->set_end_range(NULL, handler::RANGE_SCAN_ASC);
10948       }
10949     }
10950 
10951     if (last_range->flag & NO_MAX_RANGE)        // Read last record
10952     {
10953       int local_error;
10954       if ((local_error= file->ha_index_last(record)))
10955       {
10956         /*
10957           HA_ERR_END_OF_FILE is returned both when the table is empty and when
10958           there are no qualifying records in the range (when using ICP).
10959           Interpret this return value as "no qualifying rows in the range" to
10960           avoid loss of records. If the error code truly meant "empty table"
10961           the next iteration of the loop will exit.
10962         */
10963         if (local_error != HA_ERR_END_OF_FILE)
10964           DBUG_RETURN(local_error);
10965         last_range= NULL;                       // Go to next range
10966         continue;
10967       }
10968 
10969       if (cmp_prev(last_range) == 0)
10970 	DBUG_RETURN(0);
10971       last_range= 0;                            // No match; go to next range
10972       continue;
10973     }
10974 
10975     if (eqrange_all_keyparts)
10976 
10977     {
10978       result= file->ha_index_read_map(record, last_range->max_key,
10979                                       last_range->max_keypart_map,
10980                                       HA_READ_KEY_EXACT);
10981     }
10982     else
10983     {
10984       DBUG_ASSERT(last_range->flag & NEAR_MAX ||
10985                   (last_range->flag & EQ_RANGE &&
10986                    used_key_parts >
10987                    head->key_info[index].user_defined_key_parts) ||
10988                   range_reads_after_key(last_range));
10989       result= file->ha_index_read_map(record, last_range->max_key,
10990                                       last_range->max_keypart_map,
10991                                       ((last_range->flag & NEAR_MAX) ?
10992                                        HA_READ_BEFORE_KEY :
10993                                        HA_READ_PREFIX_LAST_OR_PREV));
10994     }
10995     if (result)
10996     {
10997       if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
10998 	DBUG_RETURN(result);
10999       last_range= 0;                            // Not found, to next range
11000       continue;
11001     }
11002     if (cmp_prev(last_range) == 0)
11003     {
11004       if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
11005 	last_range= 0;				// Stop searching
11006       DBUG_RETURN(0);				// Found key is in range
11007     }
11008     last_range= 0;                              // To next range
11009   }
11010 }
11011 
11012 
11013 /**
11014   Create a compatible quick select with the result ordered in an opposite way
11015 
11016   @param used_key_parts_arg  Number of used key parts
11017 
11018   @retval NULL in case of errors (OOM etc)
11019   @retval pointer to a newly created QUICK_SELECT_DESC if success
11020 */
11021 
make_reverse(uint used_key_parts_arg)11022 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
11023 {
11024   bool error= FALSE;
11025   QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg,
11026                                                       &error);
11027   if (new_quick == NULL || error)
11028   {
11029     delete new_quick;
11030     return NULL;
11031   }
11032   return new_quick;
11033 }
11034 
11035 
11036 /*
11037   Compare if found key is over max-value
11038   Returns 0 if key <= range->max_key
11039   TODO: Figure out why can't this function be as simple as cmp_prev().
11040 */
11041 
cmp_next(QUICK_RANGE * range_arg)11042 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
11043 {
11044   if (range_arg->flag & NO_MAX_RANGE)
11045     return 0;                                   /* key can't be to large */
11046 
11047   KEY_PART *key_part=key_parts;
11048   uint store_length;
11049 
11050   for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
11051        key < end;
11052        key+= store_length, key_part++)
11053   {
11054     int cmp;
11055     store_length= key_part->store_length;
11056     if (key_part->null_bit)
11057     {
11058       if (*key)
11059       {
11060         if (!key_part->field->is_null())
11061           return 1;
11062         continue;
11063       }
11064       else if (key_part->field->is_null())
11065         return 0;
11066       key++;					// Skip null byte
11067       store_length--;
11068     }
11069     if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
11070       return 0;
11071     if (cmp > 0)
11072       return 1;
11073   }
11074   return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
11075 }
11076 
11077 
11078 /*
11079   Returns 0 if found key is inside range (found key >= range->min_key).
11080 */
11081 
cmp_prev(QUICK_RANGE * range_arg)11082 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
11083 {
11084   int cmp;
11085   if (range_arg->flag & NO_MIN_RANGE)
11086     return 0;					/* key can't be to small */
11087 
11088   cmp= key_cmp(key_part_info, range_arg->min_key,
11089                range_arg->min_length);
11090   if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
11091     return 0;
11092   return 1;                                     // outside of range
11093 }
11094 
11095 
11096 /*
11097  * TRUE if this range will require using HA_READ_AFTER_KEY
11098    See comment in get_next() about this
11099  */
11100 
range_reads_after_key(QUICK_RANGE * range_arg)11101 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
11102 {
11103   return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
11104 	  !(range_arg->flag & EQ_RANGE) ||
11105 	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
11106 }
11107 
11108 
add_info_string(String * str)11109 void QUICK_RANGE_SELECT::add_info_string(String *str)
11110 {
11111   KEY *key_info= head->key_info + index;
11112   str->append(key_info->name);
11113 }
11114 
add_info_string(String * str)11115 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
11116 {
11117   QUICK_RANGE_SELECT *quick;
11118   bool first= TRUE;
11119   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11120   str->append(STRING_WITH_LEN("sort_union("));
11121   while ((quick= it++))
11122   {
11123     if (!first)
11124       str->append(',');
11125     else
11126       first= FALSE;
11127     quick->add_info_string(str);
11128   }
11129   if (pk_quick_select)
11130   {
11131     str->append(',');
11132     pk_quick_select->add_info_string(str);
11133   }
11134   str->append(')');
11135 }
11136 
add_info_string(String * str)11137 void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
11138 {
11139   bool first= TRUE;
11140   QUICK_RANGE_SELECT *quick;
11141   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11142   str->append(STRING_WITH_LEN("intersect("));
11143   while ((quick= it++))
11144   {
11145     KEY *key_info= head->key_info + quick->index;
11146     if (!first)
11147       str->append(',');
11148     else
11149       first= FALSE;
11150     str->append(key_info->name);
11151   }
11152   if (cpk_quick)
11153   {
11154     KEY *key_info= head->key_info + cpk_quick->index;
11155     str->append(',');
11156     str->append(key_info->name);
11157   }
11158   str->append(')');
11159 }
11160 
add_info_string(String * str)11161 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
11162 {
11163   bool first= TRUE;
11164   QUICK_SELECT_I *quick;
11165   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11166   str->append(STRING_WITH_LEN("union("));
11167   while ((quick= it++))
11168   {
11169     if (!first)
11170       str->append(',');
11171     else
11172       first= FALSE;
11173     quick->add_info_string(str);
11174   }
11175   str->append(')');
11176 }
11177 
11178 
add_keys_and_lengths(String * key_names,String * used_lengths)11179 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
11180                                               String *used_lengths)
11181 {
11182   char buf[64];
11183   uint length;
11184   KEY *key_info= head->key_info + index;
11185   key_names->append(key_info->name);
11186   length= longlong2str(max_used_key_length, buf, 10) - buf;
11187   used_lengths->append(buf, length);
11188 }
11189 
add_keys_and_lengths(String * key_names,String * used_lengths)11190 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
11191                                                     String *used_lengths)
11192 {
11193   char buf[64];
11194   uint length;
11195   bool first= TRUE;
11196   QUICK_RANGE_SELECT *quick;
11197 
11198   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11199   while ((quick= it++))
11200   {
11201     if (first)
11202       first= FALSE;
11203     else
11204     {
11205       key_names->append(',');
11206       used_lengths->append(',');
11207     }
11208 
11209     KEY *key_info= head->key_info + quick->index;
11210     key_names->append(key_info->name);
11211     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11212     used_lengths->append(buf, length);
11213   }
11214   if (pk_quick_select)
11215   {
11216     KEY *key_info= head->key_info + pk_quick_select->index;
11217     key_names->append(',');
11218     key_names->append(key_info->name);
11219     length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
11220     used_lengths->append(',');
11221     used_lengths->append(buf, length);
11222   }
11223 }
11224 
add_keys_and_lengths(String * key_names,String * used_lengths)11225 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
11226                                                       String *used_lengths)
11227 {
11228   char buf[64];
11229   uint length;
11230   bool first= TRUE;
11231   QUICK_RANGE_SELECT *quick;
11232   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11233   while ((quick= it++))
11234   {
11235     KEY *key_info= head->key_info + quick->index;
11236     if (first)
11237       first= FALSE;
11238     else
11239     {
11240       key_names->append(',');
11241       used_lengths->append(',');
11242     }
11243     key_names->append(key_info->name);
11244     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
11245     used_lengths->append(buf, length);
11246   }
11247 
11248   if (cpk_quick)
11249   {
11250     KEY *key_info= head->key_info + cpk_quick->index;
11251     key_names->append(',');
11252     key_names->append(key_info->name);
11253     length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
11254     used_lengths->append(',');
11255     used_lengths->append(buf, length);
11256   }
11257 }
11258 
add_keys_and_lengths(String * key_names,String * used_lengths)11259 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
11260                                                   String *used_lengths)
11261 {
11262   bool first= TRUE;
11263   QUICK_SELECT_I *quick;
11264   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
11265   while ((quick= it++))
11266   {
11267     if (first)
11268       first= FALSE;
11269     else
11270     {
11271       used_lengths->append(',');
11272       key_names->append(',');
11273     }
11274     quick->add_keys_and_lengths(key_names, used_lengths);
11275   }
11276 }
11277 
11278 
11279 /*******************************************************************************
11280 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
11281 *******************************************************************************/
11282 
11283 static inline uint get_field_keypart(KEY *index, Field *field);
11284 static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
11285                                              PARAM *param, uint *param_idx);
11286 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
11287                                     SEL_ARG **cur_range);
11288 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
11289                        KEY_PART_INFO *first_non_group_part,
11290                        KEY_PART_INFO *min_max_arg_part,
11291                        KEY_PART_INFO *last_part, THD *thd,
11292                        uchar *key_infix, uint *key_infix_len,
11293                        KEY_PART_INFO **first_non_infix_part);
11294 static bool
11295 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
11296                                Field::imagetype image_type);
11297 
11298 static void
11299 cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
11300                    uint group_key_parts, SEL_TREE *range_tree,
11301                    SEL_ARG *index_tree, ha_rows quick_prefix_records,
11302                    bool have_min, bool have_max,
11303                    double *read_cost, ha_rows *records);
11304 
11305 
11306 /**
11307   Test if this access method is applicable to a GROUP query with MIN/MAX
11308   functions, and if so, construct a new TRP object.
11309 
11310   DESCRIPTION
11311     Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
11312     Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
11313     following conditions:
11314     A) Table T has at least one compound index I of the form:
11315        I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
11316     B) Query conditions:
11317     B0. Q is over a single table T.
11318     B1. The attributes referenced by Q are a subset of the attributes of I.
11319     B2. All attributes QA in Q can be divided into 3 overlapping groups:
11320         - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
11321           referenced by any number of MIN and/or MAX functions if present.
11322         - WA = {W_1, ..., W_p} - from the WHERE clause
11323         - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
11324              = SA              - if Q is a DISTINCT query (based on the
11325                                  equivalence of DISTINCT and GROUP queries.
11326         - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
11327           GROUP BY and not referenced by MIN/MAX functions.
11328         with the following properties specified below.
11329     B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
11330         applicable.
11331 
11332     SA1. There is at most one attribute in SA referenced by any number of
11333          MIN and/or MAX functions which, which if present, is denoted as C.
11334     SA2. The position of the C attribute in the index is after the last A_k.
11335     SA3. The attribute C can be referenced in the WHERE clause only in
11336          predicates of the forms:
11337          - (C {< | <= | > | >= | =} const)
11338          - (const {< | <= | > | >= | =} C)
11339          - (C between const_i and const_j)
11340          - C IS NULL
11341          - C IS NOT NULL
11342          - C != const
11343     SA4. If Q has a GROUP BY clause, there are no other aggregate functions
11344          except MIN and MAX. For queries with DISTINCT, aggregate functions
11345          are allowed.
11346     SA5. The select list in DISTINCT queries should not contain expressions.
11347     SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
11348          for AGG_FUNC(DISTINCT ...) optimization because cursor position is
11349          never stored after a unique key lookup in the clustered index and
11350          furhter index_next/prev calls can not be used. So loose index scan
11351          optimization can not be used in this case.
11352     SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
11353          access method is not used.
11354          For above queries MIN/MAX() aggregation has to be done at
11355          nested_loops_join (end_send_group). But with current design MIN/MAX()
11356          is always set as part of loose index scan. Because of this mismatch
11357          MIN() and MAX() values will be set incorrectly. For such queries to
11358          work we need a new interface for loose index scan. This new interface
11359          should only fetch records with min and max values and let
11360          end_send_group to do aggregation. Until then do not use
11361          loose_index_scan.
11362     GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
11363          G_i = A_j => i = j.
11364     GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
11365          forms a prefix of I. This permutation is used as the GROUP clause
11366          when the DISTINCT query is converted to a GROUP query.
11367     GA3. The attributes in GA may participate in arbitrary predicates, divided
11368          into two groups:
11369          - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
11370            attributes of a prefix of GA
11371          - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
11372            of GA. Since P is applied to only GROUP attributes it filters some
11373            groups, and thus can be applied after the grouping.
11374     GA4. There are no expressions among G_i, just direct column references.
11375     NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
11376          and the MIN/MAX attribute C, then NGA must consist of exactly the
11377          index attributes that constitute the gap. As a result there is a
11378          permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
11379          in the index.
11380     NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
11381          equality conditions for all NG_i of the form (NG_i = const) or
11382          (const = NG_i), such that each NG_i is referenced in exactly one
11383          conjunct. Informally, the predicates provide constants to fill the
11384          gap in the index.
11385     NGA3.If BA <> {}, there can only be one range. TODO: This is a code
11386          limitation and is not strictly needed. See BUG#15947433
11387     WA1. There are no other attributes in the WHERE clause except the ones
11388          referenced in predicates RNG, PA, PC, EQ defined above. Therefore
11389          WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
11390          above tests. By transitivity then it also follows that each WA_i
11391          participates in the index I (if this was already tested for GA, NGA
11392          and C).
11393     WA2. If there is a predicate on C, then it must be in conjunction
11394          to all predicates on all earlier keyparts in I.
11395 
11396     C) Overall query form:
11397        SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
11398          FROM T
11399         WHERE [RNG(A_1,...,A_p ; where p <= k)]
11400          [AND EQ(B_1,...,B_m)]
11401          [AND PC(C)]
11402          [AND PA(A_i1,...,A_iq)]
11403        GROUP BY A_1,...,A_k
11404        [HAVING PH(A_1, ..., B_1,..., C)]
11405     where EXPR(...) is an arbitrary expression over some or all SELECT fields,
11406     or:
11407        SELECT DISTINCT A_i1,...,A_ik
11408          FROM T
11409         WHERE [RNG(A_1,...,A_p ; where p <= k)]
11410          [AND PA(A_i1,...,A_iq)];
11411 
11412   NOTES
11413     If the current query satisfies the conditions above, and if
11414     (mem_root! = NULL), then the function constructs and returns a new TRP
11415     object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
11416     If (mem_root == NULL), then the function only tests whether the current
11417     query satisfies the conditions above, and, if so, sets
11418     is_applicable = TRUE.
11419 
11420     Queries with DISTINCT for which index access can be used are transformed
11421     into equivalent group-by queries of the form:
11422 
11423     SELECT A_1,...,A_k FROM T
11424      WHERE [RNG(A_1,...,A_p ; where p <= k)]
11425       [AND PA(A_i1,...,A_iq)]
11426     GROUP BY A_1,...,A_k;
11427 
11428     The group-by list is a permutation of the select attributes, according
11429     to their order in the index.
11430 
11431   TODO
11432   - What happens if the query groups by the MIN/MAX field, and there is no
11433     other field as in: "select min(a) from t1 group by a" ?
11434   - We assume that the general correctness of the GROUP-BY query was checked
11435     before this point. Is this correct, or do we have to check it completely?
11436   - Lift the limitation in condition (B3), that is, make this access method
11437     applicable to ROLLUP queries.
11438 
11439  @param  param     Parameter from test_quick_select
11440  @param  sel_tree  Range tree generated by get_mm_tree
11441  @param  read_time Best read time so far (=table/index scan time)
11442  @return table read plan
11443    @retval NULL  Loose index scan not applicable or mem_root == NULL
11444    @retval !NULL Loose index scan table read plan
11445 */
11446 
11447 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,double read_time)11448 get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
11449 {
11450   THD *thd= param->thd;
11451   JOIN *join= thd->lex->current_select->join;
11452   TABLE *table= param->table;
11453   bool have_min= FALSE;              /* TRUE if there is a MIN function. */
11454   bool have_max= FALSE;              /* TRUE if there is a MAX function. */
11455   Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
11456   KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
11457   uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
11458   KEY *index_info= NULL;    /* The index chosen for data access. */
11459   uint index= 0;            /* The id of the chosen index. */
11460   uint group_key_parts= 0;  // Number of index key parts in the group prefix.
11461   uint used_key_parts= 0;   /* Number of index key parts used for access. */
11462   uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
11463   uint key_infix_len= 0;          /* Length of key_infix. */
11464   TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
11465   uint key_part_nr;
11466   ORDER *tmp_group;
11467   Item *item;
11468   Item_field *item_field;
11469   bool is_agg_distinct;
11470   List<Item_field> agg_distinct_flds;
11471   /* Cost-related variables for the best index so far. */
11472   double best_read_cost= DBL_MAX;
11473   ha_rows best_records= 0;
11474   SEL_ARG *best_index_tree= NULL;
11475   ha_rows best_quick_prefix_records= 0;
11476   uint best_param_idx= 0;
11477   List_iterator<Item> select_items_it;
11478   Opt_trace_context * const trace= &param->thd->opt_trace;
11479 
11480   DBUG_ENTER("get_best_group_min_max");
11481 
11482   Opt_trace_object trace_group(trace, "group_index_range",
11483                                Opt_trace_context::RANGE_OPTIMIZER);
11484   const char* cause= NULL;
11485 
11486   /* Perform few 'cheap' tests whether this access method is applicable. */
11487   if (!join)
11488     cause= "no_join";
11489   else if (join->primary_tables != 1)  /* Query must reference one table. */
11490     cause= "not_single_table";
11491   else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
11492     cause= "rollup";
11493   else if (table->s->keys == 0)        /* There are no indexes to use. */
11494     cause= "no_index";
11495   else if (param->order_direction == ORDER::ORDER_DESC)
11496     cause= "cannot_do_reverse_ordering";
11497   if (cause != NULL)
11498   {
11499     trace_group.add("chosen", false).add_alnum("cause", cause);
11500     DBUG_RETURN(NULL);
11501   }
11502 
11503   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
11504   is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
11505 
11506   if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
11507       (!join->select_distinct) &&
11508       !is_agg_distinct)
11509   {
11510     trace_group.add("chosen", false).
11511       add_alnum("cause", "not_group_by_or_distinct");
11512     DBUG_RETURN(NULL);
11513   }
11514   /* Analyze the query in more detail. */
11515 
11516   if (join->sum_funcs[0])
11517   {
11518     Item_sum *min_max_item;
11519     Item_sum **func_ptr= join->sum_funcs;
11520     while ((min_max_item= *(func_ptr++)))
11521     {
11522       if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
11523         have_min= TRUE;
11524       else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
11525         have_max= TRUE;
11526       else if (is_agg_distinct &&
11527                (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
11528                 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
11529                 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
11530         continue;
11531       else
11532       {
11533         trace_group.add("chosen", false).
11534           add_alnum("cause", "not_applicable_aggregate_function");
11535         DBUG_RETURN(NULL);
11536       }
11537 
11538       /* The argument of MIN/MAX. */
11539       Item *expr= min_max_item->get_arg(0)->real_item();
11540       if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
11541       {
11542         if (! min_max_arg_item)
11543           min_max_arg_item= (Item_field*) expr;
11544         else if (! min_max_arg_item->eq(expr, 1))
11545           DBUG_RETURN(NULL);
11546       }
11547       else
11548         DBUG_RETURN(NULL);
11549     }
11550   }
11551 
11552   /* Check (SA7). */
11553   if (is_agg_distinct && (have_max || have_min))
11554   {
11555     trace_group.add("chosen", false).
11556       add_alnum("cause", "have_both_agg_distinct_and_min_max");
11557     DBUG_RETURN(NULL);
11558   }
11559 
11560   select_items_it= List_iterator<Item>(join->fields_list);
11561   /* Check (SA5). */
11562   if (join->select_distinct)
11563   {
11564     trace_group.add("distinct_query", true);
11565     while ((item= select_items_it++))
11566     {
11567       if (item->real_item()->type() != Item::FIELD_ITEM)
11568         DBUG_RETURN(NULL);
11569     }
11570   }
11571 
11572   /* Check (GA4) - that there are no expressions among the group attributes. */
11573   for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
11574   {
11575     if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
11576     {
11577       trace_group.add("chosen", false).
11578         add_alnum("cause", "group_field_is_expression");
11579       DBUG_RETURN(NULL);
11580     }
11581   }
11582 
11583   /*
11584     Check that table has at least one compound index such that the conditions
11585     (GA1,GA2) are all TRUE. If there is more than one such index, select the
11586     first one. Here we set the variables: group_prefix_len and index_info.
11587   */
11588 
11589   const uint pk= param->table->s->primary_key;
11590   KEY *cur_index_info= table->key_info;
11591   KEY *cur_index_info_end= cur_index_info + table->s->keys;
11592   SEL_ARG *cur_index_tree= NULL;
11593   ha_rows cur_quick_prefix_records= 0;
11594   uint cur_param_idx= MAX_KEY;
11595   Opt_trace_array trace_indices(trace, "potential_group_range_indices");
11596   for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
11597        cur_index_info++, cur_index++)
11598   {
11599     Opt_trace_object trace_idx(trace);
11600     trace_idx.add_utf8("index", cur_index_info->name);
11601     KEY_PART_INFO *cur_part;
11602     KEY_PART_INFO *end_part; /* Last part for loops. */
11603     /* Last index part. */
11604     KEY_PART_INFO *last_part;
11605     KEY_PART_INFO *first_non_group_part;
11606     KEY_PART_INFO *first_non_infix_part;
11607     uint key_infix_parts;
11608     uint cur_group_key_parts= 0;
11609     uint cur_group_prefix_len= 0;
11610     double cur_read_cost;
11611     ha_rows cur_records;
11612     key_map used_key_parts_map;
11613     uint max_key_part= 0;
11614     uint cur_key_infix_len= 0;
11615     uchar cur_key_infix[MAX_KEY_LENGTH];
11616     uint cur_used_key_parts;
11617 
11618     /* Check (B1) - if current index is covering. */
11619     if (!table->covering_keys.is_set(cur_index))
11620     {
11621       cause= "not_covering";
11622       goto next_index;
11623     }
11624 
11625     /*
11626       If the current storage manager is such that it appends the primary key to
11627       each index, then the above condition is insufficient to check if the
11628       index is covering. In such cases it may happen that some fields are
11629       covered by the PK index, but not by the current index. Since we can't
11630       use the concatenation of both indexes for index lookup, such an index
11631       does not qualify as covering in our case. If this is the case, below
11632       we check that all query fields are indeed covered by 'cur_index'.
11633     */
11634     if (pk < MAX_KEY && cur_index != pk &&
11635         (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
11636     {
11637       /* For each table field */
11638       for (uint i= 0; i < table->s->fields; i++)
11639       {
11640         Field *cur_field= table->field[i];
11641         /*
11642           If the field is used in the current query ensure that it's
11643           part of 'cur_index'
11644         */
11645         if (bitmap_is_set(table->read_set, cur_field->field_index) &&
11646             !cur_field->is_part_of_actual_key(thd, cur_index, cur_index_info))
11647         {
11648           cause= "not_covering";
11649           goto next_index;                  // Field was not part of key
11650         }
11651       }
11652     }
11653     trace_idx.add("covering", true);
11654 
11655     /*
11656       Check (GA1) for GROUP BY queries.
11657     */
11658     if (join->group_list)
11659     {
11660       cur_part= cur_index_info->key_part;
11661       end_part= cur_part + actual_key_parts(cur_index_info);
11662       /* Iterate in parallel over the GROUP list and the index parts. */
11663       for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
11664            tmp_group= tmp_group->next, cur_part++)
11665       {
11666         /*
11667           TODO:
11668           tmp_group::item is an array of Item, is it OK to consider only the
11669           first Item? If so, then why? What is the array for?
11670         */
11671         /* Above we already checked that all group items are fields. */
11672         DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
11673         Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
11674         if (group_field->field->eq(cur_part->field))
11675         {
11676           cur_group_prefix_len+= cur_part->store_length;
11677           ++cur_group_key_parts;
11678           max_key_part= cur_part - cur_index_info->key_part + 1;
11679           used_key_parts_map.set_bit(max_key_part);
11680         }
11681         else
11682         {
11683           cause= "group_attribute_not_prefix_in_index";
11684           goto next_index;
11685         }
11686       }
11687     }
11688 
11689     /*
11690       Check (GA2) if this is a DISTINCT query.
11691       If GA2, then Store a new ORDER object in group_fields_array at the
11692       position of the key part of item_field->field. Thus we get the ORDER
11693       objects for each field ordered as the corresponding key parts.
11694       Later group_fields_array of ORDER objects is used to convert the query
11695       to a GROUP query.
11696     */
11697     if ((!join->group_list && join->select_distinct) ||
11698         is_agg_distinct)
11699     {
11700       if (!is_agg_distinct)
11701       {
11702         select_items_it.rewind();
11703       }
11704 
11705       List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
11706       while (NULL !=
11707              (item= (is_agg_distinct ?
11708                      (Item *) agg_distinct_flds_it++ : select_items_it++)))
11709       {
11710         /* (SA5) already checked above. */
11711         item_field= (Item_field*) item->real_item();
11712         DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
11713 
11714         /* not doing loose index scan for derived tables */
11715         if (!item_field->field)
11716         {
11717           cause= "derived_table";
11718           goto next_index;
11719         }
11720 
11721         /* Find the order of the key part in the index. */
11722         key_part_nr= get_field_keypart(cur_index_info, item_field->field);
11723         /*
11724           Check if this attribute was already present in the select list.
11725           If it was present, then its corresponding key part was alredy used.
11726         */
11727         if (used_key_parts_map.is_set(key_part_nr))
11728           continue;
11729         if (key_part_nr < 1 ||
11730             (!is_agg_distinct && key_part_nr > join->fields_list.elements))
11731         {
11732           cause= "select_attribute_not_prefix_in_index";
11733           goto next_index;
11734         }
11735         cur_part= cur_index_info->key_part + key_part_nr - 1;
11736         cur_group_prefix_len+= cur_part->store_length;
11737         used_key_parts_map.set_bit(key_part_nr);
11738         ++cur_group_key_parts;
11739         max_key_part= max(max_key_part,key_part_nr);
11740       }
11741       /*
11742         Check that used key parts forms a prefix of the index.
11743         To check this we compare bits in all_parts and cur_parts.
11744         all_parts have all bits set from 0 to (max_key_part-1).
11745         cur_parts have bits set for only used keyparts.
11746       */
11747       ulonglong all_parts, cur_parts;
11748       all_parts= (1ULL << max_key_part) - 1;
11749       cur_parts= used_key_parts_map.to_ulonglong() >> 1;
11750       if (all_parts != cur_parts)
11751         goto next_index;
11752     }
11753 
11754     /* Check (SA2). */
11755     if (min_max_arg_item)
11756     {
11757       key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
11758       if (key_part_nr <= cur_group_key_parts)
11759       {
11760         cause= "aggregate_column_not_suffix_in_idx";
11761         goto next_index;
11762       }
11763       min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
11764     }
11765 
11766     /* Check (SA6) if clustered key is used. */
11767     if (is_agg_distinct && cur_index == table->s->primary_key &&
11768         table->file->primary_key_is_clustered())
11769     {
11770       cause= "primary_key_is_clustered";
11771       goto next_index;
11772     }
11773 
11774     /*
11775       Check (NGA1, NGA2) and extract a sequence of constants to be used as part
11776       of all search keys.
11777     */
11778 
11779     /*
11780       If there is MIN/MAX, each keypart between the last group part and the
11781       MIN/MAX part must participate in one equality with constants, and all
11782       keyparts after the MIN/MAX part must not be referenced in the query.
11783 
11784       If there is no MIN/MAX, the keyparts after the last group part can be
11785       referenced only in equalities with constants, and the referenced keyparts
11786       must form a sequence without any gaps that starts immediately after the
11787       last group keypart.
11788     */
11789     last_part= cur_index_info->key_part + actual_key_parts(cur_index_info);
11790     first_non_group_part=
11791       (cur_group_key_parts < actual_key_parts(cur_index_info)) ?
11792       cur_index_info->key_part + cur_group_key_parts :
11793       NULL;
11794     first_non_infix_part= min_max_arg_part ?
11795       (min_max_arg_part < last_part) ?
11796       min_max_arg_part :
11797       NULL :
11798       NULL;
11799     if (first_non_group_part &&
11800         (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
11801     {
11802       if (tree)
11803       {
11804         uint dummy;
11805         SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11806                                                         &dummy);
11807         if (!get_constant_key_infix(cur_index_info, index_range_tree,
11808                                     first_non_group_part, min_max_arg_part,
11809                                     last_part, thd, cur_key_infix,
11810                                     &cur_key_infix_len,
11811                                     &first_non_infix_part))
11812         {
11813           cause= "nonconst_equality_gap_attribute";
11814           goto next_index;
11815         }
11816       }
11817       else if (min_max_arg_part &&
11818                (min_max_arg_part - first_non_group_part > 0))
11819       {
11820         /*
11821           There is a gap but no range tree, thus no predicates at all for the
11822           non-group keyparts.
11823         */
11824         cause= "no_nongroup_keypart_predicate";
11825         goto next_index;
11826       }
11827       else if (first_non_group_part && join->conds)
11828       {
11829         /*
11830           If there is no MIN/MAX function in the query, but some index
11831           key part is referenced in the WHERE clause, then this index
11832           cannot be used because the WHERE condition over the keypart's
11833           field cannot be 'pushed' to the index (because there is no
11834           range 'tree'), and the WHERE clause must be evaluated before
11835           GROUP BY/DISTINCT.
11836         */
11837         /*
11838           Store the first and last keyparts that need to be analyzed
11839           into one array that can be passed as parameter.
11840         */
11841         KEY_PART_INFO *key_part_range[2];
11842         key_part_range[0]= first_non_group_part;
11843         key_part_range[1]= last_part;
11844 
11845         /* Check if cur_part is referenced in the WHERE clause. */
11846         if (join->conds->walk(&Item::find_item_in_field_list_processor, 1,
11847                               (uchar*) key_part_range))
11848         {
11849           cause= "keypart_reference_from_where_clause";
11850           goto next_index;
11851         }
11852       }
11853     }
11854 
11855     /*
11856       Test (WA1) partially - that no other keypart after the last infix part is
11857       referenced in the query.
11858     */
11859     if (first_non_infix_part)
11860     {
11861       cur_part= first_non_infix_part +
11862         (min_max_arg_part && (min_max_arg_part < last_part));
11863       for (; cur_part != last_part; cur_part++)
11864       {
11865         if (bitmap_is_set(table->read_set, cur_part->field->field_index))
11866         {
11867           cause= "keypart_after_infix_in_query";
11868           goto next_index;
11869         }
11870       }
11871     }
11872 
11873     /**
11874       Test WA2:If there are conditions on a column C participating in
11875       MIN/MAX, those conditions must be conjunctions to all earlier
11876       keyparts. Otherwise, Loose Index Scan cannot be used.
11877     */
11878     if (tree && min_max_arg_item)
11879     {
11880       uint dummy;
11881       SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
11882                                                       &dummy);
11883       SEL_ARG *cur_range= NULL;
11884       if (get_sel_arg_for_keypart(min_max_arg_part->field,
11885                                   index_range_tree, &cur_range) ||
11886           (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
11887       {
11888         cause= "minmax_keypart_in_disjunctive_query";
11889         goto next_index;
11890       }
11891     }
11892 
11893     /* If we got to this point, cur_index_info passes the test. */
11894     key_infix_parts= cur_key_infix_len ? (uint)
11895       (first_non_infix_part - first_non_group_part) : 0;
11896     cur_used_key_parts= cur_group_key_parts + key_infix_parts;
11897 
11898     /* Compute the cost of using this index. */
11899     if (tree)
11900     {
11901       /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
11902       cur_index_tree= get_index_range_tree(cur_index, tree, param,
11903                                            &cur_param_idx);
11904       /* Check if this range tree can be used for prefix retrieval. */
11905       Cost_estimate dummy_cost;
11906       uint mrr_flags= HA_MRR_SORTED;
11907       uint mrr_bufsize=0;
11908       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
11909                                                    FALSE /*don't care*/,
11910                                                    cur_index_tree, TRUE,
11911                                                    &mrr_flags, &mrr_bufsize,
11912                                                    &dummy_cost);
11913 #ifdef OPTIMIZER_TRACE
11914       if (unlikely(cur_index_tree && trace->is_started()))
11915       {
11916         trace_idx.add("index_dives_for_eq_ranges", !param->use_index_statistics);
11917         Opt_trace_array trace_range(trace, "ranges");
11918 
11919         const KEY_PART_INFO *key_part= cur_index_info->key_part;
11920 
11921         String range_info;
11922         range_info.set_charset(system_charset_info);
11923         append_range_all_keyparts(&trace_range, NULL, &range_info,
11924                                   cur_index_tree, key_part);
11925       }
11926 #endif
11927     }
11928     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
11929                        cur_group_key_parts, tree, cur_index_tree,
11930                        cur_quick_prefix_records, have_min, have_max,
11931                        &cur_read_cost, &cur_records);
11932     /*
11933       If cur_read_cost is lower than best_read_cost use cur_index.
11934       Do not compare doubles directly because they may have different
11935       representations (64 vs. 80 bits).
11936     */
11937     trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
11938     if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
11939     {
11940       index_info= cur_index_info;
11941       index= cur_index;
11942       best_read_cost= cur_read_cost;
11943       best_records= cur_records;
11944       best_index_tree= cur_index_tree;
11945       best_quick_prefix_records= cur_quick_prefix_records;
11946       best_param_idx= cur_param_idx;
11947       group_key_parts= cur_group_key_parts;
11948       group_prefix_len= cur_group_prefix_len;
11949       key_infix_len= cur_key_infix_len;
11950       if (key_infix_len)
11951         memcpy (key_infix, cur_key_infix, sizeof (key_infix));
11952       used_key_parts= cur_used_key_parts;
11953     }
11954 
11955   next_index:
11956     if (cause)
11957     {
11958       trace_idx.add("usable", false).add_alnum("cause", cause);
11959       cause= NULL;
11960     }
11961   }
11962   trace_indices.end();
11963 
11964   if (!index_info) /* No usable index found. */
11965     DBUG_RETURN(NULL);
11966 
11967   /* Check (SA3) for the where clause. */
11968   if (join->conds && min_max_arg_item &&
11969       !check_group_min_max_predicates(join->conds, min_max_arg_item,
11970                                       (index_info->flags & HA_SPATIAL) ?
11971                                       Field::itMBR : Field::itRAW))
11972   {
11973     trace_group.add("usable", false).
11974       add_alnum("cause", "unsupported_predicate_on_agg_attribute");
11975     DBUG_RETURN(NULL);
11976   }
11977 
11978   /* The query passes all tests, so construct a new TRP object. */
11979   read_plan= new (param->mem_root)
11980                  TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
11981                                    min_max_arg_part,
11982                                    group_prefix_len, used_key_parts,
11983                                    group_key_parts, index_info, index,
11984                                    key_infix_len,
11985                                    (key_infix_len > 0) ? key_infix : NULL,
11986                                    tree, best_index_tree, best_param_idx,
11987                                    best_quick_prefix_records);
11988   if (read_plan)
11989   {
11990     if (tree && read_plan->quick_prefix_records == 0)
11991       DBUG_RETURN(NULL);
11992 
11993     read_plan->read_cost= best_read_cost;
11994     read_plan->records=   best_records;
11995     if (read_time < best_read_cost && is_agg_distinct)
11996     {
11997       trace_group.add("index_scan", true);
11998       read_plan->read_cost= 0;
11999       read_plan->use_index_scan();
12000     }
12001 
12002     DBUG_PRINT("info",
12003                ("Returning group min/max plan: cost: %g, records: %lu",
12004                 read_plan->read_cost, (ulong) read_plan->records));
12005   }
12006 
12007   DBUG_RETURN(read_plan);
12008 }
12009 
12010 
12011 /*
12012   Check that the MIN/MAX attribute participates only in range predicates
12013   with constants.
12014 
12015   SYNOPSIS
12016     check_group_min_max_predicates()
12017     cond              tree (or subtree) describing all or part of the WHERE
12018                       clause being analyzed
12019     min_max_arg_item  the field referenced by the MIN/MAX function(s)
12020     min_max_arg_part  the keypart of the MIN/MAX argument if any
12021 
12022   DESCRIPTION
12023     The function walks recursively over the cond tree representing a WHERE
12024     clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
12025     aggregate function, it is referenced only by one of the following
12026     predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
12027 
12028   RETURN
12029     TRUE  if cond passes the test
12030     FALSE o/w
12031 */
12032 
12033 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type)12034 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
12035                                Field::imagetype image_type)
12036 {
12037   DBUG_ENTER("check_group_min_max_predicates");
12038   DBUG_ASSERT(cond && min_max_arg_item);
12039 
12040   cond= cond->real_item();
12041   Item::Type cond_type= cond->type();
12042   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
12043   {
12044     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
12045     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
12046     Item *and_or_arg;
12047     while ((and_or_arg= li++))
12048     {
12049       if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
12050                                          image_type))
12051         DBUG_RETURN(FALSE);
12052     }
12053     DBUG_RETURN(TRUE);
12054   }
12055 
12056   /*
12057     TODO:
12058     This is a very crude fix to handle sub-selects in the WHERE clause
12059     (Item_subselect objects). With the test below we rule out from the
12060     optimization all queries with subselects in the WHERE clause. What has to
12061     be done, is that here we should analyze whether the subselect references
12062     the MIN/MAX argument field, and disallow the optimization only if this is
12063     so.
12064   */
12065   if (cond_type == Item::SUBSELECT_ITEM)
12066     DBUG_RETURN(FALSE);
12067 
12068   /*
12069     Condition of the form 'field' is equivalent to 'field <> 0' and thus
12070     satisfies the SA3 condition.
12071   */
12072   if (cond_type == Item::FIELD_ITEM)
12073   {
12074     DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
12075     DBUG_RETURN(TRUE);
12076   }
12077 
12078   /* We presume that at this point there are no other Items than functions. */
12079   DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
12080 
12081   /* Test if cond references only group-by or non-group fields. */
12082   Item_func *pred= (Item_func*) cond;
12083   Item *cur_arg;
12084   DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
12085   for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
12086   {
12087     Item **arguments= pred->arguments();
12088     cur_arg= arguments[arg_idx]->real_item();
12089     DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
12090     if (cur_arg->type() == Item::FIELD_ITEM)
12091     {
12092       if (min_max_arg_item->eq(cur_arg, 1))
12093       {
12094        /*
12095          If pred references the MIN/MAX argument, check whether pred is a range
12096          condition that compares the MIN/MAX argument with a constant.
12097        */
12098         Item_func::Functype pred_type= pred->functype();
12099         if (pred_type != Item_func::EQUAL_FUNC     &&
12100             pred_type != Item_func::LT_FUNC        &&
12101             pred_type != Item_func::LE_FUNC        &&
12102             pred_type != Item_func::GT_FUNC        &&
12103             pred_type != Item_func::GE_FUNC        &&
12104             pred_type != Item_func::BETWEEN        &&
12105             pred_type != Item_func::ISNULL_FUNC    &&
12106             pred_type != Item_func::ISNOTNULL_FUNC &&
12107             pred_type != Item_func::EQ_FUNC        &&
12108             pred_type != Item_func::NE_FUNC)
12109           DBUG_RETURN(FALSE);
12110 
12111         /* Check that pred compares min_max_arg_item with a constant. */
12112         Item *args[3];
12113         memset(args, 0, 3 * sizeof(Item*));
12114         bool inv;
12115         /* Test if this is a comparison of a field and a constant. */
12116         if (!simple_pred(pred, args, &inv))
12117           DBUG_RETURN(FALSE);
12118 
12119         /* Check for compatible string comparisons - similar to get_mm_leaf. */
12120         if (args[0] && args[1] && !args[2] && // this is a binary function
12121             min_max_arg_item->result_type() == STRING_RESULT &&
12122             /*
12123               Don't use an index when comparing strings of different collations.
12124             */
12125             ((args[1]->result_type() == STRING_RESULT &&
12126               image_type == Field::itRAW &&
12127               min_max_arg_item->field->charset() != pred->compare_collation())
12128              ||
12129              /*
12130                We can't always use indexes when comparing a string index to a
12131                number.
12132              */
12133              (args[1]->result_type() != STRING_RESULT &&
12134               min_max_arg_item->field->cmp_type() != args[1]->result_type())))
12135           DBUG_RETURN(FALSE);
12136       }
12137     }
12138     else if (cur_arg->type() == Item::FUNC_ITEM)
12139     {
12140       if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
12141                                          image_type))
12142         DBUG_RETURN(FALSE);
12143     }
12144     else if (cur_arg->const_item())
12145     {
12146       /*
12147         For predicates of the form "const OP expr" we also have to check 'expr'
12148         to make a decision.
12149       */
12150       continue;
12151     }
12152     else
12153       DBUG_RETURN(FALSE);
12154   }
12155 
12156   DBUG_RETURN(TRUE);
12157 }
12158 
12159 
12160 /*
12161   Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
12162   any. 'tree' must be a unique conjunction to ALL predicates in earlier
12163   keyparts of 'keypart_tree'.
12164 
12165   E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
12166   covers 'field', all these conditions satisfies the requirement:
12167 
12168    1. "(kp1=2 OR kp1=3) AND kp2=10"    => returns "kp2=10"
12169    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)"  => returns "kp2=10"
12170    3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12171                                        => returns "kp2=10  OR kp2=11"
12172 
12173    whereas these do not
12174    1. "(kp1=2 AND kp2=10) OR kp1=3"
12175    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
12176    3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
12177 
12178    This function effectively tests requirement WA2. In combination with
12179    a test that the returned tree has no more than one range it is also
12180    a test of NGA3.
12181 
12182   @param[in]   field          The field we want the SEL_ARG tree for
12183   @param[in]   keypart_tree   Root node of the SEL_ARG* tree for the index
12184   @param[out]  cur_range      The SEL_ARG tree, if any, for the keypart
12185                               covering field 'keypart_field'
12186   @retval true   'keypart_tree' contained a predicate for 'field' that
12187                   is not conjunction to all predicates on earlier keyparts
12188   @retval false  otherwise
12189 */
12190 
12191 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)12192 get_sel_arg_for_keypart(Field *field,
12193                         SEL_ARG *keypart_tree,
12194                         SEL_ARG **cur_range)
12195 {
12196   if (keypart_tree == NULL)
12197     return false;
12198   if (keypart_tree->type != SEL_ARG::KEY_RANGE)
12199   {
12200     /*
12201       A range predicate not usable by Loose Index Scan is found.
12202       Predicates for keypart 'keypart_tree->part' and later keyparts
12203       cannot be used.
12204     */
12205     *cur_range= keypart_tree;
12206     return false;
12207   }
12208   if (keypart_tree->field->eq(field))
12209   {
12210     *cur_range= keypart_tree;
12211     return false;
12212   }
12213 
12214   SEL_ARG *tree_first_range= NULL;
12215   SEL_ARG *first_kp=  keypart_tree->first();
12216 
12217   for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
12218   {
12219     SEL_ARG *curr_tree= NULL;
12220     if (cur_kp->next_key_part)
12221     {
12222       if (get_sel_arg_for_keypart(field,
12223                                   cur_kp->next_key_part,
12224                                   &curr_tree))
12225         return true;
12226     }
12227     /**
12228       Check if the SEL_ARG tree for 'field' is identical for all ranges in
12229       'keypart_tree
12230      */
12231     if (cur_kp == first_kp)
12232       tree_first_range= curr_tree;
12233     else if (!all_same(tree_first_range, curr_tree))
12234       return true;
12235   }
12236   *cur_range= tree_first_range;
12237   return false;
12238 }
12239 
12240 /*
12241   Extract a sequence of constants from a conjunction of equality predicates.
12242 
12243   SYNOPSIS
12244     get_constant_key_infix()
12245     index_info             [in]  Descriptor of the chosen index.
12246     index_range_tree       [in]  Range tree for the chosen index
12247     first_non_group_part   [in]  First index part after group attribute parts
12248     min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
12249     last_part              [in]  Last keypart of the index
12250     thd                    [in]  Current thread
12251     key_infix              [out] Infix of constants to be used for index lookup
12252     key_infix_len          [out] Lenghth of the infix
12253     first_non_infix_part   [out] The first keypart after the infix (if any)
12254 
12255   DESCRIPTION
12256     Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
12257     for each keypart field NGF_i not in GROUP-BY, check that there is a
12258     constant equality predicate among conds with the form (NGF_i = const_ci) or
12259     (const_ci = NGF_i).
12260     Thus all the NGF_i attributes must fill the 'gap' between the last group-by
12261     attribute and the MIN/MAX attribute in the index (if present).  Also ensure
12262     that there is only a single range on NGF_i (NGA3). If these
12263     conditions hold, copy each constant from its corresponding predicate into
12264     key_infix, in the order its NG_i attribute appears in the index, and update
12265     key_infix_len with the total length of the key parts in key_infix.
12266 
12267   RETURN
12268     TRUE  if the index passes the test
12269     FALSE o/w
12270 */
12271 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)12272 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
12273                        KEY_PART_INFO *first_non_group_part,
12274                        KEY_PART_INFO *min_max_arg_part,
12275                        KEY_PART_INFO *last_part, THD *thd,
12276                        uchar *key_infix, uint *key_infix_len,
12277                        KEY_PART_INFO **first_non_infix_part)
12278 {
12279   SEL_ARG       *cur_range;
12280   KEY_PART_INFO *cur_part;
12281   /* End part for the first loop below. */
12282   KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
12283 
12284   *key_infix_len= 0;
12285   uchar *key_ptr= key_infix;
12286   for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
12287   {
12288     cur_range= NULL;
12289     /*
12290       Check NGA3:
12291       1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
12292          checks for a unique conjunction of this tree with all the predicates
12293          on the earlier keyparts in the index.
12294       2. Check for multiple ranges on the found keypart tree.
12295 
12296       We assume that index_range_tree points to the leftmost keypart in
12297       the index.
12298     */
12299     if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
12300                                 &cur_range))
12301       return false;
12302 
12303     if (cur_range && cur_range->elements > 1)
12304       return false;
12305 
12306     if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
12307     {
12308       if (min_max_arg_part)
12309         return false; /* The current keypart has no range predicates at all. */
12310       else
12311       {
12312         *first_non_infix_part= cur_part;
12313         return true;
12314       }
12315     }
12316 
12317     if ((cur_range->min_flag & NO_MIN_RANGE) ||
12318         (cur_range->max_flag & NO_MAX_RANGE) ||
12319         (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
12320       return false;
12321 
12322     uint field_length= cur_part->store_length;
12323     if (cur_range->maybe_null &&
12324          cur_range->min_value[0] && cur_range->max_value[0])
12325     {
12326       /*
12327         cur_range specifies 'IS NULL'. In this case the argument points
12328         to a "null value" (a copy of is_null_string) that we do not
12329         memcmp(), or memcpy to a field.
12330       */
12331       DBUG_ASSERT (field_length > 0);
12332       *key_ptr= 1;
12333       key_ptr+= field_length;
12334       *key_infix_len+= field_length;
12335     }
12336     else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
12337     { /* cur_range specifies an equality condition. */
12338       memcpy(key_ptr, cur_range->min_value, field_length);
12339       key_ptr+= field_length;
12340       *key_infix_len+= field_length;
12341     }
12342     else
12343       return false;
12344   }
12345 
12346   if (!min_max_arg_part && (cur_part == last_part))
12347     *first_non_infix_part= last_part;
12348 
12349   return TRUE;
12350 }
12351 
12352 
12353 /*
12354   Find the key part referenced by a field.
12355 
12356   SYNOPSIS
12357     get_field_keypart()
12358     index  descriptor of an index
12359     field  field that possibly references some key part in index
12360 
12361   NOTES
12362     The return value can be used to get a KEY_PART_INFO pointer by
12363     part= index->key_part + get_field_keypart(...) - 1;
12364 
12365   RETURN
12366     Positive number which is the consecutive number of the key part, or
12367     0 if field does not reference any index field.
12368 */
12369 
12370 static inline uint
get_field_keypart(KEY * index,Field * field)12371 get_field_keypart(KEY *index, Field *field)
12372 {
12373   KEY_PART_INFO *part, *end;
12374 
12375   for (part= index->key_part, end= part + actual_key_parts(index) ;
12376        part < end; part++)
12377   {
12378     if (field->eq(part->field))
12379       return part - index->key_part + 1;
12380   }
12381   return 0;
12382 }
12383 
12384 
12385 /*
12386   Find the SEL_ARG sub-tree that corresponds to the chosen index.
12387 
12388   SYNOPSIS
12389     get_index_range_tree()
12390     index     [in]  The ID of the index being looked for
12391     range_tree[in]  Tree of ranges being searched
12392     param     [in]  PARAM from SQL_SELECT::test_quick_select
12393     param_idx [out] Index in the array PARAM::key that corresponds to 'index'
12394 
12395   DESCRIPTION
12396 
12397     A SEL_TREE contains range trees for all usable indexes. This procedure
12398     finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
12399     ordered in the same way as the members of PARAM::key, thus we first find
12400     the corresponding index in the array PARAM::key. This index is returned
12401     through the variable param_idx, to be used later as argument of
12402     check_quick_select().
12403 
12404   RETURN
12405     Pointer to the SEL_ARG subtree that corresponds to index.
12406 */
12407 
get_index_range_tree(uint index,SEL_TREE * range_tree,PARAM * param,uint * param_idx)12408 SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
12409                                uint *param_idx)
12410 {
12411   uint idx= 0; /* Index nr in param->key_parts */
12412   while (idx < param->keys)
12413   {
12414     if (index == param->real_keynr[idx])
12415       break;
12416     idx++;
12417   }
12418   *param_idx= idx;
12419   return(range_tree->keys[idx]);
12420 }
12421 
12422 
12423 /*
12424   Compute the cost of a quick_group_min_max_select for a particular index.
12425 
12426   SYNOPSIS
12427     cost_group_min_max()
12428     table                [in] The table being accessed
12429     index_info           [in] The index used to access the table
12430     used_key_parts       [in] Number of key parts used to access the index
12431     group_key_parts      [in] Number of index key parts in the group prefix
12432     range_tree           [in] Tree of ranges for all indexes
12433     index_tree           [in] The range tree for the current index
12434     quick_prefix_records [in] Number of records retrieved by the internally
12435 			      used quick range select if any
12436     have_min             [in] True if there is a MIN function
12437     have_max             [in] True if there is a MAX function
12438     read_cost           [out] The cost to retrieve rows via this quick select
12439     records             [out] The number of rows retrieved
12440 
12441   DESCRIPTION
12442     This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
12443     the number of rows returned.
12444 
12445   NOTES
12446     The cost computation distinguishes several cases:
12447     1) No equality predicates over non-group attributes (thus no key_infix).
12448        If groups are bigger than blocks on the average, then we assume that it
12449        is very unlikely that block ends are aligned with group ends, thus even
12450        if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
12451        keys, except for the first MIN and the last MAX keys, will be in the
12452        same block.  If groups are smaller than blocks, then we are going to
12453        read all blocks.
12454     2) There are equality predicates over non-group attributes.
12455        In this case the group prefix is extended by additional constants, and
12456        as a result the min/max values are inside sub-groups of the original
12457        groups. The number of blocks that will be read depends on whether the
12458        ends of these sub-groups will be contained in the same or in different
12459        blocks. We compute the probability for the two ends of a subgroup to be
12460        in two different blocks as the ratio of:
12461        - the number of positions of the left-end of a subgroup inside a group,
12462          such that the right end of the subgroup is past the end of the buffer
12463          containing the left-end, and
12464        - the total number of possible positions for the left-end of the
12465          subgroup, which is the number of keys in the containing group.
12466        We assume it is very unlikely that two ends of subsequent subgroups are
12467        in the same block.
12468     3) The are range predicates over the group attributes.
12469        Then some groups may be filtered by the range predicates. We use the
12470        selectivity of the range predicates to decide how many groups will be
12471        filtered.
12472 
12473   TODO
12474      - Take into account the optional range predicates over the MIN/MAX
12475        argument.
12476      - Check if we have a PK index and we use all cols - then each key is a
12477        group, and it will be better to use an index scan.
12478 
12479   RETURN
12480     None
12481 */
12482 
cost_group_min_max(TABLE * table,KEY * index_info,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,double * read_cost,ha_rows * records)12483 void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
12484                         uint group_key_parts, SEL_TREE *range_tree,
12485                         SEL_ARG *index_tree, ha_rows quick_prefix_records,
12486                         bool have_min, bool have_max,
12487                         double *read_cost, ha_rows *records)
12488 {
12489   ha_rows table_records;
12490   uint num_groups;
12491   uint num_blocks;
12492   uint keys_per_block;
12493   uint keys_per_group;
12494   uint keys_per_subgroup; /* Average number of keys in sub-groups */
12495                           /* formed by a key infix. */
12496   double p_overlap; /* Probability that a sub-group overlaps two blocks. */
12497   double quick_prefix_selectivity;
12498   double io_cost;
12499   DBUG_ENTER("cost_group_min_max");
12500 
12501   table_records= table->file->stats.records;
12502   keys_per_block= (table->file->stats.block_size / 2 /
12503                    (index_info->key_length + table->file->ref_length)
12504                         + 1);
12505   num_blocks= (uint)(table_records / keys_per_block) + 1;
12506 
12507   /* Compute the number of keys in a group. */
12508   keys_per_group= index_info->rec_per_key[group_key_parts - 1];
12509   if (keys_per_group == 0) /* If there is no statistics try to guess */
12510     /* each group contains 10% of all records */
12511     keys_per_group= (uint)(table_records / 10) + 1;
12512   num_groups= (uint)(table_records / keys_per_group) + 1;
12513 
12514   /* Apply the selectivity of the quick select for group prefixes. */
12515   if (range_tree && (quick_prefix_records != HA_POS_ERROR))
12516   {
12517     quick_prefix_selectivity= (double) quick_prefix_records /
12518                               (double) table_records;
12519     num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
12520     set_if_bigger(num_groups, 1);
12521   }
12522 
12523   if (used_key_parts > group_key_parts)
12524   { /*
12525       Compute the probability that two ends of a subgroup are inside
12526       different blocks.
12527     */
12528     keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1];
12529     if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
12530       p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
12531     else
12532     {
12533       double blocks_per_group= (double) num_blocks / (double) num_groups;
12534       p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
12535       p_overlap= min(p_overlap, 1.0);
12536     }
12537     io_cost= min<double>(num_groups * (1 + p_overlap), num_blocks);
12538   }
12539   else
12540     io_cost= (keys_per_group > keys_per_block) ?
12541              (have_min && have_max) ? (double) (num_groups + 1) :
12542                                       (double) num_groups :
12543              (double) num_blocks;
12544 
12545   /*
12546     CPU cost must be comparable to that of an index scan as computed
12547     in SQL_SELECT::test_quick_select(). When the groups are small,
12548     e.g. for a unique index, using index scan will be cheaper since it
12549     reads the next record without having to re-position to it on every
12550     group. To make the CPU cost reflect this, we estimate the CPU cost
12551     as the sum of:
12552     1. Cost for evaluating the condition (similarly as for index scan).
12553     2. Cost for navigating the index structure (assuming a b-tree).
12554        Note: We only add the cost for one comparision per block. For a
12555              b-tree the number of comparisons will be larger.
12556        TODO: This cost should be provided by the storage engine.
12557   */
12558   const double tree_traversal_cost=
12559     ceil(log(static_cast<double>(table_records))/
12560          log(static_cast<double>(keys_per_block))) * ROWID_COMPARE_COST;
12561 
12562   const double cpu_cost= num_groups * (tree_traversal_cost + ROW_EVALUATE_COST);
12563 
12564   *read_cost= io_cost + cpu_cost;
12565   *records= num_groups;
12566 
12567   DBUG_PRINT("info",
12568              ("table rows: %lu  keys/block: %u  keys/group: %u  result rows: %lu  blocks: %u",
12569               (ulong)table_records, keys_per_block, keys_per_group,
12570               (ulong) *records, num_blocks));
12571   DBUG_VOID_RETURN;
12572 }
12573 
12574 
12575 /*
12576   Construct a new quick select object for queries with group by with min/max.
12577 
12578   SYNOPSIS
12579     TRP_GROUP_MIN_MAX::make_quick()
12580     param              Parameter from test_quick_select
12581     retrieve_full_rows ignored
12582     parent_alloc       Memory pool to use, if any.
12583 
12584   NOTES
12585     Make_quick ignores the retrieve_full_rows parameter because
12586     QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
12587     The other parameter are ignored as well because all necessary
12588     data to create the QUICK object is computed at this TRP creation
12589     time.
12590 
12591   RETURN
12592     New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
12593     NULL otherwise.
12594 */
12595 
12596 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)12597 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
12598                               MEM_ROOT *parent_alloc)
12599 {
12600   QUICK_GROUP_MIN_MAX_SELECT *quick;
12601   DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
12602 
12603   quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
12604                                         param->thd->lex->current_select->join,
12605                                         have_min, have_max,
12606                                         have_agg_distinct, min_max_arg_part,
12607                                         group_prefix_len, group_key_parts,
12608                                         used_key_parts, index_info, index,
12609                                         read_cost, records, key_infix_len,
12610                                         key_infix, parent_alloc, is_index_scan);
12611   if (!quick)
12612     DBUG_RETURN(NULL);
12613 
12614   if (quick->init())
12615   {
12616     delete quick;
12617     DBUG_RETURN(NULL);
12618   }
12619 
12620   if (range_tree)
12621   {
12622     DBUG_ASSERT(quick_prefix_records > 0);
12623     if (quick_prefix_records == HA_POS_ERROR)
12624       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
12625     else
12626     {
12627       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
12628       quick->quick_prefix_select= get_quick_select(param, param_idx,
12629                                                    index_tree,
12630                                                    HA_MRR_SORTED,
12631                                                    0,
12632                                                    &quick->alloc);
12633       if (!quick->quick_prefix_select)
12634       {
12635         delete quick;
12636         DBUG_RETURN(NULL);
12637       }
12638     }
12639     /*
12640       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
12641       attribute, and create an array of QUICK_RANGES to be used by the
12642       new quick select.
12643     */
12644     if (min_max_arg_part)
12645     {
12646       SEL_ARG *min_max_range= index_tree;
12647       while (min_max_range) /* Find the tree for the MIN/MAX key part. */
12648       {
12649         if (min_max_range->field->eq(min_max_arg_part->field))
12650           break;
12651         min_max_range= min_max_range->next_key_part;
12652       }
12653       /* Scroll to the leftmost interval for the MIN/MAX argument. */
12654       while (min_max_range && min_max_range->prev)
12655         min_max_range= min_max_range->prev;
12656       /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
12657       while (min_max_range)
12658       {
12659         if (quick->add_range(min_max_range))
12660         {
12661           delete quick;
12662           quick= NULL;
12663           DBUG_RETURN(NULL);
12664         }
12665         min_max_range= min_max_range->next;
12666       }
12667     }
12668   }
12669   else
12670     quick->quick_prefix_select= NULL;
12671 
12672   quick->update_key_stat();
12673   quick->adjust_prefix_ranges();
12674 
12675   DBUG_RETURN(quick);
12676 }
12677 
12678 
12679 /*
12680   Construct new quick select for group queries with min/max.
12681 
12682   SYNOPSIS
12683     QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
12684     table             The table being accessed
12685     join              Descriptor of the current query
12686     have_min          TRUE if the query selects a MIN function
12687     have_max          TRUE if the query selects a MAX function
12688     min_max_arg_part  The only argument field of all MIN/MAX functions
12689     group_prefix_len  Length of all key parts in the group prefix
12690     prefix_key_parts  All key parts in the group prefix
12691     index_info        The index chosen for data access
12692     use_index         The id of index_info
12693     read_cost         Cost of this access method
12694     records           Number of records returned
12695     key_infix_len     Length of the key infix appended to the group prefix
12696     key_infix         Infix of constants from equality predicates
12697     parent_alloc      Memory pool for this and quick_prefix_select data
12698     is_index_scan     get the next different key not by jumping on it via
12699                       index read, but by scanning until the end of the
12700                       rows with equal key value.
12701 
12702   RETURN
12703     None
12704 */
12705 
12706 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,double read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)12707 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
12708                            bool have_max_arg, bool have_agg_distinct_arg,
12709                            KEY_PART_INFO *min_max_arg_part_arg,
12710                            uint group_prefix_len_arg, uint group_key_parts_arg,
12711                            uint used_key_parts_arg, KEY *index_info_arg,
12712                            uint use_index, double read_cost_arg,
12713                            ha_rows records_arg, uint key_infix_len_arg,
12714                            uchar *key_infix_arg, MEM_ROOT *parent_alloc,
12715                            bool is_index_scan_arg)
12716   :join(join_arg), index_info(index_info_arg),
12717    group_prefix_len(group_prefix_len_arg),
12718    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
12719    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
12720    seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
12721    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
12722    min_functions_it(NULL), max_functions_it(NULL),
12723    is_index_scan(is_index_scan_arg)
12724 {
12725   head=       table;
12726   index=      use_index;
12727   record=     head->record[0];
12728   tmp_record= head->record[1];
12729   read_time= read_cost_arg;
12730   records= records_arg;
12731   used_key_parts= used_key_parts_arg;
12732   real_key_parts= used_key_parts_arg;
12733   real_prefix_len= group_prefix_len + key_infix_len;
12734   group_prefix= NULL;
12735   min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
12736 
12737   /*
12738     We can't have parent_alloc set as the init function can't handle this case
12739     yet.
12740   */
12741   DBUG_ASSERT(!parent_alloc);
12742   if (!parent_alloc)
12743   {
12744     init_sql_alloc(&alloc, join->thd->variables.range_alloc_block_size, 0);
12745     join->thd->mem_root= &alloc;
12746   }
12747   else
12748     memset(&alloc, 0, sizeof(MEM_ROOT));  // ensure that it's not used
12749 }
12750 
12751 
12752 /*
12753   Do post-constructor initialization.
12754 
12755   SYNOPSIS
12756     QUICK_GROUP_MIN_MAX_SELECT::init()
12757 
12758   DESCRIPTION
12759     The method performs initialization that cannot be done in the constructor
12760     such as memory allocations that may fail. It allocates memory for the
12761     group prefix and inifix buffers, and for the lists of MIN/MAX item to be
12762     updated during execution.
12763 
12764   RETURN
12765     0      OK
12766     other  Error code
12767 */
12768 
init()12769 int QUICK_GROUP_MIN_MAX_SELECT::init()
12770 {
12771   if (group_prefix) /* Already initialized. */
12772     return 0;
12773 
12774   if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len)))
12775       return 1;
12776   /*
12777     We may use group_prefix to store keys with all select fields, so allocate
12778     enough space for it.
12779   */
12780   if (!(group_prefix= (uchar*) alloc_root(&alloc,
12781                                          real_prefix_len + min_max_arg_len)))
12782     return 1;
12783 
12784   if (key_infix_len > 0)
12785   {
12786     /*
12787       The memory location pointed to by key_infix will be deleted soon, so
12788       allocate a new buffer and copy the key_infix into it.
12789     */
12790     uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
12791     if (!tmp_key_infix)
12792       return 1;
12793     memcpy(tmp_key_infix, this->key_infix, key_infix_len);
12794     this->key_infix= tmp_key_infix;
12795   }
12796 
12797   if (min_max_arg_part)
12798   {
12799     if (my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
12800       return 1;
12801 
12802     if (have_min)
12803     {
12804       if (!(min_functions= new List<Item_sum>))
12805         return 1;
12806     }
12807     else
12808       min_functions= NULL;
12809     if (have_max)
12810     {
12811       if (!(max_functions= new List<Item_sum>))
12812         return 1;
12813     }
12814     else
12815       max_functions= NULL;
12816 
12817     Item_sum *min_max_item;
12818     Item_sum **func_ptr= join->sum_funcs;
12819     while ((min_max_item= *(func_ptr++)))
12820     {
12821       if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
12822         min_functions->push_back(min_max_item);
12823       else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
12824         max_functions->push_back(min_max_item);
12825     }
12826 
12827     if (have_min)
12828     {
12829       if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
12830         return 1;
12831     }
12832 
12833     if (have_max)
12834     {
12835       if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
12836         return 1;
12837     }
12838   }
12839   else
12840     min_max_ranges.elements= 0;
12841 
12842   return 0;
12843 }
12844 
12845 
~QUICK_GROUP_MIN_MAX_SELECT()12846 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
12847 {
12848   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
12849   if (head->file->inited)
12850     /*
12851       We may have used this object for index access during
12852       create_sort_index() and then switched to rnd access for the rest
12853       of execution. Since we don't do cleanup until now, we must call
12854       ha_*_end() for whatever is the current access method.
12855     */
12856     head->file->ha_index_or_rnd_end();
12857   if (min_max_arg_part)
12858     delete_dynamic(&min_max_ranges);
12859   free_root(&alloc,MYF(0));
12860   delete min_functions_it;
12861   delete max_functions_it;
12862   delete quick_prefix_select;
12863   DBUG_VOID_RETURN;
12864 }
12865 
12866 
12867 /*
12868   Eventually create and add a new quick range object.
12869 
12870   SYNOPSIS
12871     QUICK_GROUP_MIN_MAX_SELECT::add_range()
12872     sel_range  Range object from which a
12873 
12874   NOTES
12875     Construct a new QUICK_RANGE object from a SEL_ARG object, and
12876     add it to the array min_max_ranges. If sel_arg is an infinite
12877     range, e.g. (x < 5 or x > 4), then skip it and do not construct
12878     a quick range.
12879 
12880   RETURN
12881     FALSE on success
12882     TRUE  otherwise
12883 */
12884 
add_range(SEL_ARG * sel_range)12885 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
12886 {
12887   QUICK_RANGE *range;
12888   uint range_flag= sel_range->min_flag | sel_range->max_flag;
12889 
12890   /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
12891   if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
12892     return FALSE;
12893 
12894   if (!(sel_range->min_flag & NO_MIN_RANGE) &&
12895       !(sel_range->max_flag & NO_MAX_RANGE))
12896   {
12897     if (sel_range->maybe_null &&
12898         sel_range->min_value[0] && sel_range->max_value[0])
12899       range_flag|= NULL_RANGE; /* IS NULL condition */
12900     /*
12901       Do not perform comparison if one of the argiment is NULL value.
12902     */
12903     else if (!sel_range->min_value[0] &&
12904              !sel_range->max_value[0] &&
12905              memcmp(sel_range->min_value, sel_range->max_value,
12906                     min_max_arg_len) == 0)
12907       range_flag|= EQ_RANGE;  /* equality condition */
12908   }
12909   range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
12910                          make_keypart_map(sel_range->part),
12911                          sel_range->max_value, min_max_arg_len,
12912                          make_keypart_map(sel_range->part),
12913                          range_flag);
12914   if (!range)
12915     return TRUE;
12916   if (insert_dynamic(&min_max_ranges, &range))
12917     return TRUE;
12918   return FALSE;
12919 }
12920 
12921 
12922 /*
12923   Opens the ranges if there are more conditions in quick_prefix_select than
12924   the ones used for jumping through the prefixes.
12925 
12926   SYNOPSIS
12927     QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
12928 
12929   NOTES
12930     quick_prefix_select is made over the conditions on the whole key.
12931     It defines a number of ranges of length x.
12932     However when jumping through the prefixes we use only the the first
12933     few most significant keyparts in the range key. However if there
12934     are more keyparts to follow the ones we are using we must make the
12935     condition on the key inclusive (because x < "ab" means
12936     x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
12937     To achive the above we must turn off the NEAR_MIN/NEAR_MAX
12938 */
adjust_prefix_ranges()12939 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
12940 {
12941   if (quick_prefix_select &&
12942       group_prefix_len < quick_prefix_select->max_used_key_length)
12943   {
12944     DYNAMIC_ARRAY *arr;
12945     uint inx;
12946 
12947     for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++)
12948     {
12949       QUICK_RANGE *range;
12950 
12951       get_dynamic(arr, (uchar*)&range, inx);
12952       range->flag &= ~(NEAR_MIN | NEAR_MAX);
12953     }
12954   }
12955 }
12956 
12957 
12958 /*
12959   Determine the total number and length of the keys that will be used for
12960   index lookup.
12961 
12962   SYNOPSIS
12963     QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
12964 
12965   DESCRIPTION
12966     The total length of the keys used for index lookup depends on whether
12967     there are any predicates referencing the min/max argument, and/or if
12968     the min/max argument field can be NULL.
12969     This function does an optimistic analysis whether the search key might
12970     be extended by a constant for the min/max keypart. It is 'optimistic'
12971     because during actual execution it may happen that a particular range
12972     is skipped, and then a shorter key will be used. However this is data
12973     dependent and can't be easily estimated here.
12974 
12975   RETURN
12976     None
12977 */
12978 
update_key_stat()12979 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
12980 {
12981   max_used_key_length= real_prefix_len;
12982   if (min_max_ranges.elements > 0)
12983   {
12984     QUICK_RANGE *cur_range;
12985     if (have_min)
12986     { /* Check if the right-most range has a lower boundary. */
12987       get_dynamic(&min_max_ranges, (uchar*)&cur_range,
12988                   min_max_ranges.elements - 1);
12989       if (!(cur_range->flag & NO_MIN_RANGE))
12990       {
12991         max_used_key_length+= min_max_arg_len;
12992         used_key_parts++;
12993         return;
12994       }
12995     }
12996     if (have_max)
12997     { /* Check if the left-most range has an upper boundary. */
12998       get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0);
12999       if (!(cur_range->flag & NO_MAX_RANGE))
13000       {
13001         max_used_key_length+= min_max_arg_len;
13002         used_key_parts++;
13003         return;
13004       }
13005     }
13006   }
13007   else if (have_min && min_max_arg_part &&
13008            min_max_arg_part->field->real_maybe_null())
13009   {
13010     /*
13011       If a MIN/MAX argument value is NULL, we can quickly determine
13012       that we're in the beginning of the next group, because NULLs
13013       are always < any other value. This allows us to quickly
13014       determine the end of the current group and jump to the next
13015       group (see next_min()) and thus effectively increases the
13016       usable key length.
13017     */
13018     max_used_key_length+= min_max_arg_len;
13019     used_key_parts++;
13020   }
13021 }
13022 
13023 
13024 /*
13025   Initialize a quick group min/max select for key retrieval.
13026 
13027   SYNOPSIS
13028     QUICK_GROUP_MIN_MAX_SELECT::reset()
13029 
13030   DESCRIPTION
13031     Initialize the index chosen for access and find and store the prefix
13032     of the last group. The method is expensive since it performs disk access.
13033 
13034   RETURN
13035     0      OK
13036     other  Error code
13037 */
13038 
reset(void)13039 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
13040 {
13041   int result;
13042   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
13043 
13044   seen_first_key= false;
13045   head->set_keyread(TRUE); /* We need only the key attributes */
13046   /*
13047     Request ordered index access as usage of ::index_last(),
13048     ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
13049   */
13050   if ((result= head->file->ha_index_init(index, true)))
13051   {
13052     head->file->print_error(result, MYF(0));
13053     DBUG_RETURN(result);
13054   }
13055   if (quick_prefix_select && quick_prefix_select->reset())
13056     DBUG_RETURN(1);
13057 
13058   result= head->file->ha_index_last(record);
13059   if (result != 0)
13060   {
13061     if (result == HA_ERR_END_OF_FILE)
13062       DBUG_RETURN(0);
13063     else
13064       DBUG_RETURN(result);
13065   }
13066 
13067   /* Save the prefix of the last group. */
13068   key_copy(last_prefix, record, index_info, group_prefix_len);
13069 
13070   DBUG_RETURN(0);
13071 }
13072 
13073 
13074 
13075 /*
13076   Get the next key containing the MIN and/or MAX key for the next group.
13077 
13078   SYNOPSIS
13079     QUICK_GROUP_MIN_MAX_SELECT::get_next()
13080 
13081   DESCRIPTION
13082     The method finds the next subsequent group of records that satisfies the
13083     query conditions and finds the keys that contain the MIN/MAX values for
13084     the key part referenced by the MIN/MAX function(s). Once a group and its
13085     MIN/MAX values are found, store these values in the Item_sum objects for
13086     the MIN/MAX functions. The rest of the values in the result row are stored
13087     in the Item_field::result_field of each select field. If the query does
13088     not contain MIN and/or MAX functions, then the function only finds the
13089     group prefix, which is a query answer itself.
13090 
13091   NOTES
13092     If both MIN and MAX are computed, then we use the fact that if there is
13093     no MIN key, there can't be a MAX key as well, so we can skip looking
13094     for a MAX key in this case.
13095 
13096   RETURN
13097     0                  on success
13098     HA_ERR_END_OF_FILE if returned all keys
13099     other              if some error occurred
13100 */
13101 
get_next()13102 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
13103 {
13104   int min_res= 0;
13105   int max_res= 0;
13106 #ifdef HPUX11
13107   /*
13108     volatile is required by a bug in the HP compiler due to which the
13109     last test of result fails.
13110   */
13111   volatile int result;
13112 #else
13113   int result;
13114 #endif
13115   int is_last_prefix= 0;
13116 
13117   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
13118 
13119   /*
13120     Loop until a group is found that satisfies all query conditions or the last
13121     group is reached.
13122   */
13123   do
13124   {
13125     result= next_prefix();
13126     /*
13127       Check if this is the last group prefix. Notice that at this point
13128       this->record contains the current prefix in record format.
13129     */
13130     if (!result)
13131     {
13132       is_last_prefix= key_cmp(index_info->key_part, last_prefix,
13133                               group_prefix_len);
13134       DBUG_ASSERT(is_last_prefix <= 0);
13135     }
13136     else
13137     {
13138       if (result == HA_ERR_KEY_NOT_FOUND)
13139         continue;
13140       break;
13141     }
13142 
13143     if (have_min)
13144     {
13145       min_res= next_min();
13146       if (min_res == 0)
13147         update_min_result();
13148     }
13149     /* If there is no MIN in the group, there is no MAX either. */
13150     if ((have_max && !have_min) ||
13151         (have_max && have_min && (min_res == 0)))
13152     {
13153       max_res= next_max();
13154       if (max_res == 0)
13155         update_max_result();
13156       /* If a MIN was found, a MAX must have been found as well. */
13157       DBUG_ASSERT((have_max && !have_min) ||
13158                   (have_max && have_min && (max_res == 0)));
13159     }
13160     /*
13161       If this is just a GROUP BY or DISTINCT without MIN or MAX and there
13162       are equality predicates for the key parts after the group, find the
13163       first sub-group with the extended prefix.
13164     */
13165     if (!have_min && !have_max && key_infix_len > 0)
13166       result= head->file->ha_index_read_map(record, group_prefix,
13167                                             make_prev_keypart_map(real_key_parts),
13168                                             HA_READ_KEY_EXACT);
13169 
13170     result= have_min ? min_res : have_max ? max_res : result;
13171   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13172            is_last_prefix != 0);
13173 
13174   if (result == HA_ERR_KEY_NOT_FOUND)
13175     result= HA_ERR_END_OF_FILE;
13176 
13177   DBUG_RETURN(result);
13178 }
13179 
13180 
13181 /*
13182   Retrieve the minimal key in the next group.
13183 
13184   SYNOPSIS
13185     QUICK_GROUP_MIN_MAX_SELECT::next_min()
13186 
13187   DESCRIPTION
13188     Find the minimal key within this group such that the key satisfies the query
13189     conditions and NULL semantics. The found key is loaded into this->record.
13190 
13191   IMPLEMENTATION
13192     Depending on the values of min_max_ranges.elements, key_infix_len, and
13193     whether there is a  NULL in the MIN field, this function may directly
13194     return without any data access. In this case we use the key loaded into
13195     this->record by the call to this->next_prefix() just before this call.
13196 
13197   RETURN
13198     0                    on success
13199     HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
13200     HA_ERR_END_OF_FILE   - "" -
13201     other                if some error occurred
13202 */
13203 
next_min()13204 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
13205 {
13206   int result= 0;
13207   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
13208 
13209   /* Find the MIN key using the eventually extended group prefix. */
13210   if (min_max_ranges.elements > 0)
13211   {
13212     if ((result= next_min_in_range()))
13213       DBUG_RETURN(result);
13214   }
13215   else
13216   {
13217     /* Apply the constant equality conditions to the non-group select fields */
13218     if (key_infix_len > 0)
13219     {
13220       if ((result= head->file->ha_index_read_map(record, group_prefix,
13221                                                  make_prev_keypart_map(real_key_parts),
13222                                                  HA_READ_KEY_EXACT)))
13223         DBUG_RETURN(result);
13224     }
13225 
13226     /*
13227       If the min/max argument field is NULL, skip subsequent rows in the same
13228       group with NULL in it. Notice that:
13229       - if the first row in a group doesn't have a NULL in the field, no row
13230       in the same group has (because NULL < any other value),
13231       - min_max_arg_part->field->ptr points to some place in 'record'.
13232     */
13233     if (min_max_arg_part && min_max_arg_part->field->is_null())
13234     {
13235       uchar key_buf[MAX_KEY_LENGTH];
13236 
13237       /* Find the first subsequent record without NULL in the MIN/MAX field. */
13238       key_copy(key_buf, record, index_info, max_used_key_length);
13239       result= head->file->ha_index_read_map(record, key_buf,
13240                                             make_keypart_map(real_key_parts),
13241                                             HA_READ_AFTER_KEY);
13242       /*
13243         Check if the new record belongs to the current group by comparing its
13244         prefix with the group's prefix. If it is from the next group, then the
13245         whole group has NULLs in the MIN/MAX field, so use the first record in
13246         the group as a result.
13247         TODO:
13248         It is possible to reuse this new record as the result candidate for the
13249         next call to next_min(), and to save one lookup in the next call. For
13250         this add a new member 'this->next_group_prefix'.
13251       */
13252       if (!result)
13253       {
13254         if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13255           key_restore(record, key_buf, index_info, 0);
13256       }
13257       else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
13258         result= 0; /* There is a result in any case. */
13259     }
13260   }
13261 
13262   /*
13263     If the MIN attribute is non-nullable, this->record already contains the
13264     MIN key in the group, so just return.
13265   */
13266   DBUG_RETURN(result);
13267 }
13268 
13269 
13270 /*
13271   Retrieve the maximal key in the next group.
13272 
13273   SYNOPSIS
13274     QUICK_GROUP_MIN_MAX_SELECT::next_max()
13275 
13276   DESCRIPTION
13277     Lookup the maximal key of the group, and store it into this->record.
13278 
13279   RETURN
13280     0                    on success
13281     HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
13282     HA_ERR_END_OF_FILE	 - "" -
13283     other                if some error occurred
13284 */
13285 
next_max()13286 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
13287 {
13288   int result;
13289 
13290   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
13291 
13292   /* Get the last key in the (possibly extended) group. */
13293   if (min_max_ranges.elements > 0)
13294     result= next_max_in_range();
13295   else
13296     result= head->file->ha_index_read_map(record, group_prefix,
13297                                           make_prev_keypart_map(real_key_parts),
13298                                           HA_READ_PREFIX_LAST);
13299   DBUG_RETURN(result);
13300 }
13301 
13302 
13303 /**
13304   Find the next different key value by skiping all the rows with the same key
13305   value.
13306 
13307   Implements a specialized loose index access method for queries
13308   containing aggregate functions with distinct of the form:
13309     SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
13310   This method comes to replace the index scan + Unique class
13311   (distinct selection) for loose index scan that visits all the rows of a
13312   covering index instead of jumping in the begining of each group.
13313   TODO: Placeholder function. To be replaced by a handler API call
13314 
13315   @param is_index_scan     hint to use index scan instead of random index read
13316                            to find the next different value.
13317   @param file              table handler
13318   @param key_part          group key to compare
13319   @param record            row data
13320   @param group_prefix      current key prefix data
13321   @param group_prefix_len  length of the current key prefix data
13322   @param group_key_parts   number of the current key prefix columns
13323   @return status
13324     @retval  0  success
13325     @retval !0  failure
13326 */
13327 
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)13328 static int index_next_different (bool is_index_scan, handler *file,
13329                                 KEY_PART_INFO *key_part, uchar * record,
13330                                 const uchar * group_prefix,
13331                                 uint group_prefix_len,
13332                                 uint group_key_parts)
13333 {
13334   if (is_index_scan)
13335   {
13336     int result= 0;
13337 
13338     while (!key_cmp (key_part, group_prefix, group_prefix_len))
13339     {
13340       result= file->ha_index_next(record);
13341       if (result)
13342         return(result);
13343     }
13344     return result;
13345   }
13346   else
13347     return file->ha_index_read_map(record, group_prefix,
13348                                    make_prev_keypart_map(group_key_parts),
13349                                    HA_READ_AFTER_KEY);
13350 }
13351 
13352 
13353 /*
13354   Determine the prefix of the next group.
13355 
13356   SYNOPSIS
13357     QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13358 
13359   DESCRIPTION
13360     Determine the prefix of the next group that satisfies the query conditions.
13361     If there is a range condition referencing the group attributes, use a
13362     QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
13363     condition. If there is a key infix of constants, append this infix
13364     immediately after the group attributes. The possibly extended prefix is
13365     stored in this->group_prefix. The first key of the found group is stored in
13366     this->record, on which relies this->next_min().
13367 
13368   RETURN
13369     0                    on success
13370     HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
13371     HA_ERR_END_OF_FILE   if there are no more keys
13372     other                if some error occurred
13373 */
next_prefix()13374 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
13375 {
13376   int result;
13377   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
13378 
13379   if (quick_prefix_select)
13380   {
13381     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
13382     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
13383                                                       group_key_parts,
13384                                                       cur_prefix)))
13385       DBUG_RETURN(result);
13386     seen_first_key= TRUE;
13387   }
13388   else
13389   {
13390     if (!seen_first_key)
13391     {
13392       result= head->file->ha_index_first(record);
13393       if (result)
13394         DBUG_RETURN(result);
13395       seen_first_key= TRUE;
13396     }
13397     else
13398     {
13399       /* Load the first key in this group into record. */
13400       result= index_next_different (is_index_scan, head->file,
13401                                     index_info->key_part,
13402                                     record, group_prefix, group_prefix_len,
13403                                     group_key_parts);
13404       if (result)
13405         DBUG_RETURN(result);
13406     }
13407   }
13408 
13409   /* Save the prefix of this group for subsequent calls. */
13410   key_copy(group_prefix, record, index_info, group_prefix_len);
13411   /* Append key_infix to group_prefix. */
13412   if (key_infix_len > 0)
13413     memcpy(group_prefix + group_prefix_len,
13414            key_infix, key_infix_len);
13415 
13416   DBUG_RETURN(0);
13417 }
13418 
13419 
13420 /*
13421   Find the minimal key in a group that satisfies some range conditions for the
13422   min/max argument field.
13423 
13424   SYNOPSIS
13425     QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13426 
13427   DESCRIPTION
13428     Given the sequence of ranges min_max_ranges, find the minimal key that is
13429     in the left-most possible range. If there is no such key, then the current
13430     group does not have a MIN key that satisfies the WHERE clause. If a key is
13431     found, its value is stored in this->record.
13432 
13433   RETURN
13434     0                    on success
13435     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13436                          the ranges
13437     HA_ERR_END_OF_FILE   - "" -
13438     other                if some error
13439 */
13440 
next_min_in_range()13441 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
13442 {
13443   ha_rkey_function find_flag;
13444   key_part_map keypart_map;
13445   QUICK_RANGE *cur_range;
13446   bool found_null= FALSE;
13447   int result= HA_ERR_KEY_NOT_FOUND;
13448 
13449   DBUG_ASSERT(min_max_ranges.elements > 0);
13450 
13451   for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
13452   { /* Search from the left-most range to the right. */
13453     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx);
13454 
13455     /*
13456       If the current value for the min/max argument is bigger than the right
13457       boundary of cur_range, there is no need to check this range.
13458     */
13459     if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
13460         (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
13461                  min_max_arg_len) == 1))
13462       continue;
13463 
13464     if (cur_range->flag & NO_MIN_RANGE)
13465     {
13466       keypart_map= make_prev_keypart_map(real_key_parts);
13467       find_flag= HA_READ_KEY_EXACT;
13468     }
13469     else
13470     {
13471       /* Extend the search key with the lower boundary for this range. */
13472       memcpy(group_prefix + real_prefix_len, cur_range->min_key,
13473              cur_range->min_length);
13474       keypart_map= make_keypart_map(real_key_parts);
13475       find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
13476                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
13477                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
13478     }
13479 
13480     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13481                                           find_flag);
13482     if (result)
13483     {
13484       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13485           (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
13486         continue; /* Check the next range. */
13487 
13488       /*
13489         In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
13490         HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
13491         range, it can't succeed for any other subsequent range.
13492       */
13493       break;
13494     }
13495 
13496     /* A key was found. */
13497     if (cur_range->flag & EQ_RANGE)
13498       break; /* No need to perform the checks below for equal keys. */
13499 
13500     if (cur_range->flag & NULL_RANGE)
13501     {
13502       /*
13503         Remember this key, and continue looking for a non-NULL key that
13504         satisfies some other condition.
13505       */
13506       memcpy(tmp_record, record, head->s->rec_buff_length);
13507       found_null= TRUE;
13508       continue;
13509     }
13510 
13511     /* Check if record belongs to the current group. */
13512     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13513     {
13514       result= HA_ERR_KEY_NOT_FOUND;
13515       continue;
13516     }
13517 
13518     /* If there is an upper limit, check if the found key is in the range. */
13519     if ( !(cur_range->flag & NO_MAX_RANGE) )
13520     {
13521       /* Compose the MAX key for the range. */
13522       uchar *max_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13523       memcpy(max_key, group_prefix, real_prefix_len);
13524       memcpy(max_key + real_prefix_len, cur_range->max_key,
13525              cur_range->max_length);
13526       /* Compare the found key with max_key. */
13527       int cmp_res= key_cmp(index_info->key_part, max_key,
13528                            real_prefix_len + min_max_arg_len);
13529       /*
13530         The key is outside of the range if:
13531         the interval is open and the key is equal to the maximum boundry
13532         or
13533         the key is greater than the maximum
13534       */
13535       if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
13536           cmp_res > 0)
13537       {
13538         result= HA_ERR_KEY_NOT_FOUND;
13539         continue;
13540       }
13541     }
13542     /* If we got to this point, the current key qualifies as MIN. */
13543     DBUG_ASSERT(result == 0);
13544     break;
13545   }
13546   /*
13547     If there was a key with NULL in the MIN/MAX field, and there was no other
13548     key without NULL from the same group that satisfies some other condition,
13549     then use the key with the NULL.
13550   */
13551   if (found_null && result)
13552   {
13553     memcpy(record, tmp_record, head->s->rec_buff_length);
13554     result= 0;
13555   }
13556   return result;
13557 }
13558 
13559 
13560 /*
13561   Find the maximal key in a group that satisfies some range conditions for the
13562   min/max argument field.
13563 
13564   SYNOPSIS
13565     QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13566 
13567   DESCRIPTION
13568     Given the sequence of ranges min_max_ranges, find the maximal key that is
13569     in the right-most possible range. If there is no such key, then the current
13570     group does not have a MAX key that satisfies the WHERE clause. If a key is
13571     found, its value is stored in this->record.
13572 
13573   RETURN
13574     0                    on success
13575     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
13576                          the ranges
13577     HA_ERR_END_OF_FILE   - "" -
13578     other                if some error
13579 */
13580 
next_max_in_range()13581 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
13582 {
13583   ha_rkey_function find_flag;
13584   key_part_map keypart_map;
13585   QUICK_RANGE *cur_range;
13586   int result;
13587 
13588   DBUG_ASSERT(min_max_ranges.elements > 0);
13589 
13590   for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
13591   { /* Search from the right-most range to the left. */
13592     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1);
13593 
13594     /*
13595       If the current value for the min/max argument is smaller than the left
13596       boundary of cur_range, there is no need to check this range.
13597     */
13598     if (range_idx != min_max_ranges.elements &&
13599         !(cur_range->flag & NO_MIN_RANGE) &&
13600         (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
13601                  min_max_arg_len) == -1))
13602       continue;
13603 
13604     if (cur_range->flag & NO_MAX_RANGE)
13605     {
13606       keypart_map= make_prev_keypart_map(real_key_parts);
13607       find_flag= HA_READ_PREFIX_LAST;
13608     }
13609     else
13610     {
13611       /* Extend the search key with the upper boundary for this range. */
13612       memcpy(group_prefix + real_prefix_len, cur_range->max_key,
13613              cur_range->max_length);
13614       keypart_map= make_keypart_map(real_key_parts);
13615       find_flag= (cur_range->flag & EQ_RANGE) ?
13616                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
13617                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
13618     }
13619 
13620     result= head->file->ha_index_read_map(record, group_prefix, keypart_map,
13621                                           find_flag);
13622 
13623     if (result)
13624     {
13625       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
13626           (cur_range->flag & EQ_RANGE))
13627         continue; /* Check the next range. */
13628 
13629       /*
13630         In no key was found with this upper bound, there certainly are no keys
13631         in the ranges to the left.
13632       */
13633       return result;
13634     }
13635     /* A key was found. */
13636     if (cur_range->flag & EQ_RANGE)
13637       return 0; /* No need to perform the checks below for equal keys. */
13638 
13639     /* Check if record belongs to the current group. */
13640     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
13641       continue;                                 // Row not found
13642 
13643     /* If there is a lower limit, check if the found key is in the range. */
13644     if ( !(cur_range->flag & NO_MIN_RANGE) )
13645     {
13646       /* Compose the MIN key for the range. */
13647       uchar *min_key= (uchar*) my_alloca(real_prefix_len + min_max_arg_len);
13648       memcpy(min_key, group_prefix, real_prefix_len);
13649       memcpy(min_key + real_prefix_len, cur_range->min_key,
13650              cur_range->min_length);
13651       /* Compare the found key with min_key. */
13652       int cmp_res= key_cmp(index_info->key_part, min_key,
13653                            real_prefix_len + min_max_arg_len);
13654       /*
13655         The key is outside of the range if:
13656         the interval is open and the key is equal to the minimum boundry
13657         or
13658         the key is less than the minimum
13659       */
13660       if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
13661           cmp_res < 0)
13662         continue;
13663     }
13664     /* If we got to this point, the current key qualifies as MAX. */
13665     return result;
13666   }
13667   return HA_ERR_KEY_NOT_FOUND;
13668 }
13669 
13670 
13671 /*
13672   Update all MIN function results with the newly found value.
13673 
13674   SYNOPSIS
13675     QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13676 
13677   DESCRIPTION
13678     The method iterates through all MIN functions and updates the result value
13679     of each function by calling Item_sum::reset(), which in turn picks the new
13680     result value from this->head->record[0], previously updated by
13681     next_min(). The updated value is stored in a member variable of each of the
13682     Item_sum objects, depending on the value type.
13683 
13684   IMPLEMENTATION
13685     The update must be done separately for MIN and MAX, immediately after
13686     next_min() was called and before next_max() is called, because both MIN and
13687     MAX take their result value from the same buffer this->head->record[0]
13688     (i.e.  this->record).
13689 
13690   RETURN
13691     None
13692 */
13693 
update_min_result()13694 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
13695 {
13696   Item_sum *min_func;
13697 
13698   min_functions_it->rewind();
13699   while ((min_func= (*min_functions_it)++))
13700     min_func->reset_and_add();
13701 }
13702 
13703 
13704 /*
13705   Update all MAX function results with the newly found value.
13706 
13707   SYNOPSIS
13708     QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13709 
13710   DESCRIPTION
13711     The method iterates through all MAX functions and updates the result value
13712     of each function by calling Item_sum::reset(), which in turn picks the new
13713     result value from this->head->record[0], previously updated by
13714     next_max(). The updated value is stored in a member variable of each of the
13715     Item_sum objects, depending on the value type.
13716 
13717   IMPLEMENTATION
13718     The update must be done separately for MIN and MAX, immediately after
13719     next_max() was called, because both MIN and MAX take their result value
13720     from the same buffer this->head->record[0] (i.e.  this->record).
13721 
13722   RETURN
13723     None
13724 */
13725 
update_max_result()13726 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
13727 {
13728   Item_sum *max_func;
13729 
13730   max_functions_it->rewind();
13731   while ((max_func= (*max_functions_it)++))
13732     max_func->reset_and_add();
13733 }
13734 
13735 
13736 /*
13737   Append comma-separated list of keys this quick select uses to key_names;
13738   append comma-separated list of corresponding used lengths to used_lengths.
13739 
13740   SYNOPSIS
13741     QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
13742     key_names    [out] Names of used indexes
13743     used_lengths [out] Corresponding lengths of the index names
13744 
13745   DESCRIPTION
13746     This method is used by select_describe to extract the names of the
13747     indexes used by a quick select.
13748 
13749 */
13750 
add_keys_and_lengths(String * key_names,String * used_lengths)13751 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
13752                                                       String *used_lengths)
13753 {
13754   char buf[64];
13755   uint length;
13756   key_names->append(index_info->name);
13757   length= longlong2str(max_used_key_length, buf, 10) - buf;
13758   used_lengths->append(buf, length);
13759 }
13760 
13761 
13762 
13763 /**
13764   Traverse the R-B range tree for this and later keyparts to see if
13765   there are at least as many equality ranges as defined by the limit.
13766 
13767   @param keypart_root   The root of a R-B tree of ranges for a given keypart.
13768   @param count[in,out]  The number of equality ranges found so far
13769   @param limit          The number of ranges
13770 
13771   @retval true if limit > 0 and 'limit' or more equality ranges have been
13772           found in the range R-B trees
13773   @retval false otherwise
13774 
13775 */
eq_ranges_exceeds_limit(SEL_ARG * keypart_root,uint * count,uint limit)13776 static bool eq_ranges_exceeds_limit(SEL_ARG *keypart_root, uint* count, uint limit)
13777 {
13778   // "Statistics instead of index dives" feature is turned off
13779   if (limit == 0)
13780     return false;
13781 
13782   /*
13783     Optimization: if there is at least one equality range, index
13784     statistics will be used when limit is 1. It's safe to return true
13785     even without checking that there is an equality range because if
13786     there are none, index statistics will not be used anyway.
13787   */
13788   if (limit == 1)
13789     return true;
13790 
13791   for(SEL_ARG *keypart_range= keypart_root->first();
13792       keypart_range; keypart_range= keypart_range->next)
13793   {
13794     /*
13795       This is an equality range predicate and should be counted if:
13796       1) the range for this keypart does not have a min/max flag
13797          (which indicates <, <= etc), and
13798       2) the lower and upper range boundaries have the same value
13799          (it's not a "x BETWEEN a AND b")
13800 
13801       Note, however, that if this is an "x IS NULL" condition we don't
13802       count it because the number of NULL-values is likely to be off
13803       the index statistics we plan to use.
13804     */
13805     if (!keypart_range->min_flag && !keypart_range->max_flag && // 1)
13806         !keypart_range->cmp_max_to_min(keypart_range) &&        // 2)
13807         !keypart_range->is_null_interval())                     // "x IS NULL"
13808     {
13809       /*
13810          Count predicates in the next keypart, but only if that keypart
13811          is the next in the index.
13812       */
13813       if (keypart_range->next_key_part &&
13814           keypart_range->next_key_part->part == keypart_range->part + 1)
13815         eq_ranges_exceeds_limit(keypart_range->next_key_part, count, limit);
13816       else
13817         // We've found a path of equlity predicates down to a keypart leaf
13818         (*count)++;
13819 
13820       if (*count >= limit)
13821         return true;
13822     }
13823   }
13824   return false;
13825 }
13826 
13827 #ifndef DBUG_OFF
13828 
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)13829 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
13830                            const char *msg)
13831 {
13832   SEL_ARG **key,**end;
13833   int idx;
13834   char buff[1024];
13835   DBUG_ENTER("print_sel_tree");
13836 
13837   String tmp(buff,sizeof(buff),&my_charset_bin);
13838   tmp.length(0);
13839   for (idx= 0,key=tree->keys, end=key+param->keys ;
13840        key != end ;
13841        key++,idx++)
13842   {
13843     if (tree_map->is_set(idx))
13844     {
13845       uint keynr= param->real_keynr[idx];
13846       if (tmp.length())
13847         tmp.append(',');
13848       tmp.append(param->table->key_info[keynr].name);
13849     }
13850   }
13851   if (!tmp.length())
13852     tmp.append(STRING_WITH_LEN("(empty)"));
13853 
13854   DBUG_PRINT("info", ("SEL_TREE: %p (%s)  scans: %s", tree, msg, tmp.ptr()));
13855   DBUG_VOID_RETURN;
13856 }
13857 
13858 
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)13859 static void print_ror_scans_arr(TABLE *table, const char *msg,
13860                                 struct st_ror_scan_info **start,
13861                                 struct st_ror_scan_info **end)
13862 {
13863   DBUG_ENTER("print_ror_scans_arr");
13864 
13865   char buff[1024];
13866   String tmp(buff,sizeof(buff),&my_charset_bin);
13867   tmp.length(0);
13868   for (;start != end; start++)
13869   {
13870     if (tmp.length())
13871       tmp.append(',');
13872     tmp.append(table->key_info[(*start)->keynr].name);
13873   }
13874   if (!tmp.length())
13875     tmp.append(STRING_WITH_LEN("(empty)"));
13876   DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
13877   fprintf(DBUG_FILE,"ROR key scans (%s): %s", msg, tmp.ptr());
13878 
13879   DBUG_VOID_RETURN;
13880 }
13881 
13882 
13883 #endif /* !DBUG_OFF */
13884 
13885 /**
13886   Print a key to a string
13887 
13888   @param[out] out          String the key is appended to
13889   @param[in]  key_part     Index components description
13890   @param[in]  key          Key tuple
13891   @param[in]  used_length  Key tuple length
13892 */
13893 static void
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key)13894 print_key_value(String *out, const KEY_PART_INFO *key_part, const uchar *key)
13895 {
13896   Field *field= key_part->field;
13897 
13898   if (field->flags & BLOB_FLAG)
13899   {
13900     // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
13901     if (field->real_maybe_null() && *key)
13902       out->append(STRING_WITH_LEN("NULL"));
13903     else
13904       out->append(STRING_WITH_LEN("unprintable_blob_value"));
13905     return;
13906   }
13907 
13908   char buff[128];
13909   String tmp(buff, sizeof(buff), system_charset_info);
13910   tmp.length(0);
13911 
13912   TABLE *table= field->table;
13913   my_bitmap_map *old_sets[2];
13914 
13915   dbug_tmp_use_all_columns(table, old_sets, table->read_set,
13916                            table->write_set);
13917 
13918   uint store_length= key_part->store_length;
13919 
13920   if (field->real_maybe_null())
13921   {
13922     /*
13923       Byte 0 of key is the null-byte. If set, key is NULL.
13924       Otherwise, print the key value starting immediately after the
13925       null-byte
13926     */
13927     if (*key)
13928     {
13929       out->append(STRING_WITH_LEN("NULL"));
13930       goto restore_col_map;
13931     }
13932     key++;                                    // Skip null byte
13933     store_length--;
13934   }
13935   field->set_key_image(key, key_part->length);
13936   if (field->type() == MYSQL_TYPE_BIT)
13937     (void) field->val_int_as_str(&tmp, 1); // may change tmp's charset
13938   else
13939     field->val_str(&tmp); // may change tmp's charset
13940   out->append(tmp.ptr(), tmp.length(), tmp.charset());
13941 
13942 restore_col_map:
13943   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
13944 }
13945 
13946 /**
13947   Append range info for a key part to a string
13948 
13949   @param[in,out] out          String the range info is appended to
13950   @param[in]     key_part     Indexed column used in a range select
13951   @param[in]     min_key      Key tuple describing lower bound of range
13952   @param[in]     max_key      Key tuple describing upper bound of range
13953   @param[in]     flag         Key range flags defining what min_key
13954                               and max_key represent @see my_base.h
13955  */
append_range(String * out,const KEY_PART_INFO * key_part,const uchar * min_key,const uchar * max_key,const uint flag)13956 void append_range(String *out,
13957                   const KEY_PART_INFO *key_part,
13958                   const uchar *min_key, const uchar *max_key,
13959                   const uint flag)
13960 {
13961   if (out->length() > 0)
13962     out->append(STRING_WITH_LEN(" AND "));
13963 
13964   if (!(flag & NO_MIN_RANGE))
13965   {
13966     print_key_value(out, key_part, min_key);
13967     if (flag & NEAR_MIN)
13968       out->append(STRING_WITH_LEN(" < "));
13969     else
13970       out->append(STRING_WITH_LEN(" <= "));
13971   }
13972 
13973   out->append(key_part->field->field_name);
13974 
13975   if (!(flag & NO_MAX_RANGE))
13976   {
13977     if (flag & NEAR_MAX)
13978       out->append(STRING_WITH_LEN(" < "));
13979     else
13980       out->append(STRING_WITH_LEN(" <= "));
13981     print_key_value(out, key_part, max_key);
13982   }
13983 }
13984 
13985 /**
13986   Traverse an R-B tree of range conditions and append all ranges for
13987   this keypart and consecutive keyparts to range_trace (if non-NULL)
13988   or to range_string (if range_trace is NULL). See description of R-B
13989   trees/SEL_ARG for details on how ranges are linked.
13990 
13991   @param[in,out] range_trace   Optimizer trace array ranges are appended to
13992   @param[in,out] range_string  The string where range predicates are
13993                                appended when the last keypart has
13994                                been reached.
13995   @param[in]     range_so_far  String containing ranges for keyparts prior
13996                                to this keypart.
13997   @param[in]     keypart_root  The root of the R-B tree containing intervals
13998                                for this keypart.
13999   @param[in]     key_parts     Index components description, used when adding
14000                                information to the optimizer trace
14001 
14002   @note This function mimics the behavior of sel_arg_range_seq_next()
14003 */
append_range_all_keyparts(Opt_trace_array * range_trace,String * range_string,String * range_so_far,SEL_ARG * keypart_root,const KEY_PART_INFO * key_parts)14004 static void append_range_all_keyparts(Opt_trace_array *range_trace,
14005                                       String *range_string,
14006                                       String *range_so_far,
14007                                       SEL_ARG *keypart_root,
14008                                       const KEY_PART_INFO *key_parts)
14009 {
14010   DBUG_ASSERT(keypart_root && keypart_root != &null_element);
14011 
14012   const bool append_to_trace= (range_trace != NULL);
14013 
14014   // Either add info to range_string or to range_trace
14015   DBUG_ASSERT(append_to_trace ? !range_string : (range_string != NULL));
14016 
14017   // Navigate to first interval in red-black tree
14018   const KEY_PART_INFO *cur_key_part= key_parts + keypart_root->part;
14019   const SEL_ARG *keypart_range= keypart_root->first();
14020 
14021   const uint save_range_so_far_length= range_so_far->length();
14022 
14023   while (keypart_range)
14024   {
14025     /*
14026       Skip the rest of condition printing to avoid OOM if appending to
14027       range_string and the string becomes too long. Printing very long
14028       range conditions normally doesn't make sense either.
14029      */
14030     if (!append_to_trace && range_string->length() > 500)
14031     {
14032       range_string->append(STRING_WITH_LEN("..."));
14033       break;
14034     }
14035 
14036     // Append the current range predicate to the range String
14037     append_range(range_so_far, cur_key_part,
14038                  keypart_range->min_value, keypart_range->max_value,
14039                  keypart_range->min_flag | keypart_range->max_flag);
14040 
14041     /*
14042       Print range predicates for consecutive keyparts if
14043       1) There are predicates for later keyparts
14044       2) There are no "holes" in the used keyparts (keypartX can only
14045          be used if there is a range predicate on keypartX-1)
14046       3) The current range is an equality range
14047      */
14048     if (keypart_range->next_key_part &&
14049         keypart_range->next_key_part->part == keypart_range->part + 1 &&
14050         keypart_range->is_singlepoint())
14051     {
14052       append_range_all_keyparts(range_trace, range_string, range_so_far,
14053                                 keypart_range->next_key_part, key_parts);
14054     }
14055     else
14056     {
14057       /*
14058         This is the last keypart with a usable range predicate. Print
14059         full range info to the optimizer trace or to the string
14060       */
14061       if (append_to_trace)
14062         range_trace->add_utf8(range_so_far->ptr(),
14063                               range_so_far->length());
14064       else
14065       {
14066         if (range_string->length() == 0)
14067           range_string->append(STRING_WITH_LEN("("));
14068         else
14069           range_string->append(STRING_WITH_LEN(" OR ("));
14070 
14071         range_string->append(range_so_far->ptr(), range_so_far->length());
14072         range_string->append(STRING_WITH_LEN(")"));
14073       }
14074     }
14075     keypart_range= keypart_range->next;
14076     /*
14077       Now moving to next range for this keypart, so "reset"
14078       range_so_far to include only range description of earlier
14079       keyparts
14080     */
14081     range_so_far->length(save_range_so_far_length);
14082   }
14083 }
14084 
14085 /**
14086   Print the ranges in a SEL_TREE to debug log.
14087 
14088   @param tree_name   Descriptive name of the tree
14089   @param tree        The SEL_TREE that will be printed to debug log
14090   @param param       PARAM from SQL_SELECT::test_quick_select
14091 */
dbug_print_tree(const char * tree_name,SEL_TREE * tree,const RANGE_OPT_PARAM * param)14092 static inline void dbug_print_tree(const char *tree_name,
14093                                    SEL_TREE *tree,
14094                                    const RANGE_OPT_PARAM *param)
14095 {
14096 #ifndef DBUG_OFF
14097   if (!param->using_real_indexes)
14098   {
14099     DBUG_PRINT("info",
14100                ("sel_tree: "
14101                 "%s uses a partitioned index and cannot be printed",
14102                 tree_name));
14103     return;
14104   }
14105 
14106   if (!tree)
14107   {
14108     DBUG_PRINT("info", ("sel_tree: %s is NULL", tree_name));
14109     return;
14110   }
14111 
14112   if (tree->type == SEL_TREE::IMPOSSIBLE)
14113   {
14114     DBUG_PRINT("info", ("sel_tree: %s is IMPOSSIBLE", tree_name));
14115     return;
14116   }
14117 
14118   if (tree->type == SEL_TREE::ALWAYS)
14119   {
14120     DBUG_PRINT("info", ("sel_tree: %s is ALWAYS", tree_name));
14121     return;
14122   }
14123 
14124   if (tree->type == SEL_TREE::MAYBE)
14125   {
14126     DBUG_PRINT("info", ("sel_tree: %s is MAYBE", tree_name));
14127     return;
14128   }
14129 
14130   if (!tree->merges.is_empty())
14131   {
14132     DBUG_PRINT("info",
14133                ("sel_tree: "
14134                 "%s contains the following merges", tree_name));
14135 
14136     List_iterator<SEL_IMERGE> it(tree->merges);
14137     int i= 0;
14138     for (SEL_IMERGE *el= it++; el; el= it++, i++)
14139     {
14140       for (SEL_TREE** current= el->trees;
14141            current != el->trees_next;
14142            current++)
14143         dbug_print_tree("  merge_tree", *current, param);
14144     }
14145   }
14146 
14147   for (uint i= 0; i< param->keys; i++)
14148   {
14149     if (tree->keys[i] == NULL || tree->keys[i] == &null_element)
14150       continue;
14151 
14152     uint real_key_nr= param->real_keynr[i];
14153 
14154     const KEY &cur_key= param->table->key_info[real_key_nr];
14155     const KEY_PART_INFO *key_part= cur_key.key_part;
14156 
14157     /*
14158       String holding the final range description from
14159       append_range_all_keyparts()
14160     */
14161     char buff1[512];
14162     String range_result(buff1, sizeof(buff1), system_charset_info);
14163     range_result.length(0);
14164 
14165     /*
14166       Range description up to a certain keypart - used internally in
14167       append_range_all_keyparts()
14168     */
14169     char buff2[128];
14170     String range_so_far(buff2, sizeof(buff2), system_charset_info);
14171     range_so_far.length(0);
14172 
14173     append_range_all_keyparts(NULL, &range_result, &range_so_far,
14174                               tree->keys[i], key_part);
14175 
14176     DBUG_PRINT("info",
14177                ("sel_tree: %s->keys[%d(real_keynr: %d)]: %s",
14178                 tree_name, i, real_key_nr, range_result.ptr()));
14179   }
14180 #endif
14181 }
14182 
14183 /*****************************************************************************
14184 ** Print a quick range for debugging
14185 ** TODO:
14186 ** This should be changed to use a String to store each row instead
14187 ** of locking the DEBUG stream !
14188 *****************************************************************************/
14189 
14190 #ifndef DBUG_OFF
14191 
14192 static void
print_multiple_key_values(KEY_PART * key_part,const uchar * key,uint used_length)14193 print_multiple_key_values(KEY_PART *key_part, const uchar *key,
14194                           uint used_length)
14195 {
14196   char buff[1024];
14197   const uchar *key_end= key+used_length;
14198   String tmp(buff,sizeof(buff),&my_charset_bin);
14199   uint store_length;
14200   TABLE *table= key_part->field->table;
14201   my_bitmap_map *old_sets[2];
14202 
14203   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14204 
14205   for (; key < key_end; key+=store_length, key_part++)
14206   {
14207     Field *field=      key_part->field;
14208     store_length= key_part->store_length;
14209 
14210     if (field->real_maybe_null())
14211     {
14212       if (*key)
14213       {
14214         fwrite("NULL",sizeof(char),4,DBUG_FILE);
14215         continue;
14216       }
14217       key++;                                    // Skip null byte
14218       store_length--;
14219     }
14220     field->set_key_image(key, key_part->length);
14221     if (field->type() == MYSQL_TYPE_BIT)
14222       (void) field->val_int_as_str(&tmp, 1);
14223     else
14224       field->val_str(&tmp);
14225     fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
14226     if (key+store_length < key_end)
14227       fputc('/',DBUG_FILE);
14228   }
14229   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14230 }
14231 
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)14232 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
14233 {
14234   char buf[MAX_KEY/8+1];
14235   TABLE *table;
14236   my_bitmap_map *old_sets[2];
14237   DBUG_ENTER("print_quick");
14238   if (!quick)
14239     DBUG_VOID_RETURN;
14240   DBUG_LOCK_FILE;
14241 
14242   table= quick->head;
14243   dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
14244   quick->dbug_dump(0, TRUE);
14245   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
14246 
14247   fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
14248 
14249   DBUG_UNLOCK_FILE;
14250   DBUG_VOID_RETURN;
14251 }
14252 
dbug_dump(int indent,bool verbose)14253 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
14254 {
14255   /* purecov: begin inspected */
14256   fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
14257           indent, "", head->key_info[index].name, max_used_key_length);
14258 
14259   if (verbose)
14260   {
14261     QUICK_RANGE *range;
14262     QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
14263     QUICK_RANGE **end_range= pr + ranges.elements;
14264     for (; pr != end_range; ++pr)
14265     {
14266       fprintf(DBUG_FILE, "%*s", indent + 2, "");
14267       range= *pr;
14268       if (!(range->flag & NO_MIN_RANGE))
14269       {
14270         print_multiple_key_values(key_parts, range->min_key,
14271                                   range->min_length);
14272         if (range->flag & NEAR_MIN)
14273           fputs(" < ",DBUG_FILE);
14274         else
14275           fputs(" <= ",DBUG_FILE);
14276       }
14277       fputs("X",DBUG_FILE);
14278 
14279       if (!(range->flag & NO_MAX_RANGE))
14280       {
14281         if (range->flag & NEAR_MAX)
14282           fputs(" < ",DBUG_FILE);
14283         else
14284           fputs(" <= ",DBUG_FILE);
14285         print_multiple_key_values(key_parts, range->max_key,
14286                                   range->max_length);
14287       }
14288       fputs("\n",DBUG_FILE);
14289     }
14290   }
14291   /* purecov: end */
14292 }
14293 
dbug_dump(int indent,bool verbose)14294 void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
14295 {
14296   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14297   QUICK_RANGE_SELECT *quick;
14298   fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
14299   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14300   while ((quick= it++))
14301     quick->dbug_dump(indent+2, verbose);
14302   if (pk_quick_select)
14303   {
14304     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14305     pk_quick_select->dbug_dump(indent+2, verbose);
14306   }
14307   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14308 }
14309 
dbug_dump(int indent,bool verbose)14310 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
14311 {
14312   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
14313   QUICK_RANGE_SELECT *quick;
14314   fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
14315           indent, "", need_to_fetch_row? "":"non-");
14316   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14317   while ((quick= it++))
14318     quick->dbug_dump(indent+2, verbose);
14319   if (cpk_quick)
14320   {
14321     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
14322     cpk_quick->dbug_dump(indent+2, verbose);
14323   }
14324   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14325 }
14326 
dbug_dump(int indent,bool verbose)14327 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
14328 {
14329   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
14330   QUICK_SELECT_I *quick;
14331   fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
14332   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
14333   while ((quick= it++))
14334     quick->dbug_dump(indent+2, verbose);
14335   fprintf(DBUG_FILE, "%*s}\n", indent, "");
14336 }
14337 
14338 /*
14339   Print quick select information to DBUG_FILE.
14340 
14341   SYNOPSIS
14342     QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
14343     indent  Indentation offset
14344     verbose If TRUE show more detailed output.
14345 
14346   DESCRIPTION
14347     Print the contents of this quick select to DBUG_FILE. The method also
14348     calls dbug_dump() for the used quick select if any.
14349 
14350   IMPLEMENTATION
14351     Caller is responsible for locking DBUG_FILE before this call and unlocking
14352     it afterwards.
14353 
14354   RETURN
14355     None
14356 */
14357 
dbug_dump(int indent,bool verbose)14358 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
14359 {
14360   fprintf(DBUG_FILE,
14361           "%*squick_group_min_max_select: index %s (%d), length: %d\n",
14362           indent, "", index_info->name, index, max_used_key_length);
14363   if (key_infix_len > 0)
14364   {
14365     fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
14366             indent, "", key_infix_len);
14367   }
14368   if (quick_prefix_select)
14369   {
14370     fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
14371     quick_prefix_select->dbug_dump(indent + 2, verbose);
14372   }
14373   if (min_max_ranges.elements > 0)
14374   {
14375     fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
14376             indent, "", min_max_ranges.elements);
14377   }
14378 }
14379 
14380 
14381 #endif /* !DBUG_OFF */
14382