1 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates.
2    Copyright (c) 2008, 2020, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /*
18   TODO:
19   Fix that MAYBE_KEY are stored in the tree so that we can detect use
20   of full hash keys for queries like:
21 
22   select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
23 
24 */
25 
26 /*
27   This file contains:
28 
29   RangeAnalysisModule
30     A module that accepts a condition, index (or partitioning) description,
31     and builds lists of intervals (in index/partitioning space), such that
32     all possible records that match the condition are contained within the
33     intervals.
34     The entry point for the range analysis module is get_mm_tree() function.
35 
36     The lists are returned in form of complicated structure of interlinked
37     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
38     See quick_range_seq_next, find_used_partitions for examples of how to walk
39     this structure.
40     All direct "users" of this module are located within this file, too.
41 
42 
43   PartitionPruningModule
44     A module that accepts a partitioned table, condition, and finds which
45     partitions we will need to use in query execution. Search down for
46     "PartitionPruningModule" for description.
47     The module has single entry point - prune_partitions() function.
48 
49 
50   Range/index_merge/groupby-minmax optimizer module
51     A module that accepts a table, condition, and returns
52      - a QUICK_*_SELECT object that can be used to retrieve rows that match
53        the specified condition, or a "no records will match the condition"
54        statement.
55 
56     The module entry points are
57       test_quick_select()
58       get_quick_select_for_ref()
59 
60 
61   Record retrieval code for range/index_merge/groupby-min-max.
62     Implementations of QUICK_*_SELECT classes.
63 
64   KeyTupleFormat
65   ~~~~~~~~~~~~~~
66   The code in this file (and elsewhere) makes operations on key value tuples.
67   Those tuples are stored in the following format:
68 
69   The tuple is a sequence of key part values. The length of key part value
70   depends only on its type (and not depends on the what value is stored)
71 
72     KeyTuple: keypart1-data, keypart2-data, ...
73 
74   The value of each keypart is stored in the following format:
75 
76     keypart_data: [isnull_byte] keypart-value-bytes
77 
78   If a keypart may have a NULL value (key_part->field->real_maybe_null() can
79   be used to check this), then the first byte is a NULL indicator with the
80   following valid values:
81     1  - keypart has NULL value.
82     0  - keypart has non-NULL value.
83 
84   <questionable-statement> If isnull_byte==1 (NULL value), then the following
85   keypart->length bytes must be 0.
86   </questionable-statement>
87 
88   keypart-value-bytes holds the value. Its format depends on the field type.
89   The length of keypart-value-bytes may or may not depend on the value being
90   stored. The default is that length is static and equal to
91   KEY_PART_INFO::length.
92 
93   Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
94   value:
95 
96      keypart-value-bytes: value_length value_bytes
97 
98   The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
99 
100   See key_copy() and key_restore() for code to move data between index tuple
101   and table record
102 
103   CAUTION: the above description is only sergefp's understanding of the
104            subject and may omit some details.
105 */
106 
107 #ifdef USE_PRAGMA_IMPLEMENTATION
108 #pragma implementation				// gcc: Class implementation
109 #endif
110 
111 #include "mariadb.h"
112 #include "sql_priv.h"
113 #include "key.h"        // is_key_used, key_copy, key_cmp, key_restore
114 #include "sql_parse.h"                          // check_stack_overrun
115 #include "sql_partition.h"    // get_part_id_func, PARTITION_ITERATOR,
116                               // struct partition_info, NOT_A_PARTITION_ID
117 #include "records.h"          // init_read_record, end_read_record
118 #include <m_ctype.h>
119 #include "sql_select.h"
120 #include "sql_statistics.h"
121 #include "uniques.h"
122 #include "my_json_writer.h"
123 
124 #ifndef EXTRA_DEBUG
125 #define test_rb_tree(A,B) {}
126 #define test_use_count(A) {}
127 #endif
128 
129 /*
130   Convert double value to #rows. Currently this does floor(), and we
131   might consider using round() instead.
132 */
133 #define double2rows(x) ((ha_rows)(x))
134 
135 /*
136   this should be long enough so that any memcmp with a string that
137   starts from '\0' won't cross is_null_string boundaries, even
138   if the memcmp is optimized to compare 4- 8- or 16- bytes at once
139 */
140 static uchar is_null_string[20]= {1,0};
141 
142 /**
143   Helper function to compare two SEL_ARG's.
144 */
all_same(const SEL_ARG * sa1,const SEL_ARG * sa2)145 static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
146 {
147   if (sa1 == NULL && sa2 == NULL)
148     return true;
149   if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
150     return false;
151   return sa1->all_same(sa2);
152 }
153 
154 class SEL_IMERGE;
155 
156 #define CLONE_KEY1_MAYBE 1
157 #define CLONE_KEY2_MAYBE 2
158 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
159 
160 
161 /*
162   While objects of the class SEL_ARG represent ranges for indexes or
163   index infixes (including ranges for index prefixes and index suffixes),
164   objects of the class SEL_TREE represent AND/OR formulas of such ranges.
165   Currently an AND/OR formula represented by a SEL_TREE object can have
166   at most three levels:
167 
168     <SEL_TREE formula> ::=
169       [ <SEL_RANGE_TREE formula> AND ]
170       [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ]
171 
172     <SEL_RANGE_TREE formula> ::=
173       <SEL_ARG formula> [ AND  <SEL_ARG_formula> ... ]
174 
175     <SEL_IMERGE formula> ::=
176       <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ]
177 
178   As we can see from the above definitions:
179    - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas
180    - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas
181    - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula
182      and SEL_IMERGE formulas.
183   It's required above that a SEL_TREE formula has at least one conjunct.
184 
185   Usually we will consider normalized SEL_RANGE_TREE formulas where we use
186   TRUE as conjunct members for those indexes whose SEL_ARG trees are empty.
187 
188   We will call an SEL_TREE object simply 'tree'.
189   The part of a tree that represents SEL_RANGE_TREE formula is called
190   'range part' of the tree while the remaining part is called 'imerge part'.
191   If a tree contains only a range part then we call such a tree 'range tree'.
192   Components of a range tree that represent SEL_ARG formulas are called ranges.
193   If a tree does not contain any range part we call such a tree 'imerge tree'.
194   Components of the imerge part of a tree that represent SEL_IMERGE formula
195   are called imerges.
196 
197   Usually we'll designate:
198     SEL_TREE formulas         by T_1,...,T_k
199     SEL_ARG formulas          by R_1,...,R_k
200     SEL_RANGE_TREE formulas   by RT_1,...,RT_k
201     SEL_IMERGE formulas       by M_1,...,M_k
202   Accordingly we'll use:
203     t_1,...,t_k - to designate trees representing T_1,...,T_k
204     r_1,...,r_k - to designate ranges representing R_1,...,R_k
205     rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k
206     m_1,...,m_k - to designate imerges representing M_1,...,M_k
207 
208   SEL_TREE objects are usually built from WHERE conditions or
209   ON expressions.
210   A SEL_TREE object always represents an inference of the condition it is
211   built from. Therefore, if a row satisfies a SEL_TREE formula it also
212   satisfies the condition it is built from.
213 
214   The following transformations of tree t representing SEL_TREE formula T
215   yield a new tree t1 thar represents an inference of T: T=>T1.
216     (1) remove any of SEL_ARG tree from the range part of t
217     (2) remove any imerge from the tree t
218     (3) remove any of SEL_ARG tree from any range tree contained
219         in any imerge of tree
220 
221   Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE
222   objects in many cases can be effectively used to filter out a big part
223   of table rows that do not satisfy WHERE/IN conditions utilizing
224   only single or multiple range index scans.
225 
226   A single range index scan is constructed for a range tree that contains
227   only one SEL_ARG object for an index or an index prefix.
228   An index intersection scan can be constructed for a range tree
229   that contains several SEL_ARG objects. Currently index intersection
230   scans are constructed only for single-point ranges.
231   An index merge scan is constructed for a imerge tree that contains only
232   one imerge. If range trees of this imerge contain only single-point merges
233   than a union of index intersections can be built.
234 
235   Usually the tree built by the range optimizer for a query table contains
236   more than one range in the range part, and additionally may contain some
237   imerges in the imerge part. The range optimizer evaluates all of them one
238   by one and chooses the range or the imerge that provides the cheapest
239   single or multiple range index scan of the table.  According to rules
240   (1)-(3) this scan always filter out only those rows that do not satisfy
241   the query conditions.
242 
243   For any condition the SEL_TREE object for it is built in a bottom up
244   manner starting from the range trees for the predicates. The tree_and
245   function builds a tree for any conjunction of formulas from the trees
246   for its conjuncts. The tree_or function builds a tree for any disjunction
247   of formulas from the trees for its disjuncts.
248 */
249 
250 class SEL_TREE :public Sql_alloc
251 {
252 public:
253   /*
254     Starting an effort to document this field:
255     (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) =>
256        (type == SEL_TREE::IMPOSSIBLE)
257   */
258   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
259 
SEL_TREE(enum Type type_arg,MEM_ROOT * root,size_t num_keys)260   SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys)
261     : type(type_arg), keys(root, num_keys), n_ror_scans(0)
262   {
263     keys_map.clear_all();
264   }
265 
SEL_TREE(MEM_ROOT * root,size_t num_keys)266   SEL_TREE(MEM_ROOT *root, size_t num_keys) :
267     type(KEY), keys(root, num_keys), n_ror_scans(0)
268   {
269     keys_map.clear_all();
270   }
271 
272   SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param);
273   /*
274     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
275     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
276     merit in range analyzer functions (e.g. get_mm_parts) returning a
277     pointer to such SEL_TREE instead of NULL)
278   */
279   Mem_root_array<SEL_ARG *, true> keys;
280   key_map keys_map;        /* bitmask of non-NULL elements in keys */
281 
282   /*
283     Possible ways to read rows using index_merge. The list is non-empty only
284     if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
285   */
286   List<SEL_IMERGE> merges;
287 
288   /* The members below are filled/used only after get_mm_tree is done */
289   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
290   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
291 
292   struct st_index_scan_info **index_scans;     /* list of index scans */
293   struct st_index_scan_info **index_scans_end; /* last index scan */
294 
295   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
296   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
297   /* Note that #records for each key scan is stored in table->quick_rows */
298 
without_ranges()299   bool without_ranges() { return keys_map.is_clear_all(); }
without_imerges()300   bool without_imerges() { return merges.is_empty(); }
301 };
302 
303 
304 class PARAM : public RANGE_OPT_PARAM
305 {
306 public:
307   ha_rows quick_rows[MAX_KEY];
308 
309   /*
310     This will collect 'possible keys' based on the range optimization.
311 
312     Queries with a JOIN object actually use ref optimizer (see add_key_field)
313     to collect possible_keys. This is used by single table UPDATE/DELETE.
314   */
315   key_map possible_keys;
316   longlong baseflag;
317   uint max_key_parts, range_count;
318 
319   bool quick;				// Don't calulate possible keys
320 
321   uint fields_bitmap_size;
322   MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
323   MY_BITMAP tmp_covered_fields;
324 
325   key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */
326 
327   uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
328   uint imerge_cost_buff_size; /* size of the buffer */
329 
330   /* Number of ranges in the last checked tree->key */
331   uint n_ranges;
332   uint8 first_null_comp; /* first null component if any, 0 - otherwise */
333 };
334 
335 
336 class TABLE_READ_PLAN;
337   class TRP_RANGE;
338   class TRP_ROR_INTERSECT;
339   class TRP_ROR_UNION;
340   class TRP_INDEX_INTERSECT;
341   class TRP_INDEX_MERGE;
342   class TRP_GROUP_MIN_MAX;
343 
344 struct st_index_scan_info;
345 struct st_ror_scan_info;
346 
347 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
348 static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
349                                   SEL_ARG *tree, bool update_tbl_stats,
350                                   uint *mrr_flags, uint *bufsize,
351                                   Cost_estimate *cost, bool *is_ror_scan);
352 
353 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
354                                      SEL_ARG *key_tree, uint mrr_flags,
355                                      uint mrr_buf_size, MEM_ROOT *alloc);
356 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
357                                        bool index_read_must_be_used,
358                                        bool for_range_access,
359                                        double read_time);
360 static
361 TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
362                                               double read_time);
363 static
364 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
365                                           double read_time,
366                                           bool *are_all_covering);
367 static
368 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
369                                                    SEL_TREE *tree,
370                                                    double read_time);
371 static
372 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
373                                          double read_time, bool named_trace= false);
374 static
375 TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
376                                         TRP_INDEX_MERGE *imerge_trp,
377                                         double read_time);
378 static
379 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
380                                           double read_time);
381 
382 #ifndef DBUG_OFF
383 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
384                            const char *msg);
385 static void print_ror_scans_arr(TABLE *table, const char *msg,
386                                 struct st_ror_scan_info **start,
387                                 struct st_ror_scan_info **end);
388 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
389 #endif
390 
391 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,
392                           SEL_TREE *tree1, SEL_TREE *tree2);
393 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,
394                          SEL_TREE *tree1,SEL_TREE *tree2);
395 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
396 static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
397                        SEL_ARG *key1, SEL_ARG *key2);
398 static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
399                         SEL_ARG *key1, SEL_ARG *key2,
400                         uint clone_flag);
401 static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
402                                   SEL_ARG *key1, SEL_ARG *key2);
403 static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
404                                    SEL_ARG *key1, SEL_ARG *key2,
405                                    uint clone_flag);
406 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
407 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
408                     SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
409                     uchar *max_key,uint max_key_flag);
410 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
411 
412 SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
413 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
414                              uint length);
415 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
416 
417 static
418 SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
419                                       SEL_ARG *sel_arg);
420 static
421 bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
422                                   SEL_ARG *key2);
423 
424 #include "opt_range_mrr.cc"
425 
426 static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
427                                        key_map *common_keys);
428 static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param,
429                                           SEL_TREE *tree);
430 
431 static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
432                                   SEL_TREE *tree1, SEL_TREE *tree2,
433                                   key_map *common_keys);
434 static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
435                                    SEL_TREE *tree1, SEL_TREE *tree2,
436                                    key_map common_keys);
437 static int and_range_trees(RANGE_OPT_PARAM *param,
438                            SEL_TREE *tree1, SEL_TREE *tree2,
439                            SEL_TREE *result);
440 static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree);
441 static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree,
442                                    SEL_ARG **backup);
443 static void print_key_value(String *out, const KEY_PART_INFO *key_part,
444                             const uchar* key, uint length);
445 static void print_keyparts_name(String *out, const KEY_PART_INFO *key_part,
446                                 uint n_keypart, key_part_map keypart_map);
447 
448 static void trace_ranges(Json_writer_array *range_trace,
449                          PARAM *param, uint idx,
450                          SEL_ARG *keypart,
451                          const KEY_PART_INFO *key_parts);
452 
453 static
454 void print_range(String *out, const KEY_PART_INFO *key_part,
455                  KEY_MULTI_RANGE *range, uint n_key_parts);
456 
457 static
458 void print_range_for_non_indexed_field(String *out, Field *field,
459                                        KEY_MULTI_RANGE *range);
460 
461 static void print_min_range_operator(String *out, const ha_rkey_function flag);
462 static void print_max_range_operator(String *out, const ha_rkey_function flag);
463 
464 static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field);
465 
466 /*
467   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
468   a condition in the following form:
469    (t_1||t_2||...||t_N) && (next)
470 
471   where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
472   (t_i,t_j) contains SEL_ARGS for the same index.
473 
474   SEL_TREE contained in SEL_IMERGE always has merges=NULL.
475 
476   This class relies on memory manager to do the cleanup.
477 */
478 
479 class SEL_IMERGE : public Sql_alloc
480 {
481   enum { PREALLOCED_TREES= 10};
482 public:
483   SEL_TREE *trees_prealloced[PREALLOCED_TREES];
484   SEL_TREE **trees;             /* trees used to do index_merge   */
485   SEL_TREE **trees_next;        /* last of these trees            */
486   SEL_TREE **trees_end;         /* end of allocated space         */
487 
488   SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
489 
SEL_IMERGE()490   SEL_IMERGE() :
491     trees(&trees_prealloced[0]),
492     trees_next(trees),
493     trees_end(trees + PREALLOCED_TREES)
494   {}
495   SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param);
496   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
497   bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree);
498   int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
499                    SEL_IMERGE *new_imerge);
500   int or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
501                               uint n_init_trees,
502                               SEL_TREE *new_tree,
503                               bool is_first_check_pass,
504                               bool *is_last_check_pass);
505   int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
506                                 uint n_init_trees,
507                                 SEL_IMERGE* imerge,
508                                 bool is_first_check_pass,
509                                 bool *is_last_check_pass);
510 };
511 
512 
513 /*
514   Add a range tree to the range trees of this imerge
515 
516   SYNOPSIS
517     or_sel_tree()
518       param                  Context info for the operation
519       tree                   SEL_TREE to add to this imerge
520 
521   DESCRIPTION
522     The function just adds the range tree 'tree' to the range trees
523     of this imerge.
524 
525   RETURN
526      0   if the operation is success
527     -1   if the function runs out memory
528 */
529 
or_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree)530 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
531 {
532   if (trees_next == trees_end)
533   {
534     const int realloc_ratio= 2;		/* Double size for next round */
535     size_t old_elements= (trees_end - trees);
536     size_t old_size= sizeof(SEL_TREE**) * old_elements;
537     size_t new_size= old_size * realloc_ratio;
538     SEL_TREE **new_trees;
539     if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
540       return -1;
541     memcpy(new_trees, trees, old_size);
542     trees=      new_trees;
543     trees_next= trees + old_elements;
544     trees_end=  trees + old_elements * realloc_ratio;
545   }
546   *(trees_next++)= tree;
547   return 0;
548 }
549 
550 
551 /*
552   Check if any of the range trees of this imerge intersects with a given tree
553 
554   SYNOPSIS
555     have_common_keys()
556       param    Context info for the function
557       tree     SEL_TREE intersection with the imerge range trees is checked for
558 
559   DESCRIPTION
560     The function checks whether there is any range tree rt_i in this imerge
561     such that there are some indexes for which ranges are defined in both
562     rt_i and the range part of the SEL_TREE tree.
563     To check this the function calls the function sel_trees_have_common_keys.
564 
565   RETURN
566     TRUE    if there are such range trees in this imerge
567     FALSE   otherwise
568 */
569 
have_common_keys(RANGE_OPT_PARAM * param,SEL_TREE * tree)570 bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree)
571 {
572   for (SEL_TREE** or_tree= trees, **bound= trees_next;
573        or_tree != bound; or_tree++)
574   {
575     key_map common_keys;
576     if (sel_trees_have_common_keys(*or_tree, tree, &common_keys))
577       return TRUE;
578   }
579   return FALSE;
580 }
581 
582 
583 /*
584   Perform AND operation for this imerge and the range part of a tree
585 
586   SYNOPSIS
587     and_sel_tree()
588       param           Context info for the operation
589       tree            SEL_TREE for the second operand of the operation
590       new_imerge  OUT imerge for the result of the operation
591 
592   DESCRIPTION
593     This function performs AND operation for this imerge m and the
594     range part of the SEL_TREE tree rt. In other words the function
595     pushes rt into this imerge. The resulting imerge is returned in
596     the parameter new_imerge.
597     If this imerge m represent the formula
598       RT_1 OR ... OR RT_k
599     then the resulting imerge of the function represents the formula
600       (RT_1 AND RT) OR ... OR (RT_k AND RT)
601     The function calls the function and_range_trees to construct the
602     range tree representing (RT_i AND RT).
603 
604   NOTE
605     The function may return an empty imerge without any range trees.
606     This happens when each call of and_range_trees returns an
607     impossible range tree (SEL_TREE::IMPOSSIBLE).
608     Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6).
609 
610   RETURN
611      0  if the operation is a success
612     -1  otherwise: there is not enough memory to perform the operation
613 */
614 
and_sel_tree(RANGE_OPT_PARAM * param,SEL_TREE * tree,SEL_IMERGE * new_imerge)615 int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
616                              SEL_IMERGE *new_imerge)
617 {
618   for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++)
619   {
620     SEL_TREE *res_or_tree= 0;
621     SEL_TREE *and_tree= 0;
622     if (!(res_or_tree= new SEL_TREE(param->mem_root, param->keys)) ||
623         !(and_tree= new SEL_TREE(tree, TRUE, param)))
624       return (-1);
625     if (!and_range_trees(param, *or_tree, and_tree, res_or_tree))
626     {
627       if (new_imerge->or_sel_tree(param, res_or_tree))
628         return (-1);
629     }
630   }
631   return 0;
632 }
633 
634 
635 /*
636   Perform OR operation on this imerge and the range part of a tree
637 
638   SYNOPSIS
639     or_sel_tree_with_checks()
640       param                  Context info for the operation
641       n_trees                Number of trees in this imerge to check for oring
642       tree                   SEL_TREE whose range part is to be ored
643       is_first_check_pass    <=> the first call of the function for this imerge
644       is_last_check_pass OUT <=> no more calls of the function for this imerge
645 
646   DESCRIPTION
647     The function performs OR operation on this imerge m and the range part
648     of the SEL_TREE tree rt. It always replaces this imerge with the result
649     of the operation.
650 
651     The operation can be performed in two different modes: with
652     is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming
653     this imerge differently.
654 
655     Given this imerge represents the formula
656       RT_1 OR ... OR RT_k:
657 
658     1. In the first mode, when is_first_check_pass==TRUE :
659       1.1. If rt must be ored(see the function sel_trees_must_be_ored) with
660            some rt_j (there may be only one such range tree in the imerge)
661            then the function produces an imerge representing the formula
662              RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k,
663            where the tree for (RT_j OR RT) is built by oring the pairs
664            of SEL_ARG trees for the corresponding indexes
665       1.2. Otherwise the function produces the imerge representing the formula:
666            RT_1 OR ... OR RT_k OR RT.
667 
668     2. In the second mode, when is_first_check_pass==FALSE :
669       2.1. For each rt_j in the imerge that can be ored (see the function
670            sel_trees_can_be_ored) with rt the function replaces rt_j for a
671            range tree such that for each index for which ranges are defined
672            in both in rt_j and rt  the tree contains the  result of oring of
673            these ranges.
674       2.2. In other cases the function does not produce any imerge.
675 
676     When is_first_check==TRUE the function returns FALSE in the parameter
677     is_last_check_pass if there is no rt_j such that rt_j can be ored with rt,
678     but, at the same time, it's not true that rt_j must be ored with rt.
679     When is_first_check==FALSE the function always returns FALSE in the
680     parameter is_last_check_pass.
681 
682   RETURN
683     1  The result of oring of rt_j and rt that must be ored returns the
684        the range tree with type==SEL_TREE::ALWAYS
685        (in this case the imerge m should be discarded)
686    -1  The function runs out of memory
687     0  in all other cases
688 */
689 
or_sel_tree_with_checks(RANGE_OPT_PARAM * param,uint n_trees,SEL_TREE * tree,bool is_first_check_pass,bool * is_last_check_pass)690 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
691                                         uint n_trees,
692                                         SEL_TREE *tree,
693                                         bool is_first_check_pass,
694                                         bool *is_last_check_pass)
695 {
696   bool was_ored= FALSE;
697   *is_last_check_pass= is_first_check_pass;
698   SEL_TREE** or_tree= trees;
699   for (uint i= 0; i < n_trees; i++, or_tree++)
700   {
701     SEL_TREE *result= 0;
702     key_map result_keys;
703     key_map ored_keys;
704     if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys))
705     {
706       bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree,
707                                                 ored_keys);
708       if (must_be_ored || !is_first_check_pass)
709       {
710         result_keys.clear_all();
711         result= *or_tree;
712         for (uint key_no= 0; key_no < param->keys; key_no++)
713         {
714           if (!ored_keys.is_set(key_no))
715 	  {
716             result->keys[key_no]= 0;
717 	    continue;
718           }
719           SEL_ARG *key1= (*or_tree)->keys[key_no];
720           SEL_ARG *key2= tree->keys[key_no];
721           key2->incr_refs();
722           if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1,
723                                                        key2)))
724           {
725 
726             result_keys.set_bit(key_no);
727 #ifdef EXTRA_DEBUG
728             if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
729 	    {
730               key1= result->keys[key_no];
731               (key1)->test_use_count(key1);
732             }
733 #endif
734           }
735         }
736       }
737       else if(is_first_check_pass)
738         *is_last_check_pass= FALSE;
739     }
740 
741     if (result)
742     {
743       result->keys_map= result_keys;
744       if (result_keys.is_clear_all())
745         result->type= SEL_TREE::ALWAYS;
746       if ((result->type == SEL_TREE::MAYBE) ||
747           (result->type == SEL_TREE::ALWAYS))
748         return 1;
749       /* SEL_TREE::IMPOSSIBLE is impossible here */
750       *or_tree= result;
751       was_ored= TRUE;
752     }
753   }
754   if (was_ored)
755     return 0;
756 
757   if (is_first_check_pass && !*is_last_check_pass &&
758       !(tree= new SEL_TREE(tree, FALSE, param)))
759     return (-1);
760   return or_sel_tree(param, tree);
761 }
762 
763 
764 /*
765   Perform OR operation on this imerge and and another imerge
766 
767   SYNOPSIS
768     or_sel_imerge_with_checks()
769       param                  Context info for the operation
770       n_trees           Number of trees in this imerge to check for oring
771       imerge                 The second operand of the operation
772       is_first_check_pass    <=> the first call of the function for this imerge
773       is_last_check_pass OUT <=> no more calls of the function for this imerge
774 
775   DESCRIPTION
776     For each range tree rt from 'imerge' the function calls the method
777     SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this
778     SEL_IMERGE object m and the tree rt. The mode of the operation is
779     specified by the parameter is_first_check_pass. Each call of
780     SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m.
781     The function returns FALSE in the prameter is_last_check_pass if
782     at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks
783     returns FALSE as the value of its last parameter.
784 
785   RETURN
786     1  One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1.
787        (in this case the imerge m should be discarded)
788    -1  The function runs out of memory
789     0  in all other cases
790 */
791 
or_sel_imerge_with_checks(RANGE_OPT_PARAM * param,uint n_trees,SEL_IMERGE * imerge,bool is_first_check_pass,bool * is_last_check_pass)792 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
793                                           uint n_trees,
794                                           SEL_IMERGE* imerge,
795                                           bool is_first_check_pass,
796                                           bool *is_last_check_pass)
797 {
798   *is_last_check_pass= TRUE;
799   SEL_TREE** tree= imerge->trees;
800   SEL_TREE** tree_end= imerge->trees_next;
801   for ( ; tree < tree_end; tree++)
802   {
803     uint rc;
804     bool is_last= TRUE;
805     rc= or_sel_tree_with_checks(param, n_trees, *tree,
806                                is_first_check_pass, &is_last);
807     if (!is_last)
808       *is_last_check_pass= FALSE;
809     if (rc)
810       return rc;
811   }
812   return 0;
813 }
814 
815 
816 /*
817   Copy constructor for SEL_TREE objects
818 
819   SYNOPSIS
820     SEL_TREE
821       arg            The source tree for the constructor
822       without_merges <=> only the range part of the tree arg is copied
823       param          Context info for the operation
824 
825   DESCRIPTION
826     The constructor creates a full copy of the SEL_TREE arg if
827     the prameter without_merges==FALSE. Otherwise a tree is created
828     that contains the copy only of the range part of the tree arg.
829 */
830 
SEL_TREE(SEL_TREE * arg,bool without_merges,RANGE_OPT_PARAM * param)831 SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
832                    RANGE_OPT_PARAM *param)
833   : Sql_alloc(),
834     keys(param->mem_root, param->keys),
835     n_ror_scans(0)
836 {
837   keys_map= arg->keys_map;
838   type= arg->type;
839   MEM_ROOT *mem_root;
840 
841   for (uint idx= 0; idx < param->keys; idx++)
842   {
843     if ((keys[idx]= arg->keys[idx]))
844       keys[idx]->incr_refs_all();
845   }
846 
847   if (without_merges)
848     return;
849 
850   mem_root= current_thd->mem_root;
851   List_iterator<SEL_IMERGE> it(arg->merges);
852   for (SEL_IMERGE *el= it++; el; el= it++)
853   {
854     SEL_IMERGE *merge= new (mem_root) SEL_IMERGE(el, 0, param);
855     if (!merge || merge->trees == merge->trees_next)
856     {
857       merges.empty();
858       return;
859     }
860     merges.push_back(merge, mem_root);
861   }
862 }
863 
864 
865 /*
866   Copy constructor for SEL_IMERGE objects
867 
868   SYNOPSIS
869     SEL_IMERGE
870       arg         The source imerge for the constructor
871       cnt         How many trees from arg are to be copied
872       param       Context info for the operation
873 
874   DESCRIPTION
875     The cnt==0 then the constructor creates a full copy of the
876     imerge arg. Otherwise only the first cnt trees of the imerge
877     are copied.
878 */
879 
SEL_IMERGE(SEL_IMERGE * arg,uint cnt,RANGE_OPT_PARAM * param)880 SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
881                        RANGE_OPT_PARAM *param) : Sql_alloc()
882 {
883   size_t elements= (arg->trees_end - arg->trees);
884   if (elements > PREALLOCED_TREES)
885   {
886     size_t size= elements * sizeof (SEL_TREE **);
887     if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
888       goto mem_err;
889   }
890   else
891     trees= &trees_prealloced[0];
892 
893   trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees);
894   trees_end= trees + elements;
895 
896   for (SEL_TREE **tree= trees, **arg_tree= arg->trees; tree < trees_next;
897        tree++, arg_tree++)
898   {
899     if (!(*tree= new SEL_TREE(*arg_tree, TRUE, param)))
900       goto mem_err;
901   }
902 
903   return;
904 
905 mem_err:
906   trees= &trees_prealloced[0];
907   trees_next= trees;
908   trees_end= trees;
909 }
910 
911 
912 /*
913   Perform AND operation on two imerge lists
914 
915   SYNOPSIS
916     imerge_list_and_list()
917       param             Context info for the operation
918       im1               The first imerge list for the operation
919       im2               The second imerge list for the operation
920 
921   DESCRIPTION
922     The function just appends the imerge list im2 to the imerge list im1
923 
924   RETURN VALUE
925     none
926 */
927 
imerge_list_and_list(List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)928 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
929 {
930   im1->append(im2);
931 }
932 
933 
934 /*
935   Perform OR operation on two imerge lists
936 
937   SYNOPSIS
938     imerge_list_or_list()
939       param             Context info for the operation
940       im1               The first imerge list for the operation
941       im2               The second imerge list for the operation
942 
943   DESCRIPTION
944     Assuming that the first imerge list represents the formula
945       F1= M1_1 AND ... AND M1_k1
946     while the second imerge list represents the formula
947       F2= M2_1 AND ... AND M2_k2,
948     where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1])
949     and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]),
950     the function builds a list of imerges for some formula that can be
951     inferred from the formula (F1 OR F2).
952 
953     More exactly the function builds imerges for the formula (M1_1 OR M2_1).
954     Note that
955       (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) =
956       AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) =>
957       M1_1 OR M2_1.
958     So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2).
959 
960     To build imerges for the formula (M1_1 OR M2_1) the function invokes,
961     possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks
962     for the imerge m1_1.
963     At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks
964     performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by
965     calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE.
966     The resulting imerge of the operation is ored with the next range tree of
967     the imerge m2_1. This oring continues until the last range tree from
968     m2_1 has been ored.
969     At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks
970     performs the same sequence of OR operations, but now calling
971     SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE.
972 
973     The imerges that the operation produces replace those in the list im1
974 
975   RETURN
976     0     if the operation is a success
977    -1     if the function has run out of memory
978 */
979 
imerge_list_or_list(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * im1,List<SEL_IMERGE> * im2)980 int imerge_list_or_list(RANGE_OPT_PARAM *param,
981                         List<SEL_IMERGE> *im1,
982                         List<SEL_IMERGE> *im2)
983 {
984 
985   uint rc;
986   bool is_last_check_pass= FALSE;
987   SEL_IMERGE *imerge= im1->head();
988   uint elems= (uint)(imerge->trees_next-imerge->trees);
989   MEM_ROOT *mem_root= current_thd->mem_root;
990 
991   im1->empty();
992   im1->push_back(imerge, mem_root);
993 
994   rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
995                                         TRUE, &is_last_check_pass);
996   if (rc)
997   {
998     if (rc == 1)
999     {
1000       im1->empty();
1001       rc= 0;
1002     }
1003     return rc;
1004   }
1005 
1006   if (!is_last_check_pass)
1007   {
1008     SEL_IMERGE* new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, param);
1009     if (new_imerge)
1010     {
1011       is_last_check_pass= TRUE;
1012       rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
1013                                                  FALSE, &is_last_check_pass);
1014       if (!rc)
1015         im1->push_back(new_imerge, mem_root);
1016     }
1017   }
1018   return rc;
1019 }
1020 
1021 
1022 /*
1023   Perform OR operation for each imerge from a list and the range part of a tree
1024 
1025   SYNOPSIS
1026     imerge_list_or_tree()
1027       param       Context info for the operation
1028       merges      The list of imerges to be ored with the range part of tree
1029       tree        SEL_TREE whose range part is to be ored with the imerges
1030 
1031   DESCRIPTION
1032     For each imerge mi from the list 'merges' the function performes OR
1033     operation with mi and the range part of 'tree' rt, producing one or
1034     two imerges.
1035 
1036     Given the merge mi represent the formula RTi_1 OR ... OR RTi_k,
1037     the function forms the merges by the following rules:
1038 
1039     1. If rt cannot be ored with any of the trees rti the function just
1040        produces an imerge that represents the formula
1041          RTi_1 OR ... RTi_k OR RT.
1042     2. If there exist a tree rtj that must be ored with rt the function
1043        produces an imerge the represents the formula
1044          RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k,
1045        where the range tree for (RTi_j OR RT) is constructed by oring the
1046        SEL_ARG trees that must be ored.
1047     3. For each rti_j that can be ored with rt the function produces
1048        the new tree rti_j' and substitutes rti_j for this new range tree.
1049 
1050     In any case the function removes mi from the list and then adds all
1051     produced imerges.
1052 
1053     To build imerges by rules 1-3 the function calls the method
1054     SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first
1055     call it passes TRUE for the third parameter of the function.
1056     At this first call imerges by rules 1-2 are built. If the call
1057     returns FALSE as the return value of its fourth parameter then the
1058     function are called for the second time. At this call the imerge
1059     of rule 3 is produced.
1060 
1061     If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then
1062     then it means that the produced tree contains an always true
1063     range tree and the whole imerge can be discarded.
1064 
1065   RETURN
1066     1     if no imerges are produced
1067     0     otherwise
1068 */
1069 
1070 static
imerge_list_or_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * merges,SEL_TREE * tree)1071 int imerge_list_or_tree(RANGE_OPT_PARAM *param,
1072                         List<SEL_IMERGE> *merges,
1073                         SEL_TREE *tree)
1074 {
1075   SEL_IMERGE *imerge;
1076   List<SEL_IMERGE> additional_merges;
1077   List_iterator<SEL_IMERGE> it(*merges);
1078   MEM_ROOT *mem_root= current_thd->mem_root;
1079 
1080   while ((imerge= it++))
1081   {
1082     bool is_last_check_pass;
1083     int rc= 0;
1084     int rc1= 0;
1085     SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param);
1086     if (or_tree)
1087     {
1088       uint elems= (uint)(imerge->trees_next-imerge->trees);
1089       rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
1090                                           TRUE, &is_last_check_pass);
1091       if (!is_last_check_pass)
1092       {
1093         SEL_IMERGE *new_imerge= new (mem_root) SEL_IMERGE(imerge, elems,
1094                                                           param);
1095         if (new_imerge)
1096 	{
1097           rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree,
1098                                                    FALSE, &is_last_check_pass);
1099           if (!rc1)
1100             additional_merges.push_back(new_imerge, mem_root);
1101         }
1102       }
1103     }
1104     if (rc || rc1 || !or_tree)
1105       it.remove();
1106   }
1107 
1108   merges->append(&additional_merges);
1109   return merges->is_empty();
1110 }
1111 
1112 
1113 /*
1114   Perform pushdown operation of the range part of a tree into given imerges
1115 
1116   SYNOPSIS
1117     imerge_list_and_tree()
1118       param           Context info for the operation
1119       merges   IN/OUT List of imerges to push the range part of 'tree' into
1120       tree            SEL_TREE whose range part is to be pushed into imerges
1121       replace         if the pushdow operation for a imerge is a success
1122                       then the original imerge is replaced for the result
1123                       of the pushdown
1124 
1125   DESCRIPTION
1126     For each imerge from the list merges the function pushes the range part
1127     rt of 'tree' into the imerge.
1128     More exactly if the imerge mi from the list represents the formula
1129       RTi_1 OR ... OR RTi_k
1130     the function bulds a new imerge that represents the formula
1131       (RTi_1 AND RT) OR ... OR (RTi_k AND RT)
1132     and adds this imerge to the list merges.
1133     To perform this pushdown operation the function calls the method
1134     SEL_IMERGE::and_sel_tree.
1135     For any imerge mi the new imerge is not created if for each pair of
1136     trees rti_j and rt the intersection of the indexes with defined ranges
1137     is empty.
1138     If the result of the pushdown operation for the imerge mi returns an
1139     imerge with no trees then then not only nothing is added to the list
1140     merges but mi itself is removed from the list.
1141 
1142   TODO
1143     Optimize the code in order to not create new SEL_IMERGE and new SER_TREE
1144     objects when 'replace' is TRUE. (Currently this function is called always
1145     with this parameter equal to TRUE.)
1146 
1147   RETURN
1148     1    if no imerges are left in the list merges
1149     0    otherwise
1150 */
1151 
1152 static
imerge_list_and_tree(RANGE_OPT_PARAM * param,List<SEL_IMERGE> * merges,SEL_TREE * tree,bool replace)1153 int imerge_list_and_tree(RANGE_OPT_PARAM *param,
1154                          List<SEL_IMERGE> *merges,
1155                          SEL_TREE *tree,
1156                          bool replace)
1157 {
1158   SEL_IMERGE *imerge;
1159   SEL_IMERGE *new_imerge= NULL;
1160   List<SEL_IMERGE> new_merges;
1161   List_iterator<SEL_IMERGE> it(*merges);
1162   MEM_ROOT *mem_root= current_thd->mem_root;
1163 
1164   while ((imerge= it++))
1165   {
1166     if (!new_imerge)
1167       new_imerge= new (mem_root) SEL_IMERGE();
1168     if (imerge->have_common_keys(param, tree) &&
1169         new_imerge && !imerge->and_sel_tree(param, tree, new_imerge))
1170     {
1171       if (new_imerge->trees == new_imerge->trees_next)
1172         it.remove();
1173       else
1174       {
1175         if (replace)
1176           it.replace(new_imerge);
1177         else
1178           new_merges.push_back(new_imerge, mem_root);
1179         new_imerge= NULL;
1180       }
1181     }
1182   }
1183   imerge_list_and_list(&new_merges, merges);
1184   *merges= new_merges;
1185   return merges->is_empty();
1186 }
1187 
1188 
1189 /***************************************************************************
1190 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1191 ***************************************************************************/
1192 
1193 	/* make a select from mysql info
1194 	   Error is set as following:
1195 	   0 = ok
1196 	   1 = Got some error (out of memory?)
1197 	   */
1198 
make_select(TABLE * head,table_map const_tables,table_map read_tables,COND * conds,SORT_INFO * filesort,bool allow_null_cond,int * error)1199 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1200 			table_map read_tables, COND *conds,
1201                         SORT_INFO *filesort,
1202                         bool allow_null_cond,
1203                         int *error)
1204 {
1205   SQL_SELECT *select;
1206   DBUG_ENTER("make_select");
1207 
1208   *error=0;
1209 
1210   if (!conds && !allow_null_cond)
1211     DBUG_RETURN(0);
1212   if (!(select= new (head->in_use->mem_root) SQL_SELECT))
1213   {
1214     *error= 1;			// out of memory
1215     DBUG_RETURN(0);		/* purecov: inspected */
1216   }
1217   select->read_tables=read_tables;
1218   select->const_tables=const_tables;
1219   select->head=head;
1220   select->cond= conds;
1221 
1222   if (filesort && my_b_inited(&filesort->io_cache))
1223   {
1224     /*
1225       Hijack the filesort io_cache for make_select
1226       SQL_SELECT will be responsible for ensuring that it's properly freed.
1227     */
1228     select->file= filesort->io_cache;
1229     select->records=(ha_rows) (select->file.end_of_file/
1230 			       head->file->ref_length);
1231     my_b_clear(&filesort->io_cache);
1232   }
1233   DBUG_RETURN(select);
1234 }
1235 
1236 
SQL_SELECT()1237 SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0)
1238 {
1239   quick_keys.clear_all(); needed_reg.clear_all();
1240   my_b_clear(&file);
1241 }
1242 
1243 
cleanup()1244 void SQL_SELECT::cleanup()
1245 {
1246   delete quick;
1247   quick= 0;
1248   if (free_cond)
1249   {
1250     free_cond=0;
1251     delete cond;
1252     cond= 0;
1253   }
1254   close_cached_file(&file);
1255 }
1256 
1257 
~SQL_SELECT()1258 SQL_SELECT::~SQL_SELECT()
1259 {
1260   cleanup();
1261 }
1262 
1263 #undef index					// Fix for Unixware 7
1264 
QUICK_SELECT_I()1265 QUICK_SELECT_I::QUICK_SELECT_I()
1266   :max_used_key_length(0),
1267    used_key_parts(0)
1268 {}
1269 
QUICK_RANGE_SELECT(THD * thd,TABLE * table,uint key_nr,bool no_alloc,MEM_ROOT * parent_alloc,bool * create_error)1270 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1271                                        bool no_alloc, MEM_ROOT *parent_alloc,
1272                                        bool *create_error)
1273   :thd(thd), no_alloc(no_alloc), parent_alloc(parent_alloc),
1274    free_file(0),cur_range(NULL),last_range(0),dont_free(0)
1275 {
1276   my_bitmap_map *bitmap;
1277   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1278 
1279   in_ror_merged_scan= 0;
1280   index= key_nr;
1281   head=  table;
1282   key_part_info= head->key_info[index].key_part;
1283 
1284   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1285   mrr_buf_size= thd->variables.mrr_buff_size;
1286   mrr_buf_desc= NULL;
1287 
1288   if (!no_alloc && !parent_alloc)
1289   {
1290     // Allocates everything through the internal memroot
1291     init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1292                    thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1293     thd->mem_root= &alloc;
1294   }
1295   else
1296     bzero((char*) &alloc,sizeof(alloc));
1297   file= head->file;
1298   record= head->record[0];
1299 
1300   my_init_dynamic_array2(PSI_INSTRUMENT_ME, &ranges, sizeof(QUICK_RANGE*),
1301                          thd->alloc(sizeof(QUICK_RANGE*) * 16), 16, 16,
1302                          MYF(MY_THREAD_SPECIFIC));
1303 
1304   /* Allocate a bitmap for used columns */
1305   if (!(bitmap= (my_bitmap_map*) thd->alloc(head->s->column_bitmap_size)))
1306   {
1307     column_bitmap.bitmap= 0;
1308     *create_error= 1;
1309   }
1310   else
1311     my_bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1312   DBUG_VOID_RETURN;
1313 }
1314 
1315 
need_sorted_output()1316 void QUICK_RANGE_SELECT::need_sorted_output()
1317 {
1318   if (!(mrr_flags & HA_MRR_SORTED))
1319   {
1320     /*
1321       Native implementation can't produce sorted output. We'll have to
1322       switch to default
1323     */
1324     mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
1325   }
1326   mrr_flags |= HA_MRR_SORTED;
1327 }
1328 
1329 
init()1330 int QUICK_RANGE_SELECT::init()
1331 {
1332   DBUG_ENTER("QUICK_RANGE_SELECT::init");
1333 
1334   if (file->inited != handler::NONE)
1335     file->ha_index_or_rnd_end();
1336   DBUG_RETURN(FALSE);
1337 }
1338 
1339 
range_end()1340 void QUICK_RANGE_SELECT::range_end()
1341 {
1342   if (file->inited != handler::NONE)
1343     file->ha_index_or_rnd_end();
1344 }
1345 
1346 
~QUICK_RANGE_SELECT()1347 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1348 {
1349   DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1350   if (!dont_free)
1351   {
1352     /* file is NULL for CPK scan on covering ROR-intersection */
1353     if (file)
1354     {
1355       range_end();
1356       file->ha_end_keyread();
1357       if (free_file)
1358       {
1359         DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1360                             free_file));
1361         file->ha_external_unlock(current_thd);
1362         file->ha_close();
1363         delete file;
1364       }
1365     }
1366     delete_dynamic(&ranges); /* ranges are allocated in alloc */
1367     free_root(&alloc,MYF(0));
1368   }
1369   my_free(mrr_buf_desc);
1370   DBUG_VOID_RETURN;
1371 }
1372 
1373 /*
1374   QUICK_INDEX_SORT_SELECT works as follows:
1375   - Do index scans, accumulate rowids in the Unique object
1376     (Unique will also sort and de-duplicate rowids)
1377   - Use rowids from unique to run a disk-ordered sweep
1378 */
1379 
QUICK_INDEX_SORT_SELECT(THD * thd_param,TABLE * table)1380 QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, TABLE *table)
1381   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1382 {
1383   DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT");
1384   index= MAX_KEY;
1385   head= table;
1386   init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1387                  thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1388   DBUG_VOID_RETURN;
1389 }
1390 
init()1391 int QUICK_INDEX_SORT_SELECT::init()
1392 {
1393   DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init");
1394   DBUG_RETURN(0);
1395 }
1396 
reset()1397 int QUICK_INDEX_SORT_SELECT::reset()
1398 {
1399   DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset");
1400   const int retval= read_keys_and_merge();
1401   DBUG_RETURN(retval);
1402 }
1403 
1404 bool
push_quick_back(QUICK_RANGE_SELECT * quick_sel_range)1405 QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1406 {
1407   DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back");
1408   if (head->file->is_clustering_key(quick_sel_range->index))
1409   {
1410    /*
1411      A quick_select over a clustered primary key is handled specifically
1412      Here we assume:
1413      - PK columns are included in any other merged index
1414      - Scan on the PK is disk-ordered.
1415        (not meeting #2 will only cause performance degradation)
1416 
1417        We could treat clustered PK as any other index, but that would
1418        be inefficient. There is no point in doing scan on
1419        CPK, remembering the rowid, then making rnd_pos() call with
1420        that rowid.
1421     */
1422     pk_quick_select= quick_sel_range;
1423     DBUG_RETURN(0);
1424   }
1425   DBUG_RETURN(quick_selects.push_back(quick_sel_range, thd->mem_root));
1426 }
1427 
~QUICK_INDEX_SORT_SELECT()1428 QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
1429 {
1430   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1431   QUICK_RANGE_SELECT* quick;
1432   DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT");
1433   delete unique;
1434   quick_it.rewind();
1435   while ((quick= quick_it++))
1436     quick->file= NULL;
1437   quick_selects.delete_elements();
1438   delete pk_quick_select;
1439   /* It's ok to call the next two even if they are already deinitialized */
1440   end_read_record(&read_record);
1441   free_root(&alloc,MYF(0));
1442   DBUG_VOID_RETURN;
1443 }
1444 
QUICK_ROR_INTERSECT_SELECT(THD * thd_param,TABLE * table,bool retrieve_full_rows,MEM_ROOT * parent_alloc)1445 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1446                                                        TABLE *table,
1447                                                        bool retrieve_full_rows,
1448                                                        MEM_ROOT *parent_alloc)
1449   : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1450     scans_inited(FALSE)
1451 {
1452   index= MAX_KEY;
1453   head= table;
1454   record= head->record[0];
1455   if (!parent_alloc)
1456     init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1457                    thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1458   else
1459     bzero(&alloc, sizeof(MEM_ROOT));
1460   last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1461                                   head->file->ref_length);
1462 }
1463 
1464 
1465 /*
1466   Do post-constructor initialization.
1467   SYNOPSIS
1468     QUICK_ROR_INTERSECT_SELECT::init()
1469 
1470   RETURN
1471     0      OK
1472     other  Error code
1473 */
1474 
init()1475 int QUICK_ROR_INTERSECT_SELECT::init()
1476 {
1477   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1478  /* Check if last_rowid was successfully allocated in ctor */
1479   DBUG_RETURN(!last_rowid);
1480 }
1481 
1482 
1483 /*
1484   Initialize this quick select to be a ROR-merged scan.
1485 
1486   SYNOPSIS
1487     QUICK_RANGE_SELECT::init_ror_merged_scan()
1488       reuse_handler If TRUE, use head->file, otherwise create a separate
1489                     handler object
1490 
1491   NOTES
1492     This function creates and prepares for subsequent use a separate handler
1493     object if it can't reuse head->file. The reason for this is that during
1494     ROR-merge several key scans are performed simultaneously, and a single
1495     handler is only capable of preserving context of a single key scan.
1496 
1497     In ROR-merge the quick select doing merge does full records retrieval,
1498     merged quick selects read only keys.
1499 
1500   RETURN
1501     0  ROR child scan initialized, ok to use.
1502     1  error
1503 */
1504 
init_ror_merged_scan(bool reuse_handler,MEM_ROOT * local_alloc)1505 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler,
1506                                              MEM_ROOT *local_alloc)
1507 {
1508   handler *save_file= file, *org_file;
1509   THD *thd= head->in_use;
1510   MY_BITMAP * const save_read_set= head->read_set;
1511   MY_BITMAP * const save_write_set= head->write_set;
1512   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1513 
1514   in_ror_merged_scan= 1;
1515   if (reuse_handler)
1516   {
1517     DBUG_PRINT("info", ("Reusing handler %p", file));
1518     if (init())
1519     {
1520       DBUG_RETURN(1);
1521     }
1522     goto end;
1523   }
1524 
1525   /* Create a separate handler object for this quick select */
1526   if (free_file)
1527   {
1528     /* already have own 'handler' object. */
1529     DBUG_RETURN(0);
1530   }
1531 
1532   if (!(file= head->file->clone(head->s->normalized_path.str, local_alloc)))
1533   {
1534     /*
1535       Manually set the error flag. Note: there seems to be quite a few
1536       places where a failure could cause the server to "hang" the client by
1537       sending no response to a query. ATM those are not real errors because
1538       the storage engine calls in question happen to never fail with the
1539       existing storage engines.
1540     */
1541     my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1542     /* Caller will free the memory */
1543     goto failure;  /* purecov: inspected */
1544   }
1545 
1546   if (file->ha_external_lock(thd, F_RDLCK))
1547     goto failure;
1548 
1549   if (init())
1550   {
1551     file->ha_external_unlock(thd);
1552     file->ha_close();
1553     goto failure;
1554   }
1555   free_file= TRUE;
1556   last_rowid= file->ref;
1557 
1558 end:
1559   /*
1560     We are only going to read key fields and call position() on 'file'
1561     The following sets head->read_set (== column_bitmap) to only use this
1562     key. The 'column_bitmap' is used in ::get_next()
1563   */
1564   org_file= head->file;
1565   head->file= file;
1566 
1567   head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
1568   head->prepare_for_keyread(index, &column_bitmap);
1569   head->prepare_for_position();
1570 
1571   head->file= org_file;
1572 
1573   /* Restore head->read_set (and write_set) to what they had before the call */
1574   head->column_bitmaps_set(save_read_set, save_write_set);
1575 
1576   if (reset())
1577   {
1578     if (!reuse_handler)
1579     {
1580       file->ha_external_unlock(thd);
1581       file->ha_close();
1582       goto failure;
1583     }
1584     DBUG_RETURN(1);
1585   }
1586   DBUG_RETURN(0);
1587 
1588 failure:
1589   head->column_bitmaps_set(save_read_set, save_write_set);
1590   delete file;
1591   file= save_file;
1592   free_file= false;
1593   DBUG_RETURN(1);
1594 }
1595 
1596 
1597 /*
1598   Initialize this quick select to be a part of a ROR-merged scan.
1599   SYNOPSIS
1600     QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1601       reuse_handler If TRUE, use head->file, otherwise create separate
1602                     handler object.
1603   RETURN
1604     0     OK
1605     other error code
1606 */
init_ror_merged_scan(bool reuse_handler,MEM_ROOT * local_alloc)1607 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler,
1608                                                      MEM_ROOT *local_alloc)
1609 {
1610   List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
1611   QUICK_SELECT_WITH_RECORD *cur;
1612   QUICK_RANGE_SELECT *quick;
1613   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1614 
1615   /* Initialize all merged "children" quick selects */
1616   DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1617   if (!need_to_fetch_row && reuse_handler)
1618   {
1619     cur= quick_it++;
1620     quick= cur->quick;
1621     /*
1622       There is no use of this->file. Use it for the first of merged range
1623       selects.
1624     */
1625     int error= quick->init_ror_merged_scan(TRUE, local_alloc);
1626     if (unlikely(error))
1627       DBUG_RETURN(error);
1628     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1629   }
1630   while ((cur= quick_it++))
1631   {
1632     quick= cur->quick;
1633 #ifndef DBUG_OFF
1634     const MY_BITMAP * const save_read_set= quick->head->read_set;
1635     const MY_BITMAP * const save_write_set= quick->head->write_set;
1636 #endif
1637     if (quick->init_ror_merged_scan(FALSE, local_alloc))
1638       DBUG_RETURN(1);
1639     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1640 
1641     // Sets are shared by all members of "quick_selects" so must not change
1642 #ifndef DBUG_OFF
1643     DBUG_ASSERT(quick->head->read_set == save_read_set);
1644     DBUG_ASSERT(quick->head->write_set == save_write_set);
1645 #endif
1646     /* All merged scans share the same record buffer in intersection. */
1647     quick->record= head->record[0];
1648   }
1649 
1650   if (need_to_fetch_row &&
1651       unlikely(head->file->ha_rnd_init_with_error(false)))
1652   {
1653     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1654     DBUG_RETURN(1);
1655   }
1656   DBUG_RETURN(0);
1657 }
1658 
1659 
1660 /*
1661   Initialize quick select for row retrieval.
1662   SYNOPSIS
1663     reset()
1664   RETURN
1665     0      OK
1666     other  Error code
1667 */
1668 
reset()1669 int QUICK_ROR_INTERSECT_SELECT::reset()
1670 {
1671   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1672   if (!scans_inited && init_ror_merged_scan(TRUE, &alloc))
1673     DBUG_RETURN(1);
1674   scans_inited= TRUE;
1675   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
1676   QUICK_SELECT_WITH_RECORD *qr;
1677   while ((qr= it++))
1678     qr->quick->reset();
1679   DBUG_RETURN(0);
1680 }
1681 
1682 
1683 /*
1684   Add a merged quick select to this ROR-intersection quick select.
1685 
1686   SYNOPSIS
1687     QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1688       alloc Mem root to create auxiliary structures on
1689       quick Quick select to be added. The quick select must return
1690             rows in rowid order.
1691   NOTES
1692     This call can only be made before init() is called.
1693 
1694   RETURN
1695     FALSE OK
1696     TRUE  Out of memory.
1697 */
1698 
1699 bool
push_quick_back(MEM_ROOT * local_alloc,QUICK_RANGE_SELECT * quick)1700 QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *local_alloc,
1701                                             QUICK_RANGE_SELECT *quick)
1702 {
1703   QUICK_SELECT_WITH_RECORD *qr;
1704   if (!(qr= new QUICK_SELECT_WITH_RECORD) ||
1705       !(qr->key_tuple= (uchar*)alloc_root(local_alloc,
1706                                           quick->max_used_key_length)))
1707     return TRUE;
1708   qr->quick= quick;
1709   return quick_selects.push_back(qr);
1710 }
1711 
1712 
~QUICK_ROR_INTERSECT_SELECT()1713 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1714 {
1715   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1716   quick_selects.delete_elements();
1717   delete cpk_quick;
1718   free_root(&alloc,MYF(0));
1719   if (need_to_fetch_row && head->file->inited != handler::NONE)
1720     head->file->ha_rnd_end();
1721   DBUG_VOID_RETURN;
1722 }
1723 
1724 
QUICK_ROR_UNION_SELECT(THD * thd_param,TABLE * table)1725 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1726                                                TABLE *table)
1727   : thd(thd_param), scans_inited(FALSE)
1728 {
1729   index= MAX_KEY;
1730   head= table;
1731   rowid_length= table->file->ref_length;
1732   record= head->record[0];
1733   init_sql_alloc(key_memory_quick_range_select_root, &alloc,
1734                  thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
1735   thd_param->mem_root= &alloc;
1736 }
1737 
1738 
1739 /*
1740   Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1741   queue.
1742 
1743   SYNPOSIS
1744     QUICK_ROR_UNION_SELECT_queue_cmp()
1745       arg   Pointer to QUICK_ROR_UNION_SELECT
1746       val1  First merged select
1747       val2  Second merged select
1748 */
1749 
1750 C_MODE_START
1751 
QUICK_ROR_UNION_SELECT_queue_cmp(void * arg,uchar * val1,uchar * val2)1752 static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1753 {
1754   QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1755   return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1756                                    ((QUICK_SELECT_I*)val2)->last_rowid);
1757 }
1758 
1759 C_MODE_END
1760 
1761 
1762 /*
1763   Do post-constructor initialization.
1764   SYNOPSIS
1765     QUICK_ROR_UNION_SELECT::init()
1766 
1767   RETURN
1768     0      OK
1769     other  Error code
1770 */
1771 
init()1772 int QUICK_ROR_UNION_SELECT::init()
1773 {
1774   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1775   if (init_queue(&queue, quick_selects.elements, 0,
1776                  FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1777                  (void*) this, 0, 0))
1778   {
1779     bzero(&queue, sizeof(QUEUE));
1780     DBUG_RETURN(1);
1781   }
1782 
1783   if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1784     DBUG_RETURN(1);
1785   prev_rowid= cur_rowid + head->file->ref_length;
1786   DBUG_RETURN(0);
1787 }
1788 
1789 
1790 /*
1791   Initialize quick select for row retrieval.
1792   SYNOPSIS
1793     reset()
1794 
1795   RETURN
1796     0      OK
1797     other  Error code
1798 */
1799 
reset()1800 int QUICK_ROR_UNION_SELECT::reset()
1801 {
1802   QUICK_SELECT_I *quick;
1803   int error;
1804   DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1805   have_prev_rowid= FALSE;
1806   if (!scans_inited)
1807   {
1808     List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1809     while ((quick= it++))
1810     {
1811       if (quick->init_ror_merged_scan(FALSE, &alloc))
1812         DBUG_RETURN(1);
1813     }
1814     scans_inited= TRUE;
1815   }
1816   queue_remove_all(&queue);
1817   /*
1818     Initialize scans for merged quick selects and put all merged quick
1819     selects into the queue.
1820   */
1821   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1822   while ((quick= it++))
1823   {
1824     if (unlikely((error= quick->reset())))
1825       DBUG_RETURN(error);
1826     if (unlikely((error= quick->get_next())))
1827     {
1828       if (error == HA_ERR_END_OF_FILE)
1829         continue;
1830       DBUG_RETURN(error);
1831     }
1832     quick->save_last_pos();
1833     queue_insert(&queue, (uchar*)quick);
1834   }
1835   /* Prepare for ha_rnd_pos calls. */
1836   if (head->file->inited && unlikely((error= head->file->ha_rnd_end())))
1837   {
1838     DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1839     DBUG_RETURN(error);
1840   }
1841   if (unlikely((error= head->file->ha_rnd_init(false))))
1842   {
1843     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1844     DBUG_RETURN(error);
1845   }
1846 
1847   DBUG_RETURN(0);
1848 }
1849 
1850 
1851 bool
push_quick_back(QUICK_SELECT_I * quick_sel_range)1852 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1853 {
1854   return quick_selects.push_back(quick_sel_range);
1855 }
1856 
~QUICK_ROR_UNION_SELECT()1857 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1858 {
1859   DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1860   delete_queue(&queue);
1861   quick_selects.delete_elements();
1862   if (head->file->inited != handler::NONE)
1863     head->file->ha_rnd_end();
1864   free_root(&alloc,MYF(0));
1865   DBUG_VOID_RETURN;
1866 }
1867 
1868 
QUICK_RANGE()1869 QUICK_RANGE::QUICK_RANGE()
1870   :min_key(0),max_key(0),min_length(0),max_length(0),
1871    flag(NO_MIN_RANGE | NO_MAX_RANGE),
1872   min_keypart_map(0), max_keypart_map(0)
1873 {}
1874 
SEL_ARG(SEL_ARG & arg)1875 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1876 {
1877   type=arg.type;
1878   min_flag=arg.min_flag;
1879   max_flag=arg.max_flag;
1880   maybe_flag=arg.maybe_flag;
1881   maybe_null=arg.maybe_null;
1882   part=arg.part;
1883   field=arg.field;
1884   min_value=arg.min_value;
1885   max_value=arg.max_value;
1886   next_key_part=arg.next_key_part;
1887   max_part_no= arg.max_part_no;
1888   use_count=1; elements=1;
1889   weight=1;
1890   next= 0;
1891   if (next_key_part)
1892   {
1893     ++next_key_part->use_count;
1894     weight += next_key_part->weight;
1895   }
1896 }
1897 
1898 
make_root()1899 inline void SEL_ARG::make_root()
1900 {
1901   left=right= &null_element;
1902   color=BLACK;
1903   next=prev=0;
1904   use_count=0; elements=1;
1905 }
1906 
SEL_ARG(Field * f,const uchar * min_value_arg,const uchar * max_value_arg)1907 SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
1908                  const uchar *max_value_arg)
1909   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
1910    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
1911    max_value((uchar*) max_value_arg), next(0),prev(0),
1912    next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1)
1913 {
1914   left=right= &null_element;
1915   max_part_no= 1;
1916 }
1917 
SEL_ARG(Field * field_,uint8 part_,uchar * min_value_,uchar * max_value_,uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)1918 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
1919                  uchar *min_value_, uchar *max_value_,
1920 		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
1921   :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
1922    part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
1923    field(field_), min_value(min_value_), max_value(max_value_),
1924    next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1)
1925 {
1926   max_part_no= part+1;
1927   left=right= &null_element;
1928 }
1929 
1930 
1931 /*
1932   A number of helper classes:
1933     SEL_ARG_LE, SEL_ARG_LT, SEL_ARG_GT, SEL_ARG_GE,
1934   to share the code between:
1935     Field::stored_field_make_mm_leaf()
1936     Field::stored_field_make_mm_leaf_exact()
1937 */
1938 class SEL_ARG_LE: public SEL_ARG
1939 {
1940 public:
SEL_ARG_LE(const uchar * key,Field * field)1941   SEL_ARG_LE(const uchar *key, Field *field)
1942    :SEL_ARG(field, key, key)
1943   {
1944     if (!field->real_maybe_null())
1945       min_flag= NO_MIN_RANGE;     // From start
1946     else
1947     {
1948       min_value= is_null_string;
1949       min_flag= NEAR_MIN;        // > NULL
1950     }
1951   }
1952 };
1953 
1954 
1955 class SEL_ARG_LT: public SEL_ARG_LE
1956 {
1957 public:
1958   /*
1959     Use this constructor if value->save_in_field() went precisely,
1960     without any data rounding or truncation.
1961   */
SEL_ARG_LT(const uchar * key,Field * field)1962   SEL_ARG_LT(const uchar *key, Field *field)
1963    :SEL_ARG_LE(key, field)
1964   { max_flag= NEAR_MAX; }
1965   /*
1966     Use this constructor if value->save_in_field() returned success,
1967     but we don't know if rounding or truncation happened
1968     (as some Field::store() do not report minor data changes).
1969   */
SEL_ARG_LT(THD * thd,const uchar * key,Field * field,Item * value)1970   SEL_ARG_LT(THD *thd, const uchar *key, Field *field, Item *value)
1971    :SEL_ARG_LE(key, field)
1972   {
1973     if (stored_field_cmp_to_item(thd, field, value) == 0)
1974       max_flag= NEAR_MAX;
1975   }
1976 };
1977 
1978 
1979 class SEL_ARG_GT: public SEL_ARG
1980 {
1981 public:
1982   /*
1983     Use this constructor if value->save_in_field() went precisely,
1984     without any data rounding or truncation.
1985   */
SEL_ARG_GT(const uchar * key,const KEY_PART * key_part,Field * field)1986   SEL_ARG_GT(const uchar *key, const KEY_PART *key_part, Field *field)
1987    :SEL_ARG(field, key, key)
1988   {
1989     // Don't use open ranges for partial key_segments
1990     if (!(key_part->flag & HA_PART_KEY_SEG))
1991       min_flag= NEAR_MIN;
1992     max_flag= NO_MAX_RANGE;
1993   }
1994   /*
1995     Use this constructor if value->save_in_field() returned success,
1996     but we don't know if rounding or truncation happened
1997     (as some Field::store() do not report minor data changes).
1998   */
SEL_ARG_GT(THD * thd,const uchar * key,const KEY_PART * key_part,Field * field,Item * value)1999   SEL_ARG_GT(THD *thd, const uchar *key,
2000              const KEY_PART *key_part, Field *field, Item *value)
2001    :SEL_ARG(field, key, key)
2002   {
2003     // Don't use open ranges for partial key_segments
2004     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
2005         (stored_field_cmp_to_item(thd, field, value) <= 0))
2006       min_flag= NEAR_MIN;
2007     max_flag= NO_MAX_RANGE;
2008   }
2009 };
2010 
2011 
2012 class SEL_ARG_GE: public SEL_ARG
2013 {
2014 public:
2015   /*
2016     Use this constructor if value->save_in_field() went precisely,
2017     without any data rounding or truncation.
2018   */
SEL_ARG_GE(const uchar * key,Field * field)2019   SEL_ARG_GE(const uchar *key, Field *field)
2020    :SEL_ARG(field, key, key)
2021   {
2022     max_flag= NO_MAX_RANGE;
2023   }
2024   /*
2025     Use this constructor if value->save_in_field() returned success,
2026     but we don't know if rounding or truncation happened
2027     (as some Field::store() do not report minor data changes).
2028   */
SEL_ARG_GE(THD * thd,const uchar * key,const KEY_PART * key_part,Field * field,Item * value)2029   SEL_ARG_GE(THD *thd, const uchar *key,
2030              const KEY_PART *key_part, Field *field, Item *value)
2031    :SEL_ARG(field, key, key)
2032   {
2033     // Don't use open ranges for partial key_segments
2034     if ((!(key_part->flag & HA_PART_KEY_SEG)) &&
2035         (stored_field_cmp_to_item(thd, field, value) < 0))
2036       min_flag= NEAR_MIN;
2037     max_flag= NO_MAX_RANGE;
2038   }
2039 };
2040 
2041 
clone(RANGE_OPT_PARAM * param,SEL_ARG * new_parent,SEL_ARG ** next_arg)2042 SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
2043                         SEL_ARG **next_arg)
2044 {
2045   SEL_ARG *tmp;
2046 
2047   /* Bail out if we have already generated too many SEL_ARGs */
2048   if (++param->alloced_sel_args > MAX_SEL_ARGS)
2049     return 0;
2050 
2051   if (type != KEY_RANGE)
2052   {
2053     if (!(tmp= new (param->mem_root) SEL_ARG(type)))
2054       return 0;					// out of memory
2055     tmp->prev= *next_arg;			// Link into next/prev chain
2056     (*next_arg)->next=tmp;
2057     (*next_arg)= tmp;
2058     tmp->part= this->part;
2059   }
2060   else
2061   {
2062     if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
2063                                              min_flag, max_flag, maybe_flag)))
2064       return 0;					// OOM
2065     tmp->parent=new_parent;
2066     tmp->next_key_part=next_key_part;
2067     if (left != &null_element)
2068       if (!(tmp->left=left->clone(param, tmp, next_arg)))
2069 	return 0;				// OOM
2070 
2071     tmp->prev= *next_arg;			// Link into next/prev chain
2072     (*next_arg)->next=tmp;
2073     (*next_arg)= tmp;
2074 
2075     if (right != &null_element)
2076       if (!(tmp->right= right->clone(param, tmp, next_arg)))
2077 	return 0;				// OOM
2078   }
2079   increment_use_count(1);
2080   tmp->color= color;
2081   tmp->elements= this->elements;
2082   tmp->max_part_no= max_part_no;
2083   tmp->weight= weight;
2084   return tmp;
2085 }
2086 
2087 /**
2088   This gives the first SEL_ARG in the interval list, and the minimal element
2089   in the red-black tree
2090 
2091   @return
2092   SEL_ARG   first SEL_ARG in the interval list
2093 */
first()2094 SEL_ARG *SEL_ARG::first()
2095 {
2096   SEL_ARG *next_arg=this;
2097   if (!next_arg->left)
2098     return 0;					// MAYBE_KEY
2099   while (next_arg->left != &null_element)
2100     next_arg=next_arg->left;
2101   return next_arg;
2102 }
2103 
first() const2104 const SEL_ARG *SEL_ARG::first() const
2105 {
2106   return const_cast<SEL_ARG*>(this)->first();
2107 }
2108 
last()2109 SEL_ARG *SEL_ARG::last()
2110 {
2111   SEL_ARG *next_arg=this;
2112   if (!next_arg->right)
2113     return 0;					// MAYBE_KEY
2114   while (next_arg->right != &null_element)
2115     next_arg=next_arg->right;
2116   return next_arg;
2117 }
2118 
2119 
2120 /*
2121   Check if a compare is ok, when one takes ranges in account
2122   Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
2123 */
2124 
sel_cmp(Field * field,uchar * a,uchar * b,uint8 a_flag,uint8 b_flag)2125 int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
2126                      uint8 b_flag)
2127 {
2128   int cmp;
2129   /* First check if there was a compare to a min or max element */
2130   if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2131   {
2132     if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
2133 	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
2134       return 0;
2135     return (a_flag & NO_MIN_RANGE) ? -1 : 1;
2136   }
2137   if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
2138     return (b_flag & NO_MIN_RANGE) ? 1 : -1;
2139 
2140   if (field->real_maybe_null())			// If null is part of key
2141   {
2142     if (*a != *b)
2143     {
2144       return *a ? -1 : 1;
2145     }
2146     if (*a)
2147       goto end;					// NULL where equal
2148     a++; b++;					// Skip NULL marker
2149   }
2150   cmp=field->key_cmp(a , b);
2151   if (cmp) return cmp < 0 ? -1 : 1;		// The values differed
2152 
2153   // Check if the compared equal arguments was defined with open/closed range
2154  end:
2155   if (a_flag & (NEAR_MIN | NEAR_MAX))
2156   {
2157     if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2158       return 0;
2159     if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2160       return (a_flag & NEAR_MIN) ? 2 : -2;
2161     return (a_flag & NEAR_MIN) ? 1 : -1;
2162   }
2163   if (b_flag & (NEAR_MIN | NEAR_MAX))
2164     return (b_flag & NEAR_MIN) ? -2 : 2;
2165   return 0;					// The elements where equal
2166 }
2167 
2168 
clone_tree(RANGE_OPT_PARAM * param)2169 SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2170 {
2171   SEL_ARG tmp_link,*next_arg,*root;
2172   next_arg= &tmp_link;
2173   if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2174     return 0;
2175   next_arg->next=0;				// Fix last link
2176   tmp_link.next->prev=0;			// Fix first link
2177   if (root)					// If not OOM
2178     root->use_count= 0;
2179   return root;
2180 }
2181 
2182 
2183 /*
2184   Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2185   objects from table read plans.
2186 */
2187 class TABLE_READ_PLAN
2188 {
2189 public:
2190   /*
2191     Plan read cost, with or without cost of full row retrieval, depending
2192     on plan creation parameters.
2193   */
2194   double read_cost;
2195   ha_rows records; /* estimate of #rows to be examined */
2196 
2197   /*
2198     If TRUE, the scan returns rows in rowid order. This is used only for
2199     scans that can be both ROR and non-ROR.
2200   */
2201   bool is_ror;
2202 
2203   /*
2204     Create quick select for this plan.
2205     SYNOPSIS
2206      make_quick()
2207        param               Parameter from test_quick_select
2208        retrieve_full_rows  If TRUE, created quick select will do full record
2209                            retrieval.
2210        parent_alloc        Memory pool to use, if any.
2211 
2212     NOTES
2213       retrieve_full_rows is ignored by some implementations.
2214 
2215     RETURN
2216       created quick select
2217       NULL on any error.
2218   */
2219   virtual QUICK_SELECT_I *make_quick(PARAM *param,
2220                                      bool retrieve_full_rows,
2221                                      MEM_ROOT *parent_alloc=NULL) = 0;
2222 
2223   /* Table read plans are allocated on MEM_ROOT and are never deleted */
operator new(size_t size,MEM_ROOT * mem_root)2224   static void *operator new(size_t size, MEM_ROOT *mem_root)
2225   { return (void*) alloc_root(mem_root, (uint) size); }
operator delete(void * ptr,size_t size)2226   static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); }
operator delete(void * ptr,MEM_ROOT * mem_root)2227   static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
~TABLE_READ_PLAN()2228   virtual ~TABLE_READ_PLAN() {}               /* Remove gcc warning */
2229   /**
2230      Add basic info for this TABLE_READ_PLAN to the optimizer trace.
2231 
2232      @param param        Parameters for range analysis of this table
2233      @param trace_object The optimizer trace object the info is appended to
2234   */
2235   virtual void trace_basic_info(PARAM *param,
2236                                 Json_writer_object *trace_object) const= 0;
2237 
2238 };
2239 
2240 class TRP_ROR_INTERSECT;
2241 class TRP_ROR_UNION;
2242 class TRP_INDEX_MERGE;
2243 
2244 
2245 /*
2246   Plan for a QUICK_RANGE_SELECT scan.
2247   TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2248   QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2249   record retrieval scans.
2250 */
2251 
2252 class TRP_RANGE : public TABLE_READ_PLAN
2253 {
2254 public:
2255   SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
2256   uint     key_idx; /* key number in PARAM::key */
2257   uint     mrr_flags;
2258   uint     mrr_buf_size;
2259 
TRP_RANGE(SEL_ARG * key_arg,uint idx_arg,uint mrr_flags_arg)2260   TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2261    : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2262   {}
~TRP_RANGE()2263   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
2264 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)2265   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2266                              MEM_ROOT *parent_alloc)
2267   {
2268     DBUG_ENTER("TRP_RANGE::make_quick");
2269     QUICK_RANGE_SELECT *quick;
2270     if ((quick= get_quick_select(param, key_idx, key,  mrr_flags,
2271                                  mrr_buf_size, parent_alloc)))
2272     {
2273       quick->records= records;
2274       quick->read_time= read_cost;
2275     }
2276     DBUG_RETURN(quick);
2277   }
2278   void trace_basic_info(PARAM *param,
2279                         Json_writer_object *trace_object) const;
2280 };
2281 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2282 void TRP_RANGE::trace_basic_info(PARAM *param,
2283                                  Json_writer_object *trace_object) const
2284 {
2285   DBUG_ASSERT(trace_object->trace_started());
2286   DBUG_ASSERT(param->using_real_indexes);
2287   const uint keynr_in_table= param->real_keynr[key_idx];
2288 
2289   const KEY &cur_key= param->table->key_info[keynr_in_table];
2290   const KEY_PART_INFO *key_part= cur_key.key_part;
2291 
2292   trace_object->add("type", "range_scan")
2293                .add("index", cur_key.name)
2294                .add("rows", records);
2295 
2296   Json_writer_array trace_range(param->thd, "ranges");
2297 
2298   // TRP_RANGE should not be created if there are no range intervals
2299   DBUG_ASSERT(key);
2300 
2301   trace_ranges(&trace_range, param, key_idx, key, key_part);
2302 }
2303 
2304 
2305 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2306 
2307 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2308 {
2309 public:
TRP_ROR_INTERSECT()2310   TRP_ROR_INTERSECT() {}                      /* Remove gcc warning */
~TRP_ROR_INTERSECT()2311   virtual ~TRP_ROR_INTERSECT() {}             /* Remove gcc warning */
2312   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2313                              MEM_ROOT *parent_alloc);
2314 
2315   /* Array of pointers to ROR range scans used in this intersection */
2316   struct st_ror_scan_info **first_scan;
2317   struct st_ror_scan_info **last_scan; /* End of the above array */
2318   struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
2319   bool is_covering; /* TRUE if no row retrieval phase is necessary */
2320   double index_scan_costs; /* SUM(cost(index_scan)) */
2321   void trace_basic_info(PARAM *param,
2322                         Json_writer_object *trace_object) const;
2323 };
2324 
2325 
2326 
2327 /*
2328   Plan for QUICK_ROR_UNION_SELECT scan.
2329   QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2330   is ignored by make_quick.
2331 */
2332 
2333 class TRP_ROR_UNION : public TABLE_READ_PLAN
2334 {
2335 public:
TRP_ROR_UNION()2336   TRP_ROR_UNION() {}                          /* Remove gcc warning */
~TRP_ROR_UNION()2337   virtual ~TRP_ROR_UNION() {}                 /* Remove gcc warning */
2338   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2339                              MEM_ROOT *parent_alloc);
2340   TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2341   TABLE_READ_PLAN **last_ror;  /* end of the above array */
2342   void trace_basic_info(PARAM *param,
2343                         Json_writer_object *trace_object) const;
2344 };
2345 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2346 void TRP_ROR_UNION::trace_basic_info(PARAM *param,
2347                                      Json_writer_object *trace_object) const
2348 {
2349   THD *thd= param->thd;
2350   DBUG_ASSERT(trace_object->trace_started());
2351   trace_object->add("type", "index_roworder_union");
2352   Json_writer_array smth_trace(thd, "union_of");
2353   for (TABLE_READ_PLAN **current= first_ror; current != last_ror; current++)
2354   {
2355     Json_writer_object trp_info(thd);
2356     (*current)->trace_basic_info(param, &trp_info);
2357   }
2358 }
2359 
2360 /*
2361   Plan for QUICK_INDEX_INTERSECT_SELECT scan.
2362   QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2363   is ignored by make_quick.
2364 */
2365 
2366 class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN
2367 {
2368 public:
TRP_INDEX_INTERSECT()2369   TRP_INDEX_INTERSECT() {}                        /* Remove gcc warning */
~TRP_INDEX_INTERSECT()2370   virtual ~TRP_INDEX_INTERSECT() {}               /* Remove gcc warning */
2371   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2372                              MEM_ROOT *parent_alloc);
2373   TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
2374   TRP_RANGE **range_scans_end; /* end of the array */
2375   /* keys whose scans are to be filtered by cpk conditions */
2376   key_map filtered_scans;
2377   void trace_basic_info(PARAM *param,
2378                         Json_writer_object *trace_object) const;
2379 
2380 };
2381 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2382 void TRP_INDEX_INTERSECT::trace_basic_info(PARAM *param,
2383                                        Json_writer_object *trace_object) const
2384 {
2385   THD *thd= param->thd;
2386   DBUG_ASSERT(trace_object->trace_started());
2387   trace_object->add("type", "index_sort_intersect");
2388   Json_writer_array smth_trace(thd, "index_sort_intersect_of");
2389   for (TRP_RANGE **current= range_scans; current != range_scans_end;
2390                                                           current++)
2391   {
2392     Json_writer_object trp_info(thd);
2393     (*current)->trace_basic_info(param, &trp_info);
2394   }
2395 }
2396 
2397 /*
2398   Plan for QUICK_INDEX_MERGE_SELECT scan.
2399   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2400   is ignored by make_quick.
2401 */
2402 
2403 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2404 {
2405 public:
TRP_INDEX_MERGE()2406   TRP_INDEX_MERGE() {}                        /* Remove gcc warning */
~TRP_INDEX_MERGE()2407   virtual ~TRP_INDEX_MERGE() {}               /* Remove gcc warning */
2408   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2409                              MEM_ROOT *parent_alloc);
2410   TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2411   TRP_RANGE **range_scans_end; /* end of the array */
2412   void trace_basic_info(PARAM *param,
2413                         Json_writer_object *trace_object) const;
2414 };
2415 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2416 void TRP_INDEX_MERGE::trace_basic_info(PARAM *param,
2417                                        Json_writer_object *trace_object) const
2418 {
2419   THD *thd= param->thd;
2420   DBUG_ASSERT(trace_object->trace_started());
2421   trace_object->add("type", "index_merge");
2422   Json_writer_array smth_trace(thd, "index_merge_of");
2423   for (TRP_RANGE **current= range_scans; current != range_scans_end; current++)
2424   {
2425     Json_writer_object trp_info(thd);
2426     (*current)->trace_basic_info(param, &trp_info);
2427   }
2428 }
2429 
2430 /*
2431   Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2432 */
2433 
2434 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2435 {
2436 private:
2437   bool have_min, have_max, have_agg_distinct;
2438   KEY_PART_INFO *min_max_arg_part;
2439   uint group_prefix_len;
2440   uint used_key_parts;
2441   uint group_key_parts;
2442   KEY *index_info;
2443   uint index;
2444   uint key_infix_len;
2445   uchar key_infix[MAX_KEY_LENGTH];
2446   SEL_TREE *range_tree; /* Represents all range predicates in the query. */
2447   SEL_ARG  *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
2448   uint param_idx; /* Index of used key in param->key. */
2449   bool is_index_scan; /* Use index_next() instead of random read */
2450 public:
2451   /* Number of records selected by the ranges in index_tree. */
2452   ha_rows quick_prefix_records;
2453 public:
TRP_GROUP_MIN_MAX(bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint used_key_parts_arg,uint group_key_parts_arg,KEY * index_info_arg,uint index_arg,uint key_infix_len_arg,uchar * key_infix_arg,SEL_TREE * tree_arg,SEL_ARG * index_tree_arg,uint param_idx_arg,ha_rows quick_prefix_records_arg)2454   TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2455                     bool have_agg_distinct_arg,
2456                     KEY_PART_INFO *min_max_arg_part_arg,
2457                     uint group_prefix_len_arg, uint used_key_parts_arg,
2458                     uint group_key_parts_arg, KEY *index_info_arg,
2459                     uint index_arg, uint key_infix_len_arg,
2460                     uchar *key_infix_arg,
2461                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2462                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
2463   : have_min(have_min_arg), have_max(have_max_arg),
2464     have_agg_distinct(have_agg_distinct_arg),
2465     min_max_arg_part(min_max_arg_part_arg),
2466     group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2467     group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2468     index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2469     index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2470     quick_prefix_records(quick_prefix_records_arg)
2471     {
2472       if (key_infix_len)
2473         memcpy(this->key_infix, key_infix_arg, key_infix_len);
2474     }
~TRP_GROUP_MIN_MAX()2475   virtual ~TRP_GROUP_MIN_MAX() {}             /* Remove gcc warning */
2476 
2477   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2478                              MEM_ROOT *parent_alloc);
use_index_scan()2479   void use_index_scan() { is_index_scan= TRUE; }
2480   void trace_basic_info(PARAM *param,
2481                         Json_writer_object *trace_object) const;
2482 };
2483 
2484 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const2485 void TRP_GROUP_MIN_MAX::trace_basic_info(PARAM *param,
2486                                 Json_writer_object *trace_object) const
2487 {
2488   THD *thd= param->thd;
2489   DBUG_ASSERT(trace_object->trace_started());
2490 
2491   trace_object->add("type", "index_group").add("index", index_info->name);
2492 
2493   if (min_max_arg_part)
2494     trace_object->add("min_max_arg", min_max_arg_part->field->field_name);
2495   else
2496     trace_object->add_null("min_max_arg");
2497 
2498   trace_object->add("min_aggregate", have_min)
2499                .add("max_aggregate", have_max)
2500                .add("distinct_aggregate", have_agg_distinct)
2501                .add("rows", records)
2502                .add("cost", read_cost);
2503 
2504   const KEY_PART_INFO *key_part= index_info->key_part;
2505   {
2506     Json_writer_array trace_keyparts(thd, "key_parts_used_for_access");
2507     for (uint partno= 0; partno < used_key_parts; partno++)
2508     {
2509       const KEY_PART_INFO *cur_key_part= key_part + partno;
2510       trace_keyparts.add(cur_key_part->field->field_name);
2511     }
2512   }
2513 
2514   Json_writer_array trace_range(thd, "ranges");
2515 
2516   // can have group quick without ranges
2517   if (index_tree)
2518   {
2519     trace_ranges(&trace_range, param, param_idx,
2520                  index_tree, key_part);
2521   }
2522 }
2523 
2524 
2525 typedef struct st_index_scan_info
2526 {
2527   uint      idx;      /* # of used key in param->keys */
2528   uint      keynr;    /* # of used key in table */
2529   uint      range_count;
2530   ha_rows   records;  /* estimate of # records this scan will return */
2531 
2532   /* Set of intervals over key fields that will be used for row retrieval. */
2533   SEL_ARG   *sel_arg;
2534 
2535   KEY *key_info;
2536   uint used_key_parts;
2537 
2538   /* Estimate of # records filtered out by intersection with cpk */
2539   ha_rows   filtered_out;
2540   /* Bitmap of fields used in index intersection */
2541   MY_BITMAP used_fields;
2542 
2543   /* Fields used in the query and covered by ROR scan. */
2544   MY_BITMAP covered_fields;
2545   uint      used_fields_covered; /* # of set bits in covered_fields */
2546   int       key_rec_length; /* length of key record (including rowid) */
2547 
2548   /*
2549     Cost of reading all index records with values in sel_arg intervals set
2550     (assuming there is no need to access full table records)
2551   */
2552   double    index_read_cost;
2553   uint      first_uncovered_field; /* first unused bit in covered_fields */
2554   uint      key_components; /* # of parts in the key */
2555 } INDEX_SCAN_INFO;
2556 
2557 /*
2558   Fill param->needed_fields with bitmap of fields used in the query.
2559   SYNOPSIS
2560     fill_used_fields_bitmap()
2561       param Parameter from test_quick_select function.
2562 
2563   NOTES
2564     Clustered PK members are not put into the bitmap as they are implicitly
2565     present in all keys (and it is impossible to avoid reading them).
2566   RETURN
2567     0  Ok
2568     1  Out of memory.
2569 */
2570 
fill_used_fields_bitmap(PARAM * param)2571 static int fill_used_fields_bitmap(PARAM *param)
2572 {
2573   TABLE *table= param->table;
2574   my_bitmap_map *tmp;
2575   uint pk;
2576   param->tmp_covered_fields.bitmap= 0;
2577   param->fields_bitmap_size= table->s->column_bitmap_size;
2578   if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2579                                   param->fields_bitmap_size)) ||
2580       my_bitmap_init(&param->needed_fields, tmp, table->s->fields, FALSE))
2581     return 1;
2582 
2583   bitmap_copy(&param->needed_fields, table->read_set);
2584   bitmap_union(&param->needed_fields, table->write_set);
2585 
2586   pk= param->table->s->primary_key;
2587   if (param->table->file->pk_is_clustering_key(pk))
2588   {
2589     /* The table uses clustered PK and it is not internally generated */
2590     KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2591     KEY_PART_INFO *key_part_end= key_part +
2592                                  param->table->key_info[pk].user_defined_key_parts;
2593     for (;key_part != key_part_end; ++key_part)
2594       bitmap_clear_bit(&param->needed_fields, key_part->fieldnr-1);
2595   }
2596   return 0;
2597 }
2598 
2599 
2600 /*
2601   Test if a key can be used in different ranges
2602 
2603   SYNOPSIS
2604     SQL_SELECT::test_quick_select()
2605       thd               Current thread
2606       keys_to_use       Keys to use for range retrieval
2607       prev_tables       Tables assumed to be already read when the scan is
2608                         performed (but not read at the moment of this call)
2609       limit             Query limit
2610       force_quick_range Prefer to use range (instead of full table scan) even
2611                         if it is more expensive.
2612       remove_false_parts_of_where  Remove parts of OR-clauses for which range
2613                                    analysis produced SEL_TREE(IMPOSSIBLE)
2614       only_single_index_range_scan Evaluate only single index range scans
2615 
2616   NOTES
2617     Updates the following in the select parameter:
2618       needed_reg - Bits for keys with may be used if all prev regs are read
2619       quick      - Parameter to use when reading records.
2620 
2621     In the table struct the following information is updated:
2622       quick_keys           - Which keys can be used
2623       quick_rows           - How many rows the key matches
2624       opt_range_condition_rows - E(# rows that will satisfy the table condition)
2625 
2626   IMPLEMENTATION
2627     opt_range_condition_rows value is obtained as follows:
2628 
2629       It is a minimum of E(#output rows) for all considered table access
2630       methods (range and index_merge accesses over various indexes).
2631 
2632     The obtained value is not a true E(#rows that satisfy table condition)
2633     but rather a pessimistic estimate. To obtain a true E(#...) one would
2634     need to combine estimates of various access methods, taking into account
2635     correlations between sets of rows they will return.
2636 
2637     For example, if values of tbl.key1 and tbl.key2 are independent (a right
2638     assumption if we have no information about their correlation) then the
2639     correct estimate will be:
2640 
2641       E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2642       = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2643 
2644     which is smaller than
2645 
2646        MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2647 
2648     which is currently produced.
2649 
2650   TODO
2651    * Change the value returned in opt_range_condition_rows from a pessimistic
2652      estimate to true E(#rows that satisfy table condition).
2653      (we can re-use some of E(#rows) calcuation code from
2654      index_merge/intersection for this)
2655 
2656    * Check if this function really needs to modify keys_to_use, and change the
2657      code to pass it by reference if it doesn't.
2658 
2659    * In addition to force_quick_range other means can be (an usually are) used
2660      to make this function prefer range over full table scan. Figure out if
2661      force_quick_range is really needed.
2662 
2663   RETURN
2664    -1 if error or impossible select (i.e. certainly no rows will be selected)
2665     0 if can't use quick_select
2666     1 if found usable ranges and quick select has been successfully created.
2667 */
2668 
test_quick_select(THD * thd,key_map keys_to_use,table_map prev_tables,ha_rows limit,bool force_quick_range,bool ordered_output,bool remove_false_parts_of_where,bool only_single_index_range_scan)2669 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2670 				  table_map prev_tables,
2671 				  ha_rows limit, bool force_quick_range,
2672                                   bool ordered_output,
2673                                   bool remove_false_parts_of_where,
2674                                   bool only_single_index_range_scan)
2675 {
2676   uint idx;
2677   double scan_time;
2678   Item *notnull_cond= NULL;
2679   TABLE_READ_PLAN *best_trp= NULL;
2680   SEL_ARG **backup_keys= 0;
2681   DBUG_ENTER("SQL_SELECT::test_quick_select");
2682   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
2683 		      (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2684 		      (ulong) const_tables));
2685   DBUG_PRINT("info", ("records: %lu", (ulong) head->stat_records()));
2686   delete quick;
2687   quick=0;
2688   needed_reg.clear_all();
2689   quick_keys.clear_all();
2690   head->with_impossible_ranges.clear_all();
2691   DBUG_ASSERT(!head->is_filled_at_execution());
2692   if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
2693     DBUG_RETURN(0);
2694   records= head->stat_records();
2695   notnull_cond= head->notnull_cond;
2696   if (!records)
2697     records++;					/* purecov: inspected */
2698   if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
2699     only_single_index_range_scan= 1;
2700 
2701   if (head->force_index || force_quick_range)
2702     scan_time= read_time= DBL_MAX;
2703   else
2704   {
2705     scan_time= rows2double(records) / TIME_FOR_COMPARE;
2706     /*
2707       The 2 is there to prefer range scans to full table scans.
2708       This is mainly to make the test suite happy as many tests has
2709       very few rows. In real life tables has more than a few rows and the
2710       +2 has no practical effect.
2711     */
2712     read_time= (double) head->file->scan_time() + scan_time + 2;
2713     if (limit < records && read_time < (double) records + scan_time + 1 )
2714     {
2715       read_time= (double) records + scan_time + 1; // Force to use index
2716       notnull_cond= NULL;
2717     }
2718   }
2719 
2720   possible_keys.clear_all();
2721 
2722   DBUG_PRINT("info",("Time to scan table: %g", read_time));
2723 
2724   Json_writer_object table_records(thd);
2725   table_records.add_table_name(head);
2726 
2727   Json_writer_object trace_range(thd, "range_analysis");
2728   {
2729     Json_writer_object table_rec(thd, "table_scan");
2730     table_rec.add("rows", records).add("cost", read_time);
2731   }
2732 
2733   keys_to_use.intersect(head->keys_in_use_for_query);
2734   if (!keys_to_use.is_clear_all())
2735   {
2736     uchar buff[STACK_BUFF_ALLOC];
2737     MEM_ROOT alloc;
2738     SEL_TREE *tree= NULL;
2739     SEL_TREE *notnull_cond_tree= NULL;
2740     KEY_PART *key_parts;
2741     KEY *key_info;
2742     PARAM param;
2743     bool force_group_by = false;
2744 
2745     if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff))
2746       DBUG_RETURN(0);                           // Fatal error flag is set
2747 
2748     /* set up parameter that is passed to all functions */
2749     param.thd= thd;
2750     param.baseflag= head->file->ha_table_flags();
2751     param.prev_tables=prev_tables | const_tables;
2752     param.read_tables=read_tables;
2753     param.current_table= head->map;
2754     param.table=head;
2755     param.keys=0;
2756     param.mem_root= &alloc;
2757     param.old_root= thd->mem_root;
2758     param.needed_reg= &needed_reg;
2759     param.imerge_cost_buff_size= 0;
2760     param.using_real_indexes= TRUE;
2761     param.remove_jump_scans= TRUE;
2762     param.max_key_parts= 0;
2763     param.remove_false_where_parts= remove_false_parts_of_where;
2764     param.force_default_mrr= ordered_output;
2765     param.possible_keys.clear_all();
2766 
2767     thd->no_errors=1;				// Don't warn about NULL
2768     init_sql_alloc(key_memory_quick_range_select_root, &alloc,
2769                    thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
2770     if (!(param.key_parts=
2771            (KEY_PART*) alloc_root(&alloc,
2772                                   sizeof(KEY_PART) *
2773 	                          head->s->actual_n_key_parts(thd))) ||
2774         fill_used_fields_bitmap(&param))
2775     {
2776       thd->no_errors=0;
2777       free_root(&alloc,MYF(0));			// Return memory & allocator
2778       DBUG_RETURN(-1);				// Error
2779     }
2780     key_parts= param.key_parts;
2781 
2782     /*
2783       Make an array with description of all key parts of all table keys.
2784       This is used in get_mm_parts function.
2785     */
2786     key_info= head->key_info;
2787     uint max_key_len= 0;
2788 
2789     Json_writer_array trace_idx(thd, "potential_range_indexes");
2790 
2791     for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2792     {
2793       Json_writer_object trace_idx_details(thd);
2794       trace_idx_details.add("index", key_info->name);
2795       KEY_PART_INFO *key_part_info;
2796       uint n_key_parts= head->actual_n_key_parts(key_info);
2797 
2798       if (!keys_to_use.is_set(idx))
2799       {
2800         trace_idx_details.add("usable", false)
2801                          .add("cause", "not applicable");
2802         continue;
2803       }
2804       if (key_info->flags & HA_FULLTEXT)
2805       {
2806         trace_idx_details.add("usable", false).add("cause", "fulltext");
2807         continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG
2808       }
2809 
2810       trace_idx_details.add("usable", true);
2811       param.key[param.keys]=key_parts;
2812       key_part_info= key_info->key_part;
2813       uint cur_key_len= 0;
2814       Json_writer_array trace_keypart(thd, "key_parts");
2815       for (uint part= 0 ; part < n_key_parts ;
2816            part++, key_parts++, key_part_info++)
2817       {
2818 	key_parts->key=		 param.keys;
2819 	key_parts->part=	 part;
2820 	key_parts->length=       key_part_info->length;
2821 	key_parts->store_length= key_part_info->store_length;
2822         cur_key_len += key_part_info->store_length;
2823 	key_parts->field=	 key_part_info->field;
2824 	key_parts->null_bit=	 key_part_info->null_bit;
2825         key_parts->image_type =
2826           (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2827         /* Only HA_PART_KEY_SEG is used */
2828         key_parts->flag=         (uint8) key_part_info->key_part_flag;
2829         trace_keypart.add(key_parts->field->field_name);
2830       }
2831       param.real_keynr[param.keys++]=idx;
2832       if (cur_key_len > max_key_len)
2833         max_key_len= cur_key_len;
2834     }
2835     trace_idx.end();
2836 
2837     param.key_parts_end=key_parts;
2838     param.alloced_sel_args= 0;
2839 
2840     max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
2841     if (!(param.min_key= (uchar*)alloc_root(&alloc,max_key_len)) ||
2842         !(param.max_key= (uchar*)alloc_root(&alloc,max_key_len)))
2843     {
2844       thd->no_errors=0;
2845       free_root(&alloc,MYF(0));			// Return memory & allocator
2846       DBUG_RETURN(-1);				// Error
2847     }
2848 
2849     thd->mem_root= &alloc;
2850     /* Calculate cost of full index read for the shortest covering index */
2851     if (!force_quick_range && !head->covering_keys.is_clear_all())
2852     {
2853       int key_for_use= find_shortest_key(head, &head->covering_keys);
2854       double key_read_time= (head->file->key_scan_time(key_for_use) +
2855                              rows2double(records) / TIME_FOR_COMPARE);
2856       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
2857                            "read time %g", key_for_use, key_read_time));
2858 
2859       Json_writer_object trace_cov(thd, "best_covering_index_scan");
2860       bool chosen= FALSE;
2861       if (key_read_time < read_time)
2862       {
2863         read_time= key_read_time;
2864         chosen= TRUE;
2865       }
2866       trace_cov.add("index", head->key_info[key_for_use].name)
2867                .add("cost", key_read_time).add("chosen", chosen);
2868       if (!chosen)
2869         trace_cov.add("cause", "cost");
2870     }
2871 
2872     double best_read_time= read_time;
2873 
2874     if (notnull_cond)
2875       notnull_cond_tree= notnull_cond->get_mm_tree(&param, &notnull_cond);
2876 
2877     if (cond || notnull_cond_tree)
2878     {
2879       {
2880         Json_writer_array trace_range_summary(thd,
2881                                               "setup_range_conditions");
2882         if (cond)
2883           tree= cond->get_mm_tree(&param, &cond);
2884         if (notnull_cond_tree)
2885           tree= tree_and(&param, tree, notnull_cond_tree);
2886       }
2887       if (tree)
2888       {
2889         if (tree->type == SEL_TREE::IMPOSSIBLE)
2890         {
2891           records=0L;                      /* Return -1 from this function. */
2892           read_time= (double) HA_POS_ERROR;
2893           trace_range.add("impossible_range", true);
2894           goto free_mem;
2895         }
2896         /*
2897           If the tree can't be used for range scans, proceed anyway, as we
2898           can construct a group-min-max quick select
2899         */
2900         if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2901         {
2902           trace_range.add("range_scan_possible", false);
2903           tree= NULL;
2904         }
2905       }
2906       else if (thd->is_error())
2907       {
2908         thd->no_errors=0;
2909         thd->mem_root= param.old_root;
2910         free_root(&alloc, MYF(0));
2911         DBUG_RETURN(-1);
2912       }
2913     }
2914 
2915     if (tree)
2916     {
2917       /*
2918         It is possible to use a range-based quick select (but it might be
2919         slower than 'all' table scan).
2920       */
2921       TRP_ROR_INTERSECT *rori_trp;
2922       TRP_INDEX_INTERSECT *intersect_trp;
2923       bool can_build_covering= FALSE;
2924       Json_writer_object trace_range(thd, "analyzing_range_alternatives");
2925 
2926       backup_keys= (SEL_ARG**) alloca(sizeof(backup_keys[0])*param.keys);
2927       memcpy(&backup_keys[0], &tree->keys[0],
2928              sizeof(backup_keys[0])*param.keys);
2929 
2930       remove_nonrange_trees(&param, tree);
2931 
2932       /* Get best 'range' plan and prepare data for making other plans */
2933       if (auto range_trp= get_key_scans_params(&param, tree,
2934                                                only_single_index_range_scan,
2935                                                true, best_read_time))
2936       {
2937         best_trp= range_trp;
2938         best_read_time= best_trp->read_cost;
2939       }
2940 
2941       /*
2942         Simultaneous key scans and row deletes on several handler
2943         objects are not allowed so don't use ROR-intersection for
2944         table deletes.
2945       */
2946       if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2947            optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2948           !only_single_index_range_scan)
2949       {
2950         /*
2951           Get best non-covering ROR-intersection plan and prepare data for
2952           building covering ROR-intersection.
2953         */
2954         if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
2955                                               &can_build_covering)))
2956         {
2957           best_trp= rori_trp;
2958           best_read_time= best_trp->read_cost;
2959           /*
2960             Try constructing covering ROR-intersect only if it looks possible
2961             and worth doing.
2962           */
2963           if (!rori_trp->is_covering && can_build_covering &&
2964               (rori_trp= get_best_covering_ror_intersect(&param, tree,
2965                                                          best_read_time)))
2966             best_trp= rori_trp;
2967         }
2968       }
2969       /*
2970         Do not look for an index intersection  plan if there is a covering
2971         index. The scan by this covering index will be always cheaper than
2972         any index intersection.
2973       */
2974       if (param.table->covering_keys.is_clear_all() &&
2975           optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2976           optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT) &&
2977           !only_single_index_range_scan)
2978       {
2979         if ((intersect_trp= get_best_index_intersect(&param, tree,
2980                                                     best_read_time)))
2981         {
2982           best_trp= intersect_trp;
2983           best_read_time= best_trp->read_cost;
2984           set_if_smaller(param.table->opt_range_condition_rows,
2985                          intersect_trp->records);
2986         }
2987       }
2988 
2989       if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2990           head->stat_records() != 0 && !only_single_index_range_scan)
2991       {
2992         /* Try creating index_merge/ROR-union scan. */
2993         SEL_IMERGE *imerge;
2994         TABLE_READ_PLAN *best_conj_trp= NULL,
2995           *UNINIT_VAR(new_conj_trp); /* no empty index_merge lists possible */
2996         DBUG_PRINT("info",("No range reads possible,"
2997                            " trying to construct index_merge"));
2998         List_iterator_fast<SEL_IMERGE> it(tree->merges);
2999         Json_writer_array trace_idx_merge(thd, "analyzing_index_merge_union");
3000         while ((imerge= it++))
3001         {
3002           new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
3003           if (new_conj_trp)
3004             set_if_smaller(param.table->opt_range_condition_rows,
3005                            new_conj_trp->records);
3006           if (new_conj_trp &&
3007               (!best_conj_trp ||
3008                new_conj_trp->read_cost < best_conj_trp->read_cost))
3009           {
3010             best_conj_trp= new_conj_trp;
3011             best_read_time= best_conj_trp->read_cost;
3012           }
3013         }
3014         if (best_conj_trp)
3015           best_trp= best_conj_trp;
3016       }
3017     }
3018 
3019     /*
3020       Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
3021       Notice that it can be constructed no matter if there is a range tree.
3022     */
3023     DBUG_EXECUTE_IF("force_group_by", force_group_by = true; );
3024     if (!only_single_index_range_scan)
3025     {
3026       TRP_GROUP_MIN_MAX *group_trp;
3027       if (tree)
3028         restore_nonrange_trees(&param, tree, backup_keys);
3029       if ((group_trp= get_best_group_min_max(&param, tree, read_time)))
3030       {
3031         param.table->opt_range_condition_rows= MY_MIN(group_trp->records,
3032                                                   head->stat_records());
3033         Json_writer_object grp_summary(thd, "best_group_range_summary");
3034 
3035         if (unlikely(thd->trace_started()))
3036           group_trp->trace_basic_info(&param, &grp_summary);
3037 
3038         if (group_trp->read_cost < best_read_time || force_group_by)
3039         {
3040           grp_summary.add("chosen", true);
3041           best_trp= group_trp;
3042           best_read_time= best_trp->read_cost;
3043         }
3044         else
3045           grp_summary.add("chosen", false).add("cause", "cost");
3046       }
3047       if (tree)
3048         remove_nonrange_trees(&param, tree);
3049     }
3050 
3051     thd->mem_root= param.old_root;
3052 
3053     /* If we got a read plan, create a quick select from it. */
3054     if (best_trp)
3055     {
3056       records= best_trp->records;
3057       if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
3058       {
3059         delete quick;
3060         quick= NULL;
3061       }
3062     }
3063     possible_keys= param.possible_keys;
3064 
3065   free_mem:
3066     if (unlikely(quick && best_trp && thd->trace_started()))
3067     {
3068       Json_writer_object trace_range_summary(thd,
3069                                            "chosen_range_access_summary");
3070       {
3071         Json_writer_object trace_range_plan(thd, "range_access_plan");
3072         best_trp->trace_basic_info(&param, &trace_range_plan);
3073       }
3074       trace_range_summary.add("rows_for_plan", quick->records)
3075                          .add("cost_for_plan", quick->read_time)
3076                          .add("chosen", true);
3077     }
3078 
3079     free_root(&alloc,MYF(0));			// Return memory & allocator
3080     thd->mem_root= param.old_root;
3081     thd->no_errors=0;
3082   }
3083 
3084   DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
3085 
3086   /*
3087     Assume that if the user is using 'limit' we will only need to scan
3088     limit rows if we are using a key
3089   */
3090   DBUG_RETURN(records ? MY_TEST(quick) : -1);
3091 }
3092 
3093 /****************************************************************************
3094  * Condition selectivity module
3095  ****************************************************************************/
3096 
3097 
3098 /*
3099   Build descriptors of pseudo-indexes over columns to perform range analysis
3100 
3101   SYNOPSIS
3102     create_key_parts_for_pseudo_indexes()
3103       param       IN/OUT data structure for the descriptors to be built
3104       used_fields bitmap of columns for which the descriptors are to be built
3105 
3106   DESCRIPTION
3107     For each column marked in the bitmap used_fields the function builds
3108     a descriptor of a single-component pseudo-index over this column that
3109     can be used for the range analysis of the predicates over this columns.
3110     The descriptors are created in the memory of param->mem_root.
3111 
3112   RETURN
3113     FALSE  in the case of success
3114     TRUE   otherwise
3115 */
3116 
3117 static
create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM * param,MY_BITMAP * used_fields)3118 bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
3119                                          MY_BITMAP *used_fields)
3120 {
3121   Field **field_ptr;
3122   TABLE *table= param->table;
3123   uint parts= 0;
3124 
3125   for (field_ptr= table->field; *field_ptr; field_ptr++)
3126   {
3127     Field *field= *field_ptr;
3128     if (bitmap_is_set(used_fields, field->field_index) &&
3129         is_eits_usable(field))
3130       parts++;
3131   }
3132 
3133   KEY_PART *key_part;
3134   uint keys= 0;
3135 
3136   if (!parts)
3137     return TRUE;
3138 
3139   if (!(key_part= (KEY_PART *)  alloc_root(param->mem_root,
3140                                            sizeof(KEY_PART) * parts)))
3141     return TRUE;
3142 
3143   param->key_parts= key_part;
3144   uint max_key_len= 0;
3145   for (field_ptr= table->field; *field_ptr; field_ptr++)
3146   {
3147     Field *field= *field_ptr;
3148     if (bitmap_is_set(used_fields, field->field_index))
3149     {
3150       if (!is_eits_usable(field))
3151         continue;
3152 
3153       uint16 store_length;
3154       uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
3155       key_part->key= keys;
3156       key_part->part= 0;
3157       if (field->flags & BLOB_FLAG)
3158         key_part->length= max_key_part_length;
3159       else
3160       {
3161         key_part->length= (uint16) field->key_length();
3162         set_if_smaller(key_part->length, max_key_part_length);
3163       }
3164       store_length= key_part->length;
3165       if (field->real_maybe_null())
3166         store_length+= HA_KEY_NULL_LENGTH;
3167       if (field->real_type() == MYSQL_TYPE_VARCHAR)
3168         store_length+= HA_KEY_BLOB_LENGTH;
3169       if (max_key_len < store_length)
3170         max_key_len= store_length;
3171       key_part->store_length= store_length;
3172       key_part->field= field;
3173       key_part->image_type= Field::itRAW;
3174       key_part->flag= 0;
3175       param->key[keys]= key_part;
3176       keys++;
3177       key_part++;
3178     }
3179   }
3180 
3181   max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
3182   if (!(param->min_key= (uchar*)alloc_root(param->mem_root, max_key_len)) ||
3183       !(param->max_key= (uchar*)alloc_root(param->mem_root, max_key_len)))
3184   {
3185     return true;
3186   }
3187   param->keys= keys;
3188   param->key_parts_end= key_part;
3189 
3190   return FALSE;
3191 }
3192 
3193 
3194 /*
3195   Estimate the number of rows in all ranges built for a column
3196   by the range optimizer
3197 
3198   SYNOPSIS
3199     records_in_column_ranges()
3200       param      the data structure to access descriptors of pseudo indexes
3201                  built over columns used in the condition of the processed query
3202       idx        the index of the descriptor of interest in param
3203       tree       the tree representing ranges built for the interesting column
3204 
3205   DESCRIPTION
3206     This function retrieves the ranges represented by the SEL_ARG 'tree' and
3207     for each of them r it calls the function get_column_range_cardinality()
3208     that estimates the number of expected rows in r. It is assumed that param
3209     is the data structure containing the descriptors of pseudo-indexes that
3210     has been built to perform range analysis of the range conditions imposed
3211     on the columns used in the processed query, while idx is the index of the
3212     descriptor created in 'param' exactly for the column for which 'tree'
3213     has been built by the range optimizer.
3214 
3215   RETURN
3216     the number of rows in the retrieved ranges
3217 */
3218 
3219 static
records_in_column_ranges(PARAM * param,uint idx,SEL_ARG * tree)3220 double records_in_column_ranges(PARAM *param, uint idx,
3221                                 SEL_ARG *tree)
3222 {
3223   THD *thd= param->thd;
3224   SEL_ARG_RANGE_SEQ seq;
3225   KEY_MULTI_RANGE range;
3226   range_seq_t seq_it;
3227   double rows;
3228   Field *field;
3229   uint flags= 0;
3230   double total_rows= 0;
3231   RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
3232                          sel_arg_range_seq_next, 0, 0};
3233 
3234   /* Handle cases when we don't have a valid non-empty list of range */
3235   if (!tree)
3236     return DBL_MAX;
3237   if (tree->type == SEL_ARG::IMPOSSIBLE)
3238     return (0L);
3239 
3240   field= tree->field;
3241 
3242   seq.keyno= idx;
3243   seq.real_keyno= MAX_KEY;
3244   seq.param= param;
3245   seq.start= tree;
3246   seq.is_ror_scan= FALSE;
3247 
3248   seq_it= seq_if.init((void *) &seq, 0, flags);
3249 
3250   Json_writer_array range_trace(thd, "ranges");
3251 
3252   while (!seq_if.next(seq_it, &range))
3253   {
3254     key_range *min_endp, *max_endp;
3255     min_endp= range.start_key.length? &range.start_key : NULL;
3256     max_endp= range.end_key.length? &range.end_key : NULL;
3257     int range_flag= range.range_flag;
3258 
3259     if (!range.start_key.length)
3260       range_flag |= NO_MIN_RANGE;
3261     if (!range.end_key.length)
3262       range_flag |= NO_MAX_RANGE;
3263     if (range.start_key.flag == HA_READ_AFTER_KEY)
3264       range_flag |= NEAR_MIN;
3265     if (range.start_key.flag == HA_READ_BEFORE_KEY)
3266       range_flag |= NEAR_MAX;
3267 
3268     if (unlikely(thd->trace_started()))
3269     {
3270       StringBuffer<128> range_info(system_charset_info);
3271       print_range_for_non_indexed_field(&range_info, field, &range);
3272       range_trace.add(range_info.c_ptr_safe(), range_info.length());
3273     }
3274 
3275     rows= get_column_range_cardinality(field, min_endp, max_endp, range_flag);
3276     if (DBL_MAX == rows)
3277     {
3278       total_rows= DBL_MAX;
3279       break;
3280     }
3281     total_rows += rows;
3282   }
3283   return total_rows;
3284 }
3285 
3286 
3287 /*
3288   Compare quick select ranges according to number of found rows
3289   If there is equal amounts of rows, use the long key part.
3290   The idea is that if we have keys (a),(a,b) and (a,b,c) and we have
3291   a query like WHERE a=1 and b=1 and c=1,
3292   it is better to use key (a,b,c) than (a) as it will ensure we don't also
3293   use histograms for columns b and c
3294 */
3295 
3296 static
cmp_quick_ranges(TABLE * table,uint * a,uint * b)3297 int cmp_quick_ranges(TABLE *table, uint *a, uint *b)
3298 {
3299   int tmp= CMP_NUM(table->opt_range[*a].rows, table->opt_range[*b].rows);
3300   if (tmp)
3301     return tmp;
3302   return -CMP_NUM(table->opt_range[*a].key_parts, table->opt_range[*b].key_parts);
3303 }
3304 
3305 
3306 /*
3307   Calculate the selectivity of the condition imposed on the rows of a table
3308 
3309   SYNOPSIS
3310     calculate_cond_selectivity_for_table()
3311       thd        the context handle
3312       table      the table of interest
3313       cond       conditions imposed on the rows of the table
3314 
3315   DESCRIPTION
3316     This function calculates the selectivity of range conditions cond imposed
3317     on the rows of 'table' in the processed query.
3318     The calculated selectivity is assigned to the field table->cond_selectivity.
3319 
3320     Selectivity is calculated as a product of selectivities imposed by:
3321 
3322     1. possible range accesses. (if multiple range accesses use the same
3323        restrictions on the same field, we make adjustments for that)
3324     2. Sargable conditions on fields for which we have column statistics (if
3325        a field is used in a possible range access, we assume that selectivity
3326        is already provided by the range access' estimates)
3327     3. Reading a few records from the table pages and checking the condition
3328        selectivity (this is used for conditions like "column LIKE '%val%'"
3329        where approaches #1 and #2 do not provide selectivity data).
3330 
3331   NOTE
3332     Currently the selectivities of range conditions over different columns are
3333     considered independent.
3334 
3335   RETURN
3336     FALSE  on success
3337     TRUE   otherwise
3338 */
3339 
calculate_cond_selectivity_for_table(THD * thd,TABLE * table,Item ** cond)3340 bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
3341 {
3342   uint keynr, range_index, ranges;
3343   MY_BITMAP *used_fields= &table->cond_set;
3344   double table_records= (double)table->stat_records();
3345   uint optimal_key_order[MAX_KEY];
3346   DBUG_ENTER("calculate_cond_selectivity_for_table");
3347 
3348   table->cond_selectivity= 1.0;
3349 
3350   if (table_records == 0)
3351     DBUG_RETURN(FALSE);
3352 
3353   QUICK_SELECT_I *quick;
3354   if ((quick=table->reginfo.join_tab->quick) &&
3355       quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
3356   {
3357     table->cond_selectivity*= (quick->records/table_records);
3358     DBUG_RETURN(FALSE);
3359   }
3360 
3361   if (!*cond)
3362     DBUG_RETURN(FALSE);
3363 
3364   if (table->pos_in_table_list->schema_table)
3365     DBUG_RETURN(FALSE);
3366 
3367   MY_BITMAP handled_columns;
3368   my_bitmap_map* buf;
3369   if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size)))
3370     DBUG_RETURN(TRUE);
3371   my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE);
3372 
3373   /*
3374     Calculate the selectivity of the range conditions supported by indexes.
3375 
3376     First, take into account possible range accesses.
3377     range access estimates are the most precise, we prefer them to any other
3378     estimate sources.
3379   */
3380 
3381   Json_writer_object trace_wrapper(thd);
3382   Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes");
3383 
3384   /*
3385     Walk through all quick ranges in the order of least found rows.
3386   */
3387   for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++)
3388     if (table->opt_range_keys.is_set(keynr))
3389       optimal_key_order[ranges++]= keynr;
3390 
3391   my_qsort2(optimal_key_order, ranges,
3392             sizeof(optimal_key_order[0]),
3393             (qsort2_cmp) cmp_quick_ranges, table);
3394 
3395   for (range_index= 0 ; range_index < ranges ; range_index++)
3396   {
3397     uint keynr= optimal_key_order[range_index];
3398     {
3399       {
3400         uint i;
3401         uint used_key_parts= table->opt_range[keynr].key_parts;
3402         double quick_cond_selectivity= (table->opt_range[keynr].rows /
3403                                         table_records);
3404         KEY *key_info= table->key_info + keynr;
3405         KEY_PART_INFO* key_part= key_info->key_part;
3406         /*
3407           Suppose, there are range conditions on two keys
3408             KEY1 (col1, col2)
3409             KEY2 (col3, col2)
3410 
3411           we don't want to count selectivity of condition on col2 twice.
3412 
3413           First, find the longest key prefix that's made of columns whose
3414           selectivity wasn't already accounted for.
3415         */
3416         for (i= 0; i < used_key_parts; i++, key_part++)
3417         {
3418           if (bitmap_is_set(&handled_columns, key_part->fieldnr-1))
3419 	    break;
3420           bitmap_set_bit(&handled_columns, key_part->fieldnr-1);
3421         }
3422         if (i)
3423         {
3424           double UNINIT_VAR(selectivity_mult);
3425 
3426           /*
3427             There is at least 1-column prefix of columns whose selectivity has
3428             not yet been accounted for.
3429           */
3430           table->cond_selectivity*= quick_cond_selectivity;
3431           Json_writer_object selectivity_for_index(thd);
3432           selectivity_for_index.add("index_name", key_info->name)
3433                                .add("selectivity_from_index",
3434                                     quick_cond_selectivity);
3435           if (i != used_key_parts)
3436 	  {
3437             /*
3438               Range access got us estimate for #used_key_parts.
3439               We need estimate for #(i-1) key parts.
3440             */
3441             double f1= key_info->actual_rec_per_key(i-1);
3442             double f2= key_info->actual_rec_per_key(i);
3443             if (f1 > 0 && f2 > 0)
3444               selectivity_mult= f1 / f2;
3445             else
3446             {
3447               /*
3448                 No statistics available, assume the selectivity is proportional
3449                 to the number of key parts.
3450                 (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1)
3451               */
3452               selectivity_mult= ((double)(i+1)) / i;
3453             }
3454             table->cond_selectivity*= selectivity_mult;
3455             selectivity_for_index.add("selectivity_multiplier",
3456                                       selectivity_mult);
3457           }
3458           /*
3459             We need to set selectivity for fields supported by indexes.
3460             For single-component indexes and for some first components
3461             of other indexes we do it here. For the remaining fields
3462             we do it later in this function, in the same way as for the
3463             fields not used in any indexes.
3464 	  */
3465 	  if (i == 1)
3466 	  {
3467             uint fieldnr= key_info->key_part[0].fieldnr;
3468             table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity;
3469             if (i != used_key_parts)
3470 	      table->field[fieldnr-1]->cond_selectivity*= selectivity_mult;
3471             bitmap_clear_bit(used_fields, fieldnr-1);
3472 	  }
3473         }
3474       }
3475     }
3476   }
3477   selectivity_for_indexes.end();
3478 
3479   /*
3480     Second step: calculate the selectivity of the range conditions not
3481     supported by any index and selectivity of the range condition
3482     over the fields whose selectivity has not been set yet.
3483   */
3484   Json_writer_array selectivity_for_columns(thd, "selectivity_for_columns");
3485 
3486   if (thd->variables.optimizer_use_condition_selectivity > 2 &&
3487       !bitmap_is_clear_all(used_fields) &&
3488       thd->variables.use_stat_tables > 0 && table->stats_is_read)
3489   {
3490     PARAM param;
3491     MEM_ROOT alloc;
3492     SEL_TREE *tree;
3493     double rows;
3494 
3495     init_sql_alloc(key_memory_quick_range_select_root, &alloc,
3496                    thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
3497     param.thd= thd;
3498     param.mem_root= &alloc;
3499     param.old_root= thd->mem_root;
3500     param.table= table;
3501     param.remove_false_where_parts= true;
3502 
3503     if (create_key_parts_for_pseudo_indexes(&param, used_fields))
3504       goto free_alloc;
3505 
3506     param.prev_tables= param.read_tables= 0;
3507     param.current_table= table->map;
3508     param.using_real_indexes= FALSE;
3509     param.real_keynr[0]= 0;
3510     param.alloced_sel_args= 0;
3511     param.max_key_parts= 0;
3512 
3513     thd->no_errors=1;
3514 
3515     tree= cond[0]->get_mm_tree(&param, cond);
3516 
3517     if (!tree)
3518       goto free_alloc;
3519 
3520     table->reginfo.impossible_range= 0;
3521     if (tree->type == SEL_TREE::IMPOSSIBLE)
3522     {
3523       rows= 0;
3524       table->reginfo.impossible_range= 1;
3525       goto free_alloc;
3526     }
3527     else if (tree->type == SEL_TREE::ALWAYS)
3528     {
3529       rows= table_records;
3530       goto free_alloc;
3531     }
3532     else if (tree->type == SEL_TREE::MAYBE)
3533     {
3534       rows= table_records;
3535       goto free_alloc;
3536     }
3537 
3538     for (uint idx= 0; idx < param.keys; idx++)
3539     {
3540       SEL_ARG *key= tree->keys[idx];
3541       if (key)
3542       {
3543         Json_writer_object selectivity_for_column(thd);
3544         selectivity_for_column.add("column_name", key->field->field_name);
3545         if (key->type == SEL_ARG::IMPOSSIBLE)
3546         {
3547           rows= 0;
3548           table->reginfo.impossible_range= 1;
3549           selectivity_for_column.add("selectivity_from_histogram", rows);
3550           selectivity_for_column.add("cause", "impossible range");
3551           goto free_alloc;
3552         }
3553         else
3554         {
3555           rows= records_in_column_ranges(&param, idx, key);
3556           if (rows != DBL_MAX)
3557           {
3558             key->field->cond_selectivity= rows/table_records;
3559             selectivity_for_column.add("selectivity_from_histogram",
3560                                        key->field->cond_selectivity);
3561           }
3562         }
3563       }
3564     }
3565 
3566     for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
3567     {
3568       Field *table_field= *field_ptr;
3569       if (bitmap_is_set(used_fields, table_field->field_index) &&
3570           table_field->cond_selectivity < 1.0)
3571       {
3572         if (!bitmap_is_set(&handled_columns, table_field->field_index))
3573           table->cond_selectivity*= table_field->cond_selectivity;
3574       }
3575     }
3576 
3577   free_alloc:
3578     thd->no_errors= 0;
3579     thd->mem_root= param.old_root;
3580     free_root(&alloc, MYF(0));
3581 
3582   }
3583   selectivity_for_columns.end();
3584 
3585   if (quick && (quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
3586      quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
3587   {
3588     table->cond_selectivity*= (quick->records/table_records);
3589   }
3590 
3591   bitmap_union(used_fields, &handled_columns);
3592 
3593   /* Check if we can improve selectivity estimates by using sampling */
3594   ulong check_rows=
3595     MY_MIN(thd->variables.optimizer_selectivity_sampling_limit,
3596         (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
3597   if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD &&
3598       thd->variables.optimizer_use_condition_selectivity > 4)
3599   {
3600     find_selective_predicates_list_processor_data *dt=
3601       (find_selective_predicates_list_processor_data *)
3602       alloc_root(thd->mem_root,
3603                  sizeof(find_selective_predicates_list_processor_data));
3604     if (!dt)
3605       DBUG_RETURN(TRUE);
3606     dt->list.empty();
3607     dt->table= table;
3608     if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, dt))
3609       DBUG_RETURN(TRUE);
3610     if (dt->list.elements > 0)
3611     {
3612       check_rows= check_selectivity(thd, check_rows, table, &dt->list);
3613       if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD)
3614       {
3615         COND_STATISTIC *stat;
3616         List_iterator_fast<COND_STATISTIC> it(dt->list);
3617         double examined_rows= check_rows;
3618         while ((stat= it++))
3619         {
3620           if (!stat->positive)
3621           {
3622             DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter"));
3623             stat->positive= 1; // avoid 0
3624           }
3625           DBUG_PRINT("info", ("The predicate selectivity : %g",
3626                               (double)stat->positive / examined_rows));
3627           double selectivity= ((double)stat->positive) / examined_rows;
3628           table->cond_selectivity*= selectivity;
3629           /*
3630             If a field is involved then we register its selectivity in case
3631             there in an equality with the field.
3632             For example in case
3633             t1.a LIKE "%bla%" and t1.a = t2.b
3634             the selectivity we have found could be used also for t2.
3635           */
3636           if (stat->field_arg)
3637           {
3638             stat->field_arg->cond_selectivity*= selectivity;
3639 
3640             if (stat->field_arg->next_equal_field)
3641             {
3642               for (Field *next_field= stat->field_arg->next_equal_field;
3643                    next_field != stat->field_arg;
3644                    next_field= next_field->next_equal_field)
3645               {
3646                 next_field->cond_selectivity*= selectivity;
3647                 next_field->table->cond_selectivity*= selectivity;
3648               }
3649             }
3650           }
3651         }
3652 
3653       }
3654       /* This list and its elements put to mem_root so should not be freed */
3655       table->cond_selectivity_sampling_explain= &dt->list;
3656     }
3657   }
3658   trace_wrapper.add("cond_selectivity", table->cond_selectivity);
3659   DBUG_RETURN(FALSE);
3660 }
3661 
3662 /****************************************************************************
3663  * Condition selectivity code ends
3664  ****************************************************************************/
3665 
3666 /****************************************************************************
3667  * Partition pruning module
3668  ****************************************************************************/
3669 
3670 /*
3671   Store field key image to table record
3672 
3673   SYNOPSIS
3674     store_key_image_to_rec()
3675       field  Field which key image should be stored
3676       ptr    Field value in key format
3677       len    Length of the value, in bytes
3678 
3679   ATTENTION
3680     len is the length of the value not counting the NULL-byte (at the same
3681     time, ptr points to the key image, which starts with NULL-byte for
3682     nullable columns)
3683 
3684   DESCRIPTION
3685     Copy the field value from its key image to the table record. The source
3686     is the value in key image format, occupying len bytes in buffer pointed
3687     by ptr. The destination is table record, in "field value in table record"
3688     format.
3689 */
3690 
store_key_image_to_rec(Field * field,uchar * ptr,uint len)3691 void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3692 {
3693   /* Do the same as print_key() does */
3694 
3695   if (field->real_maybe_null())
3696   {
3697     if (*ptr)
3698     {
3699       field->set_null();
3700       return;
3701     }
3702     field->set_notnull();
3703     ptr++;
3704   }
3705   MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
3706                                     &field->table->write_set);
3707   field->set_key_image(ptr, len);
3708   dbug_tmp_restore_column_map(&field->table->write_set, old_map);
3709 }
3710 
3711 #ifdef WITH_PARTITION_STORAGE_ENGINE
3712 
3713 /*
3714   PartitionPruningModule
3715 
3716   This part of the code does partition pruning. Partition pruning solves the
3717   following problem: given a query over partitioned tables, find partitions
3718   that we will not need to access (i.e. partitions that we can assume to be
3719   empty) when executing the query.
3720   The set of partitions to prune doesn't depend on which query execution
3721   plan will be used to execute the query.
3722 
3723   HOW IT WORKS
3724 
3725   Partition pruning module makes use of RangeAnalysisModule. The following
3726   examples show how the problem of partition pruning can be reduced to the
3727   range analysis problem:
3728 
3729   EXAMPLE 1
3730     Consider a query:
3731 
3732       SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3733 
3734     where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
3735     way to find the used (i.e. not pruned away) partitions is as follows:
3736 
3737     1. analyze the WHERE clause and extract the list of intervals over t1.a
3738        for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3739 
3740     2. for each interval I
3741        {
3742          find partitions that have non-empty intersection with I;
3743          mark them as used;
3744        }
3745 
3746   EXAMPLE 2
3747     Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3748     we need to:
3749 
3750     1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3751        The list of intervals we'll obtain will look like this:
3752        ((t1.a, t1.b) = (1,'foo')),
3753        ((t1.a, t1.b) = (2,'bar')),
3754        ((t1,a, t1.b) > (10,'zz'))
3755 
3756     2. for each interval I
3757        {
3758          if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3759          {
3760            calculate HASH(part_func(t1.a, t1.b));
3761            find which partition has records with this hash value and mark
3762              it as used;
3763          }
3764          else
3765          {
3766            mark all partitions as used;
3767            break;
3768          }
3769        }
3770 
3771    For both examples the step #1 is exactly what RangeAnalysisModule could
3772    be used to do, if it was provided with appropriate index description
3773    (array of KEY_PART structures).
3774    In example #1, we need to provide it with description of index(t1.a),
3775    in example #2, we need to provide it with description of index(t1.a, t1.b).
3776 
3777    These index descriptions are further called "partitioning index
3778    descriptions". Note that it doesn't matter if such indexes really exist,
3779    as range analysis module only uses the description.
3780 
3781    Putting it all together, partitioning module works as follows:
3782 
3783    prune_partitions() {
3784      call create_partition_index_description();
3785 
3786      call get_mm_tree(); // invoke the RangeAnalysisModule
3787 
3788      // analyze the obtained interval list and get used partitions
3789      call find_used_partitions();
3790   }
3791 
3792 */
3793 
3794 struct st_part_prune_param;
3795 struct st_part_opt_info;
3796 
3797 typedef void (*mark_full_part_func)(partition_info*, uint32);
3798 
3799 /*
3800   Partition pruning operation context
3801 */
3802 typedef struct st_part_prune_param
3803 {
3804   RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3805 
3806   /***************************************************************
3807    Following fields are filled in based solely on partitioning
3808    definition and not modified after that:
3809    **************************************************************/
3810   partition_info *part_info; /* Copy of table->part_info */
3811   /* Function to get partition id from partitioning fields only */
3812   get_part_id_func get_top_partition_id_func;
3813   /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3814   mark_full_part_func mark_full_partition_used;
3815 
3816   /* Partitioning 'index' description, array of key parts */
3817   KEY_PART *key;
3818 
3819   /*
3820     Number of fields in partitioning 'index' definition created for
3821     partitioning (0 if partitioning 'index' doesn't include partitioning
3822     fields)
3823   */
3824   uint part_fields;
3825   uint subpart_fields; /* Same as above for subpartitioning */
3826 
3827   /*
3828     Number of the last partitioning field keypart in the index, or -1 if
3829     partitioning index definition doesn't include partitioning fields.
3830   */
3831   int last_part_partno;
3832   int last_subpart_partno; /* Same as above for supartitioning */
3833 
3834   /*
3835     is_part_keypart[i] == MY_TEST(keypart #i in partitioning index is a member
3836                                   used in partitioning)
3837     Used to maintain current values of cur_part_fields and cur_subpart_fields
3838   */
3839   my_bool *is_part_keypart;
3840   /* Same as above for subpartitioning */
3841   my_bool *is_subpart_keypart;
3842 
3843   my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3844 
3845   /***************************************************************
3846    Following fields form find_used_partitions() recursion context:
3847    **************************************************************/
3848   SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
3849   SEL_ARG **arg_stack_end; /* Top of the stack    */
3850   /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3851   uint cur_part_fields;
3852   /* Same as cur_part_fields, but for subpartitioning */
3853   uint cur_subpart_fields;
3854 
3855   /* Iterator to be used to obtain the "current" set of used partitions */
3856   PARTITION_ITERATOR part_iter;
3857 
3858   /* Initialized bitmap of num_subparts size */
3859   MY_BITMAP subparts_bitmap;
3860 
3861   uchar *cur_min_key;
3862   uchar *cur_max_key;
3863 
3864   uint cur_min_flag, cur_max_flag;
3865 } PART_PRUNE_PARAM;
3866 
3867 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3868 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3869 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3870                                        SEL_IMERGE *imerge);
3871 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3872                                             List<SEL_IMERGE> &merges);
3873 static void mark_all_partitions_as_used(partition_info *part_info);
3874 
3875 #ifndef DBUG_OFF
3876 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3877 static void dbug_print_field(Field *field);
3878 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3879 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3880 #endif
3881 
3882 
3883 /**
3884   Perform partition pruning for a given table and condition.
3885 
3886   @param      thd            Thread handle
3887   @param      table          Table to perform partition pruning for
3888   @param      pprune_cond    Condition to use for partition pruning
3889 
3890   @note This function assumes that lock_partitions are setup when it
3891   is invoked. The function analyzes the condition, finds partitions that
3892   need to be used to retrieve the records that match the condition, and
3893   marks them as used by setting appropriate bit in part_info->read_partitions
3894   In the worst case all partitions are marked as used. If the table is not
3895   yet locked, it will also unset bits in part_info->lock_partitions that is
3896   not set in read_partitions.
3897 
3898   This function returns promptly if called for non-partitioned table.
3899 
3900   @return Operation status
3901     @retval true  Failure
3902     @retval false Success
3903 */
3904 
prune_partitions(THD * thd,TABLE * table,Item * pprune_cond)3905 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3906 {
3907   bool retval= FALSE;
3908   partition_info *part_info = table->part_info;
3909   DBUG_ENTER("prune_partitions");
3910 
3911   if (!part_info)
3912     DBUG_RETURN(FALSE); /* not a partitioned table */
3913 
3914   if (!pprune_cond)
3915   {
3916     mark_all_partitions_as_used(part_info);
3917     DBUG_RETURN(FALSE);
3918   }
3919 
3920   PART_PRUNE_PARAM prune_param;
3921   MEM_ROOT alloc;
3922   RANGE_OPT_PARAM  *range_par= &prune_param.range_param;
3923   MY_BITMAP *old_sets[2];
3924 
3925   prune_param.part_info= part_info;
3926   init_sql_alloc(key_memory_quick_range_select_root, &alloc,
3927                  thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
3928   range_par->mem_root= &alloc;
3929   range_par->old_root= thd->mem_root;
3930 
3931   if (create_partition_index_description(&prune_param))
3932   {
3933     mark_all_partitions_as_used(part_info);
3934     free_root(&alloc,MYF(0));		// Return memory & allocator
3935     DBUG_RETURN(FALSE);
3936   }
3937 
3938   dbug_tmp_use_all_columns(table, old_sets,
3939                            &table->read_set, &table->write_set);
3940   range_par->thd= thd;
3941   range_par->table= table;
3942   /* range_par->cond doesn't need initialization */
3943   range_par->prev_tables= range_par->read_tables= 0;
3944   range_par->current_table= table->map;
3945   /* It should be possible to switch the following ON: */
3946   range_par->remove_false_where_parts= false;
3947 
3948   range_par->keys= 1; // one index
3949   range_par->using_real_indexes= FALSE;
3950   range_par->remove_jump_scans= FALSE;
3951   range_par->real_keynr[0]= 0;
3952   range_par->alloced_sel_args= 0;
3953 
3954   thd->no_errors=1;				// Don't warn about NULL
3955   thd->mem_root=&alloc;
3956 
3957   bitmap_clear_all(&part_info->read_partitions);
3958 
3959   prune_param.key= prune_param.range_param.key_parts;
3960   SEL_TREE *tree;
3961   int res;
3962 
3963   tree= pprune_cond->get_mm_tree(range_par, &pprune_cond);
3964   if (!tree)
3965     goto all_used;
3966 
3967   if (tree->type == SEL_TREE::IMPOSSIBLE)
3968   {
3969     retval= TRUE;
3970     goto end;
3971   }
3972 
3973   if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3974     goto all_used;
3975 
3976   if (tree->merges.is_empty())
3977   {
3978     /* Range analysis has produced a single list of intervals. */
3979     prune_param.arg_stack_end= prune_param.arg_stack;
3980     prune_param.cur_part_fields= 0;
3981     prune_param.cur_subpart_fields= 0;
3982 
3983     prune_param.cur_min_key= prune_param.range_param.min_key;
3984     prune_param.cur_max_key= prune_param.range_param.max_key;
3985     prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3986 
3987     init_all_partitions_iterator(part_info, &prune_param.part_iter);
3988     if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3989                                                             tree->keys[0]))))
3990       goto all_used;
3991   }
3992   else
3993   {
3994     if (tree->merges.elements == 1)
3995     {
3996       /*
3997         Range analysis has produced a "merge" of several intervals lists, a
3998         SEL_TREE that represents an expression in form
3999           sel_imerge = (tree1 OR tree2 OR ... OR treeN)
4000         that cannot be reduced to one tree. This can only happen when
4001         partitioning index has several keyparts and the condition is OR of
4002         conditions that refer to different key parts. For example, we'll get
4003         here for "partitioning_field=const1 OR subpartitioning_field=const2"
4004       */
4005       if (-1 == (res= find_used_partitions_imerge(&prune_param,
4006                                                   tree->merges.head())))
4007         goto all_used;
4008     }
4009     else
4010     {
4011       /*
4012         Range analysis has produced a list of several imerges, i.e. a
4013         structure that represents a condition in form
4014         imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
4015         This is produced for complicated WHERE clauses that range analyzer
4016         can't really analyze properly.
4017       */
4018       if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
4019                                                        tree->merges)))
4020         goto all_used;
4021     }
4022   }
4023 
4024   /*
4025     res == 0 => no used partitions => retval=TRUE
4026     res == 1 => some used partitions => retval=FALSE
4027     res == -1 - we jump over this line to all_used:
4028   */
4029   retval= MY_TEST(!res);
4030   goto end;
4031 
4032 all_used:
4033   retval= FALSE; // some partitions are used
4034   mark_all_partitions_as_used(prune_param.part_info);
4035 end:
4036   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
4037   thd->no_errors=0;
4038   thd->mem_root= range_par->old_root;
4039   free_root(&alloc,MYF(0));			// Return memory & allocator
4040   /*
4041     Must be a subset of the locked partitions.
4042     lock_partitions contains the partitions marked by explicit partition
4043     selection (... t PARTITION (pX) ...) and we must only use partitions
4044     within that set.
4045   */
4046   bitmap_intersect(&prune_param.part_info->read_partitions,
4047                    &prune_param.part_info->lock_partitions);
4048   /*
4049     If not yet locked, also prune partitions to lock if not UPDATEing
4050     partition key fields. This will also prune lock_partitions if we are under
4051     LOCK TABLES, so prune away calls to start_stmt().
4052     TODO: enhance this prune locking to also allow pruning of
4053     'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
4054     a lock for part_key partition.
4055   */
4056   if (table->file->get_lock_type() == F_UNLCK &&
4057       !partition_key_modified(table, table->write_set))
4058   {
4059     bitmap_copy(&prune_param.part_info->lock_partitions,
4060                 &prune_param.part_info->read_partitions);
4061   }
4062   if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
4063   {
4064     table->all_partitions_pruned_away= true;
4065     retval= TRUE;
4066   }
4067   DBUG_RETURN(retval);
4068 }
4069 
4070 
4071 /*
4072   For SEL_ARG* array, store sel_arg->min values into table record buffer
4073 
4074   SYNOPSIS
4075     store_selargs_to_rec()
4076       ppar   Partition pruning context
4077       start  Array of SEL_ARG* for which the minimum values should be stored
4078       num    Number of elements in the array
4079 
4080   DESCRIPTION
4081     For each SEL_ARG* interval in the specified array, store the left edge
4082     field value (sel_arg->min, key image format) into the table record.
4083 */
4084 
store_selargs_to_rec(PART_PRUNE_PARAM * ppar,SEL_ARG ** start,int num)4085 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
4086                                  int num)
4087 {
4088   KEY_PART *parts= ppar->range_param.key_parts;
4089   for (SEL_ARG **end= start + num; start != end; start++)
4090   {
4091     SEL_ARG *sel_arg= (*start);
4092     store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
4093                            parts[sel_arg->part].length);
4094   }
4095 }
4096 
4097 
4098 /* Mark a partition as used in the case when there are no subpartitions */
mark_full_partition_used_no_parts(partition_info * part_info,uint32 part_id)4099 static void mark_full_partition_used_no_parts(partition_info* part_info,
4100                                               uint32 part_id)
4101 {
4102   DBUG_ENTER("mark_full_partition_used_no_parts");
4103   DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
4104   bitmap_set_bit(&part_info->read_partitions, part_id);
4105   DBUG_VOID_RETURN;
4106 }
4107 
4108 
4109 /* Mark a partition as used in the case when there are subpartitions */
mark_full_partition_used_with_parts(partition_info * part_info,uint32 part_id)4110 static void mark_full_partition_used_with_parts(partition_info *part_info,
4111                                                 uint32 part_id)
4112 {
4113   uint32 start= part_id * part_info->num_subparts;
4114   uint32 end=   start + part_info->num_subparts;
4115   DBUG_ENTER("mark_full_partition_used_with_parts");
4116 
4117   for (; start != end; start++)
4118   {
4119     DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
4120     bitmap_set_bit(&part_info->read_partitions, start);
4121   }
4122   DBUG_VOID_RETURN;
4123 }
4124 
4125 /*
4126   Find the set of used partitions for List<SEL_IMERGE>
4127   SYNOPSIS
4128     find_used_partitions_imerge_list
4129       ppar      Partition pruning context.
4130       key_tree  Intervals tree to perform pruning for.
4131 
4132   DESCRIPTION
4133     List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
4134     The set of used partitions is an intersection of used partitions sets
4135     for imerge_{i}.
4136     We accumulate this intersection in a separate bitmap.
4137 
4138   RETURN
4139     See find_used_partitions()
4140 */
4141 
find_used_partitions_imerge_list(PART_PRUNE_PARAM * ppar,List<SEL_IMERGE> & merges)4142 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
4143                                             List<SEL_IMERGE> &merges)
4144 {
4145   MY_BITMAP all_merges;
4146   uint bitmap_bytes;
4147   my_bitmap_map *bitmap_buf;
4148   uint n_bits= ppar->part_info->read_partitions.n_bits;
4149   bitmap_bytes= bitmap_buffer_size(n_bits);
4150   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
4151                                                 bitmap_bytes)))
4152   {
4153     /*
4154       Fallback, process just the first SEL_IMERGE. This can leave us with more
4155       partitions marked as used then actually needed.
4156     */
4157     return find_used_partitions_imerge(ppar, merges.head());
4158   }
4159   my_bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
4160   bitmap_set_prefix(&all_merges, n_bits);
4161 
4162   List_iterator<SEL_IMERGE> it(merges);
4163   SEL_IMERGE *imerge;
4164   while ((imerge=it++))
4165   {
4166     int res= find_used_partitions_imerge(ppar, imerge);
4167     if (!res)
4168     {
4169       /* no used partitions on one ANDed imerge => no used partitions at all */
4170       return 0;
4171     }
4172 
4173     if (res != -1)
4174       bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
4175 
4176 
4177     if (bitmap_is_clear_all(&all_merges))
4178       return 0;
4179 
4180     bitmap_clear_all(&ppar->part_info->read_partitions);
4181   }
4182   memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
4183          bitmap_bytes);
4184   return 1;
4185 }
4186 
4187 
4188 /*
4189   Find the set of used partitions for SEL_IMERGE structure
4190   SYNOPSIS
4191     find_used_partitions_imerge()
4192       ppar      Partition pruning context.
4193       key_tree  Intervals tree to perform pruning for.
4194 
4195   DESCRIPTION
4196     SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
4197     trivial - just use mark used partitions for each tree and bail out early
4198     if for some tree_{i} all partitions are used.
4199 
4200   RETURN
4201     See find_used_partitions().
4202 */
4203 
4204 static
find_used_partitions_imerge(PART_PRUNE_PARAM * ppar,SEL_IMERGE * imerge)4205 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
4206 {
4207   int res= 0;
4208   for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
4209   {
4210     ppar->arg_stack_end= ppar->arg_stack;
4211     ppar->cur_part_fields= 0;
4212     ppar->cur_subpart_fields= 0;
4213 
4214     ppar->cur_min_key= ppar->range_param.min_key;
4215     ppar->cur_max_key= ppar->range_param.max_key;
4216     ppar->cur_min_flag= ppar->cur_max_flag= 0;
4217 
4218     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4219     SEL_ARG *key_tree= (*ptree)->keys[0];
4220     if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
4221       return -1;
4222   }
4223   return res;
4224 }
4225 
4226 
4227 /*
4228   Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
4229 
4230   SYNOPSIS
4231     find_used_partitions()
4232       ppar      Partition pruning context.
4233       key_tree  SEL_ARG range tree to perform pruning for
4234 
4235   DESCRIPTION
4236     This function
4237       * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
4238       * finds the partitions one needs to use to get rows in these intervals
4239       * marks these partitions as used.
4240     The next session desribes the process in greater detail.
4241 
4242   IMPLEMENTATION
4243     TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
4244     We can find out which [sub]partitions to use if we obtain restrictions on
4245     [sub]partitioning fields in the following form:
4246     1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
4247     1.1  Same as (1) but for subpartition fields
4248 
4249     If partitioning supports interval analysis (i.e. partitioning is a
4250     function of a single table field, and partition_info::
4251     get_part_iter_for_interval != NULL), then we can also use condition in
4252     this form:
4253     2.  "const1 <=? partition_field <=? const2"
4254     2.1  Same as (2) but for subpartition_field
4255 
4256     INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
4257 
4258     The below is an example of what SEL_ARG tree may represent:
4259 
4260     (start)
4261      |                           $
4262      |   Partitioning keyparts   $  subpartitioning keyparts
4263      |                           $
4264      |     ...          ...      $
4265      |      |            |       $
4266      | +---------+  +---------+  $  +-----------+  +-----------+
4267      \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
4268        +---------+  +---------+  $  +-----------+  +-----------+
4269             |                    $        |             |
4270             |                    $        |        +-----------+
4271             |                    $        |        | subpar2=c6|
4272             |                    $        |        +-----------+
4273             |                    $        |
4274             |                    $  +-----------+  +-----------+
4275             |                    $  | subpar1=c4|--| subpar2=c8|
4276             |                    $  +-----------+  +-----------+
4277             |                    $
4278             |                    $
4279        +---------+               $  +------------+  +------------+
4280        | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
4281        +---------+               $  +------------+  +------------+
4282             |                    $
4283            ...                   $
4284 
4285     The up-down connections are connections via SEL_ARG::left and
4286     SEL_ARG::right. A horizontal connection to the right is the
4287     SEL_ARG::next_key_part connection.
4288 
4289     find_used_partitions() traverses the entire tree via recursion on
4290      * SEL_ARG::next_key_part (from left to right on the picture)
4291      * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
4292        performed for each depth level.
4293 
4294     Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
4295     ppar->arg_stack) constraints on partitioning and subpartitioning fields.
4296     For the example in the above picture, one of stack states is:
4297       in find_used_partitions(key_tree = "subpar2=c5") (***)
4298       in find_used_partitions(key_tree = "subpar1=c3")
4299       in find_used_partitions(key_tree = "par2=c2")   (**)
4300       in find_used_partitions(key_tree = "par1=c1")
4301       in prune_partitions(...)
4302     We apply partitioning limits as soon as possible, e.g. when we reach the
4303     depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
4304     and save them in ppar->part_iter.
4305     When we reach the depth (***), we find which subpartition(s) correspond to
4306     "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
4307     appropriate subpartitions as used.
4308 
4309     It is possible that constraints on some partitioning fields are missing.
4310     For the above example, consider this stack state:
4311       in find_used_partitions(key_tree = "subpar2=c12") (***)
4312       in find_used_partitions(key_tree = "subpar1=c10")
4313       in find_used_partitions(key_tree = "par1=c2")
4314       in prune_partitions(...)
4315     Here we don't have constraints for all partitioning fields. Since we've
4316     never set the ppar->part_iter to contain used set of partitions, we use
4317     its default "all partitions" value.  We get  subpartition id for
4318     "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
4319     partition.
4320 
4321     The inverse is also possible: we may get constraints on partitioning
4322     fields, but not constraints on subpartitioning fields. In that case,
4323     calls to find_used_partitions() with depth below (**) will return -1,
4324     and we will mark entire partition as used.
4325 
4326   TODO
4327     Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
4328 
4329   RETURN
4330     1   OK, one or more [sub]partitions are marked as used.
4331     0   The passed condition doesn't match any partitions
4332    -1   Couldn't infer any partition pruning "intervals" from the passed
4333         SEL_ARG* tree (which means that all partitions should be marked as
4334         used) Marking partitions as used is the responsibility of the caller.
4335 */
4336 
4337 static
find_used_partitions(PART_PRUNE_PARAM * ppar,SEL_ARG * key_tree)4338 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
4339 {
4340   int res, left_res=0, right_res=0;
4341   int key_tree_part= (int)key_tree->part;
4342   bool set_full_part_if_bad_ret= FALSE;
4343   bool ignore_part_fields= ppar->ignore_part_fields;
4344   bool did_set_ignore_part_fields= FALSE;
4345   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4346 
4347   if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
4348     return -1;
4349 
4350   if (key_tree->left != &null_element)
4351   {
4352     if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
4353       return -1;
4354   }
4355 
4356   /* Push SEL_ARG's to stack to enable looking backwards as well */
4357   ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
4358   ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
4359   *(ppar->arg_stack_end++)= key_tree;
4360 
4361   if (ignore_part_fields)
4362   {
4363     /*
4364       We come here when a condition on the first partitioning
4365       fields led to evaluating the partitioning condition
4366       (due to finding a condition of the type a < const or
4367       b > const). Thus we must ignore the rest of the
4368       partitioning fields but we still want to analyse the
4369       subpartitioning fields.
4370     */
4371     if (key_tree->next_key_part)
4372       res= find_used_partitions(ppar, key_tree->next_key_part);
4373     else
4374       res= -1;
4375     goto pop_and_go_right;
4376   }
4377 
4378   if (key_tree->type == SEL_ARG::KEY_RANGE)
4379   {
4380     if (ppar->part_info->get_part_iter_for_interval &&
4381         key_tree->part <= ppar->last_part_partno)
4382     {
4383       /* Collect left and right bound, their lengths and flags */
4384       uchar *min_key= ppar->cur_min_key;
4385       uchar *max_key= ppar->cur_max_key;
4386       uchar *tmp_min_key= min_key;
4387       uchar *tmp_max_key= max_key;
4388       key_tree->store_min(ppar->key[key_tree->part].store_length,
4389                           &tmp_min_key, ppar->cur_min_flag);
4390       key_tree->store_max(ppar->key[key_tree->part].store_length,
4391                           &tmp_max_key, ppar->cur_max_flag);
4392       uint flag;
4393       if (key_tree->next_key_part &&
4394           key_tree->next_key_part->part == key_tree->part+1 &&
4395           key_tree->next_key_part->part <= ppar->last_part_partno &&
4396           key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
4397       {
4398         /*
4399           There are more key parts for partition pruning to handle
4400           This mainly happens when the condition is an equality
4401           condition.
4402         */
4403         if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
4404             (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
4405             !key_tree->min_flag && !key_tree->max_flag)
4406         {
4407           /* Set 'parameters' */
4408           ppar->cur_min_key= tmp_min_key;
4409           ppar->cur_max_key= tmp_max_key;
4410           uint save_min_flag= ppar->cur_min_flag;
4411           uint save_max_flag= ppar->cur_max_flag;
4412 
4413           ppar->cur_min_flag|= key_tree->min_flag;
4414           ppar->cur_max_flag|= key_tree->max_flag;
4415 
4416           res= find_used_partitions(ppar, key_tree->next_key_part);
4417 
4418           /* Restore 'parameters' back */
4419           ppar->cur_min_key= min_key;
4420           ppar->cur_max_key= max_key;
4421 
4422           ppar->cur_min_flag= save_min_flag;
4423           ppar->cur_max_flag= save_max_flag;
4424           goto pop_and_go_right;
4425         }
4426         /* We have arrived at the last field in the partition pruning */
4427         uint tmp_min_flag= key_tree->min_flag,
4428              tmp_max_flag= key_tree->max_flag;
4429         if (!tmp_min_flag)
4430           key_tree->next_key_part->store_min_key(ppar->key,
4431                                                  &tmp_min_key,
4432                                                  &tmp_min_flag,
4433                                                  ppar->last_part_partno);
4434         if (!tmp_max_flag)
4435           key_tree->next_key_part->store_max_key(ppar->key,
4436                                                  &tmp_max_key,
4437                                                  &tmp_max_flag,
4438                                                  ppar->last_part_partno);
4439         flag= tmp_min_flag | tmp_max_flag;
4440       }
4441       else
4442         flag= key_tree->min_flag | key_tree->max_flag;
4443 
4444       if (tmp_min_key != range_par->min_key)
4445         flag&= ~NO_MIN_RANGE;
4446       else
4447         flag|= NO_MIN_RANGE;
4448       if (tmp_max_key != range_par->max_key)
4449         flag&= ~NO_MAX_RANGE;
4450       else
4451         flag|= NO_MAX_RANGE;
4452 
4453       /*
4454         We need to call the interval mapper if we have a condition which
4455         makes sense to prune on. In the example of COLUMNS on a and
4456         b it makes sense if we have a condition on a, or conditions on
4457         both a and b. If we only have conditions on b it might make sense
4458         but this is a harder case we will solve later. For the harder case
4459         this clause then turns into use of all partitions and thus we
4460         simply set res= -1 as if the mapper had returned that.
4461         TODO: What to do here is defined in WL#4065.
4462       */
4463       if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION)
4464       {
4465         uint32 i;
4466         uint32 store_length_array[MAX_KEY];
4467         uint32 num_keys= ppar->part_fields;
4468 
4469         for (i= 0; i < num_keys; i++)
4470           store_length_array[i]= ppar->key[i].store_length;
4471         res= ppar->part_info->
4472              get_part_iter_for_interval(ppar->part_info,
4473                                         FALSE,
4474                                         store_length_array,
4475                                         range_par->min_key,
4476                                         range_par->max_key,
4477                                         (uint)(tmp_min_key - range_par->min_key),
4478                                         (uint)(tmp_max_key - range_par->max_key),
4479                                         flag,
4480                                         &ppar->part_iter);
4481         if (!res)
4482           goto pop_and_go_right; /* res==0 --> no satisfying partitions */
4483       }
4484       else
4485         res= -1;
4486 
4487       if (res == -1)
4488       {
4489         /* get a full range iterator */
4490         init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4491       }
4492       /*
4493         Save our intent to mark full partition as used if we will not be able
4494         to obtain further limits on subpartitions
4495       */
4496       if (key_tree_part < ppar->last_part_partno)
4497       {
4498         /*
4499           We need to ignore the rest of the partitioning fields in all
4500           evaluations after this
4501         */
4502         did_set_ignore_part_fields= TRUE;
4503         ppar->ignore_part_fields= TRUE;
4504       }
4505       set_full_part_if_bad_ret= TRUE;
4506       goto process_next_key_part;
4507     }
4508 
4509     if (key_tree_part == ppar->last_subpart_partno &&
4510         (NULL != ppar->part_info->get_subpart_iter_for_interval))
4511     {
4512       PARTITION_ITERATOR subpart_iter;
4513       DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
4514                                                     range_par->key_parts););
4515       res= ppar->part_info->
4516            get_subpart_iter_for_interval(ppar->part_info,
4517                                          TRUE,
4518                                          NULL, /* Currently not used here */
4519                                          key_tree->min_value,
4520                                          key_tree->max_value,
4521                                          0, 0, /* Those are ignored here */
4522                                          key_tree->min_flag |
4523                                            key_tree->max_flag,
4524                                          &subpart_iter);
4525       if (res == 0)
4526       {
4527         /*
4528            The only case where we can get "no satisfying subpartitions"
4529            returned from the above call is when an error has occurred.
4530         */
4531         DBUG_ASSERT(range_par->thd->is_error());
4532         return 0;
4533       }
4534 
4535       if (res == -1)
4536         goto pop_and_go_right; /* all subpartitions satisfy */
4537 
4538       uint32 subpart_id;
4539       bitmap_clear_all(&ppar->subparts_bitmap);
4540       while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
4541              NOT_A_PARTITION_ID)
4542         bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
4543 
4544       /* Mark each partition as used in each subpartition.  */
4545       uint32 part_id;
4546       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4547               NOT_A_PARTITION_ID)
4548       {
4549         for (uint i= 0; i < ppar->part_info->num_subparts; i++)
4550           if (bitmap_is_set(&ppar->subparts_bitmap, i))
4551             bitmap_set_bit(&ppar->part_info->read_partitions,
4552                            part_id * ppar->part_info->num_subparts + i);
4553       }
4554       goto pop_and_go_right;
4555     }
4556 
4557     if (key_tree->is_singlepoint())
4558     {
4559       if (key_tree_part == ppar->last_part_partno &&
4560           ppar->cur_part_fields == ppar->part_fields &&
4561           ppar->part_info->get_part_iter_for_interval == NULL)
4562       {
4563         /*
4564           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
4565           fields. Save all constN constants into table record buffer.
4566         */
4567         store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
4568         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
4569                                                        ppar->part_fields););
4570         uint32 part_id;
4571         longlong func_value;
4572         /* Find in which partition the {const1, ...,constN} tuple goes */
4573         if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
4574                                             &func_value))
4575         {
4576           res= 0; /* No satisfying partitions */
4577           goto pop_and_go_right;
4578         }
4579         /* Rembember the limit we got - single partition #part_id */
4580         init_single_partition_iterator(part_id, &ppar->part_iter);
4581 
4582         /*
4583           If there are no subpartitions/we fail to get any limit for them,
4584           then we'll mark full partition as used.
4585         */
4586         set_full_part_if_bad_ret= TRUE;
4587         goto process_next_key_part;
4588       }
4589 
4590       if (key_tree_part == ppar->last_subpart_partno &&
4591           ppar->cur_subpart_fields == ppar->subpart_fields)
4592       {
4593         /*
4594           Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
4595           fields. Save all constN constants into table record buffer.
4596         */
4597         store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
4598                              ppar->subpart_fields);
4599         DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
4600                                                        ppar->subpart_fields,
4601                                                        ppar->subpart_fields););
4602         /* Find the subpartition (it's HASH/KEY so we always have one) */
4603         partition_info *part_info= ppar->part_info;
4604         uint32 part_id, subpart_id;
4605 
4606         if (part_info->get_subpartition_id(part_info, &subpart_id))
4607           return 0;
4608 
4609         /* Mark this partition as used in each subpartition. */
4610         while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4611                 NOT_A_PARTITION_ID)
4612         {
4613           bitmap_set_bit(&part_info->read_partitions,
4614                          part_id * part_info->num_subparts + subpart_id);
4615         }
4616         res= 1; /* Some partitions were marked as used */
4617         goto pop_and_go_right;
4618       }
4619     }
4620     else
4621     {
4622       /*
4623         Can't handle condition on current key part. If we're that deep that
4624         we're processing subpartititoning's key parts, this means we'll not be
4625         able to infer any suitable condition, so bail out.
4626       */
4627       if (key_tree_part >= ppar->last_part_partno)
4628       {
4629         res= -1;
4630         goto pop_and_go_right;
4631       }
4632       /*
4633         No meaning in continuing with rest of partitioning key parts.
4634         Will try to continue with subpartitioning key parts.
4635       */
4636       ppar->ignore_part_fields= true;
4637       did_set_ignore_part_fields= true;
4638       goto process_next_key_part;
4639     }
4640   }
4641 
4642 process_next_key_part:
4643   if (key_tree->next_key_part)
4644     res= find_used_partitions(ppar, key_tree->next_key_part);
4645   else
4646     res= -1;
4647 
4648   if (did_set_ignore_part_fields)
4649   {
4650     /*
4651       We have returned from processing all key trees linked to our next
4652       key part. We are ready to be moving down (using right pointers) and
4653       this tree is a new evaluation requiring its own decision on whether
4654       to ignore partitioning fields.
4655     */
4656     ppar->ignore_part_fields= FALSE;
4657   }
4658   if (set_full_part_if_bad_ret)
4659   {
4660     if (res == -1)
4661     {
4662       /* Got "full range" for subpartitioning fields */
4663       uint32 part_id;
4664       bool found= FALSE;
4665       while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4666              NOT_A_PARTITION_ID)
4667       {
4668         ppar->mark_full_partition_used(ppar->part_info, part_id);
4669         found= TRUE;
4670       }
4671       res= MY_TEST(found);
4672     }
4673     /*
4674       Restore the "used partitions iterator" to the default setting that
4675       specifies iteration over all partitions.
4676     */
4677     init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4678   }
4679 
4680 pop_and_go_right:
4681   /* Pop this key part info off the "stack" */
4682   ppar->arg_stack_end--;
4683   ppar->cur_part_fields-=    ppar->is_part_keypart[key_tree_part];
4684   ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4685 
4686   if (res == -1)
4687     return -1;
4688   if (key_tree->right != &null_element)
4689   {
4690     if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4691       return -1;
4692   }
4693   return (left_res || right_res || res);
4694 }
4695 
4696 
mark_all_partitions_as_used(partition_info * part_info)4697 static void mark_all_partitions_as_used(partition_info *part_info)
4698 {
4699   bitmap_copy(&(part_info->read_partitions),
4700               &(part_info->lock_partitions));
4701 }
4702 
4703 
4704 /*
4705   Check if field types allow to construct partitioning index description
4706 
4707   SYNOPSIS
4708     fields_ok_for_partition_index()
4709       pfield  NULL-terminated array of pointers to fields.
4710 
4711   DESCRIPTION
4712     For an array of fields, check if we can use all of the fields to create
4713     partitioning index description.
4714 
4715     We can't process GEOMETRY fields - for these fields singlepoint intervals
4716     cant be generated, and non-singlepoint are "special" kinds of intervals
4717     to which our processing logic can't be applied.
4718 
4719     It is not known if we could process ENUM fields, so they are disabled to be
4720     on the safe side.
4721 
4722   RETURN
4723     TRUE   Yes, fields can be used in partitioning index
4724     FALSE  Otherwise
4725 */
4726 
fields_ok_for_partition_index(Field ** pfield)4727 static bool fields_ok_for_partition_index(Field **pfield)
4728 {
4729   if (!pfield)
4730     return FALSE;
4731   for (; (*pfield); pfield++)
4732   {
4733     enum_field_types ftype= (*pfield)->real_type();
4734     if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4735       return FALSE;
4736   }
4737   return TRUE;
4738 }
4739 
4740 
4741 /*
4742   Create partition index description and fill related info in the context
4743   struct
4744 
4745   SYNOPSIS
4746     create_partition_index_description()
4747       prune_par  INOUT Partition pruning context
4748 
4749   DESCRIPTION
4750     Create partition index description. Partition index description is:
4751 
4752       part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4753 
4754     If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4755     corresponding fields_list(...) is not included into index description
4756     and we don't perform partition pruning for partitions/subpartitions.
4757 
4758   RETURN
4759     TRUE   Out of memory or can't do partition pruning at all
4760     FALSE  OK
4761 */
4762 
create_partition_index_description(PART_PRUNE_PARAM * ppar)4763 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4764 {
4765   RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4766   partition_info *part_info= ppar->part_info;
4767   uint used_part_fields, used_subpart_fields;
4768 
4769   used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4770                       part_info->num_part_fields : 0;
4771   used_subpart_fields=
4772     fields_ok_for_partition_index(part_info->subpart_field_array)?
4773       part_info->num_subpart_fields : 0;
4774 
4775   uint total_parts= used_part_fields + used_subpart_fields;
4776 
4777   ppar->ignore_part_fields= FALSE;
4778   ppar->part_fields=      used_part_fields;
4779   ppar->last_part_partno= (int)used_part_fields - 1;
4780 
4781   ppar->subpart_fields= used_subpart_fields;
4782   ppar->last_subpart_partno=
4783     used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4784 
4785   if (part_info->is_sub_partitioned())
4786   {
4787     ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
4788     ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4789   }
4790   else
4791   {
4792     ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
4793     ppar->get_top_partition_id_func= part_info->get_partition_id;
4794   }
4795 
4796   KEY_PART *key_part;
4797   MEM_ROOT *alloc= range_par->mem_root;
4798   if (!total_parts ||
4799       !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4800                                                total_parts)) ||
4801       !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4802                                                       total_parts)) ||
4803       !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4804                                                            total_parts)) ||
4805       !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4806                                                            total_parts)))
4807     return TRUE;
4808 
4809   if (ppar->subpart_fields)
4810   {
4811     my_bitmap_map *buf;
4812     uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4813     if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4814       return TRUE;
4815     my_bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4816                 FALSE);
4817   }
4818   range_par->key_parts= key_part;
4819   Field **field= (ppar->part_fields)? part_info->part_field_array :
4820                                            part_info->subpart_field_array;
4821   bool in_subpart_fields= FALSE;
4822   uint total_key_len= 0;
4823   for (uint part= 0; part < total_parts; part++, key_part++)
4824   {
4825     key_part->key=          0;
4826     key_part->part=	    part;
4827     key_part->length= (uint16)(*field)->key_length();
4828     key_part->store_length= (uint16)get_partition_field_store_length(*field);
4829     total_key_len += key_part->store_length;
4830 
4831     DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4832                          key_part->length, key_part->store_length));
4833 
4834     key_part->field=        (*field);
4835     key_part->image_type =  Field::itRAW;
4836     /*
4837       We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4838       in the RangeAnalysisModule.
4839     */
4840     key_part->flag=         0;
4841     /* We don't set key_parts->null_bit as it will not be used */
4842 
4843     ppar->is_part_keypart[part]= !in_subpart_fields;
4844     ppar->is_subpart_keypart[part]= in_subpart_fields;
4845 
4846     /*
4847       Check if this was last field in this array, in this case we
4848       switch to subpartitioning fields. (This will only happens if
4849       there are subpartitioning fields to cater for).
4850     */
4851     if (!*(++field))
4852     {
4853       field= part_info->subpart_field_array;
4854       in_subpart_fields= TRUE;
4855     }
4856   }
4857   range_par->key_parts_end= key_part;
4858 
4859   total_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
4860   if (!(range_par->min_key= (uchar*)alloc_root(alloc,total_key_len)) ||
4861       !(range_par->max_key= (uchar*)alloc_root(alloc,total_key_len)))
4862   {
4863     return true;
4864   }
4865 
4866   DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4867                                                 range_par->key_parts_end););
4868   return FALSE;
4869 }
4870 
4871 
4872 #ifndef DBUG_OFF
4873 
print_partitioning_index(KEY_PART * parts,KEY_PART * parts_end)4874 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4875 {
4876   DBUG_ENTER("print_partitioning_index");
4877   DBUG_LOCK_FILE;
4878   fprintf(DBUG_FILE, "partitioning INDEX(");
4879   for (KEY_PART *p=parts; p != parts_end; p++)
4880   {
4881     fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name.str);
4882   }
4883   fputs(");\n", DBUG_FILE);
4884   DBUG_UNLOCK_FILE;
4885   DBUG_VOID_RETURN;
4886 }
4887 
4888 /* Print field value into debug trace, in NULL-aware way. */
dbug_print_field(Field * field)4889 static void dbug_print_field(Field *field)
4890 {
4891   if (field->is_real_null())
4892     fprintf(DBUG_FILE, "NULL");
4893   else
4894   {
4895     char buf[256];
4896     String str(buf, sizeof(buf), &my_charset_bin);
4897     str.length(0);
4898     String *pstr;
4899     pstr= field->val_str(&str);
4900     fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
4901   }
4902 }
4903 
4904 
4905 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */
dbug_print_segment_range(SEL_ARG * arg,KEY_PART * part)4906 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4907 {
4908   DBUG_ENTER("dbug_print_segment_range");
4909   DBUG_LOCK_FILE;
4910   if (!(arg->min_flag & NO_MIN_RANGE))
4911   {
4912     store_key_image_to_rec(part->field, arg->min_value, part->length);
4913     dbug_print_field(part->field);
4914     if (arg->min_flag & NEAR_MIN)
4915       fputs(" < ", DBUG_FILE);
4916     else
4917       fputs(" <= ", DBUG_FILE);
4918   }
4919 
4920   fprintf(DBUG_FILE, "%s", part->field->field_name.str);
4921 
4922   if (!(arg->max_flag & NO_MAX_RANGE))
4923   {
4924     if (arg->max_flag & NEAR_MAX)
4925       fputs(" < ", DBUG_FILE);
4926     else
4927       fputs(" <= ", DBUG_FILE);
4928     store_key_image_to_rec(part->field, arg->max_value, part->length);
4929     dbug_print_field(part->field);
4930   }
4931   fputs("\n", DBUG_FILE);
4932   DBUG_UNLOCK_FILE;
4933   DBUG_VOID_RETURN;
4934 }
4935 
4936 
4937 /*
4938   Print a singlepoint multi-keypart range interval to debug trace
4939 
4940   SYNOPSIS
4941     dbug_print_singlepoint_range()
4942       start  Array of SEL_ARG* ptrs representing conditions on key parts
4943       num    Number of elements in the array.
4944 
4945   DESCRIPTION
4946     This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4947     interval to debug trace.
4948 */
4949 
dbug_print_singlepoint_range(SEL_ARG ** start,uint num)4950 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4951 {
4952   DBUG_ENTER("dbug_print_singlepoint_range");
4953   DBUG_LOCK_FILE;
4954   SEL_ARG **end= start + num;
4955 
4956   for (SEL_ARG **arg= start; arg != end; arg++)
4957   {
4958     Field *field= (*arg)->field;
4959     fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name.str);
4960     dbug_print_field(field);
4961   }
4962   fputs("\n", DBUG_FILE);
4963   DBUG_UNLOCK_FILE;
4964   DBUG_VOID_RETURN;
4965 }
4966 #endif
4967 
4968 /****************************************************************************
4969  * Partition pruning code ends
4970  ****************************************************************************/
4971 #endif
4972 
4973 
4974 /*
4975   Get cost of 'sweep' full records retrieval.
4976   SYNOPSIS
4977     get_sweep_read_cost()
4978       param            Parameter from test_quick_select
4979       records          # of records to be retrieved
4980   RETURN
4981     cost of sweep
4982 */
4983 
get_sweep_read_cost(const PARAM * param,ha_rows records)4984 double get_sweep_read_cost(const PARAM *param, ha_rows records)
4985 {
4986   double result;
4987   uint pk= param->table->s->primary_key;
4988   DBUG_ENTER("get_sweep_read_cost");
4989   if (param->table->file->pk_is_clustering_key(pk) ||
4990       param->table->file->stats.block_size == 0 /* HEAP */)
4991   {
4992     /*
4993       We are using the primary key to find the rows.
4994       Calculate the cost for this.
4995     */
4996     result= param->table->file->read_time(pk, (uint)records, records);
4997   }
4998   else
4999   {
5000     /*
5001       Rows will be retreived with rnd_pos(). Caluclate the expected
5002       cost for this.
5003     */
5004     double n_blocks=
5005       ceil(ulonglong2double(param->table->file->stats.data_file_length) /
5006            IO_SIZE);
5007     double busy_blocks=
5008       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
5009     if (busy_blocks < 1.0)
5010       busy_blocks= 1.0;
5011     DBUG_PRINT("info",("sweep: nblocks: %g, busy_blocks: %g", n_blocks,
5012                        busy_blocks));
5013     /*
5014       Disabled: Bail out if # of blocks to read is bigger than # of blocks in
5015       table data file.
5016     if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
5017       return 1;
5018     */
5019     JOIN *join= param->thd->lex->first_select_lex()->join;
5020     if (!join || join->table_count == 1)
5021     {
5022       /* No join, assume reading is done in one 'sweep' */
5023       result= busy_blocks*(DISK_SEEK_BASE_COST +
5024                           DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
5025     }
5026     else
5027     {
5028       /*
5029         Possibly this is a join with source table being non-last table, so
5030         assume that disk seeks are random here.
5031       */
5032       result= busy_blocks;
5033     }
5034   }
5035   DBUG_PRINT("return",("cost: %g", result));
5036   DBUG_RETURN(result);
5037 }
5038 
5039 
5040 /*
5041   Get best plan for a SEL_IMERGE disjunctive expression.
5042   SYNOPSIS
5043     get_best_disjunct_quick()
5044       param     Parameter from check_quick_select function
5045       imerge    Expression to use
5046       read_time Don't create scans with cost > read_time
5047 
5048   NOTES
5049     index_merge cost is calculated as follows:
5050     index_merge_cost =
5051       cost(index_reads) +         (see #1)
5052       cost(rowid_to_row_scan) +   (see #2)
5053       cost(unique_use)            (see #3)
5054 
5055     1. cost(index_reads) =SUM_i(cost(index_read_i))
5056        For non-CPK scans,
5057          cost(index_read_i) = {cost of ordinary 'index only' scan}
5058        For CPK scan,
5059          cost(index_read_i) = {cost of non-'index only' scan}
5060 
5061     2. cost(rowid_to_row_scan)
5062       If table PK is clustered then
5063         cost(rowid_to_row_scan) =
5064           {cost of ordinary clustered PK scan with n_ranges=n_rows}
5065 
5066       Otherwise, we use the following model to calculate costs:
5067       We need to retrieve n_rows rows from file that occupies n_blocks blocks.
5068       We assume that offsets of rows we need are independent variates with
5069       uniform distribution in [0..max_file_offset] range.
5070 
5071       We'll denote block as "busy" if it contains row(s) we need to retrieve
5072       and "empty" if doesn't contain rows we need.
5073 
5074       Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
5075       applies to any block in file). Let x_i be a variate taking value 1 if
5076       block #i is empty and 0 otherwise.
5077 
5078       Then E(x_i) = (1 - 1/n_blocks)^n_rows;
5079 
5080       E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
5081         = n_blocks * ((1 - 1/n_blocks)^n_rows) =
5082        ~= n_blocks * exp(-n_rows/n_blocks).
5083 
5084       E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
5085        ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
5086 
5087       Average size of "hole" between neighbor non-empty blocks is
5088            E(hole_size) = n_blocks/E(n_busy_blocks).
5089 
5090       The total cost of reading all needed blocks in one "sweep" is:
5091 
5092       E(n_busy_blocks)*
5093        (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
5094 
5095     3. Cost of Unique use is calculated in Unique::get_use_cost function.
5096 
5097   ROR-union cost is calculated in the same way index_merge, but instead of
5098   Unique a priority queue is used.
5099 
5100   RETURN
5101     Created read plan
5102     NULL - Out of memory or no read scan could be built.
5103 */
5104 
5105 static
get_best_disjunct_quick(PARAM * param,SEL_IMERGE * imerge,double read_time,bool named_trace)5106 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
5107                                          double read_time, bool named_trace)
5108 {
5109   SEL_TREE **ptree;
5110   TRP_INDEX_MERGE *imerge_trp= NULL;
5111   TRP_RANGE **range_scans;
5112   TRP_RANGE **cur_child;
5113   TRP_RANGE **cpk_scan= NULL;
5114   bool imerge_too_expensive= FALSE;
5115   double imerge_cost= 0.0;
5116   ha_rows cpk_scan_records= 0;
5117   ha_rows non_cpk_scan_records= 0;
5118   bool all_scans_ror_able= TRUE;
5119   bool all_scans_rors= TRUE;
5120   uint unique_calc_buff_size;
5121   TABLE_READ_PLAN **roru_read_plans;
5122   TABLE_READ_PLAN **cur_roru_plan;
5123   double roru_index_costs;
5124   ha_rows roru_total_records;
5125   double roru_intersect_part= 1.0;
5126   size_t n_child_scans;
5127   double limit_read_time= read_time;
5128   THD *thd= param->thd;
5129   DBUG_ENTER("get_best_disjunct_quick");
5130   DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
5131 
5132   /*
5133     In every tree of imerge remove SEL_ARG trees that do not make ranges.
5134     If after this removal some SEL_ARG tree becomes empty discard imerge.
5135   */
5136   for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++)
5137   {
5138     if (remove_nonrange_trees(param, *ptree))
5139     {
5140       imerge->trees_next= imerge->trees;
5141       break;
5142     }
5143   }
5144 
5145   n_child_scans= imerge->trees_next - imerge->trees;
5146 
5147   if (!n_child_scans)
5148     DBUG_RETURN(NULL);
5149 
5150   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
5151                                              sizeof(TRP_RANGE*)*
5152                                              n_child_scans)))
5153     DBUG_RETURN(NULL);
5154 
5155   const char* trace_best_disjunct_obj_name= named_trace ? "best_disjunct_quick" : nullptr;
5156   Json_writer_object trace_best_disjunct(thd, trace_best_disjunct_obj_name);
5157   Json_writer_array to_merge(thd, "indexes_to_merge");
5158   /*
5159     Collect best 'range' scan for each of disjuncts, and, while doing so,
5160     analyze possibility of ROR scans. Also calculate some values needed by
5161     other parts of the code.
5162   */
5163   for (ptree= imerge->trees, cur_child= range_scans;
5164        ptree != imerge->trees_next;
5165        ptree++, cur_child++)
5166   {
5167     DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
5168                                         "tree in SEL_IMERGE"););
5169     Json_writer_object trace_idx(thd);
5170     if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE,
5171                                            read_time)))
5172     {
5173       /*
5174         One of index scans in this index_merge is more expensive than entire
5175         table read for another available option. The entire index_merge (and
5176         any possible ROR-union) will be more expensive then, too. We continue
5177         here only to update SQL_SELECT members.
5178       */
5179       imerge_too_expensive= TRUE;
5180     }
5181     if (imerge_too_expensive)
5182     {
5183       trace_idx.add("chosen", false).add("cause", "cost");
5184       continue;
5185     }
5186     const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx];
5187     imerge_cost += (*cur_child)->read_cost;
5188     all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
5189     all_scans_rors &= (*cur_child)->is_ror;
5190     if (param->table->file->is_clustering_key(param->real_keynr[(*cur_child)->key_idx]))
5191     {
5192       cpk_scan= cur_child;
5193       cpk_scan_records= (*cur_child)->records;
5194     }
5195     else
5196       non_cpk_scan_records += (*cur_child)->records;
5197     trace_idx.add("index_to_merge",
5198                   param->table->key_info[keynr_in_table].name)
5199              .add("cumulated_cost", imerge_cost);
5200   }
5201 
5202   to_merge.end();
5203 
5204   DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
5205   trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
5206 
5207   if (imerge_too_expensive || (imerge_cost > read_time) ||
5208       ((non_cpk_scan_records+cpk_scan_records >=
5209         param->table->stat_records()) &&
5210        read_time != DBL_MAX))
5211   {
5212     /*
5213       Bail out if it is obvious that both index_merge and ROR-union will be
5214       more expensive
5215     */
5216     DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
5217                         "full table scan, bailing out"));
5218     trace_best_disjunct.add("chosen", false).add("cause", "cost");
5219     DBUG_RETURN(NULL);
5220   }
5221 
5222   /*
5223     If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
5224     guaranteed to be cheaper than non-ROR union), unless ROR-unions are
5225     disabled in @@optimizer_switch
5226   */
5227   if (all_scans_rors &&
5228       optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
5229   {
5230     roru_read_plans= (TABLE_READ_PLAN**)range_scans;
5231     trace_best_disjunct.add("use_roworder_union", true)
5232                        .add("cause",
5233                             "always cheaper than non roworder retrieval");
5234     goto skip_to_ror_scan;
5235   }
5236 
5237   if (cpk_scan)
5238   {
5239     /*
5240       Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
5241       is done in QUICK_RANGE_SELECT::row_in_ranges)
5242      */
5243     double rid_comp_cost= (rows2double(non_cpk_scan_records) /
5244                            TIME_FOR_COMPARE_ROWID);
5245     imerge_cost+= rid_comp_cost;
5246     trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
5247                             rid_comp_cost);
5248   }
5249 
5250   /* Calculate cost(rowid_to_row_scan) */
5251   {
5252     double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records);
5253     imerge_cost+= sweep_cost;
5254     trace_best_disjunct.add("cost_sort_rowid_and_read_disk", sweep_cost);
5255   }
5256   DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
5257                      imerge_cost));
5258   if (imerge_cost > read_time ||
5259       !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
5260   {
5261     trace_best_disjunct.add("use_roworder_index_merge", true);
5262     trace_best_disjunct.add("cause", "cost");
5263     goto build_ror_index_merge;
5264   }
5265 
5266   /* Add Unique operations cost */
5267   unique_calc_buff_size=
5268     Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
5269                                     param->table->file->ref_length,
5270                                     (size_t)param->thd->variables.sortbuff_size);
5271   if (param->imerge_cost_buff_size < unique_calc_buff_size)
5272   {
5273     if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
5274                                                      unique_calc_buff_size)))
5275       DBUG_RETURN(NULL);
5276     param->imerge_cost_buff_size= unique_calc_buff_size;
5277   }
5278 
5279   {
5280     const double dup_removal_cost= Unique::get_use_cost(
5281                            param->imerge_cost_buff, (uint)non_cpk_scan_records,
5282                            param->table->file->ref_length,
5283                            (size_t)param->thd->variables.sortbuff_size,
5284                            TIME_FOR_COMPARE_ROWID,
5285                            FALSE, NULL);
5286     imerge_cost+= dup_removal_cost;
5287     trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost)
5288                        .add("total_cost", imerge_cost);
5289   }
5290 
5291   DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
5292                      imerge_cost, read_time));
5293   if (imerge_cost < read_time)
5294   {
5295     if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
5296     {
5297       imerge_trp->read_cost= imerge_cost;
5298       imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
5299       imerge_trp->records= MY_MIN(imerge_trp->records,
5300                                param->table->stat_records());
5301       imerge_trp->range_scans= range_scans;
5302       imerge_trp->range_scans_end= range_scans + n_child_scans;
5303       read_time= imerge_cost;
5304     }
5305     if (imerge_trp)
5306     {
5307       TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp,
5308                                                    limit_read_time);
5309       if (trp != imerge_trp)
5310         DBUG_RETURN(trp);
5311     }
5312   }
5313 
5314 build_ror_index_merge:
5315   if (!all_scans_ror_able ||
5316       param->thd->lex->sql_command == SQLCOM_DELETE ||
5317       !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
5318     DBUG_RETURN(imerge_trp);
5319 
5320   /* Ok, it is possible to build a ROR-union, try it. */
5321   bool dummy;
5322   if (!(roru_read_plans=
5323           (TABLE_READ_PLAN**)alloc_root(param->mem_root,
5324                                         sizeof(TABLE_READ_PLAN*)*
5325                                         n_child_scans)))
5326     DBUG_RETURN(imerge_trp);
5327 
5328 skip_to_ror_scan:
5329   roru_index_costs= 0.0;
5330   roru_total_records= 0;
5331   cur_roru_plan= roru_read_plans;
5332 
5333   Json_writer_array trace_analyze_ror(thd, "analyzing_roworder_scans");
5334 
5335   /* Find 'best' ROR scan for each of trees in disjunction */
5336   for (ptree= imerge->trees, cur_child= range_scans;
5337        ptree != imerge->trees_next;
5338        ptree++, cur_child++, cur_roru_plan++)
5339   {
5340     Json_writer_object trp_info(thd);
5341     if (unlikely(thd->trace_started()))
5342       (*cur_child)->trace_basic_info(param, &trp_info);
5343     /*
5344       Assume the best ROR scan is the one that has cheapest full-row-retrieval
5345       scan cost.
5346       Also accumulate index_only scan costs as we'll need them to calculate
5347       overall index_intersection cost.
5348     */
5349     double cost;
5350     if ((*cur_child)->is_ror)
5351     {
5352       /* Ok, we have index_only cost, now get full rows scan cost */
5353       cost= param->table->file->
5354               read_time(param->real_keynr[(*cur_child)->key_idx], 1,
5355                         (*cur_child)->records) +
5356               rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
5357     }
5358     else
5359       cost= read_time;
5360 
5361     TABLE_READ_PLAN *prev_plan= *cur_child;
5362     if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost,
5363                                                  &dummy)))
5364     {
5365       if (prev_plan->is_ror)
5366         *cur_roru_plan= prev_plan;
5367       else
5368         DBUG_RETURN(imerge_trp);
5369       roru_index_costs += (*cur_roru_plan)->read_cost;
5370     }
5371     else
5372       roru_index_costs +=
5373         ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
5374     roru_total_records += (*cur_roru_plan)->records;
5375     roru_intersect_part *= (*cur_roru_plan)->records /
5376                            param->table->stat_records();
5377   }
5378   trace_analyze_ror.end();
5379   /*
5380     rows to retrieve=
5381       SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
5382     This is valid because index_merge construction guarantees that conditions
5383     in disjunction do not share key parts.
5384   */
5385   roru_total_records -= (ha_rows)(roru_intersect_part*
5386                                   param->table->stat_records());
5387   /* ok, got a ROR read plan for each of the disjuncts
5388     Calculate cost:
5389     cost(index_union_scan(scan_1, ... scan_n)) =
5390       SUM_i(cost_of_index_only_scan(scan_i)) +
5391       queue_use_cost(rowid_len, n) +
5392       cost_of_row_retrieval
5393     See get_merge_buffers_cost function for queue_use_cost formula derivation.
5394   */
5395 
5396   double roru_total_cost;
5397   roru_total_cost= roru_index_costs +
5398                    rows2double(roru_total_records)*log((double)n_child_scans) /
5399                    (TIME_FOR_COMPARE_ROWID * M_LN2) +
5400                    get_sweep_read_cost(param, roru_total_records);
5401 
5402   DBUG_PRINT("info", ("ROR-union: cost %g, %zu members",
5403                       roru_total_cost, n_child_scans));
5404   trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost)
5405                      .add("members", n_child_scans);
5406   TRP_ROR_UNION* roru;
5407   if (roru_total_cost < read_time)
5408   {
5409     if ((roru= new (param->mem_root) TRP_ROR_UNION))
5410     {
5411       trace_best_disjunct.add("chosen", true);
5412       roru->first_ror= roru_read_plans;
5413       roru->last_ror= roru_read_plans + n_child_scans;
5414       roru->read_cost= roru_total_cost;
5415       roru->records= roru_total_records;
5416       DBUG_RETURN(roru);
5417     }
5418   }
5419   else
5420     trace_best_disjunct.add("chosen", false);
5421   DBUG_RETURN(imerge_trp);
5422 }
5423 
5424 
5425 /*
5426   Merge index scans for the same indexes in an index merge plan
5427 
5428   SYNOPSIS
5429     merge_same_index_scans()
5430       param           Context info for the operation
5431       imerge   IN/OUT SEL_IMERGE from which imerge_trp has been extracted
5432       imerge_trp      The index merge plan where index scans for the same
5433                       indexes are to be merges
5434       read_time       The upper bound for the cost of the plan to be evaluated
5435 
5436   DESRIPTION
5437     For the given index merge plan imerge_trp extracted from the SEL_MERGE
5438     imerge the function looks for range scans with the same indexes and merges
5439     them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function
5440     creates a range tree rt_i that contains only r_i. All rt_i are joined
5441     into one index merge that replaces the original index merge imerge.
5442     The function calls get_best_disjunct_quick for the new index merge to
5443     get a new index merge plan that contains index scans only for different
5444     indexes.
5445     If there are no index scans for the same index in the original index
5446     merge plan the function does not change the original imerge and returns
5447     imerge_trp as its result.
5448 
5449   RETURN
5450     The original or or improved index merge plan
5451 */
5452 
5453 static
merge_same_index_scans(PARAM * param,SEL_IMERGE * imerge,TRP_INDEX_MERGE * imerge_trp,double read_time)5454 TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
5455                                         TRP_INDEX_MERGE *imerge_trp,
5456                                         double read_time)
5457 {
5458   uint16 first_scan_tree_idx[MAX_KEY];
5459   SEL_TREE **tree;
5460   TRP_RANGE **cur_child;
5461   uint removed_cnt= 0;
5462 
5463   DBUG_ENTER("merge_same_index_scans");
5464 
5465   bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys);
5466 
5467   for (tree= imerge->trees, cur_child= imerge_trp->range_scans;
5468        tree != imerge->trees_next;
5469        tree++, cur_child++)
5470   {
5471     DBUG_ASSERT(tree);
5472     uint key_idx= (*cur_child)->key_idx;
5473     uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx];
5474     if (!*tree_idx_ptr)
5475       *tree_idx_ptr= (uint16) (tree-imerge->trees+1);
5476     else
5477     {
5478       SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1);
5479       SEL_ARG *key= (*changed_tree)->keys[key_idx];
5480       for (uint i= 0; i < param->keys; i++)
5481         (*changed_tree)->keys[i]= NULL;
5482       (*changed_tree)->keys_map.clear_all();
5483       if (key)
5484         key->incr_refs();
5485       if ((*tree)->keys[key_idx])
5486         (*tree)->keys[key_idx]->incr_refs();
5487       if (((*changed_tree)->keys[key_idx]=
5488              key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx])))
5489         (*changed_tree)->keys_map.set_bit(key_idx);
5490       *tree= NULL;
5491       removed_cnt++;
5492     }
5493   }
5494   if (!removed_cnt)
5495     DBUG_RETURN(imerge_trp);
5496 
5497   TABLE_READ_PLAN *trp= NULL;
5498   SEL_TREE **new_trees_next= imerge->trees;
5499   for (tree= new_trees_next; tree != imerge->trees_next; tree++)
5500   {
5501     if (!*tree)
5502       continue;
5503     if (tree > new_trees_next)
5504       *new_trees_next= *tree;
5505     new_trees_next++;
5506   }
5507   imerge->trees_next= new_trees_next;
5508 
5509   DBUG_ASSERT(imerge->trees_next>imerge->trees);
5510 
5511   if (imerge->trees_next-imerge->trees > 1)
5512     trp= get_best_disjunct_quick(param, imerge, read_time, true);
5513   else
5514   {
5515     /*
5516       This alternative theoretically can be reached when the cost
5517       of the index merge for such a formula as
5518         (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR
5519         (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3
5520       is estimated as being cheaper than the cost of index scan for
5521       the formula
5522         (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4)
5523 
5524       In the current code this may happen for two reasons:
5525       1. for a single index range scan data records are accessed in
5526          a random order
5527       2. the functions that estimate the cost of a range scan and an
5528          index merge retrievals are not well calibrated
5529 
5530       As the best range access has been already chosen it does not
5531       make sense to evaluate the one obtained from a degenerated
5532       index merge.
5533     */
5534     trp= 0;
5535   }
5536 
5537   DBUG_RETURN(trp);
5538 }
5539 
5540 
5541 /*
5542   This structure contains the info common for all steps of a partial
5543   index intersection plan. Morever it contains also the info common
5544   for index intersect plans. This info is filled in by the function
5545   prepare_search_best just before searching for the best index
5546   intersection plan.
5547 */
5548 
5549 typedef struct st_common_index_intersect_info
5550 {
5551   PARAM *param;           /* context info for range optimizations            */
5552   uint key_size;          /* size of a ROWID element stored in Unique object */
5553   double compare_factor;  /* 1/compare - cost to compare two ROWIDs     */
5554   size_t max_memory_size;   /* maximum space allowed for Unique objects   */
5555   ha_rows table_cardinality;   /* estimate of the number of records in table */
5556   double cutoff_cost;        /* discard index intersects with greater costs  */
5557   INDEX_SCAN_INFO *cpk_scan;  /* clustered primary key used in intersection  */
5558 
5559   bool in_memory;  /* unique object for intersection is completely in memory */
5560 
5561   INDEX_SCAN_INFO **search_scans;    /* scans possibly included in intersect */
5562   uint n_search_scans;               /* number of elements in search_scans   */
5563 
5564   bool best_uses_cpk;   /* current best intersect uses clustered primary key */
5565   double best_cost;       /* cost of the current best index intersection     */
5566   /* estimate of the number of records in the current best intersection      */
5567   ha_rows best_records;
5568   uint best_length;    /* number of indexes in the current best intersection */
5569   INDEX_SCAN_INFO **best_intersect;  /* the current best index intersection  */
5570   /* scans from the best intersect to be filtrered by cpk conditions         */
5571   key_map filtered_scans;
5572 
5573   uint *buff_elems;        /* buffer to calculate cost of index intersection */
5574 
5575 } COMMON_INDEX_INTERSECT_INFO;
5576 
5577 
5578 /*
5579   This structure contains the info specific for one step of an index
5580   intersection plan. The structure is filled in by the function
5581    check_index_intersect_extension.
5582 */
5583 
5584 typedef struct st_partial_index_intersect_info
5585 {
5586   COMMON_INDEX_INTERSECT_INFO *common_info;    /* shared by index intersects */
5587   uint length;         /* number of index scans in the partial intersection  */
5588   ha_rows records;     /* estimate of the number of records in intersection  */
5589   double cost;         /* cost of the partial index intersection             */
5590 
5591   /* estimate of total number of records of all scans of the partial index
5592      intersect sent to the Unique object used for the intersection  */
5593   ha_rows records_sent_to_unique;
5594 
5595   /* total cost of the scans of indexes from the partial index intersection  */
5596   double index_read_cost;
5597 
5598   bool use_cpk_filter;      /* cpk filter is to be used for this       scan  */
5599   bool in_memory;            /* uses unique object in memory                 */
5600   double in_memory_cost;     /* cost of using unique object in memory        */
5601 
5602   key_map filtered_scans;    /* scans to be filtered by cpk conditions       */
5603 
5604   MY_BITMAP *intersect_fields;     /* bitmap of fields used in intersection  */
5605 
initst_partial_index_intersect_info5606   void init()
5607   {
5608     common_info= NULL;
5609     intersect_fields= NULL;
5610     records_sent_to_unique= records= length= in_memory= use_cpk_filter= 0;
5611     cost= index_read_cost= in_memory_cost= 0.0;
5612     filtered_scans.clear_all();
5613   }
5614 } PARTIAL_INDEX_INTERSECT_INFO;
5615 
5616 
5617 /* Check whether two indexes have the same first n components */
5618 
5619 static
same_index_prefix(KEY * key1,KEY * key2,uint used_parts)5620 bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts)
5621 {
5622   KEY_PART_INFO *part1= key1->key_part;
5623   KEY_PART_INFO *part2= key2->key_part;
5624   for(uint i= 0; i < used_parts; i++, part1++, part2++)
5625   {
5626     if (part1->fieldnr != part2->fieldnr)
5627       return FALSE;
5628   }
5629   return TRUE;
5630 }
5631 
5632 
5633 /* Create a bitmap for all fields of a table */
5634 
5635 static
create_fields_bitmap(PARAM * param,MY_BITMAP * fields_bitmap)5636 bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap)
5637 {
5638   my_bitmap_map *bitmap_buf;
5639 
5640   if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root,
5641                                                  param->fields_bitmap_size)))
5642     return TRUE;
5643   if (my_bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields, FALSE))
5644     return TRUE;
5645 
5646   return FALSE;
5647 }
5648 
5649 /* Compare two indexes scans for sort before search for the best intersection */
5650 
5651 static
cmp_intersect_index_scan(INDEX_SCAN_INFO ** a,INDEX_SCAN_INFO ** b)5652 int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b)
5653 {
5654   return (*a)->records < (*b)->records ?
5655           -1 : (*a)->records == (*b)->records ? 0 : 1;
5656 }
5657 
5658 
5659 static inline
set_field_bitmap_for_index_prefix(MY_BITMAP * field_bitmap,KEY_PART_INFO * key_part,uint used_key_parts)5660 void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap,
5661                                        KEY_PART_INFO *key_part,
5662                                        uint used_key_parts)
5663 {
5664   bitmap_clear_all(field_bitmap);
5665   for (KEY_PART_INFO *key_part_end= key_part+used_key_parts;
5666        key_part < key_part_end; key_part++)
5667   {
5668     bitmap_set_bit(field_bitmap, key_part->fieldnr-1);
5669   }
5670 }
5671 
5672 
5673 /*
5674   Round up table cardinality read from statistics provided by engine.
5675   This function should go away when mysql test will allow to handle
5676   more or less easily in the test suites deviations of InnoDB
5677   statistical data.
5678 */
5679 
5680 static inline
get_table_cardinality_for_index_intersect(TABLE * table)5681 ha_rows get_table_cardinality_for_index_intersect(TABLE *table)
5682 {
5683   if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)
5684     return table->stat_records();
5685   else
5686   {
5687     ha_rows d;
5688     double q;
5689     for (q= (double)table->stat_records(), d= 1 ; q >= 10; q/= 10, d*= 10 ) ;
5690     return (ha_rows) (floor(q+0.5) * d);
5691   }
5692 }
5693 
5694 static
print_keyparts(THD * thd,KEY * key,uint key_parts)5695 void print_keyparts(THD *thd, KEY *key, uint key_parts)
5696 {
5697   DBUG_ASSERT(thd->trace_started());
5698 
5699   KEY_PART_INFO *part= key->key_part;
5700   Json_writer_array keyparts(thd, "keyparts");
5701   for(uint i= 0; i < key_parts; i++, part++)
5702     keyparts.add(part->field->field_name);
5703 }
5704 
5705 
5706 static
5707 ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
5708                                              INDEX_SCAN_INFO *ext_index_scan);
5709 
5710 /*
5711   Prepare to search for the best index intersection
5712 
5713   SYNOPSIS
5714     prepare_search_best_index_intersect()
5715       param         common info about index ranges
5716       tree          tree of ranges for indexes than can be intersected
5717       common    OUT info needed for search to be filled by the function
5718       init      OUT info for an initial pseudo step of the intersection plans
5719       cutoff_cost   cut off cost of the interesting index intersection
5720 
5721   DESCRIPTION
5722     The function initializes all fields of the structure 'common' to be used
5723     when searching for the best intersection plan. It also allocates
5724     memory to store the most cheap index intersection.
5725 
5726   NOTES
5727     When selecting candidates for index intersection we always take only
5728     one representative out of any set of indexes that share the same range
5729     conditions. These indexes always have the same prefixes and the
5730     components of this prefixes are exactly those used in these range
5731     conditions.
5732     Range conditions over clustered primary key (cpk) is always used only
5733     as the condition that filters out some rowids retrieved by the scans
5734     for secondary indexes. The cpk index will be handled in special way by
5735     the function that search for the best index intersection.
5736 
5737   RETURN
5738     FALSE  in the case of success
5739     TRUE   otherwise
5740 */
5741 
5742 static
prepare_search_best_index_intersect(PARAM * param,SEL_TREE * tree,COMMON_INDEX_INTERSECT_INFO * common,PARTIAL_INDEX_INTERSECT_INFO * init,double cutoff_cost)5743 bool prepare_search_best_index_intersect(PARAM *param,
5744                                          SEL_TREE *tree,
5745                                          COMMON_INDEX_INTERSECT_INFO *common,
5746                                          PARTIAL_INDEX_INTERSECT_INFO *init,
5747                                          double cutoff_cost)
5748 {
5749   uint i;
5750   uint n_search_scans;
5751   double cost;
5752   INDEX_SCAN_INFO **index_scan;
5753   INDEX_SCAN_INFO **scan_ptr;
5754   INDEX_SCAN_INFO *cpk_scan= NULL;
5755   TABLE *table= param->table;
5756   uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans);
5757   THD *thd= param->thd;
5758 
5759   if (n_index_scans <= 1)
5760     return 1;
5761 
5762   init->init();
5763   init->common_info= common;
5764   init->cost= cutoff_cost;
5765 
5766   common->param= param;
5767   common->key_size= table->file->ref_length;
5768   common->compare_factor= TIME_FOR_COMPARE_ROWID;
5769   common->max_memory_size= (size_t)param->thd->variables.sortbuff_size;
5770   common->cutoff_cost= cutoff_cost;
5771   common->cpk_scan= NULL;
5772   common->table_cardinality=
5773     get_table_cardinality_for_index_intersect(table);
5774 
5775   if (table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
5776   {
5777     INDEX_SCAN_INFO **index_scan_end;
5778     index_scan= tree->index_scans;
5779     index_scan_end= index_scan+n_index_scans;
5780     for ( ; index_scan < index_scan_end; index_scan++)
5781     {
5782       if (table->file->is_clustering_key((*index_scan)->keynr))
5783       {
5784         common->cpk_scan= cpk_scan= *index_scan;
5785         break;
5786       }
5787     }
5788   }
5789 
5790   i= n_index_scans - MY_TEST(cpk_scan != NULL) + 1;
5791 
5792   if (!(common->search_scans =
5793 	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
5794                                          sizeof(INDEX_SCAN_INFO *) * i)))
5795     return TRUE;
5796   bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i);
5797 
5798   INDEX_SCAN_INFO **selected_index_scans= common->search_scans;
5799   Json_writer_array potential_idx_scans(thd, "potential_index_scans");
5800   for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++)
5801   {
5802     Json_writer_object idx_scan(thd);
5803     uint used_key_parts= (*index_scan)->used_key_parts;
5804     KEY *key_info= (*index_scan)->key_info;
5805     idx_scan.add("index", key_info->name);
5806 
5807     if (*index_scan == cpk_scan)
5808     {
5809       idx_scan.add("chosen", "false")
5810               .add("cause", "clustered index used for filtering");
5811       continue;
5812     }
5813     if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts &&
5814         same_index_prefix(cpk_scan->key_info, key_info, used_key_parts))
5815     {
5816       idx_scan.add("chosen", "false")
5817               .add("cause", "clustered index used for filtering");
5818       continue;
5819     }
5820 
5821     cost= table->opt_range[(*index_scan)->keynr].index_only_cost;
5822 
5823     idx_scan.add("cost", cost);
5824 
5825     if (cost >= cutoff_cost)
5826     {
5827       idx_scan.add("chosen", false);
5828       idx_scan.add("cause", "cost");
5829       continue;
5830     }
5831 
5832     for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++)
5833     {
5834       /*
5835         When we have range conditions for two different indexes with the same
5836         beginning it does not make sense to consider both of them for index
5837         intersection if the range conditions are covered by common initial
5838         components of the indexes. Actually in this case the indexes are
5839         guaranteed to have the same range conditions.
5840       */
5841       if ((*scan_ptr)->used_key_parts == used_key_parts &&
5842           same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts))
5843         break;
5844     }
5845     if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost)
5846     {
5847       idx_scan.add("chosen", true);
5848       if (!*scan_ptr)
5849         idx_scan.add("cause", "first occurrence of index prefix");
5850       else
5851         idx_scan.add("cause", "better cost for same idx prefix");
5852       *scan_ptr= *index_scan;
5853       (*scan_ptr)->index_read_cost= cost;
5854     }
5855     else
5856     {
5857       idx_scan.add("chosen", false).add("cause", "cost");
5858     }
5859   }
5860   potential_idx_scans.end();
5861 
5862   ha_rows records_in_scans= 0;
5863 
5864   for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++)
5865   {
5866     if (create_fields_bitmap(param, &(*scan_ptr)->used_fields))
5867       return TRUE;
5868     records_in_scans+= (*scan_ptr)->records;
5869   }
5870 
5871   n_search_scans= i;
5872 
5873   if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields))
5874     return TRUE;
5875 
5876   if (!(common->n_search_scans= n_search_scans))
5877     return TRUE;
5878 
5879   common->best_uses_cpk= FALSE;
5880   common->best_cost= cutoff_cost + COST_EPS;
5881   common->best_length= 0;
5882 
5883   if (!(common->best_intersect=
5884 	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
5885                                          sizeof(INDEX_SCAN_INFO *) *
5886                                          (i + MY_TEST(cpk_scan != NULL)))))
5887     return TRUE;
5888 
5889   size_t calc_cost_buff_size=
5890          Unique::get_cost_calc_buff_size((size_t)records_in_scans,
5891                                          common->key_size,
5892 				         common->max_memory_size);
5893   if (!(common->buff_elems= (uint *) alloc_root(param->mem_root,
5894                                                 calc_cost_buff_size)))
5895     return TRUE;
5896 
5897   my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *),
5898            (qsort_cmp) cmp_intersect_index_scan);
5899 
5900   Json_writer_array selected_idx_scans(thd, "selected_index_scans");
5901   if (cpk_scan)
5902   {
5903     PARTIAL_INDEX_INTERSECT_INFO curr;
5904     set_field_bitmap_for_index_prefix(&cpk_scan->used_fields,
5905                                       cpk_scan->key_info->key_part,
5906                                       cpk_scan->used_key_parts);
5907     curr.common_info= common;
5908     curr.intersect_fields= &cpk_scan->used_fields;
5909     curr.records= cpk_scan->records;
5910     curr.length= 1;
5911     for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
5912     {
5913       KEY *key_info= (*scan_ptr)->key_info;
5914       ha_rows scan_records= (*scan_ptr)->records;
5915       ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr);
5916       (*scan_ptr)->filtered_out= records >= scan_records ?
5917                                    0 : scan_records-records;
5918       if (thd->trace_started())
5919       {
5920         Json_writer_object selected_idx(thd);
5921         selected_idx.add("index", key_info->name);
5922         print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
5923         selected_idx.add("records", (*scan_ptr)->records)
5924                     .add("filtered_records", (*scan_ptr)->filtered_out);
5925       }
5926     }
5927   }
5928   else
5929   {
5930     for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
5931     {
5932       KEY *key_info= (*scan_ptr)->key_info;
5933       (*scan_ptr)->filtered_out= 0;
5934       if (thd->trace_started())
5935       {
5936         Json_writer_object selected_idx(thd);
5937         selected_idx.add("index", key_info->name);
5938         print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
5939         selected_idx.add("records", (*scan_ptr)->records)
5940                     .add("filtered_records", (*scan_ptr)->filtered_out);
5941       }
5942     }
5943   }
5944 
5945   return FALSE;
5946 }
5947 
5948 
5949 /*
5950   On Estimation of the Number of Records in an Index Intersection
5951   ===============================================================
5952 
5953   Consider query Q over table t. Let C be the WHERE condition of  this query,
5954   and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined
5955   on table t.
5956   Let rt1 and rt2 be the range trees extracted by the range optimizer from C
5957   for idx1 and idx2 respectively.
5958   Let #t be the estimate of the number of records in table t provided for the
5959   optimizer.
5960   Let #r1 and #r2 be the estimates of the number of records in the range trees
5961   rt1 and rt2, respectively, obtained by the range optimizer.
5962 
5963   We need to get an estimate for the number of records in the index
5964   intersection of rt1 and rt2. In other words, we need to estimate the
5965   cardinality of the set of records that are in both trees. Let's designate
5966   this number by #r.
5967 
5968   If we do not make any assumptions then we can only state that
5969      #r<=MY_MIN(#r1,#r2).
5970   With this estimate we can't say that the index intersection scan will be
5971   cheaper than the cheapest index scan.
5972 
5973   Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively.
5974   The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t.
5975   The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t.
5976 
5977   If we assume that the values in columns of idx1 and idx2 are independent
5978   then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t).
5979   So in this case we have: #r=#r1*#r2/#t.
5980 
5981   The above assumption of independence of the columns in idx1 and idx2 means
5982   that:
5983   - all columns are different
5984   - values from one column do not correlate with values from any other column.
5985 
5986   We can't help with the case when column correlate with each other.
5987   Yet, if they are assumed to be uncorrelated the value of #r theoretically can
5988   be evaluated . Unfortunately this evaluation, in general, is rather complex.
5989 
5990   Let's consider two indexes idx1:(dept, manager),  idx2:(dept, building)
5991   over table 'employee' and two range conditions over these indexes:
5992     Rt1: dept=10 AND manager LIKE 'S%'
5993     Rt2: dept=10 AND building LIKE 'L%'.
5994   We can state that:
5995     sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%')
5996     =sel(Rt1)*sel(Rt2)/sel(dept=10).
5997   sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of
5998   the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality
5999   #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
6000   The current code does not make an estimate either for #rt1_0, or for #rt2_0,
6001   but it can be adjusted to provide those numbers.
6002   Alternatively, MY_MIN(rec_per_key) for (dept) could be used to get an upper
6003   bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
6004   now.
6005 
6006   Let's consider two other indexes idx1:(dept, last_name),
6007   idx2:(first_name, last_name) and two range conditions over these indexes:
6008     Rt1: dept=5 AND last_name='Sm%'
6009     Rt2: first_name='Robert' AND last_name='Sm%'.
6010 
6011   sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
6012   =sel(Rt2)*sel(dept=5)
6013   Here MY_MAX(rec_per_key) for (dept) could be used to get an upper bound for
6014   the value of sel(Rt1&Rt2).
6015 
6016   When the intersected indexes have different major columns, but some
6017   minor column are common the picture may be more complicated.
6018 
6019   Let's consider the following range conditions for the same indexes as in
6020   the previous example:
6021     Rt1: (Rt11: dept=5 AND last_name='So%')
6022          OR
6023          (Rt12: dept=7 AND last_name='Saw%')
6024     Rt2: (Rt21: first_name='Robert' AND last_name='Saw%')
6025          OR
6026          (Rt22: first_name='Bob' AND last_name='So%')
6027   Here we have:
6028   sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) +
6029                 sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22)
6030   Now consider the range condition:
6031     Rt1_0: (dept=5 OR dept=7)
6032   For this condition we can state that:
6033   sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))=
6034   sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+
6035   sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)=
6036   sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+
6037   sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) >
6038   sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+
6039   sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) >
6040   sel(Rt1 & Rt2)
6041 
6042  We've just demonstrated for an example what is intuitively almost obvious
6043  in general. We can  remove the ending parts fromrange trees getting less
6044  selective range conditions for sub-indexes.
6045  So if not a most major component with the number k of an index idx is
6046  encountered in the index with which we intersect we can use the sub-index
6047  idx_k-1 that includes the components of idx up to the i-th component and
6048  the range tree for idx_k-1 to make an upper bound estimate for the number
6049   of records in the index intersection.
6050  The range tree for idx_k-1 we use here is the subtree of the original range
6051   tree for idx that contains only parts from the first k-1 components.
6052 
6053   As it was mentioned above the range optimizer currently does not provide
6054   an estimate for the number of records in the ranges for sub-indexes.
6055   However, some reasonable upper bound estimate can be obtained.
6056 
6057   Let's consider the following range tree:
6058     Rt: (first_name='Robert' AND last_name='Saw%')
6059         OR
6060         (first_name='Bob' AND last_name='So%')
6061   Let #r be the number of records in Rt. Let f_1 be the fan-out of column
6062   last_name:
6063     f_1 = rec_per_key[first_name]/rec_per_key[last_name].
6064   The the number of records in the range tree:
6065     Rt_0:  (first_name='Robert' OR first_name='Bob')
6066   for the sub-index (first_name) is not greater than MY_MAX(#r*f_1, #t).
6067   Strictly speaking, we can state only that it's not greater than
6068   MY_MAX(#r*max_f_1, #t), where
6069     max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
6070   Yet, if #r/#t is big enough (and this is the case of an index intersection,
6071   because using this index range with a single index scan is cheaper than
6072   the cost of the intersection when #r/#t is small) then almost safely we
6073   can use here f_1 instead of max_f_1.
6074 
6075   The above considerations can be used in future development. Now, they are
6076   used partly in the function that provides a rough upper bound estimate for
6077   the number of records in an index intersection that follow below.
6078 */
6079 
6080 /*
6081   Estimate the number of records selected by an extension a partial intersection
6082 
6083   SYNOPSIS
6084     records_in_index_intersect_extension()
6085      curr            partial intersection plan to be extended
6086      ext_index_scan  the evaluated extension of this partial plan
6087 
6088   DESCRIPTION
6089     The function provides an estimate for the number of records in the
6090     intersection of the partial index intersection curr with the index
6091     ext_index_scan. If all intersected indexes does not have common columns
6092     then  the function returns an exact estimate (assuming there are no
6093     correlations between values in the columns). If the intersected indexes
6094     have common  columns the function returns an upper bound for the number
6095     of records in the intersection provided that the intersection of curr
6096     with ext_index_scan can is expected to have less records than the expected
6097     number of records in the partial intersection curr. In this case the
6098     function also assigns the bitmap of the columns in the extended
6099     intersection to ext_index_scan->used_fields.
6100     If the function cannot expect that the number of records in the extended
6101     intersection is less that the expected number of records #r in curr then
6102     the function returns a number bigger than #r.
6103 
6104   NOTES
6105    See the comment before the desription of the function that explains the
6106    reasoning used  by this function.
6107 
6108   RETURN
6109     The expected number of rows in the extended index intersection
6110 */
6111 
6112 static
records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO * curr,INDEX_SCAN_INFO * ext_index_scan)6113 ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
6114                                              INDEX_SCAN_INFO *ext_index_scan)
6115 {
6116   KEY *key_info= ext_index_scan->key_info;
6117   KEY_PART_INFO* key_part= key_info->key_part;
6118   uint used_key_parts= ext_index_scan->used_key_parts;
6119   MY_BITMAP *used_fields= &ext_index_scan->used_fields;
6120 
6121   if (!curr->length)
6122   {
6123     /*
6124       If this the first index in the intersection just mark the
6125       fields in the used_fields bitmap and return the expected
6126       number of records in the range scan for the index provided
6127       by the range optimizer.
6128     */
6129     set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts);
6130     return ext_index_scan->records;
6131   }
6132 
6133   uint i;
6134   bool better_selectivity= FALSE;
6135   ha_rows records= curr->records;
6136   MY_BITMAP *curr_intersect_fields= curr->intersect_fields;
6137   for (i= 0; i < used_key_parts; i++, key_part++)
6138   {
6139     if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1))
6140       break;
6141   }
6142   if (i)
6143   {
6144     ha_rows table_cardinality= curr->common_info->table_cardinality;
6145     ha_rows ext_records= ext_index_scan->records;
6146     if (i < used_key_parts)
6147     {
6148       double f1= key_info->actual_rec_per_key(i-1);
6149       double f2= key_info->actual_rec_per_key(i);
6150       ext_records= (ha_rows) ((double) ext_records / f2 * f1);
6151     }
6152     if (ext_records < table_cardinality)
6153     {
6154       better_selectivity= TRUE;
6155       records= (ha_rows) ((double) records / table_cardinality *
6156 			  ext_records);
6157       bitmap_copy(used_fields, curr_intersect_fields);
6158       key_part= key_info->key_part;
6159       for (uint j= 0; j < used_key_parts; j++, key_part++)
6160         bitmap_set_bit(used_fields, key_part->fieldnr-1);
6161     }
6162   }
6163   return !better_selectivity ? records+1 :
6164                                !records ? 1 : records;
6165 }
6166 
6167 
6168 /*
6169   Estimate the cost a binary search within disjoint cpk range intervals
6170 
6171   Number of comparisons to check whether a cpk value satisfies
6172   the cpk range condition = log2(cpk_scan->range_count).
6173 */
6174 
6175 static inline
get_cpk_filter_cost(ha_rows filtered_records,INDEX_SCAN_INFO * cpk_scan,double compare_factor)6176 double get_cpk_filter_cost(ha_rows filtered_records,
6177                            INDEX_SCAN_INFO *cpk_scan,
6178                            double compare_factor)
6179 {
6180   return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) *
6181            filtered_records;
6182 }
6183 
6184 
6185 /*
6186   Check whether a patial index intersection plan can be extended
6187 
6188   SYNOPSIS
6189     check_index_intersect_extension()
6190      curr            partial intersection plan to be extended
6191      ext_index_scan  a possible extension of this plan to be checked
6192      next       OUT  the structure to be filled for the extended plan
6193 
6194   DESCRIPTION
6195     The function checks whether it makes sense to extend the index
6196     intersection plan adding the index ext_index_scan, and, if this
6197     the case, the function fills in the structure for the extended plan.
6198 
6199   RETURN
6200     TRUE      if it makes sense to extend the given plan
6201     FALSE     otherwise
6202 */
6203 
6204 static
check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO * curr,INDEX_SCAN_INFO * ext_index_scan,PARTIAL_INDEX_INTERSECT_INFO * next)6205 bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
6206                                      INDEX_SCAN_INFO *ext_index_scan,
6207                                      PARTIAL_INDEX_INTERSECT_INFO *next)
6208 {
6209   ha_rows records;
6210   ha_rows records_sent_to_unique;
6211   double cost;
6212   ha_rows ext_index_scan_records= ext_index_scan->records;
6213   ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out;
6214   COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
6215   double cutoff_cost= common_info->cutoff_cost;
6216   uint idx= curr->length;
6217   next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost;
6218   if (next->index_read_cost > cutoff_cost)
6219     return FALSE;
6220 
6221   if ((next->in_memory= curr->in_memory))
6222     next->in_memory_cost= curr->in_memory_cost;
6223 
6224   next->intersect_fields= &ext_index_scan->used_fields;
6225   next->filtered_scans= curr->filtered_scans;
6226 
6227   records_sent_to_unique= curr->records_sent_to_unique;
6228 
6229   next->use_cpk_filter= FALSE;
6230 
6231   /* Calculate the cost of using a Unique object for index intersection */
6232   if (idx && next->in_memory)
6233   {
6234     /*
6235       All rowids received from the first scan are expected in one unique tree
6236     */
6237     ha_rows elems_in_tree= common_info->search_scans[0]->records-
6238                            common_info->search_scans[0]->filtered_out ;
6239     next->in_memory_cost+= Unique::get_search_cost(elems_in_tree,
6240                                                    common_info->compare_factor)*
6241                              ext_index_scan_records;
6242     cost= next->in_memory_cost;
6243   }
6244   else
6245   {
6246     uint *buff_elems= common_info->buff_elems;
6247     uint key_size= common_info->key_size;
6248     double compare_factor= common_info->compare_factor;
6249     size_t max_memory_size= common_info->max_memory_size;
6250 
6251     records_sent_to_unique+= ext_index_scan_records;
6252     cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size,
6253                                max_memory_size, compare_factor, TRUE,
6254                                &next->in_memory);
6255     if (records_filtered_out_by_cpk)
6256     {
6257       /* Check whether using cpk filter for this scan is beneficial */
6258 
6259       double cost2;
6260       bool in_memory2;
6261       ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk;
6262       cost2=  Unique::get_use_cost(buff_elems, (size_t) records2, key_size,
6263                                    max_memory_size, compare_factor, TRUE,
6264                                    &in_memory2);
6265       cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan,
6266                                   compare_factor);
6267       if (cost > cost2 + COST_EPS)
6268       {
6269         cost= cost2;
6270         next->in_memory= in_memory2;
6271         next->use_cpk_filter= TRUE;
6272         records_sent_to_unique= records2;
6273       }
6274 
6275     }
6276     if (next->in_memory)
6277       next->in_memory_cost= cost;
6278   }
6279 
6280   if (next->use_cpk_filter)
6281   {
6282     next->filtered_scans.set_bit(ext_index_scan->keynr);
6283     bitmap_union(&ext_index_scan->used_fields,
6284                  &common_info->cpk_scan->used_fields);
6285   }
6286   next->records_sent_to_unique= records_sent_to_unique;
6287 
6288   records= records_in_index_intersect_extension(curr, ext_index_scan);
6289   if (idx && records > curr->records)
6290     return FALSE;
6291   if (next->use_cpk_filter && curr->filtered_scans.is_clear_all())
6292     records-= records_filtered_out_by_cpk;
6293   next->records= records;
6294 
6295   cost+= next->index_read_cost;
6296   if (cost >= cutoff_cost)
6297     return FALSE;
6298 
6299   cost+= get_sweep_read_cost(common_info->param, records);
6300 
6301   next->cost= cost;
6302   next->length= curr->length+1;
6303 
6304   return TRUE;
6305 }
6306 
6307 
6308 /*
6309   Search for the cheapest extensions of range scans used to access a table
6310 
6311   SYNOPSIS
6312     find_index_intersect_best_extension()
6313       curr        partial intersection to evaluate all possible extension for
6314 
6315   DESCRIPTION
6316     The function tries to extend the partial plan curr in all possible ways
6317     to look for a cheapest index intersection whose cost less than the
6318     cut off value set in curr->common_info.cutoff_cost.
6319 */
6320 
6321 static
find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO * curr)6322 void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
6323 {
6324   PARTIAL_INDEX_INTERSECT_INFO next;
6325   COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
6326   INDEX_SCAN_INFO **index_scans= common_info->search_scans;
6327   uint idx= curr->length;
6328   INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx];
6329   double cost= curr->cost;
6330 
6331   if (cost + COST_EPS < common_info->best_cost)
6332   {
6333     common_info->best_cost= cost;
6334     common_info->best_length= curr->length;
6335     common_info->best_records= curr->records;
6336     common_info->filtered_scans= curr->filtered_scans;
6337     /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */
6338     common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all();
6339     uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length;
6340     memcpy(common_info->best_intersect, common_info->search_scans, sz);
6341     common_info->cutoff_cost= cost;
6342   }
6343 
6344   if (!(*rem_first_index_scan_ptr))
6345     return;
6346 
6347   next.common_info= common_info;
6348 
6349   INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr;
6350   for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr;
6351        *index_scan_ptr; index_scan_ptr++)
6352   {
6353     *rem_first_index_scan_ptr= *index_scan_ptr;
6354     *index_scan_ptr= rem_first_index_scan;
6355     if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next))
6356       find_index_intersect_best_extension(&next);
6357     *index_scan_ptr= *rem_first_index_scan_ptr;
6358     *rem_first_index_scan_ptr= rem_first_index_scan;
6359   }
6360 }
6361 
6362 
6363 /*
6364   Get the plan of the best intersection of range scans used to access a table
6365 
6366   SYNOPSIS
6367     get_best_index_intersect()
6368       param         common info about index ranges
6369       tree          tree of ranges for indexes than can be intersected
6370       read_time     cut off value for the evaluated plans
6371 
6372   DESCRIPTION
6373     The function looks for the cheapest index intersection of the range
6374     scans to access a table. The info about the ranges for all indexes
6375     is provided by the range optimizer and is passed through the
6376     parameters param and tree. Any plan whose cost is greater than read_time
6377     is rejected.
6378     After the best index intersection is found the function constructs
6379     the structure that manages the execution by the chosen plan.
6380 
6381   RETURN
6382     Pointer to the generated execution structure if a success,
6383     0 - otherwise.
6384 */
6385 
6386 static
get_best_index_intersect(PARAM * param,SEL_TREE * tree,double read_time)6387 TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
6388                                               double read_time)
6389 {
6390   uint i;
6391   uint count;
6392   TRP_RANGE **cur_range;
6393   TRP_RANGE **range_scans;
6394   INDEX_SCAN_INFO *index_scan;
6395   COMMON_INDEX_INTERSECT_INFO common;
6396   PARTIAL_INDEX_INTERSECT_INFO init;
6397   TRP_INDEX_INTERSECT *intersect_trp= NULL;
6398   TABLE *table= param->table;
6399   THD *thd= param->thd;
6400 
6401   DBUG_ENTER("get_best_index_intersect");
6402 
6403   Json_writer_object trace_idx_interect(thd, "analyzing_sort_intersect");
6404 
6405   if (prepare_search_best_index_intersect(param, tree, &common, &init,
6406                                           read_time))
6407     DBUG_RETURN(NULL);
6408 
6409   find_index_intersect_best_extension(&init);
6410 
6411   if (common.best_length <= 1 && !common.best_uses_cpk)
6412     DBUG_RETURN(NULL);
6413 
6414   if (common.best_uses_cpk)
6415   {
6416     memmove((char *) (common.best_intersect+1), (char *) common.best_intersect,
6417             sizeof(INDEX_SCAN_INFO *) * common.best_length);
6418     common.best_intersect[0]= common.cpk_scan;
6419     common.best_length++;
6420   }
6421 
6422   count= common.best_length;
6423 
6424   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
6425                                             sizeof(TRP_RANGE *)*
6426                                             count)))
6427     DBUG_RETURN(NULL);
6428 
6429   for (i= 0, cur_range= range_scans; i < count; i++)
6430   {
6431     index_scan= common.best_intersect[i];
6432     if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg,
6433                                                      index_scan->idx, 0)))
6434     {
6435       TRP_RANGE *trp= *cur_range;
6436       trp->read_cost= index_scan->index_read_cost;
6437       trp->records= index_scan->records;
6438       trp->is_ror= FALSE;
6439       trp->mrr_buf_size= 0;
6440       table->intersect_keys.set_bit(index_scan->keynr);
6441       cur_range++;
6442     }
6443   }
6444 
6445   count= (uint)(tree->index_scans_end - tree->index_scans);
6446   for (i= 0; i < count; i++)
6447   {
6448     index_scan= tree->index_scans[i];
6449     if (!table->intersect_keys.is_set(index_scan->keynr))
6450     {
6451       for (uint j= 0; j < common.best_length; j++)
6452       {
6453 	INDEX_SCAN_INFO *scan= common.best_intersect[j];
6454         if (same_index_prefix(index_scan->key_info, scan->key_info,
6455                               scan->used_key_parts))
6456 	{
6457           table->intersect_keys.set_bit(index_scan->keynr);
6458           break;
6459         }
6460       }
6461     }
6462   }
6463 
6464   if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT))
6465   {
6466 
6467     intersect_trp->read_cost= common.best_cost;
6468     intersect_trp->records= common.best_records;
6469     intersect_trp->range_scans= range_scans;
6470     intersect_trp->range_scans_end= cur_range;
6471     intersect_trp->filtered_scans= common.filtered_scans;
6472     trace_idx_interect.add("rows", intersect_trp->records)
6473                       .add("cost", intersect_trp->read_cost)
6474                       .add("chosen",true);
6475   }
6476   DBUG_RETURN(intersect_trp);
6477 }
6478 
6479 
6480 typedef struct st_ror_scan_info : INDEX_SCAN_INFO
6481 {
6482 } ROR_SCAN_INFO;
6483 
trace_basic_info(PARAM * param,Json_writer_object * trace_object) const6484 void TRP_ROR_INTERSECT::trace_basic_info(PARAM *param,
6485                                          Json_writer_object *trace_object) const
6486 {
6487   THD *thd= param->thd;
6488   DBUG_ASSERT(trace_object->trace_started());
6489 
6490   trace_object->add("type", "index_roworder_intersect");
6491   trace_object->add("rows", records);
6492   trace_object->add("cost", read_cost);
6493   trace_object->add("covering", is_covering);
6494   trace_object->add("clustered_pk_scan", cpk_scan != NULL);
6495 
6496   Json_writer_array smth_trace(thd, "intersect_of");
6497   for (ROR_SCAN_INFO **cur_scan= first_scan; cur_scan != last_scan;
6498                                                          cur_scan++)
6499   {
6500     const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr];
6501     const KEY_PART_INFO *key_part= cur_key.key_part;
6502 
6503     Json_writer_object trace_isect_idx(thd);
6504     trace_isect_idx.add("type", "range_scan");
6505     trace_isect_idx.add("index", cur_key.name);
6506     trace_isect_idx.add("rows", (*cur_scan)->records);
6507 
6508     Json_writer_array trace_range(thd, "ranges");
6509 
6510     trace_ranges(&trace_range, param, (*cur_scan)->idx,
6511                  (*cur_scan)->sel_arg, key_part);
6512   }
6513 }
6514 
6515 
6516 /*
6517   Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
6518   sel_arg set of intervals.
6519 
6520   SYNOPSIS
6521     make_ror_scan()
6522       param    Parameter from test_quick_select function
6523       idx      Index of key in param->keys
6524       sel_arg  Set of intervals for a given key
6525 
6526   RETURN
6527     NULL - out of memory
6528     ROR scan structure containing a scan for {idx, sel_arg}
6529 */
6530 
6531 static
make_ror_scan(const PARAM * param,int idx,SEL_ARG * sel_arg)6532 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
6533 {
6534   ROR_SCAN_INFO *ror_scan;
6535   my_bitmap_map *bitmap_buf;
6536   uint keynr;
6537   DBUG_ENTER("make_ror_scan");
6538 
6539   if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
6540                                              sizeof(ROR_SCAN_INFO))))
6541     DBUG_RETURN(NULL);
6542 
6543   ror_scan->idx= idx;
6544   ror_scan->keynr= keynr= param->real_keynr[idx];
6545   ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
6546                              param->table->file->ref_length);
6547   ror_scan->sel_arg= sel_arg;
6548   ror_scan->records= param->quick_rows[keynr];
6549 
6550   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root,
6551                                                 param->fields_bitmap_size)))
6552     DBUG_RETURN(NULL);
6553 
6554   if (my_bitmap_init(&ror_scan->covered_fields, bitmap_buf,
6555                   param->table->s->fields, FALSE))
6556     DBUG_RETURN(NULL);
6557   bitmap_clear_all(&ror_scan->covered_fields);
6558 
6559   KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
6560   KEY_PART_INFO *key_part_end= key_part +
6561                                param->table->key_info[keynr].user_defined_key_parts;
6562   for (;key_part != key_part_end; ++key_part)
6563   {
6564     if (bitmap_is_set(&param->needed_fields, key_part->fieldnr-1))
6565       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
6566   }
6567   ror_scan->index_read_cost=
6568     param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records);
6569   DBUG_RETURN(ror_scan);
6570 }
6571 
6572 
6573 /*
6574   Compare two ROR_SCAN_INFO** by  E(#records_matched) * key_record_length.
6575   SYNOPSIS
6576     cmp_ror_scan_info()
6577       a ptr to first compared value
6578       b ptr to second compared value
6579 
6580   RETURN
6581    -1 a < b
6582     0 a = b
6583     1 a > b
6584 */
6585 
cmp_ror_scan_info(ROR_SCAN_INFO ** a,ROR_SCAN_INFO ** b)6586 static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
6587 {
6588   double val1= rows2double((*a)->records) * (*a)->key_rec_length;
6589   double val2= rows2double((*b)->records) * (*b)->key_rec_length;
6590   return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
6591 }
6592 
6593 /*
6594   Compare two ROR_SCAN_INFO** by
6595    (#covered fields in F desc,
6596     #components asc,
6597     number of first not covered component asc)
6598 
6599   SYNOPSIS
6600     cmp_ror_scan_info_covering()
6601       a ptr to first compared value
6602       b ptr to second compared value
6603 
6604   RETURN
6605    -1 a < b
6606     0 a = b
6607     1 a > b
6608 */
6609 
cmp_ror_scan_info_covering(ROR_SCAN_INFO ** a,ROR_SCAN_INFO ** b)6610 static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
6611 {
6612   if ((*a)->used_fields_covered > (*b)->used_fields_covered)
6613     return -1;
6614   if ((*a)->used_fields_covered < (*b)->used_fields_covered)
6615     return 1;
6616   if ((*a)->key_components < (*b)->key_components)
6617     return -1;
6618   if ((*a)->key_components > (*b)->key_components)
6619     return 1;
6620   if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
6621     return -1;
6622   if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
6623     return 1;
6624   return 0;
6625 }
6626 
6627 
6628 /* Auxiliary structure for incremental ROR-intersection creation */
6629 typedef struct
6630 {
6631   const PARAM *param;
6632   MY_BITMAP covered_fields; /* union of fields covered by all scans */
6633   /*
6634     Fraction of table records that satisfies conditions of all scans.
6635     This is the number of full records that will be retrieved if a
6636     non-index_only index intersection will be employed.
6637   */
6638   double out_rows;
6639   /* TRUE if covered_fields is a superset of needed_fields */
6640   bool is_covering;
6641 
6642   ha_rows index_records; /* sum(#records to look in indexes) */
6643   double index_scan_costs; /* SUM(cost of 'index-only' scans) */
6644   double total_cost;
6645 } ROR_INTERSECT_INFO;
6646 
6647 
6648 /*
6649   Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
6650 
6651   SYNOPSIS
6652     ror_intersect_init()
6653       param         Parameter from test_quick_select
6654 
6655   RETURN
6656     allocated structure
6657     NULL on error
6658 */
6659 
6660 static
ror_intersect_init(const PARAM * param)6661 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
6662 {
6663   ROR_INTERSECT_INFO *info;
6664   my_bitmap_map* buf;
6665   if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
6666                                               sizeof(ROR_INTERSECT_INFO))))
6667     return NULL;
6668   info->param= param;
6669   if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root,
6670                                          param->fields_bitmap_size)))
6671     return NULL;
6672   if (my_bitmap_init(&info->covered_fields, buf, param->table->s->fields,
6673                   FALSE))
6674     return NULL;
6675   info->is_covering= FALSE;
6676   info->index_scan_costs= 0.0;
6677   info->index_records= 0;
6678   info->out_rows= (double) param->table->stat_records();
6679   bitmap_clear_all(&info->covered_fields);
6680   return info;
6681 }
6682 
ror_intersect_cpy(ROR_INTERSECT_INFO * dst,const ROR_INTERSECT_INFO * src)6683 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
6684 {
6685   dst->param= src->param;
6686   memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
6687          no_bytes_in_map(&src->covered_fields));
6688   dst->out_rows= src->out_rows;
6689   dst->is_covering= src->is_covering;
6690   dst->index_records= src->index_records;
6691   dst->index_scan_costs= src->index_scan_costs;
6692   dst->total_cost= src->total_cost;
6693 }
6694 
6695 
6696 /*
6697   Get selectivity of a ROR scan wrt ROR-intersection.
6698 
6699   SYNOPSIS
6700     ror_scan_selectivity()
6701       info  ROR-interection
6702       scan  ROR scan
6703 
6704   NOTES
6705     Suppose we have a condition on several keys
6706     cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key
6707          k_21=c_21 AND k_22=c_22 AND ...  // parts of second key
6708           ...
6709          k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //parts of the key used by *scan
6710 
6711     where k_ij may be the same as any k_pq (i.e. keys may have common parts).
6712 
6713     A full row is retrieved if entire condition holds.
6714 
6715     The recursive procedure for finding P(cond) is as follows:
6716 
6717     First step:
6718     Pick 1st part of 1st key and break conjunction (1) into two parts:
6719       cond= (k_11=c_11 AND R)
6720 
6721     Here R may still contain condition(s) equivalent to k_11=c_11.
6722     Nevertheless, the following holds:
6723 
6724       P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
6725 
6726     Mark k_11 as fixed field (and satisfied condition) F, save P(F),
6727     save R to be cond and proceed to recursion step.
6728 
6729     Recursion step:
6730     We have a set of fixed fields/satisfied conditions) F, probability P(F),
6731     and remaining conjunction R
6732     Pick next key part on current key and its condition "k_ij=c_ij".
6733     We will add "k_ij=c_ij" into F and update P(F).
6734     Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
6735 
6736      P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
6737 
6738     (where '|' mean conditional probability, not "or")
6739 
6740     Consider the first multiplier in (2). One of the following holds:
6741     a) F contains condition on field used in t (i.e. t AND F = F).
6742       Then P(t|F) = 1
6743 
6744     b) F doesn't contain condition on field used in t. Then F and t are
6745      considered independent.
6746 
6747      P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
6748           = P(t|fields_before_t_in_key).
6749 
6750      P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
6751                                    #records(fields_before_t_in_key, t)
6752 
6753     The second multiplier is calculated by applying this step recursively.
6754 
6755   IMPLEMENTATION
6756     This function calculates the result of application of the "recursion step"
6757     described above for all fixed key members of a single key, accumulating set
6758     of covered fields, selectivity, etc.
6759 
6760     The calculation is conducted as follows:
6761     Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
6762 
6763      n_{k1}      n_{k2}
6764     --------- * ---------  * .... (3)
6765      n_{k1-1}    n_{k2-1}
6766 
6767     where k1,k2,... are key parts which fields were not yet marked as fixed
6768     ( this is result of application of option b) of the recursion step for
6769       parts of a single key).
6770     Since it is reasonable to expect that most of the fields are not marked
6771     as fixed, we calculate (3) as
6772 
6773                                   n_{i1}      n_{i2}
6774     (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
6775                                   n_{i1-1}    n_{i2-1}
6776 
6777     where i1,i2, .. are key parts that were already marked as fixed.
6778 
6779     In order to minimize number of expensive records_in_range calls we group
6780     and reduce adjacent fractions.
6781 
6782   RETURN
6783     Selectivity of given ROR scan.
6784 */
6785 
ror_scan_selectivity(const ROR_INTERSECT_INFO * info,const ROR_SCAN_INFO * scan)6786 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
6787                                    const ROR_SCAN_INFO *scan)
6788 {
6789   double selectivity_mult= 1.0;
6790   KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part;
6791   uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
6792   uchar *key_ptr= key_val;
6793   SEL_ARG *sel_arg, *tuple_arg= NULL;
6794   key_part_map keypart_map= 0;
6795   bool cur_covered;
6796   bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
6797                                            key_part->fieldnr - 1));
6798   key_range min_range;
6799   key_range max_range;
6800   min_range.key= key_val;
6801   min_range.flag= HA_READ_KEY_EXACT;
6802   max_range.key= key_val;
6803   max_range.flag= HA_READ_AFTER_KEY;
6804   ha_rows prev_records= info->param->table->stat_records();
6805   DBUG_ENTER("ror_scan_selectivity");
6806 
6807   for (sel_arg= scan->sel_arg; sel_arg;
6808        sel_arg= sel_arg->next_key_part)
6809   {
6810     DBUG_PRINT("info",("sel_arg step"));
6811     cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields,
6812                                        key_part[sel_arg->part].fieldnr - 1));
6813     if (cur_covered != prev_covered)
6814     {
6815       /* create (part1val, ..., part{n-1}val) tuple. */
6816       ha_rows records;
6817       page_range pages;
6818       if (!tuple_arg)
6819       {
6820         tuple_arg= scan->sel_arg;
6821         /* Here we use the length of the first key part */
6822         tuple_arg->store_min(key_part->store_length, &key_ptr, 0);
6823         keypart_map= 1;
6824       }
6825       while (tuple_arg->next_key_part != sel_arg)
6826       {
6827         tuple_arg= tuple_arg->next_key_part;
6828         tuple_arg->store_min(key_part[tuple_arg->part].store_length,
6829                              &key_ptr, 0);
6830         keypart_map= (keypart_map << 1) | 1;
6831       }
6832       min_range.length= max_range.length= (uint) (key_ptr - key_val);
6833       min_range.keypart_map= max_range.keypart_map= keypart_map;
6834       records= (info->param->table->file->
6835                 records_in_range(scan->keynr, &min_range, &max_range, &pages));
6836       if (cur_covered)
6837       {
6838         /* uncovered -> covered */
6839         double tmp= rows2double(records)/rows2double(prev_records);
6840         DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
6841         selectivity_mult *= tmp;
6842         prev_records= HA_POS_ERROR;
6843       }
6844       else
6845       {
6846         /* covered -> uncovered */
6847         prev_records= records;
6848       }
6849     }
6850     prev_covered= cur_covered;
6851   }
6852   if (!prev_covered)
6853   {
6854     double tmp= rows2double(info->param->quick_rows[scan->keynr]) /
6855                 rows2double(prev_records);
6856     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
6857     selectivity_mult *= tmp;
6858   }
6859   DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
6860   DBUG_RETURN(selectivity_mult);
6861 }
6862 
6863 
6864 /*
6865   Check if adding a ROR scan to a ROR-intersection reduces its cost of
6866   ROR-intersection and if yes, update parameters of ROR-intersection,
6867   including its cost.
6868 
6869   SYNOPSIS
6870     ror_intersect_add()
6871       param        Parameter from test_quick_select
6872       info         ROR-intersection structure to add the scan to.
6873       ror_scan     ROR scan info to add.
6874       is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
6875                    from other parameters and is passed separately only to
6876                    avoid duplicating the inference code)
6877 
6878   NOTES
6879     Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
6880     intersection decreases. The cost of ROR-intersection is calculated as
6881     follows:
6882 
6883     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
6884 
6885     When we add a scan the first increases and the second decreases.
6886 
6887     cost_of_full_rows_retrieval=
6888       (union of indexes used covers all needed fields) ?
6889         cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
6890         0
6891 
6892     E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
6893                            ror_scan_selectivity({scan1}, scan2) * ... *
6894                            ror_scan_selectivity({scan1,...}, scanN).
6895   RETURN
6896     TRUE   ROR scan added to ROR-intersection, cost updated.
6897     FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
6898 */
6899 
ror_intersect_add(ROR_INTERSECT_INFO * info,ROR_SCAN_INFO * ror_scan,Json_writer_object * trace_costs,bool is_cpk_scan)6900 static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
6901                               ROR_SCAN_INFO* ror_scan,
6902                               Json_writer_object *trace_costs,
6903                               bool is_cpk_scan)
6904 {
6905   double selectivity_mult= 1.0;
6906 
6907   DBUG_ENTER("ror_intersect_add");
6908   DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
6909   DBUG_PRINT("info", ("Adding scan on %s",
6910                       info->param->table->key_info[ror_scan->keynr].name.str));
6911   DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan));
6912 
6913   selectivity_mult = ror_scan_selectivity(info, ror_scan);
6914   if (selectivity_mult == 1.0)
6915   {
6916     /* Don't add this scan if it doesn't improve selectivity. */
6917     DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
6918     DBUG_RETURN(FALSE);
6919   }
6920 
6921   info->out_rows *= selectivity_mult;
6922 
6923   if (is_cpk_scan)
6924   {
6925     /*
6926       CPK scan is used to filter out rows. We apply filtering for
6927       each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
6928       per check this gives us:
6929     */
6930     const double idx_cost= rows2double(info->index_records) /
6931                               TIME_FOR_COMPARE_ROWID;
6932     info->index_scan_costs+= idx_cost;
6933     trace_costs->add("index_scan_cost", idx_cost);
6934   }
6935   else
6936   {
6937     info->index_records += info->param->quick_rows[ror_scan->keynr];
6938     info->index_scan_costs += ror_scan->index_read_cost;
6939     trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
6940     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
6941     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
6942                                                &info->covered_fields))
6943     {
6944       DBUG_PRINT("info", ("ROR-intersect is covering now"));
6945       info->is_covering= TRUE;
6946     }
6947   }
6948 
6949   info->total_cost= info->index_scan_costs;
6950   trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs);
6951   DBUG_PRINT("info", ("info->total_cost: %g", info->total_cost));
6952   if (!info->is_covering)
6953   {
6954     double sweep_cost= get_sweep_read_cost(info->param,
6955                                           double2rows(info->out_rows));
6956     info->total_cost+= sweep_cost;
6957     trace_costs->add("disk_sweep_cost", sweep_cost);
6958     DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
6959   }
6960   else
6961     trace_costs->add("disk_sweep_cost", 0);
6962 
6963   DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
6964   DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
6965                       info->is_covering?"" : "non-"));
6966   DBUG_RETURN(TRUE);
6967 }
6968 
6969 
6970 /*
6971   Get best ROR-intersection plan using non-covering ROR-intersection search
6972   algorithm. The returned plan may be covering.
6973 
6974   SYNOPSIS
6975     get_best_ror_intersect()
6976       param            Parameter from test_quick_select function.
6977       tree             Transformed restriction condition to be used to look
6978                        for ROR scans.
6979       read_time        Do not return read plans with cost > read_time.
6980       are_all_covering [out] set to TRUE if union of all scans covers all
6981                        fields needed by the query (and it is possible to build
6982                        a covering ROR-intersection)
6983 
6984   NOTES
6985     get_key_scans_params must be called before this function can be called.
6986 
6987     When this function is called by ROR-union construction algorithm it
6988     assumes it is building an uncovered ROR-intersection (and thus # of full
6989     records to be retrieved is wrong here). This is a hack.
6990 
6991   IMPLEMENTATION
6992     The approximate best non-covering plan search algorithm is as follows:
6993 
6994     find_min_ror_intersection_scan()
6995     {
6996       R= select all ROR scans;
6997       order R by (E(#records_matched) * key_record_length).
6998 
6999       S= first(R); -- set of scans that will be used for ROR-intersection
7000       R= R-first(S);
7001       min_cost= cost(S);
7002       min_scan= make_scan(S);
7003       while (R is not empty)
7004       {
7005         firstR= R - first(R);
7006         if (!selectivity(S + firstR < selectivity(S)))
7007           continue;
7008 
7009         S= S + first(R);
7010         if (cost(S) < min_cost)
7011         {
7012           min_cost= cost(S);
7013           min_scan= make_scan(S);
7014         }
7015       }
7016       return min_scan;
7017     }
7018 
7019     See ror_intersect_add function for ROR intersection costs.
7020 
7021     Special handling for Clustered PK scans
7022     Clustered PK contains all table fields, so using it as a regular scan in
7023     index intersection doesn't make sense: a range scan on CPK will be less
7024     expensive in this case.
7025     Clustered PK scan has special handling in ROR-intersection: it is not used
7026     to retrieve rows, instead its condition is used to filter row references
7027     we get from scans on other keys.
7028 
7029   RETURN
7030     ROR-intersection table read plan
7031     NULL if out of memory or no suitable plan found.
7032 */
7033 
7034 static
get_best_ror_intersect(const PARAM * param,SEL_TREE * tree,double read_time,bool * are_all_covering)7035 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
7036                                           double read_time,
7037                                           bool *are_all_covering)
7038 {
7039   uint idx;
7040   double min_cost= DBL_MAX;
7041   DBUG_ENTER("get_best_ror_intersect");
7042   THD *thd= param->thd;
7043   Json_writer_object trace_ror(thd, "analyzing_roworder_intersect");
7044 
7045   if ((tree->n_ror_scans < 2) || !param->table->stat_records() ||
7046       !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
7047     {
7048       if (tree->n_ror_scans < 2)
7049         trace_ror.add("cause", "too few roworder scans");
7050       DBUG_RETURN(NULL);
7051     }
7052 
7053   /*
7054     Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
7055     them. Also find and save clustered PK scan if there is one.
7056   */
7057   ROR_SCAN_INFO **cur_ror_scan;
7058   ROR_SCAN_INFO *cpk_scan= NULL;
7059   uint cpk_no;
7060 
7061   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7062                                                      sizeof(ROR_SCAN_INFO*)*
7063                                                      param->keys)))
7064     return NULL;
7065   cpk_no= (param->table->file->
7066            pk_is_clustering_key(param->table->s->primary_key) ?
7067            param->table->s->primary_key : MAX_KEY);
7068 
7069   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
7070   {
7071     ROR_SCAN_INFO *scan;
7072     uint key_no;
7073     if (!tree->ror_scans_map.is_set(idx))
7074       continue;
7075     key_no= param->real_keynr[idx];
7076     if (key_no != cpk_no &&
7077         param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
7078     {
7079       /* Ignore clustering keys */
7080       tree->n_ror_scans--;
7081       continue;
7082     }
7083     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
7084       return NULL;
7085     if (key_no == cpk_no)
7086     {
7087       cpk_scan= scan;
7088       tree->n_ror_scans--;
7089     }
7090     else
7091       *(cur_ror_scan++)= scan;
7092   }
7093 
7094   tree->ror_scans_end= cur_ror_scan;
7095   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
7096                                           tree->ror_scans,
7097                                           tree->ror_scans_end););
7098   /*
7099     Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
7100     ROR_SCAN_INFO's.
7101     Step 2: Get best ROR-intersection using an approximate algorithm.
7102   */
7103   my_qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
7104            (qsort_cmp)cmp_ror_scan_info);
7105   DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
7106                                           tree->ror_scans,
7107                                           tree->ror_scans_end););
7108 
7109   ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
7110   ROR_SCAN_INFO **intersect_scans_end;
7111   if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7112                                                      sizeof(ROR_SCAN_INFO*)*
7113                                                      tree->n_ror_scans)))
7114     return NULL;
7115   intersect_scans_end= intersect_scans;
7116 
7117   /* Create and incrementally update ROR intersection. */
7118   ROR_INTERSECT_INFO *intersect, *intersect_best;
7119   if (!(intersect= ror_intersect_init(param)) ||
7120       !(intersect_best= ror_intersect_init(param)))
7121     return NULL;
7122 
7123   /* [intersect_scans,intersect_scans_best) will hold the best intersection */
7124   ROR_SCAN_INFO **intersect_scans_best;
7125   cur_ror_scan= tree->ror_scans;
7126   intersect_scans_best= intersect_scans;
7127   Json_writer_array trace_isect_idx(thd, "intersecting_indexes");
7128   while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
7129   {
7130     Json_writer_object trace_idx(thd);
7131     trace_idx.add("index",
7132                  param->table->key_info[(*cur_ror_scan)->keynr].name);
7133 
7134     /* S= S + first(R);  R= R - first(R); */
7135     if (!ror_intersect_add(intersect, *cur_ror_scan, &trace_idx, FALSE))
7136     {
7137       trace_idx.add("usable", false)
7138                .add("cause", "does not reduce cost of intersect");
7139       cur_ror_scan++;
7140       continue;
7141     }
7142 
7143     trace_idx.add("cumulative_total_cost", intersect->total_cost)
7144              .add("usable", true)
7145              .add("matching_rows_now", intersect->out_rows)
7146              .add("intersect_covering_with_this_index", intersect->is_covering);
7147 
7148     *(intersect_scans_end++)= *(cur_ror_scan++);
7149 
7150     if (intersect->total_cost < min_cost)
7151     {
7152       /* Local minimum found, save it */
7153       ror_intersect_cpy(intersect_best, intersect);
7154       intersect_scans_best= intersect_scans_end;
7155       min_cost = intersect->total_cost;
7156       trace_idx.add("chosen", true);
7157     }
7158     else
7159     {
7160       trace_idx.add("chosen", false)
7161                .add("cause", "does not reduce cost");
7162     }
7163   }
7164   trace_isect_idx.end();
7165 
7166   if (intersect_scans_best == intersect_scans)
7167   {
7168     DBUG_PRINT("info", ("None of scans increase selectivity"));
7169     trace_ror.add("chosen", false)
7170              .add("cause","does not increase selectivity");
7171     DBUG_RETURN(NULL);
7172   }
7173 
7174   DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
7175                                           "best ROR-intersection",
7176                                           intersect_scans,
7177                                           intersect_scans_best););
7178 
7179   *are_all_covering= intersect->is_covering;
7180   uint best_num= (uint)(intersect_scans_best - intersect_scans);
7181   ror_intersect_cpy(intersect, intersect_best);
7182 
7183   /*
7184     Ok, found the best ROR-intersection of non-CPK key scans.
7185     Check if we should add a CPK scan. If the obtained ROR-intersection is
7186     covering, it doesn't make sense to add CPK scan.
7187   */
7188   Json_writer_object trace_cpk(thd, "clustered_pk");
7189   if (cpk_scan && !intersect->is_covering)
7190   {
7191     if (ror_intersect_add(intersect, cpk_scan, &trace_cpk, TRUE) &&
7192         (intersect->total_cost < min_cost))
7193     {
7194       trace_cpk.add("clustered_pk_scan_added_to_intersect", true)
7195                .add("cumulated_cost", intersect->total_cost);
7196       intersect_best= intersect; //just set pointer here
7197     }
7198     else
7199     {
7200       trace_cpk.add("clustered_pk_added_to_intersect", false)
7201                .add("cause", "cost");
7202       cpk_scan= 0; // Don't use cpk_scan
7203     }
7204   }
7205   else
7206   {
7207     trace_cpk.add("clustered_pk_added_to_intersect", false)
7208              .add("cause", cpk_scan ? "roworder is covering"
7209                                     : "no clustered pk index");
7210     cpk_scan= 0;                                // Don't use cpk_scan
7211   }
7212   trace_cpk.end();
7213 
7214   /* Ok, return ROR-intersect plan if we have found one */
7215   TRP_ROR_INTERSECT *trp= NULL;
7216   if (min_cost < read_time && (cpk_scan || best_num > 1))
7217   {
7218     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
7219       DBUG_RETURN(trp);
7220     if (!(trp->first_scan=
7221            (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7222                                        sizeof(ROR_SCAN_INFO*)*best_num)))
7223       DBUG_RETURN(NULL);
7224     memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
7225     trp->last_scan=  trp->first_scan + best_num;
7226     trp->is_covering= intersect_best->is_covering;
7227     trp->read_cost= intersect_best->total_cost;
7228     /* Prevent divisons by zero */
7229     ha_rows best_rows = double2rows(intersect_best->out_rows);
7230     if (!best_rows)
7231       best_rows= 1;
7232     set_if_smaller(param->table->opt_range_condition_rows, best_rows);
7233     trp->records= best_rows;
7234     trp->index_scan_costs= intersect_best->index_scan_costs;
7235     trp->cpk_scan= cpk_scan;
7236     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
7237                         "cost %g, records %lu",
7238                         trp->read_cost, (ulong) trp->records));
7239     trace_ror.add("rows", trp->records)
7240              .add("cost", trp->read_cost)
7241              .add("covering", trp->is_covering)
7242              .add("chosen", true);
7243   }
7244   else
7245   {
7246     trace_ror.add("chosen", false)
7247              .add("cause", (read_time > min_cost)
7248                             ? "too few indexes to merge"
7249                             : "cost");
7250   }
7251   DBUG_RETURN(trp);
7252 }
7253 
7254 
7255 /*
7256   Get best covering ROR-intersection.
7257   SYNOPSIS
7258     get_best_ntersectcovering_ror_intersect()
7259       param     Parameter from test_quick_select function.
7260       tree      SEL_TREE with sets of intervals for different keys.
7261       read_time Don't return table read plans with cost > read_time.
7262 
7263   RETURN
7264     Best covering ROR-intersection plan
7265     NULL if no plan found.
7266 
7267   NOTES
7268     get_best_ror_intersect must be called for a tree before calling this
7269     function for it.
7270     This function invalidates tree->ror_scans member values.
7271 
7272   The following approximate algorithm is used:
7273     I=set of all covering indexes
7274     F=set of all fields to cover
7275     S={}
7276 
7277     do
7278     {
7279       Order I by (#covered fields in F desc,
7280                   #components asc,
7281                   number of first not covered component asc);
7282       F=F-covered by first(I);
7283       S=S+first(I);
7284       I=I-first(I);
7285     } while F is not empty.
7286 */
7287 
7288 static
get_best_covering_ror_intersect(PARAM * param,SEL_TREE * tree,double read_time)7289 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
7290                                                    SEL_TREE *tree,
7291                                                    double read_time)
7292 {
7293   ROR_SCAN_INFO **ror_scan_mark;
7294   ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;
7295   DBUG_ENTER("get_best_covering_ror_intersect");
7296 
7297   if (!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
7298     DBUG_RETURN(NULL);
7299 
7300   for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
7301     (*scan)->key_components=
7302       param->table->key_info[(*scan)->keynr].user_defined_key_parts;
7303 
7304   /*
7305     Run covering-ROR-search algorithm.
7306     Assume set I is [ror_scan .. ror_scans_end)
7307   */
7308 
7309   /*I=set of all covering indexes */
7310   ror_scan_mark= tree->ror_scans;
7311 
7312   MY_BITMAP *covered_fields= &param->tmp_covered_fields;
7313   if (!covered_fields->bitmap)
7314     covered_fields->bitmap= (my_bitmap_map*)alloc_root(param->mem_root,
7315                                                param->fields_bitmap_size);
7316   if (!covered_fields->bitmap ||
7317       my_bitmap_init(covered_fields, covered_fields->bitmap,
7318                   param->table->s->fields, FALSE))
7319     DBUG_RETURN(0);
7320   bitmap_clear_all(covered_fields);
7321 
7322   double total_cost= 0.0f;
7323   ha_rows records=0;
7324   bool all_covered;
7325 
7326   DBUG_PRINT("info", ("Building covering ROR-intersection"));
7327   DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7328                                            "building covering ROR-I",
7329                                            ror_scan_mark, ror_scans_end););
7330   do
7331   {
7332     /*
7333       Update changed sorting info:
7334         #covered fields,
7335 	number of first not covered component
7336       Calculate and save these values for each of remaining scans.
7337     */
7338     for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
7339     {
7340       bitmap_subtract(&(*scan)->covered_fields, covered_fields);
7341       (*scan)->used_fields_covered=
7342         bitmap_bits_set(&(*scan)->covered_fields);
7343       (*scan)->first_uncovered_field=
7344         bitmap_get_first(&(*scan)->covered_fields);
7345     }
7346 
7347     my_qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
7348              (qsort_cmp)cmp_ror_scan_info_covering);
7349 
7350     DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7351                                              "remaining scans",
7352                                              ror_scan_mark, ror_scans_end););
7353 
7354     /* I=I-first(I) */
7355     total_cost += (*ror_scan_mark)->index_read_cost;
7356     records += (*ror_scan_mark)->records;
7357     DBUG_PRINT("info", ("Adding scan on %s",
7358                         param->table->key_info[(*ror_scan_mark)->keynr].name.str));
7359     if (total_cost > read_time)
7360       DBUG_RETURN(NULL);
7361     /* F=F-covered by first(I) */
7362     bitmap_union(covered_fields, &(*ror_scan_mark)->covered_fields);
7363     all_covered= bitmap_is_subset(&param->needed_fields, covered_fields);
7364   } while ((++ror_scan_mark < ror_scans_end) && !all_covered);
7365 
7366   if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1)
7367     DBUG_RETURN(NULL);
7368 
7369   /*
7370     Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
7371     cost total_cost.
7372   */
7373   DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
7374   DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7375                                            "creating covering ROR-intersect",
7376                                            tree->ror_scans, ror_scan_mark););
7377 
7378   /* Add priority queue use cost. */
7379   total_cost += rows2double(records)*
7380                 log((double)(ror_scan_mark - tree->ror_scans)) /
7381                 (TIME_FOR_COMPARE_ROWID * M_LN2);
7382   DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
7383 
7384   if (total_cost > read_time)
7385     DBUG_RETURN(NULL);
7386 
7387   TRP_ROR_INTERSECT *trp;
7388   if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
7389     DBUG_RETURN(trp);
7390   uint best_num= (uint)(ror_scan_mark - tree->ror_scans);
7391   if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
7392                                                      sizeof(ROR_SCAN_INFO*)*
7393                                                      best_num)))
7394     DBUG_RETURN(NULL);
7395   memcpy(trp->first_scan, tree->ror_scans, best_num*sizeof(ROR_SCAN_INFO*));
7396   trp->last_scan=  trp->first_scan + best_num;
7397   trp->is_covering= TRUE;
7398   trp->read_cost= total_cost;
7399   trp->records= records;
7400   trp->cpk_scan= NULL;
7401   set_if_smaller(param->table->opt_range_condition_rows, records);
7402 
7403   DBUG_PRINT("info",
7404              ("Returning covering ROR-intersect plan: cost %g, records %lu",
7405               trp->read_cost, (ulong) trp->records));
7406   DBUG_RETURN(trp);
7407 }
7408 
7409 
7410 /*
7411   Get best "range" table read plan for given SEL_TREE.
7412   Also update PARAM members and store ROR scans info in the SEL_TREE.
7413   SYNOPSIS
7414     get_key_scans_params
7415       param        parameters from test_quick_select
7416       tree         make range select for this SEL_TREE
7417       index_read_must_be_used if TRUE, assume 'index only' option will be set
7418                              (except for clustered PK indexes)
7419       for_range_access     if TRUE the function is called to get the best range
7420                            plan for range access, not for index merge access
7421       read_time    don't create read plans with cost > read_time.
7422   RETURN
7423     Best range read plan
7424     NULL if no plan found or error occurred
7425 */
7426 
get_key_scans_params(PARAM * param,SEL_TREE * tree,bool index_read_must_be_used,bool for_range_access,double read_time)7427 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
7428                                        bool index_read_must_be_used,
7429                                        bool for_range_access,
7430                                        double read_time)
7431 {
7432   uint idx, UNINIT_VAR(best_idx);
7433   SEL_ARG *key_to_read= NULL;
7434   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
7435   uint    UNINIT_VAR(best_mrr_flags),            /* protected by key_to_read */
7436           UNINIT_VAR(best_buf_size);             /* protected by key_to_read */
7437   TRP_RANGE* read_plan= NULL;
7438   DBUG_ENTER("get_key_scans_params");
7439   THD *thd= param->thd;
7440   /*
7441     Note that there may be trees that have type SEL_TREE::KEY but contain no
7442     key reads at all, e.g. tree for expression "key1 is not null" where key1
7443     is defined as "not null".
7444   */
7445   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
7446                                       "tree scans"););
7447   Json_writer_array range_scan_alt(thd, "range_scan_alternatives");
7448 
7449   tree->ror_scans_map.clear_all();
7450   tree->n_ror_scans= 0;
7451   tree->index_scans= 0;
7452   if (!tree->keys_map.is_clear_all())
7453   {
7454     tree->index_scans=
7455       (INDEX_SCAN_INFO **) alloc_root(param->mem_root,
7456                                       sizeof(INDEX_SCAN_INFO *) * param->keys);
7457   }
7458   tree->index_scans_end= tree->index_scans;
7459 
7460   for (idx= 0; idx < param->keys; idx++)
7461   {
7462     SEL_ARG *key= tree->keys[idx];
7463     if (key)
7464     {
7465       ha_rows found_records;
7466       Cost_estimate cost;
7467       double found_read_time;
7468       uint mrr_flags, buf_size;
7469       bool is_ror_scan= FALSE;
7470       INDEX_SCAN_INFO *index_scan;
7471       uint keynr= param->real_keynr[idx];
7472       if (key->type == SEL_ARG::MAYBE_KEY ||
7473           key->maybe_flag)
7474         param->needed_reg->set_bit(keynr);
7475 
7476       bool read_index_only= index_read_must_be_used ? TRUE :
7477                             (bool) param->table->covering_keys.is_set(keynr);
7478 
7479       Json_writer_object trace_idx(thd);
7480       trace_idx.add("index", param->table->key_info[keynr].name);
7481 
7482       found_records= check_quick_select(param, idx, read_index_only, key,
7483                                         for_range_access, &mrr_flags,
7484                                         &buf_size, &cost, &is_ror_scan);
7485 
7486       if (!for_range_access && !is_ror_scan &&
7487           !optimizer_flag(param->thd,OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
7488       {
7489         /* The scan is not a ROR-scan, just skip it */
7490         continue;
7491       }
7492 
7493       if (found_records != HA_POS_ERROR && tree->index_scans &&
7494           (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root,
7495 						     sizeof(INDEX_SCAN_INFO))))
7496       {
7497         Json_writer_array trace_range(thd, "ranges");
7498 
7499         const KEY &cur_key= param->table->key_info[keynr];
7500         const KEY_PART_INFO *key_part= cur_key.key_part;
7501 
7502         index_scan->idx= idx;
7503         index_scan->keynr= keynr;
7504         index_scan->key_info= &param->table->key_info[keynr];
7505         index_scan->used_key_parts= param->max_key_parts;
7506         index_scan->range_count= param->range_count;
7507         index_scan->records= found_records;
7508         index_scan->sel_arg= key;
7509         *tree->index_scans_end++= index_scan;
7510 
7511         if (unlikely(thd->trace_started()))
7512           trace_ranges(&trace_range, param, idx, key, key_part);
7513         trace_range.end();
7514 
7515         trace_idx.add("rowid_ordered", is_ror_scan)
7516                  .add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL))
7517                  .add("index_only", read_index_only)
7518                  .add("rows", found_records)
7519                  .add("cost", cost.total_cost());
7520       }
7521       if ((found_records != HA_POS_ERROR) && is_ror_scan)
7522       {
7523         tree->n_ror_scans++;
7524         tree->ror_scans_map.set_bit(idx);
7525       }
7526       if (found_records != HA_POS_ERROR &&
7527           read_time > (found_read_time= cost.total_cost()))
7528       {
7529         read_time=    found_read_time;
7530         best_records= found_records;
7531         key_to_read=  key;
7532         best_idx= idx;
7533         best_mrr_flags= mrr_flags;
7534         best_buf_size=  buf_size;
7535         trace_idx.add("chosen", true);
7536       }
7537       else
7538       {
7539         trace_idx.add("chosen", false);
7540         if (found_records == HA_POS_ERROR)
7541         {
7542           if (key->type == SEL_ARG::Type::MAYBE_KEY)
7543             trace_idx.add("cause", "depends on unread values");
7544           else
7545             trace_idx.add("cause", "unknown");
7546         }
7547         else
7548           trace_idx.add("cause", "cost");
7549       }
7550     }
7551   }
7552 
7553   DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
7554                                       "ROR scans"););
7555   if (key_to_read)
7556   {
7557     if ((read_plan= new (param->mem_root) TRP_RANGE(key_to_read, best_idx,
7558                                                     best_mrr_flags)))
7559     {
7560       read_plan->records= best_records;
7561       read_plan->is_ror= tree->ror_scans_map.is_set(best_idx);
7562       read_plan->read_cost= read_time;
7563       read_plan->mrr_buf_size= best_buf_size;
7564       DBUG_PRINT("info",
7565                  ("Returning range plan for key %s, cost %g, records %lu",
7566                   param->table->key_info[param->real_keynr[best_idx]].name.str,
7567                   read_plan->read_cost, (ulong) read_plan->records));
7568     }
7569   }
7570   else
7571     DBUG_PRINT("info", ("No 'range' table read plan found"));
7572 
7573   DBUG_RETURN(read_plan);
7574 }
7575 
7576 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7577 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
7578                                             bool retrieve_full_rows,
7579                                             MEM_ROOT *parent_alloc)
7580 {
7581   QUICK_INDEX_MERGE_SELECT *quick_imerge;
7582   QUICK_RANGE_SELECT *quick;
7583   /* index_merge always retrieves full rows, ignore retrieve_full_rows */
7584   if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
7585     return NULL;
7586 
7587   quick_imerge->records= records;
7588   quick_imerge->read_time= read_cost;
7589   for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
7590        range_scan++)
7591   {
7592     if (!(quick= (QUICK_RANGE_SELECT*)
7593           ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
7594         quick_imerge->push_quick_back(quick))
7595     {
7596       delete quick;
7597       delete quick_imerge;
7598       return NULL;
7599     }
7600   }
7601   return quick_imerge;
7602 }
7603 
7604 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7605 QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param,
7606                                                 bool retrieve_full_rows,
7607                                                 MEM_ROOT *parent_alloc)
7608 {
7609   QUICK_INDEX_INTERSECT_SELECT *quick_intersect;
7610   QUICK_RANGE_SELECT *quick;
7611   /* index_merge always retrieves full rows, ignore retrieve_full_rows */
7612   if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table)))
7613     return NULL;
7614 
7615   quick_intersect->records= records;
7616   quick_intersect->read_time= read_cost;
7617   quick_intersect->filtered_scans= filtered_scans;
7618   for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
7619        range_scan++)
7620   {
7621     if (!(quick= (QUICK_RANGE_SELECT*)
7622           ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))||
7623         quick_intersect->push_quick_back(quick))
7624     {
7625       delete quick;
7626       delete quick_intersect;
7627       return NULL;
7628     }
7629   }
7630   return quick_intersect;
7631 }
7632 
7633 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7634 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
7635                                               bool retrieve_full_rows,
7636                                               MEM_ROOT *parent_alloc)
7637 {
7638   QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
7639   QUICK_RANGE_SELECT *quick;
7640   DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
7641   MEM_ROOT *alloc;
7642 
7643   if ((quick_intrsect=
7644          new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
7645                                         (retrieve_full_rows? (!is_covering) :
7646                                          FALSE),
7647                                         parent_alloc)))
7648   {
7649     DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
7650                                              "creating ROR-intersect",
7651                                              first_scan, last_scan););
7652     alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
7653     for (ROR_SCAN_INFO **curr_scan= first_scan; curr_scan != last_scan;
7654                                                           ++curr_scan)
7655     {
7656       if (!(quick= get_quick_select(param, (*curr_scan)->idx,
7657                                     (*curr_scan)->sel_arg,
7658                                     HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
7659                                     0, alloc)) ||
7660           quick_intrsect->push_quick_back(alloc, quick))
7661       {
7662         delete quick_intrsect;
7663         DBUG_RETURN(NULL);
7664       }
7665     }
7666     if (cpk_scan)
7667     {
7668       if (!(quick= get_quick_select(param, cpk_scan->idx,
7669                                     cpk_scan->sel_arg,
7670                                     HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
7671                                     0, alloc)))
7672       {
7673         delete quick_intrsect;
7674         DBUG_RETURN(NULL);
7675       }
7676       quick->file= NULL;
7677       quick_intrsect->cpk_quick= quick;
7678     }
7679     quick_intrsect->records= records;
7680     quick_intrsect->read_time= read_cost;
7681   }
7682   DBUG_RETURN(quick_intrsect);
7683 }
7684 
7685 
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)7686 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
7687                                           bool retrieve_full_rows,
7688                                           MEM_ROOT *parent_alloc)
7689 {
7690   QUICK_ROR_UNION_SELECT *quick_roru;
7691   TABLE_READ_PLAN **scan;
7692   QUICK_SELECT_I *quick;
7693   DBUG_ENTER("TRP_ROR_UNION::make_quick");
7694   /*
7695     It is impossible to construct a ROR-union that will not retrieve full
7696     rows, ignore retrieve_full_rows parameter.
7697   */
7698   if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
7699   {
7700     for (scan= first_ror; scan != last_ror; scan++)
7701     {
7702       if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
7703           quick_roru->push_quick_back(quick))
7704       {
7705         delete quick_roru;
7706         DBUG_RETURN(NULL);
7707       }
7708     }
7709     quick_roru->records= records;
7710     quick_roru->read_time= read_cost;
7711   }
7712   DBUG_RETURN(quick_roru);
7713 }
7714 
7715 
7716 /*
7717   Build a SEL_TREE for <> or NOT BETWEEN predicate
7718 
7719   SYNOPSIS
7720     get_ne_mm_tree()
7721       param       PARAM from SQL_SELECT::test_quick_select
7722       cond_func   item for the predicate
7723       field       field in the predicate
7724       lt_value    constant that field should be smaller
7725       gt_value    constant that field should be greaterr
7726 
7727   RETURN
7728     #  Pointer to tree built tree
7729     0  on error
7730 */
7731 
get_ne_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * lt_value,Item * gt_value)7732 SEL_TREE *Item_bool_func::get_ne_mm_tree(RANGE_OPT_PARAM *param,
7733                                          Field *field,
7734                                          Item *lt_value, Item *gt_value)
7735 {
7736   SEL_TREE *tree;
7737   tree= get_mm_parts(param, field, Item_func::LT_FUNC, lt_value);
7738   if (tree)
7739     tree= tree_or(param, tree, get_mm_parts(param, field, Item_func::GT_FUNC,
7740 					    gt_value));
7741   return tree;
7742 }
7743 
7744 
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7745 SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
7746                                          Field *field, Item *value)
7747 {
7748   DBUG_ENTER("Item_func_ne::get_func_mm_tree");
7749   /*
7750     If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
7751     do not construct a SEL_TREE from it. A condition that excludes just one
7752     row in the table is not selective (unless there are only a few rows)
7753   */
7754   if (is_field_an_unique_index(param, field))
7755     DBUG_RETURN(NULL);
7756   DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
7757 }
7758 
7759 
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7760 SEL_TREE *Item_func_between::get_func_mm_tree(RANGE_OPT_PARAM *param,
7761                                               Field *field, Item *value)
7762 {
7763   SEL_TREE *tree;
7764   DBUG_ENTER("Item_func_between::get_func_mm_tree");
7765   if (!value)
7766   {
7767     if (negated)
7768     {
7769       tree= get_ne_mm_tree(param, field, args[1], args[2]);
7770     }
7771     else
7772     {
7773       tree= get_mm_parts(param, field, Item_func::GE_FUNC, args[1]);
7774       if (tree)
7775       {
7776         tree= tree_and(param, tree, get_mm_parts(param, field,
7777                                                  Item_func::LE_FUNC,
7778                                                  args[2]));
7779       }
7780     }
7781   }
7782   else
7783   {
7784     tree= get_mm_parts(param, field,
7785                        (negated ?
7786                         (value == (Item*)1 ? Item_func::GT_FUNC :
7787                                              Item_func::LT_FUNC):
7788                         (value == (Item*)1 ? Item_func::LE_FUNC :
7789                                              Item_func::GE_FUNC)),
7790                        args[0]);
7791   }
7792   DBUG_RETURN(tree);
7793 }
7794 
7795 
get_func_mm_tree(RANGE_OPT_PARAM * param,Field * field,Item * value)7796 SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
7797                                          Field *field, Item *value)
7798 {
7799   SEL_TREE *tree= 0;
7800   DBUG_ENTER("Item_func_in::get_func_mm_tree");
7801   /*
7802     Array for IN() is constructed when all values have the same result
7803     type. Tree won't be built for values with different result types,
7804     so we check it here to avoid unnecessary work.
7805   */
7806   if (!arg_types_compatible)
7807     DBUG_RETURN(0);
7808 
7809   if (negated)
7810   {
7811     if (array && array->type_handler()->result_type() != ROW_RESULT)
7812     {
7813       /*
7814         We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
7815         where c{i} are constants. Our goal is to produce a SEL_TREE that
7816         represents intervals:
7817 
7818         ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
7819 
7820         where $MIN is either "-inf" or NULL.
7821 
7822         The most straightforward way to produce it is to convert NOT IN
7823         into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
7824         analyzer to build SEL_TREE from that. The problem is that the
7825         range analyzer will use O(N^2) memory (which is probably a bug),
7826         and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
7827         will run out of memory.
7828 
7829         Another problem with big lists like (*) is that a big list is
7830         unlikely to produce a good "range" access, while considering that
7831         range access will require expensive CPU calculations (and for
7832         MyISAM even index accesses). In short, big NOT IN lists are rarely
7833         worth analyzing.
7834 
7835         Considering the above, we'll handle NOT IN as follows:
7836         * if the number of entries in the NOT IN list is less than
7837           NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
7838         * Otherwise, don't produce a SEL_TREE.
7839       */
7840 #define NOT_IN_IGNORE_THRESHOLD 1000
7841       MEM_ROOT *tmp_root= param->mem_root;
7842       param->thd->mem_root= param->old_root;
7843       /*
7844         Create one Item_type constant object. We'll need it as
7845         get_mm_parts only accepts constant values wrapped in Item_Type
7846         objects.
7847         We create the Item on param->mem_root which points to
7848         per-statement mem_root (while thd->mem_root is currently pointing
7849         to mem_root local to range optimizer).
7850       */
7851       Item *value_item= array->create_item(param->thd);
7852       param->thd->mem_root= tmp_root;
7853 
7854       if (array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
7855         DBUG_RETURN(0);
7856 
7857       /*
7858         if this is a "col1 NOT IN (...)", and there is a UNIQUE KEY(col1), do
7859         not constuct a SEL_TREE from it. The rationale is as follows:
7860          - if there are only a few constants, this condition is not selective
7861            (unless the table is also very small in which case we won't gain
7862            anything)
7863          - if there are a lot of constants, the overhead of building and
7864            processing enormous range list is not worth it.
7865       */
7866       if (is_field_an_unique_index(param, field))
7867         DBUG_RETURN(0);
7868 
7869       /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
7870       uint i=0;
7871       do
7872       {
7873         array->value_to_item(i, value_item);
7874         tree= get_mm_parts(param, field, Item_func::LT_FUNC, value_item);
7875         if (!tree)
7876           break;
7877         i++;
7878       } while (i < array->count && tree->type == SEL_TREE::IMPOSSIBLE);
7879 
7880       if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
7881       {
7882         /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
7883         DBUG_RETURN(NULL);
7884       }
7885       SEL_TREE *tree2;
7886       for (; i < array->used_count; i++)
7887       {
7888         if (array->compare_elems(i, i-1))
7889         {
7890           /* Get a SEL_TREE for "-inf < X < c_i" interval */
7891           array->value_to_item(i, value_item);
7892           tree2= get_mm_parts(param, field, Item_func::LT_FUNC, value_item);
7893           if (!tree2)
7894           {
7895             tree= NULL;
7896             break;
7897           }
7898 
7899           /* Change all intervals to be "c_{i-1} < X < c_i" */
7900           for (uint idx= 0; idx < param->keys; idx++)
7901           {
7902             SEL_ARG *new_interval, *last_val;
7903             if (((new_interval= tree2->keys[idx])) &&
7904                 (tree->keys[idx]) &&
7905                 ((last_val= tree->keys[idx]->last())))
7906             {
7907               new_interval->min_value= last_val->max_value;
7908               new_interval->min_flag= NEAR_MIN;
7909 
7910               /*
7911                 If the interval is over a partial keypart, the
7912                 interval must be "c_{i-1} <= X < c_i" instead of
7913                 "c_{i-1} < X < c_i". Reason:
7914 
7915                 Consider a table with a column "my_col VARCHAR(3)",
7916                 and an index with definition
7917                 "INDEX my_idx my_col(1)". If the table contains rows
7918                 with my_col values "f" and "foo", the index will not
7919                 distinguish the two rows.
7920 
7921                 Note that tree_or() below will effectively merge
7922                 this range with the range created for c_{i-1} and
7923                 we'll eventually end up with only one range:
7924                 "NULL < X".
7925 
7926                 Partitioning indexes are never partial.
7927               */
7928               if (param->using_real_indexes)
7929               {
7930                 const KEY key=
7931                   param->table->key_info[param->real_keynr[idx]];
7932                 const KEY_PART_INFO *kpi= key.key_part + new_interval->part;
7933 
7934                 if (kpi->key_part_flag & HA_PART_KEY_SEG)
7935                   new_interval->min_flag= 0;
7936               }
7937             }
7938           }
7939           /*
7940             The following doesn't try to allocate memory so no need to
7941             check for NULL.
7942           */
7943           tree= tree_or(param, tree, tree2);
7944         }
7945       }
7946 
7947       if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
7948       {
7949         /*
7950           Get the SEL_TREE for the last "c_last < X < +inf" interval
7951           (value_item cotains c_last already)
7952         */
7953         tree2= get_mm_parts(param, field, Item_func::GT_FUNC, value_item);
7954         tree= tree_or(param, tree, tree2);
7955       }
7956     }
7957     else
7958     {
7959       tree= get_ne_mm_tree(param, field, args[1], args[1]);
7960       if (tree)
7961       {
7962         Item **arg, **end;
7963         for (arg= args + 2, end= arg + arg_count - 2; arg < end ; arg++)
7964         {
7965           tree=  tree_and(param, tree, get_ne_mm_tree(param, field,
7966                                                       *arg, *arg));
7967         }
7968       }
7969     }
7970   }
7971   else
7972   {
7973     tree= get_mm_parts(param, field, Item_func::EQ_FUNC, args[1]);
7974     if (tree)
7975     {
7976       Item **arg, **end;
7977       for (arg= args + 2, end= arg + arg_count - 2;
7978            arg < end ; arg++)
7979       {
7980         tree= tree_or(param, tree, get_mm_parts(param, field,
7981                                                 Item_func::EQ_FUNC, *arg));
7982       }
7983     }
7984   }
7985   DBUG_RETURN(tree);
7986 }
7987 
7988 
7989 /*
7990   The structure Key_col_info is purely  auxiliary and is used
7991   only in the method Item_func_in::get_func_row_mm_tree
7992 */
7993 struct Key_col_info {
7994   Field *field;         /* If != NULL the column can be used for keys */
7995   cmp_item *comparator; /* If != 0 the column can be evaluated        */
7996 };
7997 
7998 /**
7999     Build SEL_TREE for the IN predicate whose arguments are rows
8000 
8001     @param param          PARAM from SQL_SELECT::test_quick_select
8002     @param key_row        First operand of the IN predicate
8003 
8004   @note
8005     The function builds a SEL_TREE for in IN predicate in the case
8006     when the predicate uses row arguments. First the function
8007     detects among the components of the key_row (c[1],...,c[n]) taken
8008     from in the left part the predicate those that can be usable
8009     for building SEL_TREE (c[i1],...,c[ik]).  They have to contain
8010     items whose real items are  field items referring to the current
8011     table or equal to the items referring to the current table.
8012     For the remaining components of the row it checks whether they
8013     can be evaluated. The result of the analysis is put into the
8014     array of structures of the type Key_row_col_info.
8015 
8016     After this the function builds the SEL_TREE for the following
8017     formula that can be inferred from the given IN predicate:
8018       c[i11]=a[1][i11] AND ... AND c[i1k1]=a[1][i1k1]
8019       OR
8020       ...
8021       OR
8022       c[im1]=a[m][im1] AND ... AND c[imkm]=a[m][imkm].
8023     Here a[1],...,a[m] are all arguments of the IN predicate from
8024     the right part and for each j ij1,...,ijkj is a subset of
8025     i1,...,ik such that a[j][ij1],...,a[j][ijkj] can be evaluated.
8026 
8027     If for some j there no a[j][i1],...,a[j][ik] can be evaluated
8028     then no SEL_TREE can be built for this predicate and the
8029     function immediately returns 0.
8030 
8031     If for some j by using evaluated values of key_row it can be
8032     proven that c[ij1]=a[j][ij1] AND ... AND c[ijkj]=a[j][ijkj]
8033     is always FALSE then this disjunct is omitted.
8034 
8035   @returns
8036     the built SEL_TREE if it can be constructed
8037     0 - otherwise.
8038 */
8039 
get_func_row_mm_tree(RANGE_OPT_PARAM * param,Item_row * key_row)8040 SEL_TREE *Item_func_in::get_func_row_mm_tree(RANGE_OPT_PARAM *param,
8041                                              Item_row *key_row)
8042 {
8043   DBUG_ENTER("Item_func_in::get_func_row_mm_tree");
8044 
8045   if (negated)
8046     DBUG_RETURN(0);
8047 
8048   SEL_TREE *res_tree= 0;
8049   uint used_key_cols= 0;
8050   uint col_comparators= 0;
8051   table_map param_comp= ~(param->prev_tables | param->read_tables |
8052                           param->current_table);
8053   uint row_cols= key_row->cols();
8054   Dynamic_array <Key_col_info> key_cols_info(row_cols);
8055   cmp_item_row *row_cmp_item;
8056 
8057   if (array)
8058   {
8059     in_row *row= static_cast<in_row*>(array);
8060     row_cmp_item= static_cast<cmp_item_row*>(row->get_cmp_item());
8061   }
8062   else
8063   {
8064     DBUG_ASSERT(get_comparator_type_handler(0) == &type_handler_row);
8065     row_cmp_item= static_cast<cmp_item_row*>(get_comparator_cmp_item(0));
8066   }
8067   DBUG_ASSERT(row_cmp_item);
8068 
8069   Item **key_col_ptr= key_row->addr(0);
8070   for(uint i= 0; i < row_cols;  i++, key_col_ptr++)
8071   {
8072     Key_col_info key_col_info= {0, NULL};
8073     Item *key_col= *key_col_ptr;
8074     if (key_col->real_item()->type() == Item::FIELD_ITEM)
8075     {
8076       /*
8077         The i-th component of key_row can be used for key access if
8078         key_col->real_item() points to a field of the current table or
8079         if it is equal to a field item pointing to such a field.
8080       */
8081       Item_field *col_field_item= (Item_field *) (key_col->real_item());
8082       Field *key_col_field= col_field_item->field;
8083       if (key_col_field->table->map != param->current_table)
8084       {
8085         Item_equal *item_equal= col_field_item->item_equal;
8086         if (item_equal)
8087         {
8088           Item_equal_fields_iterator it(*item_equal);
8089           while (it++)
8090 	  {
8091             key_col_field= it.get_curr_field();
8092             if (key_col_field->table->map == param->current_table)
8093               break;
8094           }
8095         }
8096       }
8097       if (key_col_field->table->map == param->current_table)
8098       {
8099         key_col_info.field= key_col_field;
8100         used_key_cols++;
8101       }
8102     }
8103     else if (!(key_col->used_tables() & (param_comp | param->current_table))
8104              && !key_col->is_expensive())
8105     {
8106       /* The i-th component of key_row can be evaluated */
8107 
8108       /* See the comment in Item::get_mm_tree_for_const */
8109       MEM_ROOT *tmp_root= param->mem_root;
8110       param->thd->mem_root= param->old_root;
8111 
8112       key_col->bring_value();
8113       key_col_info.comparator= row_cmp_item->get_comparator(i);
8114       DBUG_ASSERT(key_col_info.comparator);
8115       key_col_info.comparator->store_value(key_col);
8116       col_comparators++;
8117 
8118       param->thd->mem_root= tmp_root;
8119     }
8120     key_cols_info.push(key_col_info);
8121   }
8122 
8123   if (!used_key_cols)
8124     DBUG_RETURN(0);
8125 
8126   uint omitted_tuples= 0;
8127   Item **arg_start= arguments() + 1;
8128   Item **arg_end= arg_start + argument_count() - 1;
8129   for (Item **arg= arg_start ; arg < arg_end; arg++)
8130   {
8131     uint i;
8132 
8133     /*
8134       First check whether the disjunct constructed for *arg
8135       is really needed
8136     */
8137     Item_row *arg_tuple= (Item_row *) (*arg);
8138     if (col_comparators)
8139     {
8140       MEM_ROOT *tmp_root= param->mem_root;
8141       param->thd->mem_root= param->old_root;
8142       for (i= 0; i < row_cols; i++)
8143       {
8144         Key_col_info *key_col_info= &key_cols_info.at(i);
8145         if (key_col_info->comparator)
8146 	{
8147           Item *arg_col= arg_tuple->element_index(i);
8148           if (!(arg_col->used_tables() & (param_comp | param->current_table)) &&
8149 	      !arg_col->is_expensive() &&
8150               key_col_info->comparator->cmp(arg_col))
8151 	  {
8152             omitted_tuples++;
8153             break;
8154           }
8155         }
8156       }
8157       param->thd->mem_root= tmp_root;
8158       if (i < row_cols)
8159         continue;
8160     }
8161 
8162     /* The disjunct for *arg is needed: build it. */
8163     SEL_TREE *and_tree= 0;
8164     Item **arg_col_ptr= arg_tuple->addr(0);
8165     for (uint i= 0; i < row_cols; i++, arg_col_ptr++)
8166     {
8167       Key_col_info *key_col_info= &key_cols_info.at(i);
8168       if (!key_col_info->field)
8169         continue;
8170       Item *arg_col= *arg_col_ptr;
8171       if (!(arg_col->used_tables() & (param_comp | param->current_table)) &&
8172 	  !arg_col->is_expensive())
8173       {
8174         and_tree= tree_and(param, and_tree,
8175                            get_mm_parts(param,
8176                                         key_col_info->field,
8177                                         Item_func::EQ_FUNC,
8178                                         arg_col->real_item()));
8179       }
8180     }
8181     if (!and_tree)
8182     {
8183       res_tree= 0;
8184       break;
8185     }
8186     /* Join the disjunct the the OR tree that is being constructed */
8187     res_tree= !res_tree ? and_tree : tree_or(param, res_tree, and_tree);
8188   }
8189   if (omitted_tuples == argument_count() - 1)
8190   {
8191     /* It's turned out that all disjuncts are always FALSE */
8192     res_tree= new (param->mem_root) SEL_TREE(SEL_TREE::IMPOSSIBLE,
8193                                              param->mem_root, param->keys);
8194   }
8195   DBUG_RETURN(res_tree);
8196 }
8197 
8198 
8199 /*
8200   Build conjunction of all SEL_TREEs for a simple predicate applying equalities
8201 
8202   SYNOPSIS
8203     get_full_func_mm_tree()
8204       param       PARAM from SQL_SELECT::test_quick_select
8205       field_item  field in the predicate
8206       value       constant in the predicate (or a field already read from
8207                   a table in the case of dynamic range access)
8208                   (for BETWEEN it contains the number of the field argument,
8209                    for IN it's always 0)
8210       inv         TRUE <> NOT cond_func is considered
8211                   (makes sense only when cond_func is BETWEEN or IN)
8212 
8213   DESCRIPTION
8214     For a simple SARGable predicate of the form (f op c), where f is a field and
8215     c is a constant, the function builds a conjunction of all SEL_TREES that can
8216     be obtained by the substitution of f for all different fields equal to f.
8217 
8218   NOTES
8219     If the WHERE condition contains a predicate (fi op c),
8220     then not only SELL_TREE for this predicate is built, but
8221     the trees for the results of substitution of fi for
8222     each fj belonging to the same multiple equality as fi
8223     are built as well.
8224     E.g. for WHERE t1.a=t2.a AND t2.a > 10
8225     a SEL_TREE for t2.a > 10 will be built for quick select from t2
8226     and
8227     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
8228 
8229     A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated
8230     in a similar way: we build a conjuction of trees for the results
8231     of all substitutions of fi for equal fj.
8232     Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed
8233     differently. It is considered as a conjuction of two SARGable
8234     predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree
8235     is called for each of them separately producing trees for
8236        AND j (f1j <=c ) and AND j (f2j <= c)
8237     After this these two trees are united in one conjunctive tree.
8238     It's easy to see that the same tree is obtained for
8239        AND j,k (f1j <=c AND f2k<=c)
8240     which is equivalent to
8241        AND j,k (c BETWEEN f1j AND f2k).
8242     The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i)
8243     which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the
8244     function get_full_func_mm_tree is called for (f1i > c) and (f2i < c)
8245     producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two
8246     trees are united in one OR-tree. The expression
8247       (AND j (f1j > c) OR AND j (f2j < c)
8248     is equivalent to the expression
8249       AND j,k (f1j > c OR f2k < c)
8250     which is just a translation of
8251       AND j,k (c NOT BETWEEN f1j AND f2k)
8252 
8253     In the cases when one of the items f1, f2 is a constant c1 we do not create
8254     a tree for it at all. It works for BETWEEN predicates but does not
8255     work for NOT BETWEEN predicates as we have to evaluate the expression
8256     with it. If it is TRUE then the other tree can be completely ignored.
8257     We do not do it now and no trees are built in these cases for
8258     NOT BETWEEN predicates.
8259 
8260     As to IN predicates only ones of the form (f IN (c1,...,cn)),
8261     where f1 is a field and c1,...,cn are constant, are considered as
8262     SARGable. We never try to narrow the index scan using predicates of
8263     the form (c IN (c1,...,f,...,cn)).
8264 
8265   RETURN
8266     Pointer to the tree representing the built conjunction of SEL_TREEs
8267 */
8268 
get_full_func_mm_tree(RANGE_OPT_PARAM * param,Item_field * field_item,Item * value)8269 SEL_TREE *Item_bool_func::get_full_func_mm_tree(RANGE_OPT_PARAM *param,
8270                                                 Item_field *field_item,
8271                                                 Item *value)
8272 {
8273   DBUG_ENTER("Item_bool_func::get_full_func_mm_tree");
8274   SEL_TREE *tree= 0;
8275   SEL_TREE *ftree= 0;
8276   table_map ref_tables= 0;
8277   table_map param_comp= ~(param->prev_tables | param->read_tables |
8278 		          param->current_table);
8279 
8280   for (uint i= 0; i < arg_count; i++)
8281   {
8282     Item *arg= arguments()[i]->real_item();
8283     if (arg != field_item)
8284       ref_tables|= arg->used_tables();
8285   }
8286   Field *field= field_item->field;
8287   if (!((ref_tables | field->table->map) & param_comp))
8288     ftree= get_func_mm_tree(param, field, value);
8289   Item_equal *item_equal= field_item->item_equal;
8290   if (item_equal)
8291   {
8292     Item_equal_fields_iterator it(*item_equal);
8293     while (it++)
8294     {
8295       Field *f= it.get_curr_field();
8296       if (field->eq(f))
8297         continue;
8298       if (!((ref_tables | f->table->map) & param_comp))
8299       {
8300         tree= get_func_mm_tree(param, f, value);
8301         ftree= !ftree ? tree : tree_and(param, ftree, tree);
8302       }
8303     }
8304   }
8305 
8306   DBUG_RETURN(ftree);
8307 }
8308 
8309 
8310 /*
8311   make a select tree of all keys in condition
8312 
8313   @param  param  Context
8314   @param  cond  INOUT condition to perform range analysis on.
8315 
8316   @detail
8317     Range analysis may infer that some conditions are never true.
8318     - If the condition is never true, SEL_TREE(type=IMPOSSIBLE) is returned
8319     - if parts of condition are never true, the function may remove these parts
8320       from the condition 'cond'.  Sometimes, this will cause the condition to
8321       be substituted for something else.
8322 
8323 
8324   @return
8325     NULL     - Could not infer anything from condition cond.
8326     SEL_TREE with type=IMPOSSIBLE - condition can never be true.
8327 */
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8328 SEL_TREE *Item_cond_and::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8329 {
8330   DBUG_ENTER("Item_cond_and::get_mm_tree");
8331   SEL_TREE *tree= NULL;
8332   List_iterator<Item> li(*argument_list());
8333   Item *item;
8334   while ((item= li++))
8335   {
8336     SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param,li.ref());
8337     if (param->statement_should_be_aborted())
8338       DBUG_RETURN(NULL);
8339     tree= tree_and(param, tree, new_tree);
8340     if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
8341     {
8342       /*
8343         Do not remove 'item' from 'cond'. We return a SEL_TREE::IMPOSSIBLE
8344         and that is sufficient for the caller to see that the whole
8345         condition is never true.
8346       */
8347       break;
8348     }
8349   }
8350   DBUG_RETURN(tree);
8351 }
8352 
8353 
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8354 SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8355 {
8356   DBUG_ENTER("Item_cond::get_mm_tree");
8357   List_iterator<Item> li(*argument_list());
8358   bool replace_cond= false;
8359   Item *replacement_item= li++;
8360   SEL_TREE *tree= li.ref()[0]->get_mm_tree(param, li.ref());
8361   if (param->statement_should_be_aborted())
8362     DBUG_RETURN(NULL);
8363   if (tree)
8364   {
8365     if (tree->type == SEL_TREE::IMPOSSIBLE &&
8366         param->remove_false_where_parts)
8367     {
8368       /* See the other li.remove() call below */
8369       li.remove();
8370       if (argument_list()->elements <= 1)
8371         replace_cond= true;
8372     }
8373 
8374     Item *item;
8375     while ((item= li++))
8376     {
8377       SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param, li.ref());
8378       if (new_tree == NULL || param->statement_should_be_aborted())
8379         DBUG_RETURN(NULL);
8380       tree= tree_or(param, tree, new_tree);
8381       if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
8382       {
8383         replacement_item= *li.ref();
8384         break;
8385       }
8386 
8387       if (new_tree && new_tree->type == SEL_TREE::IMPOSSIBLE &&
8388           param->remove_false_where_parts)
8389       {
8390         /*
8391           This is a condition in form
8392 
8393             cond = item1 OR ... OR item_i OR ... itemN
8394 
8395           and item_i produces SEL_TREE(IMPOSSIBLE). We should remove item_i
8396           from cond.  This may cause 'cond' to become a degenerate,
8397           one-way OR. In that case, we replace 'cond' with the remaining
8398           item_i.
8399         */
8400         li.remove();
8401         if (argument_list()->elements <= 1)
8402           replace_cond= true;
8403       }
8404       else
8405         replacement_item= *li.ref();
8406     }
8407 
8408     if (replace_cond)
8409       *cond_ptr= replacement_item;
8410   }
8411   DBUG_RETURN(tree);
8412 }
8413 
8414 
get_mm_tree_for_const(RANGE_OPT_PARAM * param)8415 SEL_TREE *Item::get_mm_tree_for_const(RANGE_OPT_PARAM *param)
8416 {
8417   DBUG_ENTER("get_mm_tree_for_const");
8418   if (is_expensive())
8419     DBUG_RETURN(0);
8420   /*
8421     During the cond->val_int() evaluation we can come across a subselect
8422     item which may allocate memory on the thd->mem_root and assumes
8423     all the memory allocated has the same life span as the subselect
8424     item itself. So we have to restore the thread's mem_root here.
8425   */
8426   MEM_ROOT *tmp_root= param->mem_root;
8427   param->thd->mem_root= param->old_root;
8428   SEL_TREE *tree;
8429 
8430   const SEL_TREE::Type type= val_int()? SEL_TREE::ALWAYS: SEL_TREE::IMPOSSIBLE;
8431   param->thd->mem_root= tmp_root;
8432 
8433   tree= new (tmp_root) SEL_TREE(type, tmp_root, param->keys);
8434   DBUG_RETURN(tree);
8435 }
8436 
8437 
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8438 SEL_TREE *Item::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8439 {
8440   DBUG_ENTER("Item::get_mm_tree");
8441   if (const_item())
8442     DBUG_RETURN(get_mm_tree_for_const(param));
8443 
8444   /*
8445     Here we have a not-constant non-function Item.
8446 
8447     Item_field should not appear, as normalize_cond() replaces
8448     "WHERE field" to "WHERE field<>0".
8449 
8450     Item_exists_subselect is possible, e.g. in this query:
8451     SELECT id, st FROM t1
8452     WHERE st IN ('GA','FL') AND EXISTS (SELECT 1 FROM t2 WHERE t2.id=t1.id)
8453     GROUP BY id;
8454   */
8455   table_map ref_tables= used_tables();
8456   if ((ref_tables & param->current_table) ||
8457       (ref_tables & ~(param->prev_tables | param->read_tables)))
8458     DBUG_RETURN(0);
8459   DBUG_RETURN(new (param->mem_root) SEL_TREE(SEL_TREE::MAYBE, param->mem_root,
8460                                              param->keys));
8461 }
8462 
8463 
8464 SEL_TREE *
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8465 Item_func_between::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8466 {
8467   DBUG_ENTER("Item_func_between::get_mm_tree");
8468   if (const_item())
8469     DBUG_RETURN(get_mm_tree_for_const(param));
8470 
8471   SEL_TREE *tree= 0;
8472   SEL_TREE *ftree= 0;
8473 
8474   if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
8475   {
8476     Item_field *field_item= (Item_field*) (arguments()[0]->real_item());
8477     ftree= get_full_func_mm_tree(param, field_item, NULL);
8478   }
8479 
8480   /*
8481     Concerning the code below see the NOTES section in
8482     the comments for the function get_full_func_mm_tree()
8483   */
8484   for (uint i= 1 ; i < arg_count ; i++)
8485   {
8486     if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
8487     {
8488       Item_field *field_item= (Item_field*) (arguments()[i]->real_item());
8489       SEL_TREE *tmp= get_full_func_mm_tree(param, field_item,
8490                                            (Item*)(intptr) i);
8491       if (negated)
8492       {
8493         tree= !tree ? tmp : tree_or(param, tree, tmp);
8494         if (tree == NULL)
8495           break;
8496       }
8497       else
8498         tree= tree_and(param, tree, tmp);
8499     }
8500     else if (negated)
8501     {
8502       tree= 0;
8503       break;
8504     }
8505   }
8506 
8507   ftree= tree_and(param, ftree, tree);
8508   DBUG_RETURN(ftree);
8509 }
8510 
8511 
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8512 SEL_TREE *Item_func_in::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8513 {
8514   DBUG_ENTER("Item_func_in::get_mm_tree");
8515   if (const_item())
8516     DBUG_RETURN(get_mm_tree_for_const(param));
8517 
8518   SEL_TREE *tree= 0;
8519   switch (key_item()->real_item()->type()) {
8520   case Item::FIELD_ITEM:
8521     tree= get_full_func_mm_tree(param,
8522                                 (Item_field*) (key_item()->real_item()),
8523                                 NULL);
8524     break;
8525   case Item::ROW_ITEM:
8526     tree= get_func_row_mm_tree(param,
8527 			       (Item_row *) (key_item()->real_item()));
8528     break;
8529   default:
8530     DBUG_RETURN(0);
8531   }
8532   DBUG_RETURN(tree);
8533 }
8534 
8535 
get_mm_tree(RANGE_OPT_PARAM * param,Item ** cond_ptr)8536 SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
8537 {
8538   DBUG_ENTER("Item_equal::get_mm_tree");
8539   if (const_item())
8540     DBUG_RETURN(get_mm_tree_for_const(param));
8541 
8542   SEL_TREE *tree= 0;
8543   SEL_TREE *ftree= 0;
8544 
8545   Item *value;
8546   if (!(value= get_const()) || value->is_expensive())
8547     DBUG_RETURN(0);
8548 
8549   Item_equal_fields_iterator it(*this);
8550   table_map ref_tables= value->used_tables();
8551   table_map param_comp= ~(param->prev_tables | param->read_tables |
8552 		          param->current_table);
8553   while (it++)
8554   {
8555     Field *field= it.get_curr_field();
8556     if (!((ref_tables | field->table->map) & param_comp))
8557     {
8558       tree= get_mm_parts(param, field, Item_func::EQ_FUNC, value);
8559       ftree= !ftree ? tree : tree_and(param, ftree, tree);
8560     }
8561   }
8562 
8563   DBUG_RETURN(ftree);
8564 }
8565 
8566 
8567 /*
8568   @brief
8569     Check if there is an one-segment unique key that matches the field exactly
8570 
8571   @detail
8572     In the future we could also add "almost unique" indexes where any value is
8573     present only in a few rows (but necessarily exactly one row)
8574 */
is_field_an_unique_index(RANGE_OPT_PARAM * param,Field * field)8575 static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
8576 {
8577   DBUG_ENTER("is_field_an_unique_index");
8578 
8579   // The check for using_real_indexes is there because of the heuristics
8580   // this function is used for.
8581   if (param->using_real_indexes)
8582   {
8583     key_map::Iterator it(field->key_start);
8584     uint key_no;
8585     while ((key_no= it++) != key_map::Iterator::BITMAP_END)
8586     {
8587       KEY *key_info= &field->table->key_info[key_no];
8588       if (key_info->user_defined_key_parts == 1 &&
8589           (key_info->flags & HA_NOSAME))
8590       {
8591         DBUG_RETURN(true);
8592       }
8593     }
8594   }
8595   DBUG_RETURN(false);
8596 }
8597 
8598 
8599 SEL_TREE *
get_mm_parts(RANGE_OPT_PARAM * param,Field * field,Item_func::Functype type,Item * value)8600 Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
8601 	                     Item_func::Functype type, Item *value)
8602 {
8603   DBUG_ENTER("get_mm_parts");
8604   if (field->table != param->table)
8605     DBUG_RETURN(0);
8606 
8607   KEY_PART *key_part = param->key_parts;
8608   KEY_PART *end = param->key_parts_end;
8609   SEL_TREE *tree=0;
8610   table_map value_used_tables= 0;
8611   if (value &&
8612       (value_used_tables= value->used_tables()) &
8613       ~(param->prev_tables | param->read_tables))
8614     DBUG_RETURN(0);
8615   for (; key_part != end ; key_part++)
8616   {
8617     if (field->eq(key_part->field))
8618     {
8619       SEL_ARG *sel_arg=0;
8620       if (!tree && !(tree=new (param->thd->mem_root) SEL_TREE(param->mem_root,
8621                                                               param->keys)))
8622 	DBUG_RETURN(0);				// OOM
8623       if (!value || !(value_used_tables & ~param->read_tables))
8624       {
8625         /*
8626           We need to restore the runtime mem_root of the thread in this
8627           function because it evaluates the value of its argument, while
8628           the argument can be any, e.g. a subselect. The subselect
8629           items, in turn, assume that all the memory allocated during
8630           the evaluation has the same life span as the item itself.
8631           TODO: opt_range.cc should not reset thd->mem_root at all.
8632         */
8633         MEM_ROOT *tmp_root= param->mem_root;
8634         param->thd->mem_root= param->old_root;
8635         sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
8636         param->thd->mem_root= tmp_root;
8637 
8638 	if (!sel_arg)
8639 	  continue;
8640 	if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
8641 	{
8642 	  tree->type=SEL_TREE::IMPOSSIBLE;
8643 	  DBUG_RETURN(tree);
8644 	}
8645       }
8646       else
8647       {
8648 	// This key may be used later
8649 	if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
8650 	  DBUG_RETURN(0);			// OOM
8651       }
8652       sel_arg->part=(uchar) key_part->part;
8653       sel_arg->max_part_no= sel_arg->part+1;
8654       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
8655       tree->keys_map.set_bit(key_part->key);
8656     }
8657   }
8658 
8659   if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all())
8660     tree= NULL;
8661   DBUG_RETURN(tree);
8662 }
8663 
8664 
8665 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)8666 Item_func_null_predicate::get_mm_leaf(RANGE_OPT_PARAM *param,
8667                                       Field *field, KEY_PART *key_part,
8668                                       Item_func::Functype type,
8669                                       Item *value)
8670 {
8671   MEM_ROOT *alloc= param->mem_root;
8672   DBUG_ENTER("Item_func_null_predicate::get_mm_leaf");
8673   DBUG_ASSERT(!value);
8674   /*
8675     No check for field->table->maybe_null. It's perfecly fine to use range
8676     access for cases like
8677 
8678       SELECT * FROM t1 LEFT JOIN t2 ON t2.key IS [NOT] NULL
8679 
8680     ON expression is evaluated before considering NULL-complemented rows, so
8681     IS [NOT] NULL has regular semantics.
8682   */
8683   if (!field->real_maybe_null())
8684     DBUG_RETURN(type == ISNULL_FUNC ? &null_element : NULL);
8685   SEL_ARG *tree;
8686   if (!(tree= new (alloc) SEL_ARG(field, is_null_string, is_null_string)))
8687     DBUG_RETURN(0);
8688   if (type == Item_func::ISNOTNULL_FUNC)
8689   {
8690     tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
8691     tree->max_flag=NO_MAX_RANGE;
8692   }
8693   DBUG_RETURN(tree);
8694 }
8695 
8696 
8697 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype type,Item * value)8698 Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
8699                             Field *field, KEY_PART *key_part,
8700                             Item_func::Functype type, Item *value)
8701 {
8702   DBUG_ENTER("Item_func_like::get_mm_leaf");
8703   DBUG_ASSERT(value);
8704 
8705   if (key_part->image_type != Field::itRAW)
8706     DBUG_RETURN(0);
8707 
8708   if (param->using_real_indexes &&
8709       !field->optimize_range(param->real_keynr[key_part->key],
8710                              key_part->part))
8711     DBUG_RETURN(0);
8712 
8713   if (field->result_type() == STRING_RESULT &&
8714       field->charset() != compare_collation())
8715     DBUG_RETURN(0);
8716 
8717   StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
8718   String *res;
8719 
8720   if (!(res= value->val_str(&tmp)))
8721     DBUG_RETURN(&null_element);
8722 
8723   if (field->cmp_type() != STRING_RESULT ||
8724       field->type_handler() == &type_handler_enum ||
8725       field->type_handler() == &type_handler_set)
8726     DBUG_RETURN(0);
8727 
8728   /*
8729     TODO:
8730     Check if this was a function. This should have be optimized away
8731     in the sql_select.cc
8732   */
8733   if (res != &tmp)
8734   {
8735     tmp.copy(*res);				// Get own copy
8736     res= &tmp;
8737   }
8738 
8739   uint maybe_null= (uint) field->real_maybe_null();
8740   size_t field_length= field->pack_length() + maybe_null;
8741   size_t offset= maybe_null;
8742   size_t length= key_part->store_length;
8743 
8744   if (length != key_part->length + maybe_null)
8745   {
8746     /* key packed with length prefix */
8747     offset+= HA_KEY_BLOB_LENGTH;
8748     field_length= length - HA_KEY_BLOB_LENGTH;
8749   }
8750   else
8751   {
8752     if (unlikely(length < field_length))
8753     {
8754       /*
8755         This can only happen in a table created with UNIREG where one key
8756         overlaps many fields
8757       */
8758       length= field_length;
8759     }
8760     else
8761       field_length= length;
8762   }
8763   length+= offset;
8764   uchar *min_str,*max_str;
8765   if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
8766     DBUG_RETURN(0);
8767   max_str= min_str + length;
8768   if (maybe_null)
8769     max_str[0]= min_str[0]=0;
8770 
8771   size_t min_length, max_length;
8772   field_length-= maybe_null;
8773   if (field->charset()->like_range(res->ptr(), res->length(),
8774                                    escape, wild_one, wild_many,
8775                                    field_length,
8776                                    (char*) min_str + offset,
8777                                    (char*) max_str + offset,
8778                                    &min_length, &max_length))
8779     DBUG_RETURN(0);              // Can't optimize with LIKE
8780 
8781   if (offset != maybe_null)			// BLOB or VARCHAR
8782   {
8783     int2store(min_str + maybe_null, min_length);
8784     int2store(max_str + maybe_null, max_length);
8785   }
8786   SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
8787   DBUG_RETURN(tree);
8788 }
8789 
8790 
8791 SEL_ARG *
get_mm_leaf(RANGE_OPT_PARAM * param,Field * field,KEY_PART * key_part,Item_func::Functype functype,Item * value)8792 Item_bool_func::get_mm_leaf(RANGE_OPT_PARAM *param,
8793                             Field *field, KEY_PART *key_part,
8794                             Item_func::Functype functype, Item *value)
8795 {
8796   DBUG_ENTER("Item_bool_func::get_mm_leaf");
8797   DBUG_ASSERT(value); // IS NULL and IS NOT NULL are handled separately
8798   if (key_part->image_type != Field::itRAW)
8799     DBUG_RETURN(0);   // e.g. SPATIAL index
8800   DBUG_RETURN(field->get_mm_leaf(param, key_part, this,
8801                                  functype_to_scalar_comparison_op(functype),
8802                                  value));
8803 }
8804 
8805 
can_optimize_scalar_range(const RANGE_OPT_PARAM * param,const KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,const Item * value) const8806 bool Field::can_optimize_scalar_range(const RANGE_OPT_PARAM *param,
8807                                       const KEY_PART *key_part,
8808                                       const Item_bool_func *cond,
8809                                       scalar_comparison_op op,
8810                                       const Item *value) const
8811 {
8812   bool is_eq_func= op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL;
8813   if ((param->using_real_indexes &&
8814        !optimize_range(param->real_keynr[key_part->key],
8815                        key_part->part) && !is_eq_func) ||
8816       !can_optimize_range(cond, value, is_eq_func))
8817     return false;
8818   return true;
8819 }
8820 
8821 
make_key_image(MEM_ROOT * mem_root,const KEY_PART * key_part)8822 uchar *Field::make_key_image(MEM_ROOT *mem_root, const KEY_PART *key_part)
8823 {
8824   DBUG_ENTER("Field::make_key_image");
8825   uint maybe_null= (uint) real_maybe_null();
8826   uchar *str;
8827   if (!(str= (uchar*) alloc_root(mem_root, key_part->store_length + 1)))
8828     DBUG_RETURN(0);
8829   if (maybe_null)
8830     *str= (uchar) is_real_null();        // Set to 1 if null
8831   get_key_image(str + maybe_null, key_part->length, key_part->image_type);
8832   DBUG_RETURN(str);
8833 }
8834 
8835 
stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM * param,scalar_comparison_op op,Item * value)8836 SEL_ARG *Field::stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM *param,
8837                                                     scalar_comparison_op op,
8838                                                     Item *value)
8839 {
8840   DBUG_ENTER("Field::stored_field_make_mm_leaf_truncated");
8841   if ((op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) &&
8842       value->result_type() == item_cmp_type(result_type(),
8843                                             value->result_type()))
8844     DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
8845   /*
8846     TODO: We should return trees of the type SEL_ARG::IMPOSSIBLE
8847     for the cases like int_field > 999999999999999999999999 as well.
8848   */
8849   DBUG_RETURN(0);
8850 }
8851 
8852 
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8853 SEL_ARG *Field_num::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8854                                 const Item_bool_func *cond,
8855                                 scalar_comparison_op op, Item *value)
8856 {
8857   DBUG_ENTER("Field_num::get_mm_leaf");
8858   if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8859     DBUG_RETURN(0);
8860   int err= value->save_in_field_no_warnings(this, 1);
8861   if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8862     DBUG_RETURN(&null_element);
8863   if (err > 0 && cmp_type() != value->result_type())
8864     DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8865   DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8866 }
8867 
8868 
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8869 SEL_ARG *Field_temporal::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8870                                      const Item_bool_func *cond,
8871                                      scalar_comparison_op op, Item *value)
8872 {
8873   DBUG_ENTER("Field_temporal::get_mm_leaf");
8874   if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8875     DBUG_RETURN(0);
8876   int err= value->save_in_field_no_warnings(this, 1);
8877   if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8878     DBUG_RETURN(&null_element);
8879   if (err > 0)
8880     DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8881   DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8882 }
8883 
8884 
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8885 SEL_ARG *Field_date_common::get_mm_leaf(RANGE_OPT_PARAM *prm,
8886                                         KEY_PART *key_part,
8887                                         const Item_bool_func *cond,
8888                                         scalar_comparison_op op,
8889                                         Item *value)
8890 {
8891   DBUG_ENTER("Field_date_common::get_mm_leaf");
8892   if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8893     DBUG_RETURN(0);
8894   int err= value->save_in_field_no_warnings(this, 1);
8895   if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8896     DBUG_RETURN(&null_element);
8897   if (err > 0)
8898   {
8899     if (err == 3)
8900     {
8901       /*
8902         We were saving DATETIME into a DATE column, the conversion went ok
8903         but a non-zero time part was cut off.
8904 
8905         In MySQL's SQL dialect, DATE and DATETIME are compared as datetime
8906         values. Index over a DATE column uses DATE comparison. Changing
8907         from one comparison to the other is possible:
8908 
8909         datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10'
8910         datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10'
8911 
8912         datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10'
8913         datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10'
8914 
8915         but we'll need to convert '>' to '>=' and '<' to '<='. This will
8916         be done together with other types at the end of this function
8917         (grep for stored_field_cmp_to_item)
8918       */
8919       if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL)
8920         DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this));
8921       DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8922     }
8923     DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8924   }
8925   DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8926 }
8927 
8928 
get_mm_leaf(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value)8929 SEL_ARG *Field_str::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8930                                 const Item_bool_func *cond,
8931                                 scalar_comparison_op op, Item *value)
8932 {
8933   DBUG_ENTER("Field_str::get_mm_leaf");
8934   if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8935     DBUG_RETURN(0);
8936   int err= value->save_in_field_no_warnings(this, 1);
8937   if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8938     DBUG_RETURN(&null_element);
8939   if (err > 0)
8940   {
8941     if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL)
8942       DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this));
8943     DBUG_RETURN(NULL); /*  Cannot infer anything */
8944   }
8945   DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8946 }
8947 
8948 
get_mm_leaf_int(RANGE_OPT_PARAM * prm,KEY_PART * key_part,const Item_bool_func * cond,scalar_comparison_op op,Item * value,bool unsigned_field)8949 SEL_ARG *Field::get_mm_leaf_int(RANGE_OPT_PARAM *prm, KEY_PART *key_part,
8950                                 const Item_bool_func *cond,
8951                                 scalar_comparison_op op, Item *value,
8952                                 bool unsigned_field)
8953 {
8954   DBUG_ENTER("Field::get_mm_leaf_int");
8955   if (!can_optimize_scalar_range(prm, key_part, cond, op, value))
8956     DBUG_RETURN(0);
8957   int err= value->save_in_field_no_warnings(this, 1);
8958   if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0)
8959     DBUG_RETURN(&null_element);
8960   if (err > 0)
8961   {
8962     if (value->result_type() != INT_RESULT)
8963       DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value));
8964     else
8965       DBUG_RETURN(stored_field_make_mm_leaf_bounded_int(prm, key_part,
8966                                                         op, value,
8967                                                         unsigned_field));
8968   }
8969   if (value->result_type() != INT_RESULT)
8970     DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value));
8971   DBUG_RETURN(stored_field_make_mm_leaf_exact(prm, key_part, op, value));
8972 }
8973 
8974 
8975 /*
8976   This method is called when:
8977   - value->save_in_field_no_warnings() returned err > 0
8978   - and both field and "value" are of integer data types
8979   If an integer got bounded (e.g. to within 0..255 / -128..127)
8980   for < or >, set flags as for <= or >= (no NEAR_MAX / NEAR_MIN)
8981 */
8982 
stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value,bool unsigned_field)8983 SEL_ARG *Field::stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM *param,
8984                                                       KEY_PART *key_part,
8985                                                       scalar_comparison_op op,
8986                                                       Item *value,
8987                                                       bool unsigned_field)
8988 {
8989   DBUG_ENTER("Field::stored_field_make_mm_leaf_bounded_int");
8990   if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) // e.g. tinyint = 200
8991     DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
8992   longlong item_val= value->val_int();
8993 
8994   if (op == SCALAR_CMP_LT && item_val > 0)
8995     op= SCALAR_CMP_LE; // e.g. rewrite (tinyint < 200) to (tinyint <= 127)
8996   else if (op == SCALAR_CMP_GT && !unsigned_field &&
8997            !value->unsigned_flag && item_val < 0)
8998     op= SCALAR_CMP_GE; // e.g. rewrite (tinyint > -200) to (tinyint >= -128)
8999 
9000   /*
9001     Check if we are comparing an UNSIGNED integer with a negative constant.
9002     In this case we know that:
9003     (a) (unsigned_int [< | <=] negative_constant) == FALSE
9004     (b) (unsigned_int [> | >=] negative_constant) == TRUE
9005     In case (a) the condition is false for all values, and in case (b) it
9006     is true for all values, so we can avoid unnecessary retrieval and condition
9007     testing, and we also get correct comparison of unsinged integers with
9008     negative integers (which otherwise fails because at query execution time
9009     negative integers are cast to unsigned if compared with unsigned).
9010    */
9011   if (unsigned_field && !value->unsigned_flag && item_val < 0)
9012   {
9013     if (op == SCALAR_CMP_LT || op == SCALAR_CMP_LE) // e.g. uint < -1
9014       DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this));
9015     if (op == SCALAR_CMP_GT || op == SCALAR_CMP_GE) // e.g. uint > -1
9016       DBUG_RETURN(0);
9017   }
9018   DBUG_RETURN(stored_field_make_mm_leaf_exact(param, key_part, op, value));
9019 }
9020 
9021 
stored_field_make_mm_leaf(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value)9022 SEL_ARG *Field::stored_field_make_mm_leaf(RANGE_OPT_PARAM *param,
9023                                           KEY_PART *key_part,
9024                                           scalar_comparison_op op,
9025                                           Item *value)
9026 {
9027   DBUG_ENTER("Field::stored_field_make_mm_leaf");
9028   THD *thd= param->thd;
9029   MEM_ROOT *mem_root= param->mem_root;
9030   uchar *str;
9031   if (!(str= make_key_image(param->mem_root, key_part)))
9032     DBUG_RETURN(0);
9033 
9034   switch (op) {
9035   case SCALAR_CMP_LE:
9036     DBUG_RETURN(new (mem_root) SEL_ARG_LE(str, this));
9037   case SCALAR_CMP_LT:
9038     DBUG_RETURN(new (mem_root) SEL_ARG_LT(thd, str, this, value));
9039   case SCALAR_CMP_GT:
9040     DBUG_RETURN(new (mem_root) SEL_ARG_GT(thd, str, key_part, this, value));
9041   case SCALAR_CMP_GE:
9042     DBUG_RETURN(new (mem_root) SEL_ARG_GE(thd, str, key_part, this, value));
9043   case SCALAR_CMP_EQ:
9044   case SCALAR_CMP_EQUAL:
9045     DBUG_RETURN(new (mem_root) SEL_ARG(this, str, str));
9046     break;
9047   }
9048   DBUG_ASSERT(0);
9049   DBUG_RETURN(NULL);
9050 }
9051 
9052 
stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM * param,KEY_PART * key_part,scalar_comparison_op op,Item * value)9053 SEL_ARG *Field::stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM *param,
9054                                                 KEY_PART *key_part,
9055                                                 scalar_comparison_op op,
9056                                                 Item *value)
9057 {
9058   DBUG_ENTER("Field::stored_field_make_mm_leaf_exact");
9059   uchar *str;
9060   if (!(str= make_key_image(param->mem_root, key_part)))
9061     DBUG_RETURN(0);
9062 
9063   switch (op) {
9064   case SCALAR_CMP_LE:
9065     DBUG_RETURN(new (param->mem_root) SEL_ARG_LE(str, this));
9066   case SCALAR_CMP_LT:
9067     DBUG_RETURN(new (param->mem_root) SEL_ARG_LT(str, this));
9068   case SCALAR_CMP_GT:
9069     DBUG_RETURN(new (param->mem_root) SEL_ARG_GT(str, key_part, this));
9070   case SCALAR_CMP_GE:
9071     DBUG_RETURN(new (param->mem_root) SEL_ARG_GE(str, this));
9072   case SCALAR_CMP_EQ:
9073   case SCALAR_CMP_EQUAL:
9074     DBUG_RETURN(new (param->mem_root) SEL_ARG(this, str, str));
9075     break;
9076   }
9077   DBUG_ASSERT(0);
9078   DBUG_RETURN(NULL);
9079 }
9080 
9081 
9082 /******************************************************************************
9083 ** Tree manipulation functions
9084 ** If tree is 0 it means that the condition can't be tested. It refers
9085 ** to a non existent table or to a field in current table with isn't a key.
9086 ** The different tree flags:
9087 ** IMPOSSIBLE:	 Condition is never TRUE
9088 ** ALWAYS:	 Condition is always TRUE
9089 ** MAYBE:	 Condition may exists when tables are read
9090 ** MAYBE_KEY:	 Condition refers to a key that may be used in join loop
9091 ** KEY_RANGE:	 Condition uses a key
9092 ******************************************************************************/
9093 
9094 /*
9095   Update weights for SEL_ARG graph that is connected only via next_key_part
9096   (and not left/right) links
9097 */
update_weight_for_single_arg(SEL_ARG * arg)9098 static uint update_weight_for_single_arg(SEL_ARG *arg)
9099 {
9100   if (arg->next_key_part)
9101     return (arg->weight= 1 + update_weight_for_single_arg(arg->next_key_part));
9102   else
9103     return (arg->weight= 1);
9104 }
9105 
9106 
9107 /*
9108   Add a new key test to a key when scanning through all keys
9109   This will never be called for same key parts.
9110 */
9111 
9112 static SEL_ARG *
sel_add(SEL_ARG * key1,SEL_ARG * key2)9113 sel_add(SEL_ARG *key1,SEL_ARG *key2)
9114 {
9115   SEL_ARG *root,**key_link;
9116 
9117   if (!key1)
9118     return key2;
9119   if (!key2)
9120     return key1;
9121 
9122   key_link= &root;
9123   while (key1 && key2)
9124   {
9125     if (key1->part < key2->part)
9126     {
9127       *key_link= key1;
9128       key_link= &key1->next_key_part;
9129       key1=key1->next_key_part;
9130     }
9131     else
9132     {
9133       *key_link= key2;
9134       key_link= &key2->next_key_part;
9135       key2=key2->next_key_part;
9136     }
9137   }
9138   *key_link=key1 ? key1 : key2;
9139 
9140   update_weight_for_single_arg(root);
9141   return root;
9142 }
9143 
9144 
9145 /*
9146   Build a range tree for the conjunction of the range parts of two trees
9147 
9148   SYNOPSIS
9149     and_range_trees()
9150       param           Context info for the operation
9151       tree1           SEL_TREE for the first conjunct
9152       tree2           SEL_TREE for the second conjunct
9153       result          SEL_TREE for the result
9154 
9155   DESCRIPTION
9156     This function takes range parts of two trees tree1 and tree2 and builds
9157     a range tree for the conjunction of the formulas that these two range parts
9158     represent.
9159     More exactly:
9160     if the range part of tree1 represents the normalized formula
9161       R1_1 AND ... AND R1_k,
9162     and the range part of tree2 represents the normalized formula
9163       R2_1 AND ... AND R2_k,
9164     then the range part of the result represents the formula:
9165      RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k]
9166 
9167     The function assumes that tree1 is never equal to tree2. At the same
9168     time the tree result can be the same as tree1 (but never as tree2).
9169     If result==tree1 then rt replaces the range part of tree1 leaving
9170     imerges as they are.
9171     if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and
9172     tree2 should be preserved. Otherwise they can be destroyed.
9173 
9174   RETURN
9175     1    if the type the result tree is  SEL_TREE::IMPOSSIBLE
9176     0    otherwise
9177 */
9178 
9179 static
and_range_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,SEL_TREE * result)9180 int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
9181                     SEL_TREE *result)
9182 {
9183   DBUG_ENTER("and_ranges");
9184   key_map  result_keys;
9185   result_keys.clear_all();
9186   key_map anded_keys= tree1->keys_map;
9187   anded_keys.merge(tree2->keys_map);
9188   int key_no;
9189   key_map::Iterator it(anded_keys);
9190   while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9191   {
9192     uint flag=0;
9193     SEL_ARG *key1= tree1->keys[key_no];
9194     SEL_ARG *key2= tree2->keys[key_no];
9195     if (key1 && !key1->simple_key())
9196       flag|= CLONE_KEY1_MAYBE;
9197     if (key2 && !key2->simple_key())
9198       flag|=CLONE_KEY2_MAYBE;
9199     if (result != tree1)
9200     {
9201       if (key1)
9202         key1->incr_refs();
9203       if (key2)
9204         key2->incr_refs();
9205     }
9206     SEL_ARG *key;
9207     if ((result->keys[key_no]= key= key_and_with_limit(param, key_no,
9208                                                        key1, key2, flag)))
9209     {
9210       if (key && key->type == SEL_ARG::IMPOSSIBLE)
9211       {
9212 	result->type= SEL_TREE::IMPOSSIBLE;
9213         if (param->using_real_indexes)
9214         {
9215           param->table->with_impossible_ranges.set_bit(param->
9216                                                        real_keynr[key_no]);
9217         }
9218         DBUG_RETURN(1);
9219       }
9220       result_keys.set_bit(key_no);
9221 #ifdef EXTRA_DEBUG
9222       if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
9223         key->test_use_count(key);
9224 #endif
9225     }
9226   }
9227   result->keys_map= result_keys;
9228   DBUG_RETURN(0);
9229 }
9230 
9231 
9232 /*
9233   Build a SEL_TREE for a conjunction out of such trees for the conjuncts
9234 
9235   SYNOPSIS
9236     tree_and()
9237       param           Context info for the operation
9238       tree1           SEL_TREE for the first conjunct
9239       tree2           SEL_TREE for the second conjunct
9240 
9241   DESCRIPTION
9242     This function builds a tree for the formula (A AND B) out of the trees
9243     tree1 and tree2 that has been built for the formulas A and B respectively.
9244 
9245     In a general case
9246       tree1 represents the formula RT1 AND MT1,
9247         where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
9248       tree2 represents the formula RT2 AND MT2
9249         where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 AND ... AND M2_l2.
9250 
9251     The result tree will represent the formula of the the following structure:
9252       RT AND RT1MT2 AND RT2MT1, such that
9253         rt is a tree obtained by range intersection of trees tree1 and tree2,
9254         RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2,
9255         RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1,
9256         where rt1m2_i (i=1,...,l2) is the result of the pushdown operation
9257         of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the
9258         result of the pushdown operation of range tree rt2 into imerge m1_j.
9259 
9260     RT1MT2/RT2MT is empty if MT2/MT1 is empty.
9261 
9262     The range intersection of two range trees is produced by the function
9263     and_range_trees. The pushdown of a range tree to a imerge is performed
9264     by the function imerge_list_and_tree. This function may produce imerges
9265     containing only one range tree. Such trees are intersected with rt and
9266     the result of intersection is returned as the range part of the result
9267     tree, while the corresponding imerges are removed altogether from its
9268     imerge part.
9269 
9270   NOTE
9271     The pushdown operation of range trees into imerges is needed to be able
9272     to construct valid imerges for the condition like this:
9273       key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2)
9274 
9275   NOTE
9276     Currently we do not support intersection between indexes and index merges.
9277     When this will be supported the list of imerges for the result tree
9278     should include also imerges from M1 and M2. That's why an extra parameter
9279     is added to the function imerge_list_and_tree. If we call the function
9280     with the last parameter equal to FALSE then MT1 and MT2 will be preserved
9281     in the imerge list of the result tree. This can lead to the exponential
9282     growth of the imerge list though.
9283     Currently the last parameter of imerge_list_and_tree calls is always
9284     TRUE.
9285 
9286   RETURN
9287     The result tree, if a success
9288     0 - otherwise.
9289 */
9290 
9291 static
tree_and(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)9292 SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2)
9293 {
9294   DBUG_ENTER("tree_and");
9295   if (!tree1)
9296     DBUG_RETURN(tree2);
9297   if (!tree2)
9298     DBUG_RETURN(tree1);
9299   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
9300     DBUG_RETURN(tree1);
9301   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
9302     DBUG_RETURN(tree2);
9303   if (tree1->type == SEL_TREE::MAYBE)
9304   {
9305     if (tree2->type == SEL_TREE::KEY)
9306       tree2->type=SEL_TREE::KEY_SMALLER;
9307     DBUG_RETURN(tree2);
9308   }
9309   if (tree2->type == SEL_TREE::MAYBE)
9310   {
9311     tree1->type=SEL_TREE::KEY_SMALLER;
9312     DBUG_RETURN(tree1);
9313   }
9314 
9315   if (!tree1->merges.is_empty())
9316     imerge_list_and_tree(param, &tree1->merges, tree2, TRUE);
9317   if (!tree2->merges.is_empty())
9318     imerge_list_and_tree(param, &tree2->merges, tree1, TRUE);
9319   if (and_range_trees(param, tree1, tree2, tree1))
9320     DBUG_RETURN(tree1);
9321   imerge_list_and_list(&tree1->merges, &tree2->merges);
9322   eliminate_single_tree_imerges(param, tree1);
9323   DBUG_RETURN(tree1);
9324 }
9325 
9326 
9327 /*
9328   Eliminate single tree imerges in a SEL_TREE objects
9329 
9330   SYNOPSIS
9331     eliminate_single_tree_imerges()
9332       param      Context info for the function
9333       tree       SEL_TREE where single tree imerges are to be eliminated
9334 
9335   DESCRIPTION
9336     For each imerge in 'tree' that contains only one disjunct tree, i.e.
9337     for any imerge of the form m=rt, the function performs and operation
9338     the range part of tree, replaces rt the with the result of anding and
9339     removes imerge m from the the merge part of 'tree'.
9340 
9341   RETURN VALUE
9342     none
9343 */
9344 
9345 static
eliminate_single_tree_imerges(RANGE_OPT_PARAM * param,SEL_TREE * tree)9346 void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree)
9347 {
9348   SEL_IMERGE *imerge;
9349   List<SEL_IMERGE> merges= tree->merges;
9350   List_iterator<SEL_IMERGE> it(merges);
9351   tree->merges.empty();
9352   while ((imerge= it++))
9353   {
9354     if (imerge->trees+1 == imerge->trees_next)
9355     {
9356       tree= tree_and(param, tree, *imerge->trees);
9357       it.remove();
9358     }
9359   }
9360   tree->merges= merges;
9361 }
9362 
9363 
9364 /*
9365   For two trees check that there are indexes with ranges in both of them
9366 
9367   SYNOPSIS
9368     sel_trees_have_common_keys()
9369       tree1           SEL_TREE for the first tree
9370       tree2           SEL_TREE for the second tree
9371       common_keys OUT bitmap of all indexes with ranges in both trees
9372 
9373   DESCRIPTION
9374     For two trees tree1 and tree1 the function checks if there are indexes
9375     in their range parts such that SEL_ARG trees are defined for them in the
9376     range parts of both trees. The function returns the bitmap of such
9377     indexes in the parameter common_keys.
9378 
9379   RETURN
9380     TRUE    if there are such indexes (common_keys is nor empty)
9381     FALSE   otherwise
9382 */
9383 
9384 static
sel_trees_have_common_keys(SEL_TREE * tree1,SEL_TREE * tree2,key_map * common_keys)9385 bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
9386                                 key_map *common_keys)
9387 {
9388   *common_keys= tree1->keys_map;
9389   common_keys->intersect(tree2->keys_map);
9390   return !common_keys->is_clear_all();
9391 }
9392 
9393 
9394 /*
9395   Check whether range parts of two trees can be ored for some indexes
9396 
9397   SYNOPSIS
9398     sel_trees_can_be_ored()
9399       param              Context info for the function
9400       tree1              SEL_TREE for the first tree
9401       tree2              SEL_TREE for the second tree
9402       common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees
9403                         OUT: bitmap of all indexes that can be ored
9404 
9405   DESCRIPTION
9406     For two trees tree1 and tree2 and the bitmap common_keys containing
9407     bits for indexes that have SEL_ARG trees in range parts of both trees
9408     the function checks if there are indexes for which SEL_ARG trees can
9409     be ored. Two SEL_ARG trees for the same index can be ored if the most
9410     major components of the index used in these trees coincide. If the
9411     SEL_ARG trees for an index cannot be ored the function clears the bit
9412     for this index in the bitmap common_keys.
9413 
9414     The function does not verify that indexes marked in common_keys really
9415     have SEL_ARG trees in both tree1 and tree2. It assumes that this is true.
9416 
9417   NOTE
9418     The function sel_trees_can_be_ored is usually used in pair with the
9419     function sel_trees_have_common_keys.
9420 
9421   RETURN
9422     TRUE    if there are indexes for which SEL_ARG trees can be ored
9423     FALSE   otherwise
9424 */
9425 
9426 static
sel_trees_can_be_ored(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,key_map * common_keys)9427 bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
9428                            SEL_TREE *tree1, SEL_TREE *tree2,
9429                            key_map *common_keys)
9430 {
9431   DBUG_ENTER("sel_trees_can_be_ored");
9432   if (!sel_trees_have_common_keys(tree1, tree2, common_keys))
9433     DBUG_RETURN(FALSE);
9434   int key_no;
9435   key_map::Iterator it(*common_keys);
9436   while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9437   {
9438     DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]);
9439     /* Trees have a common key, check if they refer to the same key part */
9440     if (tree1->keys[key_no]->part != tree2->keys[key_no]->part)
9441       common_keys->clear_bit(key_no);
9442   }
9443   DBUG_RETURN(!common_keys->is_clear_all());
9444 }
9445 
9446 /*
9447   Check whether the key parts inf_init..inf_end-1 of one index can compose
9448   an infix for the key parts key_init..key_end-1 of another index
9449 */
9450 
9451 static
is_key_infix(KEY_PART * key_init,KEY_PART * key_end,KEY_PART * inf_init,KEY_PART * inf_end)9452 bool is_key_infix(KEY_PART *key_init, KEY_PART *key_end,
9453                   KEY_PART *inf_init, KEY_PART *inf_end)
9454 {
9455   KEY_PART *key_part, *inf_part;
9456   for (key_part= key_init; key_part < key_end; key_part++)
9457   {
9458     if (key_part->field->eq(inf_init->field))
9459       break;
9460   }
9461   if (key_part == key_end)
9462     return false;
9463   for (key_part++, inf_part= inf_init + 1;
9464        key_part < key_end && inf_part < inf_end;
9465        key_part++, inf_part++)
9466   {
9467     if (!key_part->field->eq(inf_part->field))
9468       return false;
9469   }
9470   return inf_part == inf_end;
9471 }
9472 
9473 
9474 /*
9475   Check whether range parts of two trees must be ored for some indexes
9476 
9477   SYNOPSIS
9478     sel_trees_must_be_ored()
9479       param              Context info for the function
9480       tree1              SEL_TREE for the first tree
9481       tree2              SEL_TREE for the second tree
9482       ordable_keys       bitmap of SEL_ARG trees that can be ored
9483 
9484   DESCRIPTION
9485     For two trees tree1 and tree2 the function checks whether they must be
9486     ored. The function assumes that the bitmap ordable_keys contains bits for
9487     those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can
9488     be ored.
9489     We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees
9490     r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both
9491     of them are marked in ordable_keys, can be merged.
9492 
9493   NOTE
9494     The function sel_trees_must_be_ored as a rule is used in pair with the
9495     function sel_trees_can_be_ored.
9496 
9497   RETURN
9498     TRUE    if there are indexes for which SEL_ARG trees must be ored
9499     FALSE   otherwise
9500 */
9501 
9502 static
sel_trees_must_be_ored(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2,key_map oredable_keys)9503 bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
9504                             SEL_TREE *tree1, SEL_TREE *tree2,
9505                             key_map oredable_keys)
9506 {
9507   key_map tmp;
9508   DBUG_ENTER("sel_trees_must_be_ored");
9509 
9510   tmp= tree1->keys_map;
9511   tmp.merge(tree2->keys_map);
9512   tmp.subtract(oredable_keys);
9513   if (!tmp.is_clear_all())
9514     DBUG_RETURN(FALSE);
9515 
9516   int idx1, idx2;
9517   key_map::Iterator it1(oredable_keys);
9518   while ((idx1= it1++) != key_map::Iterator::BITMAP_END)
9519   {
9520     KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part;
9521     KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no;
9522     key_map::Iterator it2(oredable_keys);
9523     while ((idx2= it2++) != key_map::Iterator::BITMAP_END)
9524     {
9525       if (idx2 <= idx1)
9526         continue;
9527 
9528       KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part;
9529       KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no;
9530       if (!is_key_infix(key1_init, key1_end, key2_init, key2_end) &&
9531           !is_key_infix(key2_init, key2_end, key1_init, key1_end))
9532         DBUG_RETURN(FALSE);
9533     }
9534   }
9535 
9536   DBUG_RETURN(TRUE);
9537 }
9538 
9539 
9540 /*
9541   Remove the trees that are not suitable for record retrieval
9542 
9543   SYNOPSIS
9544     remove_nonrange_trees()
9545       param  Context info for the function
9546       tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
9547 
9548   DESCRIPTION
9549     This function walks through tree->keys[] and removes the SEL_ARG* trees
9550     that are not "maybe" trees (*) and cannot be used to construct quick range
9551     selects.
9552     (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
9553           these types here as well.
9554 
9555     A SEL_ARG* tree cannot be used to construct quick select if it has
9556     tree->part != 0. (e.g. it could represent "keypart2 < const").
9557 
9558     Normally we allow construction of SEL_TREE objects that have SEL_ARG
9559     trees that do not allow quick range select construction.
9560     For example:
9561     for " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
9562     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
9563     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
9564                                                from this
9565     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
9566                                    tree.
9567 
9568     Another example:
9569     tree3= SEL_TREE { SEL_ARG{key1part1 = 1} }
9570     tree4= SEL_TREE { SEL_ARG{key2part2 = 2} }  -- can't make quick range select
9571                                                from this
9572     call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index
9573     merge can be constructed, but it is potentially useful, as anding it with
9574     tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that
9575     represents the formula
9576       key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2
9577     for which an index merge can be built.
9578 
9579     Any final SEL_TREE may contain SEL_ARG trees for which no quick select
9580     can be built. Such SEL_ARG trees should be removed from the range part
9581     before different range scans are evaluated. Such SEL_ARG trees also should
9582     be removed from all range trees of each index merge before different
9583     possible index merge plans are evaluated. If after this removal one
9584     of the range trees in the index merge becomes empty the whole index merge
9585     must be discarded.
9586 
9587   RETURN
9588     0  Ok, some suitable trees left
9589     1  No tree->keys[] left.
9590 */
9591 
remove_nonrange_trees(PARAM * param,SEL_TREE * tree)9592 static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree)
9593 {
9594   bool res= FALSE;
9595   for (uint i=0; i < param->keys; i++)
9596   {
9597     if (tree->keys[i])
9598     {
9599       if (tree->keys[i]->part)
9600       {
9601         tree->keys[i]= NULL;
9602         /* Mark that records_in_range has not been called */
9603         param->quick_rows[param->real_keynr[i]]= HA_POS_ERROR;
9604         tree->keys_map.clear_bit(i);
9605       }
9606       else
9607         res= TRUE;
9608     }
9609   }
9610   return !res;
9611 }
9612 
9613 
9614 /*
9615   Restore nonrange trees to their previous state
9616 */
9617 
restore_nonrange_trees(RANGE_OPT_PARAM * param,SEL_TREE * tree,SEL_ARG ** backup_keys)9618 static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree,
9619                                    SEL_ARG **backup_keys)
9620 {
9621   for (uint i=0; i < param->keys; i++)
9622   {
9623     if (backup_keys[i])
9624     {
9625       tree->keys[i]= backup_keys[i];
9626       tree->keys_map.set_bit(i);
9627     }
9628   }
9629 }
9630 
9631 /*
9632   Build a SEL_TREE for a disjunction out of such trees for the disjuncts
9633 
9634   SYNOPSIS
9635     tree_or()
9636       param           Context info for the operation
9637       tree1           SEL_TREE for the first disjunct
9638       tree2           SEL_TREE for the second disjunct
9639 
9640   DESCRIPTION
9641     This function builds a tree for the formula (A OR B) out of the trees
9642     tree1 and tree2 that has been built for the formulas A and B respectively.
9643 
9644     In a general case
9645       tree1 represents the formula RT1 AND MT1,
9646         where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
9647       tree2 represents the formula RT2 AND MT2
9648         where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
9649 
9650     The function constructs the result tree according the formula
9651       (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2)
9652     that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2).
9653 
9654     To limit the number of produced imerges the function considers
9655     a weaker formula than the original one:
9656       (RT1 AND M1_1) OR (RT2 AND M2_1)
9657     that is equivalent to:
9658       (RT1 OR RT2)                  (1)
9659         AND
9660       (M1_1 OR M2_1)                (2)
9661         AND
9662       (M1_1 OR RT2)                 (3)
9663         AND
9664       (M2_1 OR RT1)                 (4)
9665 
9666     For the first conjunct (1) the function builds a tree with a range part
9667     and, possibly, one imerge. For the other conjuncts (2-4)the function
9668     produces sets of imerges. All constructed imerges are included into the
9669     result tree.
9670 
9671     For the formula (1) the function produces the tree representing a formula
9672     of the structure RT [AND M], such that:
9673      - the range tree rt contains the result of oring SEL_ARG trees from rt1
9674        and rt2
9675      - the imerge m consists of two range trees rt1 and rt2.
9676     The imerge m is added if it's not true that rt1 and rt2 must be ored
9677     If rt1 and rt2 can't be ored rt is empty and only m is produced for (1).
9678 
9679     To produce imerges for the formula (2) the function calls the function
9680     imerge_list_or_list passing it the merge parts of tree1 and tree2 as
9681     parameters.
9682 
9683     To produce imerges for the formula (3) the function calls the function
9684     imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as
9685     parameters. Similarly, to produce imerges for the formula (4) the function
9686     calls the function imerge_list_or_tree passing it the imerge m2_1 and the
9687     range tree rt1.
9688 
9689     If rt1 is empty then the trees for (1) and (4) are empty.
9690     If rt2 is empty then the trees for (1) and (3) are empty.
9691     If mt1 is empty then the trees for (2) and (3) are empty.
9692     If mt2 is empty then the trees for (2) and (4) are empty.
9693 
9694   RETURN
9695     The result tree for the operation if a success
9696     0 - otherwise
9697 */
9698 
9699 static SEL_TREE *
tree_or(RANGE_OPT_PARAM * param,SEL_TREE * tree1,SEL_TREE * tree2)9700 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
9701 {
9702   DBUG_ENTER("tree_or");
9703   if (!tree1 || !tree2)
9704     DBUG_RETURN(0);
9705   if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
9706     DBUG_RETURN(tree2);
9707   if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
9708     DBUG_RETURN(tree1);
9709   if (tree1->type == SEL_TREE::MAYBE)
9710     DBUG_RETURN(tree1);				// Can't use this
9711   if (tree2->type == SEL_TREE::MAYBE)
9712     DBUG_RETURN(tree2);
9713 
9714   SEL_TREE *result= NULL;
9715   key_map result_keys;
9716   key_map ored_keys;
9717   SEL_TREE *rtree[2]= {NULL,NULL};
9718   SEL_IMERGE *imerge[2]= {NULL, NULL};
9719   bool no_ranges1= tree1->without_ranges();
9720   bool no_ranges2= tree2->without_ranges();
9721   bool no_merges1= tree1->without_imerges();
9722   bool no_merges2= tree2->without_imerges();
9723   if (!no_ranges1 && !no_merges2)
9724   {
9725     rtree[0]= new SEL_TREE(tree1, TRUE, param);
9726     imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param);
9727   }
9728   if (!no_ranges2 && !no_merges1)
9729   {
9730     rtree[1]= new SEL_TREE(tree2, TRUE, param);
9731     imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param);
9732   }
9733   bool no_imerge_from_ranges= FALSE;
9734 
9735   /* Build the range part of the tree for the formula (1) */
9736   if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys))
9737   {
9738     bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys);
9739     no_imerge_from_ranges= must_be_ored;
9740 
9741     if (no_imerge_from_ranges && no_merges1 && no_merges2)
9742     {
9743       /*
9744         Reuse tree1 as the result in simple cases. This reduces memory usage
9745         for e.g. "key IN (c1, ..., cN)" which produces a lot of ranges.
9746       */
9747       result= tree1;
9748       result->keys_map.clear_all();
9749     }
9750     else
9751     {
9752       if (!(result= new (param->mem_root) SEL_TREE(param->mem_root,
9753                                                    param->keys)))
9754       {
9755         DBUG_RETURN(result);
9756       }
9757     }
9758 
9759     key_map::Iterator it(ored_keys);
9760     int key_no;
9761     while ((key_no= it++) != key_map::Iterator::BITMAP_END)
9762     {
9763       SEL_ARG *key1= tree1->keys[key_no];
9764       SEL_ARG *key2= tree2->keys[key_no];
9765       if (!must_be_ored)
9766       {
9767         key1->incr_refs();
9768         key2->incr_refs();
9769       }
9770       if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2)))
9771         result->keys_map.set_bit(key_no);
9772     }
9773     result->type= tree1->type;
9774   }
9775   else
9776   {
9777     if (!result && !(result= new (param->mem_root) SEL_TREE(param->mem_root,
9778                                                             param->keys)))
9779       DBUG_RETURN(result);
9780   }
9781 
9782   if (no_imerge_from_ranges && no_merges1 && no_merges2)
9783   {
9784     if (result->keys_map.is_clear_all())
9785       result->type= SEL_TREE::ALWAYS;
9786     DBUG_RETURN(result);
9787   }
9788 
9789   SEL_IMERGE *imerge_from_ranges;
9790   if (!(imerge_from_ranges= new SEL_IMERGE()))
9791     result= NULL;
9792   else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges)
9793   {
9794     /* Build the imerge part of the tree for the formula (1) */
9795     SEL_TREE *rt1= tree1;
9796     SEL_TREE *rt2= tree2;
9797     if (no_merges1)
9798       rt1= new SEL_TREE(tree1, TRUE, param);
9799     if (no_merges2)
9800       rt2= new SEL_TREE(tree2, TRUE, param);
9801     if (!rt1 || !rt2 ||
9802         result->merges.push_back(imerge_from_ranges) ||
9803         imerge_from_ranges->or_sel_tree(param, rt1) ||
9804         imerge_from_ranges->or_sel_tree(param, rt2))
9805       result= NULL;
9806   }
9807   if (!result)
9808     DBUG_RETURN(result);
9809 
9810   result->type= tree1->type;
9811 
9812   if (!no_merges1 && !no_merges2 &&
9813       !imerge_list_or_list(param, &tree1->merges, &tree2->merges))
9814   {
9815     /* Build the imerges for the formula (2) */
9816     imerge_list_and_list(&result->merges, &tree1->merges);
9817   }
9818 
9819   /* Build the imerges for the formulas (3) and (4) */
9820   for (uint i=0; i < 2; i++)
9821   {
9822     List<SEL_IMERGE> merges;
9823     SEL_TREE *rt= rtree[i];
9824     SEL_IMERGE *im= imerge[1-i];
9825 
9826     if (rt && im && !merges.push_back(im) &&
9827         !imerge_list_or_tree(param, &merges, rt))
9828       imerge_list_and_list(&result->merges, &merges);
9829   }
9830 
9831   DBUG_RETURN(result);
9832 }
9833 
9834 
9835 /* And key trees where key1->part < key2 -> part */
9836 
9837 static SEL_ARG *
and_all_keys(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)9838 and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
9839              uint clone_flag)
9840 {
9841   SEL_ARG *next;
9842   ulong use_count=key1->use_count;
9843 
9844   if (sel_arg_and_weight_heuristic(param, key1, key2))
9845     return key1;
9846 
9847   if (key1->elements != 1)
9848   {
9849     key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
9850     key2->increment_use_count((int) key1->elements-1);
9851   }
9852   if (key1->type == SEL_ARG::MAYBE_KEY)
9853   {
9854     if (key2->type == SEL_ARG::KEY_RANGE)
9855       return key2;
9856     key1->right= key1->left= &null_element;
9857     key1->next= key1->prev= 0;
9858   }
9859 
9860   for (next=key1->first(); next ; next=next->next)
9861   {
9862     if (next->next_key_part)
9863     {
9864       SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
9865       if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
9866       {
9867 	key1=key1->tree_delete(next);
9868 	continue;
9869       }
9870       next->next_key_part=tmp;
9871       if (use_count)
9872 	next->increment_use_count(use_count);
9873       if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
9874         break;
9875     }
9876     else
9877       next->next_key_part=key2;
9878   }
9879   if (!key1)
9880     return &null_element;			// Impossible ranges
9881   key1->use_count++;
9882 
9883   /* Re-compute the result tree's weight. */
9884   key1->update_weight_locally();
9885 
9886   key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
9887   return key1;
9888 }
9889 
9890 
9891 /*
9892   Produce a SEL_ARG graph that represents "key1 AND key2"
9893 
9894   SYNOPSIS
9895     key_and()
9896       param   Range analysis context (needed to track if we have allocated
9897               too many SEL_ARGs)
9898       key1    First argument, root of its RB-tree
9899       key2    Second argument, root of its RB-tree
9900 
9901   RETURN
9902     RB-tree root of the resulting SEL_ARG graph.
9903     NULL if the result of AND operation is an empty interval {0}.
9904 */
9905 
9906 static SEL_ARG *
key_and(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)9907 key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
9908 {
9909   if (!key1)
9910     return key2;
9911   if (!key2)
9912     return key1;
9913   if (key1->part != key2->part)
9914   {
9915     if (key1->part > key2->part)
9916     {
9917       swap_variables(SEL_ARG *, key1, key2);
9918       clone_flag=swap_clone_flag(clone_flag);
9919     }
9920     // key1->part < key2->part
9921 
9922     if (sel_arg_and_weight_heuristic(param, key1, key2))
9923       return key1;
9924 
9925     key1->use_count--;
9926     if (key1->use_count > 0)
9927       if (!(key1= key1->clone_tree(param)))
9928 	return 0;				// OOM
9929     return and_all_keys(param, key1, key2, clone_flag);
9930   }
9931 
9932   if (((clone_flag & CLONE_KEY2_MAYBE) &&
9933        !(clone_flag & CLONE_KEY1_MAYBE) &&
9934        key2->type != SEL_ARG::MAYBE_KEY) ||
9935       key1->type == SEL_ARG::MAYBE_KEY)
9936   {						// Put simple key in key2
9937     swap_variables(SEL_ARG *, key1, key2);
9938     clone_flag=swap_clone_flag(clone_flag);
9939   }
9940 
9941   /* If one of the key is MAYBE_KEY then the found region may be smaller */
9942   if (key2->type == SEL_ARG::MAYBE_KEY)
9943   {
9944     if (key1->use_count > 1)
9945     {
9946       key1->use_count--;
9947       if (!(key1=key1->clone_tree(param)))
9948 	return 0;				// OOM
9949       key1->use_count++;
9950     }
9951     if (key1->type == SEL_ARG::MAYBE_KEY)
9952     {						// Both are maybe key
9953       key1->next_key_part=key_and(param, key1->next_key_part,
9954                                   key2->next_key_part, clone_flag);
9955 
9956       key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0);
9957 
9958       if (key1->next_key_part &&
9959 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
9960 	return key1;
9961     }
9962     else
9963     {
9964       key1->maybe_smaller();
9965       if (key2->next_key_part)
9966       {
9967 	key1->use_count--;			// Incremented in and_all_keys
9968         return and_all_keys(param, key1, key2->next_key_part, clone_flag);
9969       }
9970       key2->use_count--;			// Key2 doesn't have a tree
9971     }
9972     return key1;
9973   }
9974 
9975   if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
9976   {
9977     /* TODO: why not leave one of the trees? */
9978     key1->free_tree();
9979     key2->free_tree();
9980     return 0;					// Can't optimize this
9981   }
9982 
9983   key1->use_count--;
9984   key2->use_count--;
9985   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
9986   uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
9987 
9988   while (e1 && e2)
9989   {
9990     int cmp=e1->cmp_min_to_min(e2);
9991     if (cmp < 0)
9992     {
9993       if (get_range(&e1,&e2,key1))
9994 	continue;
9995     }
9996     else if (get_range(&e2,&e1,key2))
9997       continue;
9998     SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
9999                           clone_flag);
10000     e1->incr_refs();
10001     e2->incr_refs();
10002     if (!next || next->type != SEL_ARG::IMPOSSIBLE)
10003     {
10004       SEL_ARG *new_arg= e1->clone_and(param->thd, e2);
10005       if (!new_arg)
10006 	return &null_element;			// End of memory
10007       new_arg->next_key_part=next;
10008       if (new_arg->next_key_part)
10009         new_arg->weight += new_arg->next_key_part->weight;
10010 
10011       if (!new_tree)
10012       {
10013 	new_tree=new_arg;
10014       }
10015       else
10016 	new_tree=new_tree->insert(new_arg);
10017     }
10018     if (e1->cmp_max_to_max(e2) < 0)
10019       e1=e1->next;				// e1 can't overlapp next e2
10020     else
10021       e2=e2->next;
10022   }
10023   key1->free_tree();
10024   key2->free_tree();
10025   if (!new_tree)
10026     return &null_element;			// Impossible range
10027   new_tree->max_part_no= max_part_no;
10028   return new_tree;
10029 }
10030 
10031 
10032 static bool
get_range(SEL_ARG ** e1,SEL_ARG ** e2,SEL_ARG * root1)10033 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
10034 {
10035   (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
10036   if ((*e1)->cmp_max_to_min(*e2) < 0)
10037   {
10038     if (!((*e1)=(*e1)->next))
10039       return 1;
10040     if ((*e1)->cmp_min_to_max(*e2) > 0)
10041     {
10042       (*e2)=(*e2)->next;
10043       return 1;
10044     }
10045   }
10046   return 0;
10047 }
10048 
10049 /*
10050   @brief
10051     Update the tree weight.
10052 
10053   @detail
10054     Utility function to be called on a SEL_ARG tree root after doing local
10055     modifications concerning changes at this key part.
10056     Assumes that the weight of the graphs connected via next_key_part is
10057     up to dayte.
10058 */
update_weight_locally()10059 void SEL_ARG::update_weight_locally()
10060 {
10061   uint new_weight= 0;
10062   const SEL_ARG *sl;
10063   for (sl= first(); sl ; sl= sl->next)
10064   {
10065     new_weight++;
10066     if (sl->next_key_part)
10067       new_weight += sl->next_key_part->weight;
10068   }
10069   weight= new_weight;
10070 }
10071 
10072 
10073 #ifndef DBUG_OFF
10074 /*
10075   Verify SEL_TREE's weight.
10076 
10077   Recompute the weight and compare
10078 */
verify_weight()10079 uint SEL_ARG::verify_weight()
10080 {
10081   uint computed_weight= 0;
10082   SEL_ARG *first_arg= first();
10083 
10084   if (first_arg)
10085   {
10086     for (SEL_ARG *arg= first_arg; arg; arg= arg->next)
10087     {
10088       computed_weight++;
10089       if (arg->next_key_part)
10090         computed_weight+= arg->next_key_part->verify_weight();
10091     }
10092   }
10093   else
10094   {
10095     // first()=NULL means this is a special kind of SEL_ARG, e.g.
10096     // SEL_ARG with type=MAYBE_KEY
10097     computed_weight= 1;
10098     if (next_key_part)
10099       computed_weight += next_key_part->verify_weight();
10100   }
10101 
10102   if (computed_weight != weight)
10103   {
10104     sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n",
10105                     computed_weight, weight);
10106     DBUG_ASSERT(computed_weight == weight);  // Fail an assertion
10107   }
10108   return computed_weight;
10109 }
10110 #endif
10111 
10112 static
key_or_with_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * key1,SEL_ARG * key2)10113 SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
10114                            SEL_ARG *key1, SEL_ARG *key2)
10115 {
10116 #ifndef DBUG_OFF
10117   if (key1)
10118     key1->verify_weight();
10119   if (key2)
10120     key2->verify_weight();
10121 #endif
10122 
10123   SEL_ARG *res= key_or(param, key1, key2);
10124   res= enforce_sel_arg_weight_limit(param, keyno, res);
10125 #ifndef DBUG_OFF
10126   if (res)
10127     res->verify_weight();
10128 #endif
10129   return res;
10130 }
10131 
10132 
10133 static
key_and_with_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * key1,SEL_ARG * key2,uint clone_flag)10134 SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
10135                             SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
10136 {
10137 #ifndef DBUG_OFF
10138   if (key1)
10139     key1->verify_weight();
10140   if (key2)
10141     key2->verify_weight();
10142 #endif
10143   SEL_ARG *res= key_and(param, key1, key2, clone_flag);
10144   res= enforce_sel_arg_weight_limit(param, keyno, res);
10145 #ifndef DBUG_OFF
10146   if (res)
10147     res->verify_weight();
10148 #endif
10149   return res;
10150 }
10151 
10152 
10153 /**
10154    Combine two range expression under a common OR. On a logical level, the
10155    transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
10156 
10157    Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
10158    theformat is reminiscent of DNF, since an expression such as the following
10159 
10160    ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
10161 
10162    where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
10163    and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
10164    SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
10165    the first range and ranges must not overlap. It follows that they are also
10166    ordered by maximum endpoints. Thus
10167 
10168    ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
10169 
10170    Is a a valid SER_ARG expression for a key of at least 2 keyparts.
10171 
10172    For simplicity, we will assume that expr2 is a single range predicate,
10173    i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
10174    disjunction of several predicates by subsequently call key_or for each
10175    disjunct.
10176 
10177    The algorithm iterates over each disjunct of expr1, and for each disjunct
10178    where the first keypart's range overlaps with the first keypart's range in
10179    expr2:
10180 
10181    If the predicates are equal for the rest of the keyparts, or if there are
10182    no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
10183    node in expr2 is deallocated. If more ranges became connected in expr1, the
10184    surplus is also dealocated. If they differ, two ranges are created.
10185 
10186    - The range leading up to the overlap. Empty if endpoints are equal.
10187 
10188    - The overlapping sub-range. May be the entire range if they are equal.
10189 
10190    Finally, there may be one more range if expr2's first keypart's range has a
10191    greater maximum endpoint than the last range in expr1.
10192 
10193    For the overlapping sub-range, we recursively call key_or. Thus in order to
10194    compute key_or of
10195 
10196      (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
10197 
10198      (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
10199 
10200    We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
10201    first one, we simply hook on the condition for the second keypart from (1)
10202    : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
10203    < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
10204    the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
10205 
10206    ( 1  <  kp1 <= 2 AND 1 < kp2 < 10 ) OR
10207    ( 2  <  kp1 < 10 AND 1 < kp2 < 20 ) OR
10208    ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
10209 */
10210 static SEL_ARG *
key_or(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)10211 key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
10212 {
10213   if (!key1)
10214   {
10215     if (key2)
10216     {
10217       key2->use_count--;
10218       key2->free_tree();
10219     }
10220     return 0;
10221   }
10222   if (!key2)
10223   {
10224     key1->use_count--;
10225     key1->free_tree();
10226     return 0;
10227   }
10228   key1->use_count--;
10229   key2->use_count--;
10230 
10231   if (key1->part != key2->part ||
10232       (key1->min_flag | key2->min_flag) & GEOM_FLAG)
10233   {
10234     key1->free_tree();
10235     key2->free_tree();
10236     return 0;                                   // Can't optimize this
10237   }
10238 
10239   // If one of the key is MAYBE_KEY then the found region may be bigger
10240   if (key1->type == SEL_ARG::MAYBE_KEY)
10241   {
10242     key2->free_tree();
10243     key1->use_count++;
10244     return key1;
10245   }
10246   if (key2->type == SEL_ARG::MAYBE_KEY)
10247   {
10248     key1->free_tree();
10249     key2->use_count++;
10250     return key2;
10251   }
10252 
10253   if (key1->use_count > 0)
10254   {
10255     if (key2->use_count == 0 || key1->elements > key2->elements)
10256     {
10257       swap_variables(SEL_ARG *,key1,key2);
10258     }
10259     if (key1->use_count > 0 && !(key1=key1->clone_tree(param)))
10260       return 0;                                 // OOM
10261   }
10262 
10263   // Add tree at key2 to tree at key1
10264   bool key2_shared=key2->use_count != 0;
10265   key1->maybe_flag|=key2->maybe_flag;
10266 
10267   /*
10268     Notation for illustrations used in the rest of this function:
10269 
10270       Range: [--------]
10271              ^        ^
10272              start    stop
10273 
10274       Two overlapping ranges:
10275         [-----]               [----]            [--]
10276             [---]     or    [---]       or   [-------]
10277 
10278       Ambiguity: ***
10279         The range starts or stops somewhere in the "***" range.
10280         Example: a starts before b and may end before/the same plase/after b
10281         a: [----***]
10282         b:   [---]
10283 
10284       Adjacent ranges:
10285         Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
10286         a: ----]
10287         b:      [----
10288    */
10289 
10290   uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
10291 
10292   for (key2=key2->first(); ; )
10293   {
10294     /*
10295       key1 consists of one or more ranges. tmp is the range currently
10296       being handled.
10297 
10298       initialize tmp to the latest range in key1 that starts the same
10299       place or before the range in key2 starts
10300 
10301       key2:           [------]
10302       key1: [---] [-----] [----]
10303                   ^
10304                   tmp
10305     */
10306     if (key1->min_flag & NO_MIN_RANGE &&
10307         key1->max_flag & NO_MAX_RANGE)
10308     {
10309       if (key1->maybe_flag)
10310         return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10311       return 0;   // Always true OR
10312     }
10313     if (!key2)
10314       break;
10315 
10316     SEL_ARG *tmp=key1->find_range(key2);
10317 
10318     /*
10319       Used to describe how two key values are positioned compared to
10320       each other. Consider key_value_a.<cmp_func>(key_value_b):
10321 
10322         -2: key_value_a is smaller than key_value_b, and they are adjacent
10323         -1: key_value_a is smaller than key_value_b (not adjacent)
10324          0: the key values are equal
10325          1: key_value_a is bigger than key_value_b (not adjacent)
10326         -2: key_value_a is bigger than key_value_b, and they are adjacent
10327 
10328       Example: "cmp= tmp->cmp_max_to_min(key2)"
10329 
10330       key2:         [--------            (10 <= x ...)
10331       tmp:    -----]                      (... x <  10) => cmp==-2
10332       tmp:    ----]                       (... x <=  9) => cmp==-1
10333       tmp:    ------]                     (... x  = 10) => cmp== 0
10334       tmp:    --------]                   (... x <= 12) => cmp== 1
10335       (cmp == 2 does not make sense for cmp_max_to_min())
10336      */
10337     int cmp= 0;
10338 
10339     if (!tmp)
10340     {
10341       /*
10342         The range in key2 starts before the first range in key1. Use
10343         the first range in key1 as tmp.
10344 
10345         key2:     [--------]
10346         key1:            [****--] [----]   [-------]
10347                          ^
10348                          tmp
10349       */
10350       tmp=key1->first();
10351       cmp= -1;
10352     }
10353     else if ((cmp= tmp->cmp_max_to_min(key2)) < 0)
10354     {
10355       /*
10356         This is the case:
10357         key2:          [-------]
10358         tmp:   [----**]
10359        */
10360       SEL_ARG *next=tmp->next;
10361       if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
10362       {
10363         /*
10364           Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
10365 
10366           This is the case:
10367           key2:          [-------]
10368           tmp:     [----]
10369 
10370           Result:
10371           key2:    [-------------]     => inserted into key1 below
10372           tmp:                         => deleted
10373         */
10374         SEL_ARG *key2_next=key2->next;
10375         if (key2_shared)
10376         {
10377           if (!(key2=new SEL_ARG(*key2)))
10378             return 0;           // out of memory
10379           key2->increment_use_count(key1->use_count+1);
10380           key2->next=key2_next;                 // New copy of key2
10381         }
10382 
10383         key2->copy_min(tmp);
10384         if (!(key1=key1->tree_delete(tmp)))
10385         {                                       // Only one key in tree
10386           if (key2->min_flag & NO_MIN_RANGE &&
10387               key2->max_flag & NO_MAX_RANGE)
10388           {
10389             if (key2->maybe_flag)
10390               return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10391             return 0;   // Always true OR
10392           }
10393           key1=key2;
10394           key1->make_root();
10395           key2=key2_next;
10396           break;
10397         }
10398       }
10399       if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min
10400         break;         // No more ranges in key1. Copy rest of key2
10401     }
10402 
10403     if (cmp < 0)
10404     {
10405       /*
10406         This is the case:
10407         key2:  [--***]
10408         tmp:       [----]
10409       */
10410       int tmp_cmp;
10411       if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0)
10412       {
10413         /*
10414           This is the case:
10415           key2:  [------**]
10416           tmp:             [----]
10417         */
10418         if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
10419         {
10420           /*
10421             Adjacent ranges with equal next_key_part. Merge like this:
10422 
10423             This is the case:
10424             key2:    [------]
10425             tmp:             [-----]
10426 
10427             Result:
10428             key2:    [------]
10429             tmp:     [-------------]
10430 
10431             Then move on to next key2 range.
10432           */
10433           tmp->copy_min_to_min(key2);
10434           key1->merge_flags(key2);
10435           if (tmp->min_flag & NO_MIN_RANGE &&
10436               tmp->max_flag & NO_MAX_RANGE)
10437           {
10438             if (key1->maybe_flag)
10439               return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10440             return 0;
10441           }
10442           key2->increment_use_count(-1);        // Free not used tree
10443           key2=key2->next;
10444           continue;
10445         }
10446         else
10447         {
10448           /*
10449             key2 not adjacent to tmp or has different next_key_part.
10450             Insert into key1 and move to next range in key2
10451 
10452             This is the case:
10453             key2:  [------**]
10454             tmp:             [----]
10455 
10456             Result:
10457             key1_  [------**][----]
10458                    ^         ^
10459                    insert    tmp
10460           */
10461           SEL_ARG *next=key2->next;
10462           if (key2_shared)
10463           {
10464             SEL_ARG *cpy= new SEL_ARG(*key2);   // Must make copy
10465             if (!cpy)
10466               return 0;                         // OOM
10467             key1=key1->insert(cpy);
10468             key2->increment_use_count(key1->use_count+1);
10469           }
10470           else
10471             key1=key1->insert(key2);            // Will destroy key2_root
10472           key2=next;
10473           continue;
10474         }
10475       }
10476     }
10477 
10478     /*
10479       The ranges in tmp and key2 are overlapping:
10480 
10481       key2:          [----------]
10482       tmp:        [*****-----*****]
10483 
10484       Corollary: tmp.min <= key2.max
10485     */
10486     if (eq_tree(tmp->next_key_part,key2->next_key_part))
10487     {
10488       // Merge overlapping ranges with equal next_key_part
10489       if (tmp->is_same(key2))
10490       {
10491         /*
10492           Found exact match of key2 inside key1.
10493           Use the relevant range in key1.
10494         */
10495         tmp->merge_flags(key2);                 // Copy maybe flags
10496         key2->increment_use_count(-1);          // Free not used tree
10497       }
10498       else
10499       {
10500         SEL_ARG *last= tmp;
10501         SEL_ARG *first= tmp;
10502 
10503         /*
10504           Find the last range in key1 that overlaps key2 and
10505           where all ranges first...last have the same next_key_part as
10506           key2.
10507 
10508           key2:  [****----------------------*******]
10509           key1:     [--]  [----] [---]  [-----] [xxxx]
10510                     ^                   ^       ^
10511                     first               last    different next_key_part
10512 
10513           Since key2 covers them, the ranges between first and last
10514           are merged into one range by deleting first...last-1 from
10515           the key1 tree. In the figure, this applies to first and the
10516           two consecutive ranges. The range of last is then extended:
10517             * last.min: Set to MY_MIN(key2.min, first.min)
10518             * last.max: If there is a last->next that overlaps key2 (i.e.,
10519                         last->next has a different next_key_part):
10520                                         Set adjacent to last->next.min
10521                         Otherwise:      Set to MY_MAX(key2.max, last.max)
10522 
10523           Result:
10524           key2:  [****----------------------*******]
10525                     [--]  [----] [---]                   => deleted from key1
10526           key1:  [**------------------------***][xxxx]
10527                  ^                              ^
10528                  tmp=last                       different next_key_part
10529         */
10530         while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
10531                eq_tree(last->next->next_key_part,key2->next_key_part))
10532         {
10533           /*
10534             last->next is covered by key2 and has same next_key_part.
10535             last can be deleted
10536           */
10537           SEL_ARG *save=last;
10538           last=last->next;
10539           key1=key1->tree_delete(save);
10540         }
10541         // Redirect tmp to last which will cover the entire range
10542         tmp= last;
10543 
10544         /*
10545           We need the minimum endpoint of first so we can compare it
10546           with the minimum endpoint of the enclosing key2 range.
10547         */
10548         last->copy_min(first);
10549         bool full_range= last->copy_min(key2);
10550         if (!full_range)
10551         {
10552           if (last->next && key2->cmp_max_to_min(last->next) >= 0)
10553           {
10554             /*
10555               This is the case:
10556               key2:    [-------------]
10557               key1:  [***------]  [xxxx]
10558                      ^            ^
10559                      last         different next_key_part
10560 
10561               Extend range of last up to last->next:
10562               key2:    [-------------]
10563               key1:  [***--------][xxxx]
10564             */
10565             last->copy_min_to_max(last->next);
10566           }
10567           else
10568             /*
10569               This is the case:
10570               key2:    [--------*****]
10571               key1:  [***---------]    [xxxx]
10572                      ^                 ^
10573                      last              different next_key_part
10574 
10575               Extend range of last up to MY_MAX(last.max, key2.max):
10576               key2:    [--------*****]
10577               key1:  [***----------**] [xxxx]
10578              */
10579             full_range= last->copy_max(key2);
10580         }
10581         if (full_range)
10582         {                                       // Full range
10583           key1->free_tree();
10584           for (; key2 ; key2=key2->next)
10585             key2->increment_use_count(-1);      // Free not used tree
10586           if (key1->maybe_flag)
10587             return new SEL_ARG(SEL_ARG::MAYBE_KEY);
10588           return 0;
10589         }
10590       }
10591     }
10592 
10593     if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
10594     {
10595       /*
10596         This is the case ("cmp>=0" means that tmp.max >= key2.min):
10597         key2:              [----]
10598         tmp:     [------------*****]
10599       */
10600 
10601       if (!tmp->next_key_part)
10602       {
10603 	SEL_ARG *key2_next= key2->next;
10604 	if (key2_shared)
10605 	{
10606 	  SEL_ARG *key2_cpy= new SEL_ARG(*key2);
10607           if (!key2_cpy)
10608             return 0;
10609           key2= key2_cpy;
10610 	}
10611         /*
10612           tmp->next_key_part is empty: cut the range that is covered
10613           by tmp from key2.
10614           Reason: (key2->next_key_part OR tmp->next_key_part) will be
10615           empty and therefore equal to tmp->next_key_part. Thus, this
10616           part of the key2 range is completely covered by tmp.
10617         */
10618         if (tmp->cmp_max_to_max(key2) >= 0)
10619         {
10620           /*
10621             tmp covers the entire range in key2.
10622             key2:              [----]
10623             tmp:     [-----------------]
10624 
10625             Move on to next range in key2
10626           */
10627           key2->increment_use_count(-1); // Free not used tree
10628           key2=key2_next;
10629           continue;
10630         }
10631         else
10632         {
10633           /*
10634             This is the case:
10635             key2:           [-------]
10636             tmp:     [---------]
10637 
10638             Result:
10639             key2:               [---]
10640             tmp:     [---------]
10641           */
10642           key2->copy_max_to_min(tmp);
10643           continue;
10644         }
10645       }
10646 
10647       /*
10648         The ranges are overlapping but have not been merged because
10649         next_key_part of tmp and key2 differ.
10650         key2:              [----]
10651         tmp:     [------------*****]
10652 
10653         Split tmp in two where key2 starts:
10654         key2:              [----]
10655         key1:    [--------][--*****]
10656                  ^         ^
10657                  insert    tmp
10658       */
10659       SEL_ARG *new_arg=tmp->clone_first(key2);
10660       if (!new_arg)
10661         return 0;                               // OOM
10662       if ((new_arg->next_key_part= tmp->next_key_part))
10663         new_arg->increment_use_count(key1->use_count+1);
10664       tmp->copy_min_to_min(key2);
10665       key1=key1->insert(new_arg);
10666     } // tmp.min >= key2.min due to this if()
10667 
10668     /*
10669       Now key2.min <= tmp.min <= key2.max:
10670       key2:   [---------]
10671       tmp:    [****---*****]
10672      */
10673     SEL_ARG key2_cpy(*key2); // Get copy we can modify
10674     for (;;)
10675     {
10676       if (tmp->cmp_min_to_min(&key2_cpy) > 0)
10677       {
10678         /*
10679           This is the case:
10680           key2_cpy:    [------------]
10681           key1:                 [-*****]
10682                                 ^
10683                                 tmp
10684 
10685           Result:
10686           key2_cpy:             [---]
10687           key1:        [-------][-*****]
10688                        ^        ^
10689                        insert   tmp
10690          */
10691         SEL_ARG *new_arg=key2_cpy.clone_first(tmp);
10692         if (!new_arg)
10693           return 0; // OOM
10694         if ((new_arg->next_key_part=key2_cpy.next_key_part))
10695           new_arg->increment_use_count(key1->use_count+1);
10696         key1=key1->insert(new_arg);
10697         key2_cpy.copy_min_to_min(tmp);
10698       }
10699       // Now key2_cpy.min == tmp.min
10700 
10701       if ((cmp= tmp->cmp_max_to_max(&key2_cpy)) <= 0)
10702       {
10703         /*
10704           tmp.max <= key2_cpy.max:
10705           key2_cpy:   a)  [-------]    or b)     [----]
10706           tmp:            [----]                 [----]
10707 
10708           Steps:
10709            1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part.
10710            2) If case a: Insert range [tmp.max, key2_cpy.max] into key1 using
10711                          next_key_part of key2_cpy
10712 
10713            Result:
10714            key1:      a)  [----][-]    or b)     [----]
10715          */
10716         tmp->maybe_flag|= key2_cpy.maybe_flag;
10717         key2_cpy.increment_use_count(key1->use_count+1);
10718 
10719         uint old_weight= tmp->next_key_part? tmp->next_key_part->weight: 0;
10720 
10721         tmp->next_key_part= key_or(param, tmp->next_key_part,
10722                                    key2_cpy.next_key_part);
10723 
10724         uint new_weight= tmp->next_key_part? tmp->next_key_part->weight: 0;
10725         key1->weight += (new_weight - old_weight);
10726 
10727         if (!cmp)
10728           break;                     // case b: done with this key2 range
10729 
10730         // Make key2_cpy the range [tmp.max, key2_cpy.max]
10731         key2_cpy.copy_max_to_min(tmp);
10732         if (!(tmp=tmp->next))
10733         {
10734           /*
10735             No more ranges in key1. Insert key2_cpy and go to "end"
10736             label to insert remaining ranges in key2 if any.
10737           */
10738           SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
10739           if (!tmp2)
10740             return 0; // OOM
10741           key1=key1->insert(tmp2);
10742           key2=key2->next;
10743           goto end;
10744         }
10745         if (tmp->cmp_min_to_max(&key2_cpy) > 0)
10746         {
10747           /*
10748             The next range in key1 does not overlap with key2_cpy.
10749             Insert this range into key1 and move on to the next range
10750             in key2.
10751           */
10752           SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
10753           if (!tmp2)
10754             return 0;                           // OOM
10755           key1=key1->insert(tmp2);
10756           break;
10757         }
10758         /*
10759           key2_cpy overlaps with the next range in key1 and the case
10760           is now "key2.min <= tmp.min <= key2.max". Go back to for(;;)
10761           to handle this situation.
10762         */
10763         continue;
10764       }
10765       else
10766       {
10767         /*
10768           This is the case:
10769           key2_cpy:   [-------]
10770           tmp:        [------------]
10771 
10772           Result:
10773           key1:       [-------][---]
10774                       ^        ^
10775                       new_arg  tmp
10776           Steps:
10777            0) If tmp->next_key_part is empty: do nothing. Reason:
10778               (key2_cpy->next_key_part OR tmp->next_key_part) will be
10779               empty and therefore equal to tmp->next_key_part. Thus,
10780               the range in key2_cpy is completely covered by tmp
10781            1) Make new_arg with range [tmp.min, key2_cpy.max].
10782               new_arg->next_key_part is OR between next_key_part
10783               of tmp and key2_cpy
10784            2) Make tmp the range [key2.max, tmp.max]
10785            3) Insert new_arg into key1
10786         */
10787         if (!tmp->next_key_part) // Step 0
10788         {
10789           key2_cpy.increment_use_count(-1);     // Free not used tree
10790           break;
10791         }
10792         SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
10793         if (!new_arg)
10794           return 0; // OOM
10795         tmp->copy_max_to_min(&key2_cpy);
10796         tmp->increment_use_count(key1->use_count+1);
10797         /* Increment key count as it may be used for next loop */
10798         key2_cpy.increment_use_count(1);
10799         new_arg->next_key_part= key_or(param, tmp->next_key_part,
10800                                        key2_cpy.next_key_part);
10801         key1=key1->insert(new_arg);
10802         break;
10803       }
10804     }
10805     // Move on to next range in key2
10806     key2=key2->next;
10807   }
10808 
10809 end:
10810   /*
10811     Add key2 ranges that are non-overlapping with and higher than the
10812     highest range in key1.
10813   */
10814   while (key2)
10815   {
10816     SEL_ARG *next=key2->next;
10817     if (key2_shared)
10818     {
10819       SEL_ARG *tmp=new SEL_ARG(*key2);          // Must make copy
10820       if (!tmp)
10821         return 0;
10822       key2->increment_use_count(key1->use_count+1);
10823       key1=key1->insert(tmp);
10824     }
10825     else
10826       key1=key1->insert(key2);                  // Will destroy key2_root
10827     key2=next;
10828   }
10829   key1->use_count++;
10830 
10831   /* Re-compute the result tree's weight. */
10832   key1->update_weight_locally();
10833 
10834   key1->max_part_no= max_part_no;
10835   return key1;
10836 }
10837 
10838 
10839 /* Compare if two trees are equal */
10840 
eq_tree(SEL_ARG * a,SEL_ARG * b)10841 static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
10842 {
10843   if (a == b)
10844     return 1;
10845   if (!a || !b || !a->is_same(b))
10846     return 0;
10847   if (a->left != &null_element && b->left != &null_element)
10848   {
10849     if (!eq_tree(a->left,b->left))
10850       return 0;
10851   }
10852   else if (a->left != &null_element || b->left != &null_element)
10853     return 0;
10854   if (a->right != &null_element && b->right != &null_element)
10855   {
10856     if (!eq_tree(a->right,b->right))
10857       return 0;
10858   }
10859   else if (a->right != &null_element || b->right != &null_element)
10860     return 0;
10861   if (a->next_key_part != b->next_key_part)
10862   {						// Sub range
10863     if (!a->next_key_part != !b->next_key_part ||
10864 	!eq_tree(a->next_key_part, b->next_key_part))
10865       return 0;
10866   }
10867   return 1;
10868 }
10869 
10870 
10871 /*
10872   Compute the MAX(key part) in this SEL_ARG graph.
10873 */
get_max_key_part() const10874 uint SEL_ARG::get_max_key_part() const
10875 {
10876   const SEL_ARG *cur;
10877   uint max_part= part;
10878   for (cur= first(); cur ; cur=cur->next)
10879   {
10880     if (cur->next_key_part)
10881     {
10882       uint mp= cur->next_key_part->get_max_key_part();
10883       max_part= MY_MAX(part, mp);
10884     }
10885   }
10886   return max_part;
10887 }
10888 
10889 
10890 /*
10891   Remove the SEL_ARG graph elements which have part > max_part.
10892 
10893   @detail
10894     Also update weight for the graph and any modified subgraphs.
10895 */
10896 
prune_sel_arg_graph(SEL_ARG * sel_arg,uint max_part)10897 void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
10898 {
10899   SEL_ARG *cur;
10900   DBUG_ASSERT(max_part >= sel_arg->part);
10901 
10902   for (cur= sel_arg->first(); cur ; cur=cur->next)
10903   {
10904     if (cur->next_key_part)
10905     {
10906       if (cur->next_key_part->part > max_part)
10907       {
10908         // Remove cur->next_key_part.
10909         sel_arg->weight -= cur->next_key_part->weight;
10910         cur->next_key_part= NULL;
10911       }
10912       else
10913       {
10914         uint old_weight= cur->next_key_part->weight;
10915         prune_sel_arg_graph(cur->next_key_part, max_part);
10916         sel_arg->weight -= (old_weight - cur->next_key_part->weight);
10917       }
10918     }
10919   }
10920 }
10921 
10922 
10923 /*
10924   @brief
10925     Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT,
10926     by cutting off branches if necessary.
10927 
10928   @detail
10929     @see declaration of SEL_ARG::weight for definition of weight.
10930 
10931     This function attempts to reduce the graph's weight by cutting off
10932     SEL_ARG::next_key_part connections if necessary.
10933 
10934     We start with maximum used keypart and then remove one keypart after
10935     another until the graph's weight is within the limit.
10936 
10937   @seealso
10938      sel_arg_and_weight_heuristic();
10939 
10940   @return
10941     tree pointer  The tree after processing,
10942     NULL          If it was not possible to reduce the weight of the tree below the
10943                   limit.
10944 */
10945 
enforce_sel_arg_weight_limit(RANGE_OPT_PARAM * param,uint keyno,SEL_ARG * sel_arg)10946 SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
10947                                       SEL_ARG *sel_arg)
10948 {
10949   if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE ||
10950       !param->thd->variables.optimizer_max_sel_arg_weight)
10951     return sel_arg;
10952 
10953   Field *field= sel_arg->field;
10954   uint weight1= sel_arg->weight;
10955 
10956   while (1)
10957   {
10958     if (likely(sel_arg->weight <= param->thd->variables.
10959                                   optimizer_max_sel_arg_weight))
10960       break;
10961 
10962     uint max_part= sel_arg->get_max_key_part();
10963     if (max_part == sel_arg->part)
10964     {
10965       /*
10966         We don't return NULL right away as we want to have the information
10967         about the changed tree in the optimizer trace.
10968       */
10969       sel_arg= NULL;
10970       break;
10971     }
10972 
10973     max_part--;
10974     prune_sel_arg_graph(sel_arg, max_part);
10975   }
10976 
10977   uint weight2= sel_arg? sel_arg->weight : 0;
10978 
10979   if (weight2 != weight1)
10980   {
10981     Json_writer_object wrapper(param->thd);
10982     Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit");
10983     if (param->using_real_indexes)
10984       obj.add("index", param->table->key_info[param->real_keynr[keyno]].name);
10985     else
10986       obj.add("pseudo_index", field->field_name);
10987 
10988     obj.add("old_weight", (longlong)weight1);
10989     obj.add("new_weight", (longlong)weight2);
10990   }
10991   return sel_arg;
10992 }
10993 
10994 
10995 /*
10996   @detail
10997     Do not combine the trees if their total weight is likely to exceed the
10998     MAX_WEIGHT.
10999     (It is possible that key1 has next_key_part that has empty overlap with
11000     key2. In this case, the combined tree will have a smaller weight than we
11001     predict. We assume this is rare.)
11002 */
11003 
11004 static
sel_arg_and_weight_heuristic(RANGE_OPT_PARAM * param,SEL_ARG * key1,SEL_ARG * key2)11005 bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
11006                                   SEL_ARG *key2)
11007 {
11008   DBUG_ASSERT(key1->part < key2->part);
11009 
11010   ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight;
11011   if (max_weight && key1->weight + key1->elements*key2->weight > max_weight)
11012   {
11013     Json_writer_object wrapper(param->thd);
11014     Json_writer_object obj(param->thd, "sel_arg_weight_heuristic");
11015     obj.add("key1_field", key1->field->field_name);
11016     obj.add("key2_field", key2->field->field_name);
11017     obj.add("key1_weight", (longlong)key1->weight);
11018     obj.add("key2_weight", (longlong)key2->weight);
11019     return true; // Discard key2
11020   }
11021   return false;
11022 }
11023 
11024 
11025 SEL_ARG *
insert(SEL_ARG * key)11026 SEL_ARG::insert(SEL_ARG *key)
11027 {
11028   SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element);
11029 
11030   for (element= this; element != &null_element ; )
11031   {
11032     last_element=element;
11033     if (key->cmp_min_to_min(element) > 0)
11034     {
11035       par= &element->right; element= element->right;
11036     }
11037     else
11038     {
11039       par = &element->left; element= element->left;
11040     }
11041   }
11042   *par=key;
11043   key->parent=last_element;
11044 	/* Link in list */
11045   if (par == &last_element->left)
11046   {
11047     key->next=last_element;
11048     if ((key->prev=last_element->prev))
11049       key->prev->next=key;
11050     last_element->prev=key;
11051   }
11052   else
11053   {
11054     if ((key->next=last_element->next))
11055       key->next->prev=key;
11056     key->prev=last_element;
11057     last_element->next=key;
11058   }
11059   key->left=key->right= &null_element;
11060   SEL_ARG *root=rb_insert(key);			// rebalance tree
11061   root->use_count=this->use_count;		// copy root info
11062   root->elements= this->elements+1;
11063   /*
11064     The new weight is:
11065      old root's weight
11066      +1 for the weight of the added element
11067      + next_key_part's weight of the added element
11068   */
11069   root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0);
11070   root->maybe_flag=this->maybe_flag;
11071   return root;
11072 }
11073 
11074 
11075 /*
11076 ** Find best key with min <= given key
11077 ** Because the call context this should never return 0 to get_range
11078 */
11079 
11080 SEL_ARG *
find_range(SEL_ARG * key)11081 SEL_ARG::find_range(SEL_ARG *key)
11082 {
11083   SEL_ARG *element=this,*found=0;
11084 
11085   for (;;)
11086   {
11087     if (element == &null_element)
11088       return found;
11089     int cmp=element->cmp_min_to_min(key);
11090     if (cmp == 0)
11091       return element;
11092     if (cmp < 0)
11093     {
11094       found=element;
11095       element=element->right;
11096     }
11097     else
11098       element=element->left;
11099   }
11100 }
11101 
11102 
11103 /*
11104   Remove a element from the tree
11105 
11106   SYNOPSIS
11107     tree_delete()
11108     key		Key that is to be deleted from tree (this)
11109 
11110   NOTE
11111     This also frees all sub trees that is used by the element
11112 
11113   RETURN
11114     root of new tree (with key deleted)
11115 */
11116 
11117 SEL_ARG *
tree_delete(SEL_ARG * key)11118 SEL_ARG::tree_delete(SEL_ARG *key)
11119 {
11120   enum leaf_color remove_color;
11121   SEL_ARG *root,*nod,**par,*fix_par;
11122   DBUG_ENTER("tree_delete");
11123 
11124   root=this;
11125   this->parent= 0;
11126 
11127   /*
11128     Compute the weight the tree will have after the element is removed.
11129     We remove the element itself (weight=1)
11130     and the sub-graph connected to its next_key_part.
11131   */
11132   uint new_weight= root->weight - (1 + (key->next_key_part?
11133                                         key->next_key_part->weight : 0));
11134 
11135   DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ?
11136                                     key->next_key_part->weight : 0)));
11137 
11138   /* Unlink from list */
11139   if (key->prev)
11140     key->prev->next=key->next;
11141   if (key->next)
11142     key->next->prev=key->prev;
11143   key->increment_use_count(-1);
11144   if (!key->parent)
11145     par= &root;
11146   else
11147     par=key->parent_ptr();
11148 
11149   if (key->left == &null_element)
11150   {
11151     *par=nod=key->right;
11152     fix_par=key->parent;
11153     if (nod != &null_element)
11154       nod->parent=fix_par;
11155     remove_color= key->color;
11156   }
11157   else if (key->right == &null_element)
11158   {
11159     *par= nod=key->left;
11160     nod->parent=fix_par=key->parent;
11161     remove_color= key->color;
11162   }
11163   else
11164   {
11165     SEL_ARG *tmp=key->next;			// next bigger key (exist!)
11166     nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
11167     fix_par=tmp->parent;
11168     if (nod != &null_element)
11169       nod->parent=fix_par;
11170     remove_color= tmp->color;
11171 
11172     tmp->parent=key->parent;			// Move node in place of key
11173     (tmp->left=key->left)->parent=tmp;
11174     if ((tmp->right=key->right) != &null_element)
11175       tmp->right->parent=tmp;
11176     tmp->color=key->color;
11177     *par=tmp;
11178     if (fix_par == key)				// key->right == key->next
11179       fix_par=tmp;				// new parent of nod
11180   }
11181 
11182   if (root == &null_element)
11183     DBUG_RETURN(0);				// Maybe root later
11184   if (remove_color == BLACK)
11185     root=rb_delete_fixup(root,nod,fix_par);
11186   test_rb_tree(root,root->parent);
11187 
11188   root->use_count=this->use_count;		// Fix root counters
11189   root->weight= new_weight;
11190   root->elements=this->elements-1;
11191   root->maybe_flag=this->maybe_flag;
11192   DBUG_RETURN(root);
11193 }
11194 
11195 
11196 	/* Functions to fix up the tree after insert and delete */
11197 
left_rotate(SEL_ARG ** root,SEL_ARG * leaf)11198 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
11199 {
11200   SEL_ARG *y=leaf->right;
11201   leaf->right=y->left;
11202   if (y->left != &null_element)
11203     y->left->parent=leaf;
11204   if (!(y->parent=leaf->parent))
11205     *root=y;
11206   else
11207     *leaf->parent_ptr()=y;
11208   y->left=leaf;
11209   leaf->parent=y;
11210 }
11211 
right_rotate(SEL_ARG ** root,SEL_ARG * leaf)11212 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
11213 {
11214   SEL_ARG *y=leaf->left;
11215   leaf->left=y->right;
11216   if (y->right != &null_element)
11217     y->right->parent=leaf;
11218   if (!(y->parent=leaf->parent))
11219     *root=y;
11220   else
11221     *leaf->parent_ptr()=y;
11222   y->right=leaf;
11223   leaf->parent=y;
11224 }
11225 
11226 
11227 SEL_ARG *
rb_insert(SEL_ARG * leaf)11228 SEL_ARG::rb_insert(SEL_ARG *leaf)
11229 {
11230   SEL_ARG *y,*par,*par2,*root;
11231   root= this; root->parent= 0;
11232 
11233   leaf->color=RED;
11234   while (leaf != root && (par= leaf->parent)->color == RED)
11235   {					// This can't be root or 1 level under
11236     if (par == (par2= leaf->parent->parent)->left)
11237     {
11238       y= par2->right;
11239       if (y->color == RED)
11240       {
11241 	par->color=BLACK;
11242 	y->color=BLACK;
11243 	leaf=par2;
11244 	leaf->color=RED;		/* And the loop continues */
11245       }
11246       else
11247       {
11248 	if (leaf == par->right)
11249 	{
11250 	  left_rotate(&root,leaf->parent);
11251 	  par=leaf;			/* leaf is now parent to old leaf */
11252 	}
11253 	par->color=BLACK;
11254 	par2->color=RED;
11255 	right_rotate(&root,par2);
11256 	break;
11257       }
11258     }
11259     else
11260     {
11261       y= par2->left;
11262       if (y->color == RED)
11263       {
11264 	par->color=BLACK;
11265 	y->color=BLACK;
11266 	leaf=par2;
11267 	leaf->color=RED;		/* And the loop continues */
11268       }
11269       else
11270       {
11271 	if (leaf == par->left)
11272 	{
11273 	  right_rotate(&root,par);
11274 	  par=leaf;
11275 	}
11276 	par->color=BLACK;
11277 	par2->color=RED;
11278 	left_rotate(&root,par2);
11279 	break;
11280       }
11281     }
11282   }
11283   root->color=BLACK;
11284   test_rb_tree(root,root->parent);
11285   return root;
11286 }
11287 
11288 
rb_delete_fixup(SEL_ARG * root,SEL_ARG * key,SEL_ARG * par)11289 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
11290 {
11291   SEL_ARG *x,*w;
11292   root->parent=0;
11293 
11294   x= key;
11295   while (x != root && x->color == SEL_ARG::BLACK)
11296   {
11297     if (x == par->left)
11298     {
11299       w=par->right;
11300       if (w->color == SEL_ARG::RED)
11301       {
11302 	w->color=SEL_ARG::BLACK;
11303 	par->color=SEL_ARG::RED;
11304 	left_rotate(&root,par);
11305 	w=par->right;
11306       }
11307       if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
11308       {
11309 	w->color=SEL_ARG::RED;
11310 	x=par;
11311       }
11312       else
11313       {
11314 	if (w->right->color == SEL_ARG::BLACK)
11315 	{
11316 	  w->left->color=SEL_ARG::BLACK;
11317 	  w->color=SEL_ARG::RED;
11318 	  right_rotate(&root,w);
11319 	  w=par->right;
11320 	}
11321 	w->color=par->color;
11322 	par->color=SEL_ARG::BLACK;
11323 	w->right->color=SEL_ARG::BLACK;
11324 	left_rotate(&root,par);
11325 	x=root;
11326 	break;
11327       }
11328     }
11329     else
11330     {
11331       w=par->left;
11332       if (w->color == SEL_ARG::RED)
11333       {
11334 	w->color=SEL_ARG::BLACK;
11335 	par->color=SEL_ARG::RED;
11336 	right_rotate(&root,par);
11337 	w=par->left;
11338       }
11339       if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
11340       {
11341 	w->color=SEL_ARG::RED;
11342 	x=par;
11343       }
11344       else
11345       {
11346 	if (w->left->color == SEL_ARG::BLACK)
11347 	{
11348 	  w->right->color=SEL_ARG::BLACK;
11349 	  w->color=SEL_ARG::RED;
11350 	  left_rotate(&root,w);
11351 	  w=par->left;
11352 	}
11353 	w->color=par->color;
11354 	par->color=SEL_ARG::BLACK;
11355 	w->left->color=SEL_ARG::BLACK;
11356 	right_rotate(&root,par);
11357 	x=root;
11358 	break;
11359       }
11360     }
11361     par=x->parent;
11362   }
11363   x->color=SEL_ARG::BLACK;
11364   return root;
11365 }
11366 
11367 
11368 	/* Test that the properties for a red-black tree hold */
11369 
11370 #ifdef EXTRA_DEBUG
test_rb_tree(SEL_ARG * element,SEL_ARG * parent)11371 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
11372 {
11373   int count_l,count_r;
11374 
11375   if (element == &null_element)
11376     return 0;					// Found end of tree
11377   if (element->parent != parent)
11378   {
11379     sql_print_error("Wrong tree: Parent doesn't point at parent");
11380     return -1;
11381   }
11382   if (element->color == SEL_ARG::RED &&
11383       (element->left->color == SEL_ARG::RED ||
11384        element->right->color == SEL_ARG::RED))
11385   {
11386     sql_print_error("Wrong tree: Found two red in a row");
11387     return -1;
11388   }
11389   if (element->left == element->right && element->left != &null_element)
11390   {						// Dummy test
11391     sql_print_error("Wrong tree: Found right == left");
11392     return -1;
11393   }
11394   count_l=test_rb_tree(element->left,element);
11395   count_r=test_rb_tree(element->right,element);
11396   if (count_l >= 0 && count_r >= 0)
11397   {
11398     if (count_l == count_r)
11399       return count_l+(element->color == SEL_ARG::BLACK);
11400     sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
11401 	    count_l,count_r);
11402   }
11403   return -1;					// Error, no more warnings
11404 }
11405 
11406 
11407 /**
11408   Count how many times SEL_ARG graph "root" refers to its part "key" via
11409   transitive closure.
11410 
11411   @param root  An RB-Root node in a SEL_ARG graph.
11412   @param key   Another RB-Root node in that SEL_ARG graph.
11413 
11414   The passed "root" node may refer to "key" node via root->next_key_part,
11415   root->next->n
11416 
11417   This function counts how many times the node "key" is referred (via
11418   SEL_ARG::next_key_part) by
11419   - intervals of RB-tree pointed by "root",
11420   - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
11421   intervals of RB-tree pointed by "root",
11422   - and so on.
11423 
11424   Here is an example (horizontal links represent next_key_part pointers,
11425   vertical links - next/prev prev pointers):
11426 
11427          +----+               $
11428          |root|-----------------+
11429          +----+               $ |
11430            |                  $ |
11431            |                  $ |
11432          +----+       +---+   $ |     +---+    Here the return value
11433          |    |- ... -|   |---$-+--+->|key|    will be 4.
11434          +----+       +---+   $ |  |  +---+
11435            |                  $ |  |
11436           ...                 $ |  |
11437            |                  $ |  |
11438          +----+   +---+       $ |  |
11439          |    |---|   |---------+  |
11440          +----+   +---+       $    |
11441            |        |         $    |
11442           ...     +---+       $    |
11443                   |   |------------+
11444                   +---+       $
11445   @return
11446   Number of links to "key" from nodes reachable from "root".
11447 */
11448 
count_key_part_usage(SEL_ARG * root,SEL_ARG * key)11449 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
11450 {
11451   ulong count= 0;
11452   for (root=root->first(); root ; root=root->next)
11453   {
11454     if (root->next_key_part)
11455     {
11456       if (root->next_key_part == key)
11457 	count++;
11458       if (root->next_key_part->part < key->part)
11459 	count+=count_key_part_usage(root->next_key_part,key);
11460     }
11461   }
11462   return count;
11463 }
11464 
11465 
11466 /*
11467   Check if SEL_ARG::use_count value is correct
11468 
11469   SYNOPSIS
11470     SEL_ARG::test_use_count()
11471       root  The root node of the SEL_ARG graph (an RB-tree root node that
11472             has the least value of sel_arg->part in the entire graph, and
11473             thus is the "origin" of the graph)
11474 
11475   DESCRIPTION
11476     Check if SEL_ARG::use_count value is correct. See the definition of
11477     use_count for what is "correct".
11478 */
11479 
test_use_count(SEL_ARG * root)11480 void SEL_ARG::test_use_count(SEL_ARG *root)
11481 {
11482   uint e_count=0;
11483 
11484   if (this->type != SEL_ARG::KEY_RANGE)
11485     return;
11486   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
11487   {
11488     e_count++;
11489     if (pos->next_key_part)
11490     {
11491       ulong count=count_key_part_usage(root,pos->next_key_part);
11492       if (count > pos->next_key_part->use_count)
11493       {
11494         sql_print_information("Use_count: Wrong count for key at %p: %lu "
11495                               "should be %lu", pos,
11496                               pos->next_key_part->use_count, count);
11497 	return;
11498       }
11499       pos->next_key_part->test_use_count(root);
11500     }
11501   }
11502   if (e_count != elements)
11503     sql_print_warning("Wrong use count: %u (should be %u) for tree at %p",
11504                       e_count, elements, this);
11505 }
11506 #endif
11507 
11508 /*
11509   Calculate cost and E(#rows) for a given index and intervals tree
11510 
11511   SYNOPSIS
11512     check_quick_select()
11513       param             Parameter from test_quick_select
11514       idx               Number of index to use in PARAM::key SEL_TREE::key
11515       index_only        TRUE  - assume only index tuples will be accessed
11516                         FALSE - assume full table rows will be read
11517       tree              Transformed selection condition, tree->key[idx] holds
11518                         the intervals for the given index.
11519       update_tbl_stats  TRUE <=> update table->quick_* with information
11520                         about range scan we've evaluated.
11521       mrr_flags   INOUT MRR access flags
11522       cost        OUT   Scan cost
11523       is_ror_scan       is set to reflect if the key scan is a ROR (see
11524                         is_key_scan_ror function for more info)
11525 
11526   NOTES
11527     param->table->opt_range*, param->range_count (and maybe others) are
11528     updated with data of given key scan, see quick_range_seq_next for details.
11529 
11530   RETURN
11531     Estimate # of records to be retrieved.
11532     HA_POS_ERROR if estimate calculation failed due to table handler problems.
11533 */
11534 
11535 static
check_quick_select(PARAM * param,uint idx,bool index_only,SEL_ARG * tree,bool update_tbl_stats,uint * mrr_flags,uint * bufsize,Cost_estimate * cost,bool * is_ror_scan)11536 ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
11537                            SEL_ARG *tree, bool update_tbl_stats,
11538                            uint *mrr_flags, uint *bufsize, Cost_estimate *cost,
11539                            bool *is_ror_scan)
11540 {
11541   SEL_ARG_RANGE_SEQ seq;
11542   RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
11543   handler *file= param->table->file;
11544   ha_rows rows= HA_POS_ERROR;
11545   uint keynr= param->real_keynr[idx];
11546   DBUG_ENTER("check_quick_select");
11547 
11548   /* Range not calculated yet */
11549   param->quick_rows[keynr]= HA_POS_ERROR;
11550 
11551   /* Handle cases when we don't have a valid non-empty list of range */
11552   if (!tree)
11553     DBUG_RETURN(HA_POS_ERROR);
11554   if (tree->type == SEL_ARG::IMPOSSIBLE)
11555     DBUG_RETURN(0L);
11556   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
11557     DBUG_RETURN(HA_POS_ERROR);
11558 
11559   seq.keyno= idx;
11560   seq.real_keyno= keynr;
11561   seq.param= param;
11562   seq.start= tree;
11563 
11564   param->range_count=0;
11565   param->max_key_parts=0;
11566 
11567   seq.is_ror_scan= TRUE;
11568   if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
11569     seq.is_ror_scan= FALSE;
11570 
11571   *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
11572   /*
11573     Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
11574   */
11575   *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
11576 
11577   // TODO: param->max_key_parts holds 0 now, and not the #keyparts used.
11578   // Passing wrong second argument to index_flags() makes no difference for
11579   // most storage engines but might be an issue for MyRocks with certain
11580   // datatypes.
11581   if (index_only &&
11582       (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) &&
11583       !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX))
11584      *mrr_flags |= HA_MRR_INDEX_ONLY;
11585 
11586   if (param->thd->lex->sql_command != SQLCOM_SELECT)
11587     *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
11588 
11589   *bufsize= param->thd->variables.mrr_buff_size;
11590   /*
11591     Skip materialized derived table/view result table from MRR check as
11592     they aren't contain any data yet.
11593   */
11594   if (param->table->pos_in_table_list->is_non_derived())
11595     rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
11596                                             bufsize, mrr_flags, cost);
11597   param->quick_rows[keynr]= rows;
11598   if (rows != HA_POS_ERROR)
11599   {
11600     ha_rows table_records= param->table->stat_records();
11601     if (rows > table_records)
11602     {
11603       /*
11604         For any index the total number of records within all ranges
11605         cannot be be bigger than the number of records in the table.
11606         This check is needed as sometimes that table statistics or range
11607         estimates may be slightly out of sync.
11608       */
11609       rows= table_records;
11610       set_if_bigger(rows, 1);
11611       param->quick_rows[keynr]= rows;
11612     }
11613     param->possible_keys.set_bit(keynr);
11614     if (update_tbl_stats)
11615     {
11616       param->table->opt_range_keys.set_bit(keynr);
11617       param->table->opt_range[keynr].key_parts= param->max_key_parts;
11618       param->table->opt_range[keynr].ranges= param->range_count;
11619       param->table->opt_range_condition_rows=
11620         MY_MIN(param->table->opt_range_condition_rows, rows);
11621       param->table->opt_range[keynr].rows= rows;
11622       param->table->opt_range[keynr].cost= cost->total_cost();
11623       if (param->table->file->is_clustering_key(keynr))
11624 	param->table->opt_range[keynr].index_only_cost= 0;
11625       else
11626         param->table->opt_range[keynr].index_only_cost= cost->index_only_cost();
11627     }
11628   }
11629 
11630   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
11631   enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
11632   if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
11633   {
11634     /*
11635       All scans are non-ROR scans for those index types.
11636       TODO: Don't have this logic here, make table engines return
11637       appropriate flags instead.
11638     */
11639     seq.is_ror_scan= FALSE;
11640   }
11641   else if (param->table->file->is_clustering_key(keynr))
11642   {
11643     /* Clustered PK scan is always a ROR scan (TODO: same as above) */
11644     seq.is_ror_scan= TRUE;
11645   }
11646   else if (param->range_count > 1)
11647   {
11648     /*
11649       Scaning multiple key values in the index: the records are ROR
11650       for each value, but not between values. E.g, "SELECT ... x IN
11651       (1,3)" returns ROR order for all records with x=1, then ROR
11652       order for records with x=3
11653     */
11654     seq.is_ror_scan= FALSE;
11655   }
11656   *is_ror_scan= seq.is_ror_scan;
11657 
11658   DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
11659   DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp.
11660 }
11661 
11662 
11663 /*
11664   Check if key scan on given index with equality conditions on first n key
11665   parts is a ROR scan.
11666 
11667   SYNOPSIS
11668     is_key_scan_ror()
11669       param  Parameter from test_quick_select
11670       keynr  Number of key in the table. The key must not be a clustered
11671              primary key.
11672       nparts Number of first key parts for which equality conditions
11673              are present.
11674 
11675   NOTES
11676     ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
11677     ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
11678 
11679     This function is needed to handle a practically-important special case:
11680     an index scan is a ROR scan if it is done using a condition in form
11681 
11682         "key1_1=c_1 AND ... AND key1_n=c_n"
11683 
11684     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
11685 
11686     and the table has a clustered Primary Key defined as
11687       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k)
11688 
11689     i.e. the first key parts of it are identical to uncovered parts ot the
11690     key being scanned. This function assumes that the index flags do not
11691     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
11692 
11693     Check (1) is made in quick_range_seq_next()
11694 
11695   RETURN
11696     TRUE   The scan is ROR-scan
11697     FALSE  Otherwise
11698 */
11699 
is_key_scan_ror(PARAM * param,uint keynr,uint8 nparts)11700 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
11701 {
11702   KEY *table_key= param->table->key_info + keynr;
11703   KEY_PART_INFO *key_part= table_key->key_part + nparts;
11704   KEY_PART_INFO *key_part_end= (table_key->key_part +
11705                                 table_key->user_defined_key_parts);
11706   uint pk_number;
11707 
11708   if (param->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
11709     return false;
11710 
11711   for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
11712   {
11713     uint16 fieldnr= param->table->key_info[keynr].
11714                     key_part[kp - table_key->key_part].fieldnr - 1;
11715     if (param->table->field[fieldnr]->key_length() != kp->length)
11716       return FALSE;
11717   }
11718 
11719   /*
11720     If there are equalities for all key parts, it is a ROR scan. If there are
11721     equalities all keyparts and even some of key parts from "Extended Key"
11722     index suffix, it is a ROR-scan, too.
11723   */
11724   if (key_part >= key_part_end)
11725     return TRUE;
11726 
11727   key_part= table_key->key_part + nparts;
11728   pk_number= param->table->s->primary_key;
11729   if (!param->table->file->pk_is_clustering_key(pk_number))
11730     return FALSE;
11731 
11732   KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
11733   KEY_PART_INFO *pk_part_end= pk_part +
11734                               param->table->key_info[pk_number].user_defined_key_parts;
11735   for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
11736        ++key_part, ++pk_part)
11737   {
11738     if ((key_part->field != pk_part->field) ||
11739         (key_part->length != pk_part->length))
11740       return FALSE;
11741   }
11742   return (key_part == key_part_end);
11743 }
11744 
11745 
11746 /*
11747   Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
11748 
11749   SYNOPSIS
11750     get_quick_select()
11751       param
11752       idx            Index of used key in param->key.
11753       key_tree       SEL_ARG tree for the used key
11754       mrr_flags      MRR parameter for quick select
11755       mrr_buf_size   MRR parameter for quick select
11756       parent_alloc   If not NULL, use it to allocate memory for
11757                      quick select data. Otherwise use quick->alloc.
11758   NOTES
11759     The caller must call QUICK_SELECT::init for returned quick select.
11760 
11761     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
11762     deallocated when the returned quick select is deleted.
11763 
11764   RETURN
11765     NULL on error
11766     otherwise created quick select
11767 */
11768 
11769 QUICK_RANGE_SELECT *
get_quick_select(PARAM * param,uint idx,SEL_ARG * key_tree,uint mrr_flags,uint mrr_buf_size,MEM_ROOT * parent_alloc)11770 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
11771                  uint mrr_buf_size, MEM_ROOT *parent_alloc)
11772 {
11773   QUICK_RANGE_SELECT *quick;
11774   bool create_err= FALSE;
11775   DBUG_ENTER("get_quick_select");
11776 
11777   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
11778     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
11779                                       param->real_keynr[idx],
11780                                       MY_TEST(parent_alloc),
11781                                       parent_alloc, &create_err);
11782   else
11783     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
11784                                  param->real_keynr[idx],
11785                                  MY_TEST(parent_alloc), NULL, &create_err);
11786 
11787   if (quick)
11788   {
11789     if (create_err ||
11790 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
11791 		       param->max_key,0))
11792     {
11793       delete quick;
11794       quick=0;
11795     }
11796     else
11797     {
11798       KEY *keyinfo= param->table->key_info+param->real_keynr[idx];
11799       quick->mrr_flags= mrr_flags;
11800       quick->mrr_buf_size= mrr_buf_size;
11801       quick->key_parts=(KEY_PART*)
11802         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
11803                     (char*) param->key[idx],
11804                     sizeof(KEY_PART)*
11805                     param->table->actual_n_key_parts(keyinfo));
11806     }
11807   }
11808   DBUG_RETURN(quick);
11809 }
11810 
11811 
11812 /*
11813 ** Fix this to get all possible sub_ranges
11814 */
11815 bool
get_quick_keys(PARAM * param,QUICK_RANGE_SELECT * quick,KEY_PART * key,SEL_ARG * key_tree,uchar * min_key,uint min_key_flag,uchar * max_key,uint max_key_flag)11816 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
11817 	       SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
11818 	       uchar *max_key, uint max_key_flag)
11819 {
11820   QUICK_RANGE *range;
11821   uint flag;
11822   int min_part= key_tree->part-1, // # of keypart values in min_key buffer
11823       max_part= key_tree->part-1; // # of keypart values in max_key buffer
11824 
11825   if (key_tree->left != &null_element)
11826   {
11827     if (get_quick_keys(param,quick,key,key_tree->left,
11828 		       min_key,min_key_flag, max_key, max_key_flag))
11829       return 1;
11830   }
11831   uchar *tmp_min_key=min_key,*tmp_max_key=max_key;
11832   min_part+= key_tree->store_min(key[key_tree->part].store_length,
11833                                  &tmp_min_key,min_key_flag);
11834   max_part+= key_tree->store_max(key[key_tree->part].store_length,
11835                                  &tmp_max_key,max_key_flag);
11836 
11837   if (key_tree->next_key_part &&
11838       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
11839       key_tree->next_key_part->part == key_tree->part+1)
11840   {						  // const key as prefix
11841     if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
11842          memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 &&
11843 	 key_tree->min_flag==0 && key_tree->max_flag==0)
11844     {
11845       if (get_quick_keys(param,quick,key,key_tree->next_key_part,
11846 			 tmp_min_key, min_key_flag | key_tree->min_flag,
11847 			 tmp_max_key, max_key_flag | key_tree->max_flag))
11848 	return 1;
11849       goto end;					// Ugly, but efficient
11850     }
11851     {
11852       uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
11853       if (!tmp_min_flag)
11854         min_part+= key_tree->next_key_part->store_min_key(key,
11855                                                           &tmp_min_key,
11856                                                           &tmp_min_flag,
11857                                                           MAX_KEY);
11858       if (!tmp_max_flag)
11859         max_part+= key_tree->next_key_part->store_max_key(key,
11860                                                           &tmp_max_key,
11861                                                           &tmp_max_flag,
11862                                                           MAX_KEY);
11863       flag=tmp_min_flag | tmp_max_flag;
11864     }
11865   }
11866   else
11867   {
11868     flag = (key_tree->min_flag & GEOM_FLAG) ?
11869       key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
11870   }
11871 
11872   /*
11873     Ensure that some part of min_key and max_key are used.  If not,
11874     regard this as no lower/upper range
11875   */
11876   if ((flag & GEOM_FLAG) == 0)
11877   {
11878     if (tmp_min_key != param->min_key)
11879       flag&= ~NO_MIN_RANGE;
11880     else
11881       flag|= NO_MIN_RANGE;
11882     if (tmp_max_key != param->max_key)
11883       flag&= ~NO_MAX_RANGE;
11884     else
11885       flag|= NO_MAX_RANGE;
11886   }
11887   if (flag == 0)
11888   {
11889     uint length= (uint) (tmp_min_key - param->min_key);
11890     if (length == (uint) (tmp_max_key - param->max_key) &&
11891 	!memcmp(param->min_key,param->max_key,length))
11892     {
11893       KEY *table_key=quick->head->key_info+quick->index;
11894       flag=EQ_RANGE;
11895       if ((table_key->flags & HA_NOSAME) &&
11896           min_part == key_tree->part &&
11897           key_tree->part == table_key->user_defined_key_parts-1)
11898       {
11899         DBUG_ASSERT(min_part == max_part);
11900         if ((table_key->flags & HA_NULL_PART_KEY) &&
11901             null_part_in_key(key,
11902                              param->min_key,
11903                              (uint) (tmp_min_key - param->min_key)))
11904           flag|= NULL_RANGE;
11905         else
11906           flag|= UNIQUE_RANGE;
11907       }
11908     }
11909   }
11910 
11911   /* Get range for retrieving rows in QUICK_SELECT::get_next */
11912   if (!(range= new (param->thd->mem_root) QUICK_RANGE(
11913                                param->thd,
11914                                param->min_key,
11915 			       (uint) (tmp_min_key - param->min_key),
11916                                min_part >=0 ? make_keypart_map(min_part) : 0,
11917 			       param->max_key,
11918 			       (uint) (tmp_max_key - param->max_key),
11919                                max_part >=0 ? make_keypart_map(max_part) : 0,
11920 			       flag)))
11921     return 1;			// out of memory
11922 
11923   set_if_bigger(quick->max_used_key_length, range->min_length);
11924   set_if_bigger(quick->max_used_key_length, range->max_length);
11925   set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
11926   if (insert_dynamic(&quick->ranges, (uchar*) &range))
11927     return 1;
11928 
11929  end:
11930   if (key_tree->right != &null_element)
11931     return get_quick_keys(param,quick,key,key_tree->right,
11932 			  min_key,min_key_flag,
11933 			  max_key,max_key_flag);
11934   return 0;
11935 }
11936 
11937 /*
11938   Return 1 if there is only one range and this uses the whole unique key
11939 */
11940 
unique_key_range()11941 bool QUICK_RANGE_SELECT::unique_key_range()
11942 {
11943   if (ranges.elements == 1)
11944   {
11945     QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
11946     if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
11947     {
11948       KEY *key=head->key_info+index;
11949       return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length;
11950     }
11951   }
11952   return 0;
11953 }
11954 
11955 
11956 
11957 /*
11958   Return TRUE if any part of the key is NULL
11959 
11960   SYNOPSIS
11961     null_part_in_key()
11962       key_part  Array of key parts (index description)
11963       key       Key values tuple
11964       length    Length of key values tuple in bytes.
11965 
11966   RETURN
11967     TRUE   The tuple has at least one "keypartX is NULL"
11968     FALSE  Otherwise
11969 */
11970 
null_part_in_key(KEY_PART * key_part,const uchar * key,uint length)11971 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
11972 {
11973   for (const uchar *end=key+length ;
11974        key < end;
11975        key+= key_part++->store_length)
11976   {
11977     if (key_part->null_bit && *key)
11978       return 1;
11979   }
11980   return 0;
11981 }
11982 
11983 
is_keys_used(const MY_BITMAP * fields)11984 bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
11985 {
11986   return is_key_used(head, index, fields);
11987 }
11988 
is_keys_used(const MY_BITMAP * fields)11989 bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields)
11990 {
11991   QUICK_RANGE_SELECT *quick;
11992   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
11993   while ((quick= it++))
11994   {
11995     if (is_key_used(head, quick->index, fields))
11996       return 1;
11997   }
11998   return 0;
11999 }
12000 
is_keys_used(const MY_BITMAP * fields)12001 bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields)
12002 {
12003   QUICK_SELECT_WITH_RECORD *qr;
12004   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
12005   while ((qr= it++))
12006   {
12007     if (is_key_used(head, qr->quick->index, fields))
12008       return 1;
12009   }
12010   return 0;
12011 }
12012 
is_keys_used(const MY_BITMAP * fields)12013 bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
12014 {
12015   QUICK_SELECT_I *quick;
12016   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
12017   while ((quick= it++))
12018   {
12019     if (quick->is_keys_used(fields))
12020       return 1;
12021   }
12022   return 0;
12023 }
12024 
12025 
get_ft_select(THD * thd,TABLE * table,uint key)12026 FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
12027 {
12028   bool create_err= FALSE;
12029   FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
12030   if (create_err)
12031   {
12032     delete fts;
12033     return NULL;
12034   }
12035   else
12036     return fts;
12037 }
12038 
12039 /*
12040   Create quick select from ref/ref_or_null scan.
12041 
12042   SYNOPSIS
12043     get_quick_select_for_ref()
12044       thd      Thread handle
12045       table    Table to access
12046       ref      ref[_or_null] scan parameters
12047       records  Estimate of number of records (needed only to construct
12048                quick select)
12049   NOTES
12050     This allocates things in a new memory root, as this may be called many
12051     times during a query.
12052 
12053   RETURN
12054     Quick select that retrieves the same rows as passed ref scan
12055     NULL on error.
12056 */
12057 
get_quick_select_for_ref(THD * thd,TABLE * table,TABLE_REF * ref,ha_rows records)12058 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
12059                                              TABLE_REF *ref, ha_rows records)
12060 {
12061   MEM_ROOT *old_root, *alloc;
12062   QUICK_RANGE_SELECT *quick;
12063   KEY *key_info = &table->key_info[ref->key];
12064   KEY_PART *key_part;
12065   QUICK_RANGE *range;
12066   uint part;
12067   bool create_err= FALSE;
12068   Cost_estimate cost;
12069   uint max_used_key_len;
12070 
12071   old_root= thd->mem_root;
12072   /* The following call may change thd->mem_root */
12073   quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
12074   /* save mem_root set by QUICK_RANGE_SELECT constructor */
12075   alloc= thd->mem_root;
12076   /*
12077     return back default mem_root (thd->mem_root) changed by
12078     QUICK_RANGE_SELECT constructor
12079   */
12080   thd->mem_root= old_root;
12081 
12082   if (!quick || create_err || quick->init())
12083     goto err;
12084   quick->records= records;
12085 
12086   if ((cp_buffer_from_ref(thd, table, ref) &&
12087        unlikely(thd->is_fatal_error)) ||
12088       unlikely(!(range= new(alloc) QUICK_RANGE())))
12089     goto err;                                   // out of memory
12090 
12091   range->min_key= range->max_key= ref->key_buff;
12092   range->min_length= range->max_length= ref->key_length;
12093   range->min_keypart_map= range->max_keypart_map=
12094     make_prev_keypart_map(ref->key_parts);
12095   range->flag= EQ_RANGE;
12096 
12097   if (unlikely(!(quick->key_parts=key_part=(KEY_PART *)
12098                  alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts))))
12099     goto err;
12100 
12101   max_used_key_len=0;
12102   for (part=0 ; part < ref->key_parts ;part++,key_part++)
12103   {
12104     key_part->part=part;
12105     key_part->field=        key_info->key_part[part].field;
12106     key_part->length=       key_info->key_part[part].length;
12107     key_part->store_length= key_info->key_part[part].store_length;
12108     key_part->null_bit=     key_info->key_part[part].null_bit;
12109     key_part->flag=         (uint8) key_info->key_part[part].key_part_flag;
12110 
12111     max_used_key_len +=key_info->key_part[part].store_length;
12112   }
12113 
12114   quick->max_used_key_length= max_used_key_len;
12115 
12116   if (insert_dynamic(&quick->ranges,(uchar*)&range))
12117     goto err;
12118 
12119   /*
12120      Add a NULL range if REF_OR_NULL optimization is used.
12121      For example:
12122        if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
12123        and have ref->null_ref_key set. Will create a new NULL range here.
12124   */
12125   if (ref->null_ref_key)
12126   {
12127     QUICK_RANGE *null_range;
12128 
12129     *ref->null_ref_key= 1;		// Set null byte then create a range
12130     if (!(null_range= new (alloc)
12131           QUICK_RANGE(thd, ref->key_buff, ref->key_length,
12132                       make_prev_keypart_map(ref->key_parts),
12133                       ref->key_buff, ref->key_length,
12134                       make_prev_keypart_map(ref->key_parts), EQ_RANGE)))
12135       goto err;
12136     *ref->null_ref_key= 0;		// Clear null byte
12137     if (insert_dynamic(&quick->ranges,(uchar*)&null_range))
12138       goto err;
12139   }
12140 
12141   /* Call multi_range_read_info() to get the MRR flags and buffer size */
12142   quick->mrr_flags= HA_MRR_NO_ASSOCIATION |
12143                     (table->file->keyread_enabled() ? HA_MRR_INDEX_ONLY : 0);
12144   if (thd->lex->sql_command != SQLCOM_SELECT)
12145     quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
12146 
12147   quick->mrr_buf_size= thd->variables.mrr_buff_size;
12148   if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
12149                                          ~0,
12150                                          &quick->mrr_buf_size,
12151                                          &quick->mrr_flags, &cost))
12152     goto err;
12153 
12154   return quick;
12155 err:
12156   delete quick;
12157   return 0;
12158 }
12159 
12160 
12161 /*
12162   Perform key scans for all used indexes (except CPK), get rowids and merge
12163   them into an ordered non-recurrent sequence of rowids.
12164 
12165   The merge/duplicate removal is performed using Unique class. We put all
12166   rowids into Unique, get the sorted sequence and destroy the Unique.
12167 
12168   If table has a clustered primary key that covers all rows (TRUE for bdb
12169   and innodb currently) and one of the index_merge scans is a scan on PK,
12170   then rows that will be retrieved by PK scan are not put into Unique and
12171   primary key scan is not performed here, it is performed later separately.
12172 
12173   RETURN
12174     0     OK
12175     other error
12176 */
12177 
read_keys_and_merge_scans(THD * thd,TABLE * head,List<QUICK_RANGE_SELECT> quick_selects,QUICK_RANGE_SELECT * pk_quick_select,READ_RECORD * read_record,bool intersection,key_map * filtered_scans,Unique ** unique_ptr)12178 int read_keys_and_merge_scans(THD *thd,
12179                               TABLE *head,
12180                               List<QUICK_RANGE_SELECT> quick_selects,
12181                               QUICK_RANGE_SELECT *pk_quick_select,
12182                               READ_RECORD *read_record,
12183                               bool intersection,
12184                               key_map *filtered_scans,
12185                               Unique **unique_ptr)
12186 {
12187   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
12188   QUICK_RANGE_SELECT* cur_quick;
12189   int result;
12190   Unique *unique= *unique_ptr;
12191   handler *file= head->file;
12192   bool with_cpk_filter= pk_quick_select != NULL;
12193   DBUG_ENTER("read_keys_and_merge");
12194 
12195   /* We're going to just read rowids. */
12196   head->prepare_for_position();
12197 
12198   cur_quick_it.rewind();
12199   cur_quick= cur_quick_it++;
12200   bool first_quick= TRUE;
12201   DBUG_ASSERT(cur_quick != 0);
12202   head->file->ha_start_keyread(cur_quick->index);
12203 
12204   /*
12205     We reuse the same instance of handler so we need to call both init and
12206     reset here.
12207   */
12208   if (cur_quick->init() || cur_quick->reset())
12209     goto err;
12210 
12211   if (unique == NULL)
12212   {
12213     DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_SUICIDE(); );
12214     DBUG_EXECUTE_IF("only_one_Unique_may_be_created",
12215                     DBUG_SET("+d,index_merge_may_not_create_a_Unique"); );
12216 
12217     unique= new Unique(refpos_order_cmp, (void *)file,
12218                        file->ref_length,
12219                        (size_t)thd->variables.sortbuff_size,
12220 		       intersection ? quick_selects.elements : 0);
12221     if (!unique)
12222       goto err;
12223     *unique_ptr= unique;
12224   }
12225   else
12226   {
12227     unique->reset();
12228   }
12229 
12230   DBUG_ASSERT(file->ref_length == unique->get_size());
12231   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
12232 
12233   for (;;)
12234   {
12235     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
12236     {
12237       if (intersection)
12238         with_cpk_filter= filtered_scans->is_set(cur_quick->index);
12239       if (first_quick)
12240       {
12241         first_quick= FALSE;
12242         if (intersection && unique->is_in_memory())
12243           unique->close_for_expansion();
12244       }
12245       cur_quick->range_end();
12246       cur_quick= cur_quick_it++;
12247       if (!cur_quick)
12248         break;
12249 
12250       if (cur_quick->file->inited != handler::NONE)
12251         cur_quick->file->ha_index_end();
12252       if (cur_quick->init() || cur_quick->reset())
12253         goto err;
12254     }
12255 
12256     if (result)
12257     {
12258       if (result != HA_ERR_END_OF_FILE)
12259       {
12260         cur_quick->range_end();
12261         goto err;
12262       }
12263       break;
12264     }
12265 
12266     if (thd->killed)
12267       goto err;
12268 
12269     if (with_cpk_filter &&
12270         pk_quick_select->row_in_ranges() != intersection )
12271       continue;
12272 
12273     cur_quick->file->position(cur_quick->record);
12274     if (unique->unique_add((char*)cur_quick->file->ref))
12275       goto err;
12276   }
12277 
12278   /*
12279     Ok all rowids are in the Unique now. The next call will initialize
12280     the unique structure so it can be used to iterate through the rowids
12281     sequence.
12282   */
12283   result= unique->get(head);
12284   /*
12285     index merge currently doesn't support "using index" at all
12286   */
12287   head->file->ha_end_keyread();
12288   if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0,
12289                        &unique->sort, 1 , 1, TRUE))
12290     result= 1;
12291  DBUG_RETURN(result);
12292 
12293 err:
12294   head->file->ha_end_keyread();
12295   DBUG_RETURN(1);
12296 }
12297 
12298 
read_keys_and_merge()12299 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
12300 
12301 {
12302   int result;
12303   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
12304   result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
12305                                     &read_record, FALSE, NULL, &unique);
12306   doing_pk_scan= FALSE;
12307   DBUG_RETURN(result);
12308 }
12309 
12310 /*
12311   Get next row for index_merge.
12312   NOTES
12313     The rows are read from
12314       1. rowids stored in Unique.
12315       2. QUICK_RANGE_SELECT with clustered primary key (if any).
12316     The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
12317 */
12318 
get_next()12319 int QUICK_INDEX_MERGE_SELECT::get_next()
12320 {
12321   int result;
12322   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
12323 
12324   if (doing_pk_scan)
12325     DBUG_RETURN(pk_quick_select->get_next());
12326 
12327   if ((result= read_record.read_record()) == -1)
12328   {
12329     result= HA_ERR_END_OF_FILE;
12330     end_read_record(&read_record);
12331     // Free things used by sort early. Shouldn't be strictly necessary
12332     unique->sort.reset();
12333     /* All rows from Unique have been retrieved, do a clustered PK scan */
12334     if (pk_quick_select)
12335     {
12336       doing_pk_scan= TRUE;
12337       if ((result= pk_quick_select->init()) ||
12338           (result= pk_quick_select->reset()))
12339         DBUG_RETURN(result);
12340       DBUG_RETURN(pk_quick_select->get_next());
12341     }
12342   }
12343 
12344   DBUG_RETURN(result);
12345 }
12346 
read_keys_and_merge()12347 int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge()
12348 
12349 {
12350   int result;
12351   DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge");
12352   result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
12353                                     &read_record, TRUE, &filtered_scans,
12354                                     &unique);
12355   DBUG_RETURN(result);
12356 }
12357 
get_next()12358 int QUICK_INDEX_INTERSECT_SELECT::get_next()
12359 {
12360   int result;
12361   DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next");
12362 
12363   if ((result= read_record.read_record()) == -1)
12364   {
12365     result= HA_ERR_END_OF_FILE;
12366     end_read_record(&read_record);
12367     unique->sort.reset();                       // Free things early
12368   }
12369 
12370   DBUG_RETURN(result);
12371 }
12372 
12373 
12374 /*
12375   Retrieve next record.
12376   SYNOPSIS
12377      QUICK_ROR_INTERSECT_SELECT::get_next()
12378 
12379   NOTES
12380     Invariant on enter/exit: all intersected selects have retrieved all index
12381     records with rowid <= some_rowid_val and no intersected select has
12382     retrieved any index records with rowid > some_rowid_val.
12383     We start fresh and loop until we have retrieved the same rowid in each of
12384     the key scans or we got an error.
12385 
12386     If a Clustered PK scan is present, it is used only to check if row
12387     satisfies its condition (and never used for row retrieval).
12388 
12389     Locking: to ensure that exclusive locks are only set on records that
12390     are included in the final result we must release the lock
12391     on all rows we read but do not include in the final result. This
12392     must be done on each index that reads the record and the lock
12393     must be released using the same handler (the same quick object) as
12394     used when reading the record.
12395 
12396   RETURN
12397    0     - Ok
12398    other - Error code if any error occurred.
12399 */
12400 
get_next()12401 int QUICK_ROR_INTERSECT_SELECT::get_next()
12402 {
12403   List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
12404   QUICK_SELECT_WITH_RECORD *qr;
12405   QUICK_RANGE_SELECT* quick;
12406 
12407   /* quick that reads the given rowid first. This is needed in order
12408   to be able to unlock the row using the same handler object that locked
12409   it */
12410   QUICK_RANGE_SELECT* quick_with_last_rowid;
12411 
12412   int error, cmp;
12413   uint last_rowid_count=0;
12414   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
12415 
12416   /* Get a rowid for first quick and save it as a 'candidate' */
12417   qr= quick_it++;
12418   quick= qr->quick;
12419   error= quick->get_next();
12420   if (cpk_quick)
12421   {
12422     while (!error && !cpk_quick->row_in_ranges())
12423     {
12424       quick->file->unlock_row(); /* row not in range; unlock */
12425       error= quick->get_next();
12426     }
12427   }
12428   if (unlikely(error))
12429     DBUG_RETURN(error);
12430 
12431   /* Save the read key tuple */
12432   key_copy(qr->key_tuple, record, head->key_info + quick->index,
12433            quick->max_used_key_length);
12434 
12435   quick->file->position(quick->record);
12436   memcpy(last_rowid, quick->file->ref, head->file->ref_length);
12437   last_rowid_count= 1;
12438   quick_with_last_rowid= quick;
12439 
12440   while (last_rowid_count < quick_selects.elements)
12441   {
12442     if (!(qr= quick_it++))
12443     {
12444       quick_it.rewind();
12445       qr= quick_it++;
12446     }
12447     quick= qr->quick;
12448 
12449     do
12450     {
12451       DBUG_EXECUTE_IF("innodb_quick_report_deadlock",
12452                       DBUG_SET("+d,innodb_report_deadlock"););
12453       if (unlikely((error= quick->get_next())))
12454       {
12455         /* On certain errors like deadlock, trx might be rolled back.*/
12456         if (!thd->transaction_rollback_request)
12457           quick_with_last_rowid->file->unlock_row();
12458         DBUG_RETURN(error);
12459       }
12460       quick->file->position(quick->record);
12461       cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
12462       if (cmp < 0)
12463       {
12464         /* This row is being skipped.  Release lock on it. */
12465         quick->file->unlock_row();
12466       }
12467     } while (cmp < 0);
12468 
12469     key_copy(qr->key_tuple, record, head->key_info + quick->index,
12470              quick->max_used_key_length);
12471 
12472     /* Ok, current select 'caught up' and returned ref >= cur_ref */
12473     if (cmp > 0)
12474     {
12475       /* Found a row with ref > cur_ref. Make it a new 'candidate' */
12476       if (cpk_quick)
12477       {
12478         while (!cpk_quick->row_in_ranges())
12479         {
12480           quick->file->unlock_row(); /* row not in range; unlock */
12481           if (unlikely((error= quick->get_next())))
12482           {
12483             /* On certain errors like deadlock, trx might be rolled back.*/
12484             if (!thd->transaction_rollback_request)
12485               quick_with_last_rowid->file->unlock_row();
12486             DBUG_RETURN(error);
12487           }
12488         }
12489         quick->file->position(quick->record);
12490       }
12491       memcpy(last_rowid, quick->file->ref, head->file->ref_length);
12492       quick_with_last_rowid->file->unlock_row();
12493       last_rowid_count= 1;
12494       quick_with_last_rowid= quick;
12495 
12496       //save the fields here
12497       key_copy(qr->key_tuple, record, head->key_info + quick->index,
12498                quick->max_used_key_length);
12499     }
12500     else
12501     {
12502       /* current 'candidate' row confirmed by this select */
12503       last_rowid_count++;
12504     }
12505   }
12506 
12507   /* We get here if we got the same row ref in all scans. */
12508   if (need_to_fetch_row)
12509     error= head->file->ha_rnd_pos(head->record[0], last_rowid);
12510 
12511   if (!need_to_fetch_row)
12512   {
12513     /* Restore the columns we've read/saved with other quick selects */
12514     quick_it.rewind();
12515     while ((qr= quick_it++))
12516     {
12517       if (qr->quick != quick)
12518       {
12519         key_restore(record, qr->key_tuple, head->key_info + qr->quick->index,
12520                     qr->quick->max_used_key_length);
12521       }
12522     }
12523   }
12524 
12525   DBUG_RETURN(error);
12526 }
12527 
12528 
12529 /*
12530   Retrieve next record.
12531   SYNOPSIS
12532     QUICK_ROR_UNION_SELECT::get_next()
12533 
12534   NOTES
12535     Enter/exit invariant:
12536     For each quick select in the queue a {key,rowid} tuple has been
12537     retrieved but the corresponding row hasn't been passed to output.
12538 
12539   RETURN
12540    0     - Ok
12541    other - Error code if any error occurred.
12542 */
12543 
get_next()12544 int QUICK_ROR_UNION_SELECT::get_next()
12545 {
12546   int error, dup_row;
12547   QUICK_SELECT_I *quick;
12548   uchar *tmp;
12549   DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
12550 
12551   do
12552   {
12553     if (!queue.elements)
12554       DBUG_RETURN(HA_ERR_END_OF_FILE);
12555     /* Ok, we have a queue with >= 1 scans */
12556 
12557     quick= (QUICK_SELECT_I*)queue_top(&queue);
12558     memcpy(cur_rowid, quick->last_rowid, rowid_length);
12559 
12560     /* put into queue rowid from the same stream as top element */
12561     if ((error= quick->get_next()))
12562     {
12563       if (error != HA_ERR_END_OF_FILE)
12564         DBUG_RETURN(error);
12565       queue_remove_top(&queue);
12566     }
12567     else
12568     {
12569       quick->save_last_pos();
12570       queue_replace_top(&queue);
12571     }
12572 
12573     if (!have_prev_rowid)
12574     {
12575       /* No rows have been returned yet */
12576       dup_row= FALSE;
12577       have_prev_rowid= TRUE;
12578     }
12579     else
12580       dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
12581   } while (dup_row);
12582 
12583   tmp= cur_rowid;
12584   cur_rowid= prev_rowid;
12585   prev_rowid= tmp;
12586 
12587   error= head->file->ha_rnd_pos(quick->record, prev_rowid);
12588   DBUG_RETURN(error);
12589 }
12590 
12591 
reset()12592 int QUICK_RANGE_SELECT::reset()
12593 {
12594   uint  buf_size;
12595   uchar *mrange_buff;
12596   int   error;
12597   HANDLER_BUFFER empty_buf;
12598   MY_BITMAP * const save_read_set= head->read_set;
12599   MY_BITMAP * const save_write_set= head->write_set;
12600   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
12601   last_range= NULL;
12602   cur_range= (QUICK_RANGE**) ranges.buffer;
12603   RANGE_SEQ_IF seq_funcs= {NULL, quick_range_seq_init, quick_range_seq_next, 0, 0};
12604 
12605   if (file->inited == handler::RND)
12606   {
12607     /* Handler could be left in this state by MRR */
12608     if (unlikely((error= file->ha_rnd_end())))
12609       DBUG_RETURN(error);
12610   }
12611 
12612   if (in_ror_merged_scan)
12613     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
12614 
12615   if (file->inited == handler::NONE)
12616   {
12617     DBUG_EXECUTE_IF("bug14365043_2",
12618                     DBUG_SET("+d,ha_index_init_fail"););
12619     if (unlikely((error= file->ha_index_init(index,1))))
12620     {
12621         file->print_error(error, MYF(0));
12622         goto err;
12623     }
12624   }
12625 
12626   /* Allocate buffer if we need one but haven't allocated it yet */
12627   if (mrr_buf_size && !mrr_buf_desc)
12628   {
12629     buf_size= mrr_buf_size;
12630     while (buf_size && !my_multi_malloc(key_memory_QUICK_RANGE_SELECT_mrr_buf_desc,
12631                                         MYF(MY_WME),
12632                                         &mrr_buf_desc, sizeof(*mrr_buf_desc),
12633                                         &mrange_buff, buf_size,
12634                                         NullS))
12635     {
12636       /* Try to shrink the buffers until both are 0. */
12637       buf_size/= 2;
12638     }
12639     if (!mrr_buf_desc)
12640     {
12641       error= HA_ERR_OUT_OF_MEM;
12642       goto err;
12643     }
12644 
12645     /* Initialize the handler buffer. */
12646     mrr_buf_desc->buffer= mrange_buff;
12647     mrr_buf_desc->buffer_end= mrange_buff + buf_size;
12648     mrr_buf_desc->end_of_used_area= mrange_buff;
12649   }
12650 
12651   if (!mrr_buf_desc)
12652     empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
12653 
12654   error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
12655                                      mrr_flags, mrr_buf_desc? mrr_buf_desc:
12656                                                               &empty_buf);
12657 err:
12658   /* Restore bitmaps set on entry */
12659   if (in_ror_merged_scan)
12660     head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
12661   DBUG_RETURN(error);
12662 }
12663 
12664 
12665 /*
12666   Get next possible record using quick-struct.
12667 
12668   SYNOPSIS
12669     QUICK_RANGE_SELECT::get_next()
12670 
12671   NOTES
12672     Record is read into table->record[0]
12673 
12674   RETURN
12675     0			Found row
12676     HA_ERR_END_OF_FILE	No (more) rows in range
12677     #			Error code
12678 */
12679 
get_next()12680 int QUICK_RANGE_SELECT::get_next()
12681 {
12682   range_id_t dummy;
12683   int result;
12684   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
12685 
12686   if (!in_ror_merged_scan)
12687     DBUG_RETURN(file->multi_range_read_next(&dummy));
12688 
12689   MY_BITMAP * const save_read_set= head->read_set;
12690   MY_BITMAP * const save_write_set= head->write_set;
12691   /*
12692     We don't need to signal the bitmap change as the bitmap is always the
12693     same for this head->file
12694   */
12695   head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
12696   result= file->multi_range_read_next(&dummy);
12697   head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
12698   DBUG_RETURN(result);
12699 }
12700 
12701 
12702 /*
12703   Get the next record with a different prefix.
12704 
12705   @param prefix_length   length of cur_prefix
12706   @param group_key_parts The number of key parts in the group prefix
12707   @param cur_prefix      prefix of a key to be searched for
12708 
12709   Each subsequent call to the method retrieves the first record that has a
12710   prefix with length prefix_length and which is different from cur_prefix,
12711   such that the record with the new prefix is within the ranges described by
12712   this->ranges. The record found is stored into the buffer pointed by
12713   this->record. The method is useful for GROUP-BY queries with range
12714   conditions to discover the prefix of the next group that satisfies the range
12715   conditions.
12716 
12717   @todo
12718 
12719     This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
12720     methods should be unified into a more general one to reduce code
12721     duplication.
12722 
12723   @retval 0                  on success
12724   @retval HA_ERR_END_OF_FILE if returned all keys
12725   @retval other              if some error occurred
12726 */
12727 
get_next_prefix(uint prefix_length,uint group_key_parts,uchar * cur_prefix)12728 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
12729                                         uint group_key_parts,
12730                                         uchar *cur_prefix)
12731 {
12732   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
12733   const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
12734 
12735   for (;;)
12736   {
12737     int result;
12738     if (last_range)
12739     {
12740       /* Read the next record in the same range with prefix after cur_prefix. */
12741       DBUG_ASSERT(cur_prefix != NULL);
12742       result= file->ha_index_read_map(record, cur_prefix, keypart_map,
12743                                       HA_READ_AFTER_KEY);
12744       if (result || last_range->max_keypart_map == 0) {
12745         /*
12746           Only return if actual failure occurred. For HA_ERR_KEY_NOT_FOUND
12747           or HA_ERR_END_OF_FILE, we just want to continue to reach the next
12748           set of ranges. It is possible for the storage engine to return
12749           HA_ERR_KEY_NOT_FOUND/HA_ERR_END_OF_FILE even when there are more
12750           keys if it respects the end range set by the read_range_first call
12751           below.
12752         */
12753         if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
12754           DBUG_RETURN(result);
12755       } else {
12756         /*
12757           For storage engines that don't respect end range, check if we've
12758           moved past the current range.
12759         */
12760         key_range previous_endpoint;
12761         last_range->make_max_endpoint(&previous_endpoint, prefix_length,
12762                                       keypart_map);
12763         if (file->compare_key(&previous_endpoint) <= 0)
12764           DBUG_RETURN(0);
12765       }
12766     }
12767 
12768     uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
12769     if (count == 0)
12770     {
12771       /* Ranges have already been used up before. None is left for read. */
12772       last_range= 0;
12773       DBUG_RETURN(HA_ERR_END_OF_FILE);
12774     }
12775     last_range= *(cur_range++);
12776 
12777     key_range start_key, end_key;
12778     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
12779     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
12780 
12781     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
12782 				   last_range->max_keypart_map ? &end_key : 0,
12783                                    MY_TEST(last_range->flag & EQ_RANGE),
12784 				   TRUE);
12785     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
12786       last_range= 0;			// Stop searching
12787 
12788     if (result != HA_ERR_END_OF_FILE)
12789       DBUG_RETURN(result);
12790     last_range= 0;			// No matching rows; go to next range
12791   }
12792 }
12793 
12794 
12795 /* Get next for geometrical indexes */
12796 
get_next()12797 int QUICK_RANGE_SELECT_GEOM::get_next()
12798 {
12799   DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
12800 
12801   for (;;)
12802   {
12803     int result;
12804     if (last_range)
12805     {
12806       // Already read through key
12807       result= file->ha_index_next_same(record, last_range->min_key,
12808                                        last_range->min_length);
12809       if (result != HA_ERR_END_OF_FILE)
12810 	DBUG_RETURN(result);
12811     }
12812 
12813     uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
12814     if (count == 0)
12815     {
12816       /* Ranges have already been used up before. None is left for read. */
12817       last_range= 0;
12818       DBUG_RETURN(HA_ERR_END_OF_FILE);
12819     }
12820     last_range= *(cur_range++);
12821 
12822     result= file->ha_index_read_map(record, last_range->min_key,
12823                                     last_range->min_keypart_map,
12824                                     (ha_rkey_function)(last_range->flag ^
12825                                                        GEOM_FLAG));
12826     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
12827       DBUG_RETURN(result);
12828     last_range= 0;				// Not found, to next range
12829   }
12830 }
12831 
12832 
12833 /*
12834   Check if current row will be retrieved by this QUICK_RANGE_SELECT
12835 
12836   NOTES
12837     It is assumed that currently a scan is being done on another index
12838     which reads all necessary parts of the index that is scanned by this
12839     quick select.
12840     The implementation does a binary search on sorted array of disjoint
12841     ranges, without taking size of range into account.
12842 
12843     This function is used to filter out clustered PK scan rows in
12844     index_merge quick select.
12845 
12846   RETURN
12847     TRUE  if current row will be retrieved by this quick select
12848     FALSE if not
12849 */
12850 
row_in_ranges()12851 bool QUICK_RANGE_SELECT::row_in_ranges()
12852 {
12853   QUICK_RANGE *res;
12854   uint min= 0;
12855   uint max= ranges.elements - 1;
12856   uint mid= (max + min)/2;
12857 
12858   while (min != max)
12859   {
12860     if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
12861     {
12862       /* current row value > mid->max */
12863       min= mid + 1;
12864     }
12865     else
12866       max= mid;
12867     mid= (min + max) / 2;
12868   }
12869   res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
12870   return (!cmp_next(res) && !cmp_prev(res));
12871 }
12872 
12873 /*
12874   This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
12875   get_next() interface, but we have to hold a pointer to the original
12876   QUICK_RANGE_SELECT because its data are used all over the place. What
12877   should be done is to factor out the data that is needed into a base
12878   class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
12879   which handle the ranges and implement the get_next() function.  But
12880   for now, this seems to work right at least.
12881  */
12882 
QUICK_SELECT_DESC(QUICK_RANGE_SELECT * q,uint used_key_parts_arg)12883 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
12884                                      uint used_key_parts_arg)
12885  :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
12886   used_key_parts (used_key_parts_arg)
12887 {
12888   QUICK_RANGE *r;
12889   /*
12890     Use default MRR implementation for reverse scans. No table engine
12891     currently can do an MRR scan with output in reverse index order.
12892   */
12893   mrr_buf_desc= NULL;
12894   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
12895   mrr_buf_size= 0;
12896 
12897   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
12898   QUICK_RANGE **end_range= pr + ranges.elements;
12899   for (; pr!=end_range; pr++)
12900     rev_ranges.push_front(*pr);
12901 
12902   /* Remove EQ_RANGE flag for keys that are not using the full key */
12903   for (r = rev_it++; r; r = rev_it++)
12904   {
12905     if ((r->flag & EQ_RANGE) &&
12906 	head->key_info[index].key_length != r->max_length)
12907       r->flag&= ~EQ_RANGE;
12908   }
12909   rev_it.rewind();
12910   q->dont_free=1;				// Don't free shared mem
12911 }
12912 
12913 
get_next()12914 int QUICK_SELECT_DESC::get_next()
12915 {
12916   DBUG_ENTER("QUICK_SELECT_DESC::get_next");
12917 
12918   /* The max key is handled as follows:
12919    *   - if there is NO_MAX_RANGE, start at the end and move backwards
12920    *   - if it is an EQ_RANGE, which means that max key covers the entire
12921    *     key, go directly to the key and read through it (sorting backwards is
12922    *     same as sorting forwards)
12923    *   - if it is NEAR_MAX, go to the key or next, step back once, and
12924    *     move backwards
12925    *   - otherwise (not NEAR_MAX == include the key), go after the key,
12926    *     step back once, and move backwards
12927    */
12928 
12929   for (;;)
12930   {
12931     int result;
12932     if (last_range)
12933     {						// Already read through key
12934       result = ((last_range->flag & EQ_RANGE &&
12935                  used_key_parts <= head->key_info[index].user_defined_key_parts) ?
12936                 file->ha_index_next_same(record, last_range->min_key,
12937                                       last_range->min_length) :
12938                 file->ha_index_prev(record));
12939       if (!result)
12940       {
12941 	if (cmp_prev(*rev_it.ref()) == 0)
12942 	  DBUG_RETURN(0);
12943       }
12944       else if (result != HA_ERR_END_OF_FILE)
12945 	DBUG_RETURN(result);
12946     }
12947 
12948     if (!(last_range= rev_it++))
12949       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
12950 
12951     key_range       start_key;
12952     start_key.key=    (const uchar*) last_range->min_key;
12953     start_key.length= last_range->min_length;
12954     start_key.flag=   ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
12955                        (last_range->flag & EQ_RANGE) ?
12956                        HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
12957     start_key.keypart_map= last_range->min_keypart_map;
12958     key_range       end_key;
12959     end_key.key=      (const uchar*) last_range->max_key;
12960     end_key.length=   last_range->max_length;
12961     end_key.flag=     (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
12962                        HA_READ_AFTER_KEY);
12963     end_key.keypart_map= last_range->max_keypart_map;
12964     result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &start_key,
12965                                      (last_range->flag & NO_MAX_RANGE) ? NULL : &end_key);
12966     if (result)
12967     {
12968       DBUG_RETURN(result);
12969     }
12970 
12971     if (last_range->flag & NO_MAX_RANGE)        // Read last record
12972     {
12973       int local_error;
12974       if (unlikely((local_error= file->ha_index_last(record))))
12975 	DBUG_RETURN(local_error);		// Empty table
12976       if (cmp_prev(last_range) == 0)
12977 	DBUG_RETURN(0);
12978       last_range= 0;                            // No match; go to next range
12979       continue;
12980     }
12981 
12982     if (last_range->flag & EQ_RANGE &&
12983         used_key_parts <= head->key_info[index].user_defined_key_parts)
12984 
12985     {
12986       result= file->ha_index_read_map(record, last_range->max_key,
12987                                       last_range->max_keypart_map,
12988                                       HA_READ_KEY_EXACT);
12989     }
12990     else
12991     {
12992       DBUG_ASSERT(last_range->flag & NEAR_MAX ||
12993                   (last_range->flag & EQ_RANGE &&
12994                    used_key_parts > head->key_info[index].user_defined_key_parts) ||
12995                   range_reads_after_key(last_range));
12996       result= file->ha_index_read_map(record, last_range->max_key,
12997                                       last_range->max_keypart_map,
12998                                       ((last_range->flag & NEAR_MAX) ?
12999                                        HA_READ_BEFORE_KEY :
13000                                        HA_READ_PREFIX_LAST_OR_PREV));
13001     }
13002     if (result)
13003     {
13004       if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
13005 	DBUG_RETURN(result);
13006       last_range= 0;                            // Not found, to next range
13007       continue;
13008     }
13009     if (cmp_prev(last_range) == 0)
13010     {
13011       if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
13012 	last_range= 0;				// Stop searching
13013       DBUG_RETURN(0);				// Found key is in range
13014     }
13015     last_range= 0;                              // To next range
13016   }
13017 }
13018 
13019 
13020 /**
13021   Create a compatible quick select with the result ordered in an opposite way
13022 
13023   @param used_key_parts_arg  Number of used key parts
13024 
13025   @retval NULL in case of errors (OOM etc)
13026   @retval pointer to a newly created QUICK_SELECT_DESC if success
13027 */
13028 
make_reverse(uint used_key_parts_arg)13029 QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
13030 {
13031   QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg);
13032   if (new_quick == NULL)
13033   {
13034     delete new_quick;
13035     return NULL;
13036   }
13037   return new_quick;
13038 }
13039 
13040 
13041 /*
13042   Compare if found key is over max-value
13043   Returns 0 if key <= range->max_key
13044   TODO: Figure out why can't this function be as simple as cmp_prev().
13045 */
13046 
cmp_next(QUICK_RANGE * range_arg)13047 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
13048 {
13049   if (range_arg->flag & NO_MAX_RANGE)
13050     return 0;                                   /* key can't be to large */
13051 
13052   KEY_PART *key_part=key_parts;
13053   uint store_length;
13054 
13055   for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length;
13056        key < end;
13057        key+= store_length, key_part++)
13058   {
13059     int cmp;
13060     store_length= key_part->store_length;
13061     if (key_part->null_bit)
13062     {
13063       if (*key)
13064       {
13065         if (!key_part->field->is_null())
13066           return 1;
13067         continue;
13068       }
13069       else if (key_part->field->is_null())
13070         return 0;
13071       key++;					// Skip null byte
13072       store_length--;
13073     }
13074     if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0)
13075       return 0;
13076     if (cmp > 0)
13077       return 1;
13078   }
13079   return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
13080 }
13081 
13082 
13083 /*
13084   Returns 0 if found key is inside range (found key >= range->min_key).
13085 */
13086 
cmp_prev(QUICK_RANGE * range_arg)13087 int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
13088 {
13089   int cmp;
13090   if (range_arg->flag & NO_MIN_RANGE)
13091     return 0;					/* key can't be to small */
13092 
13093   cmp= key_cmp(key_part_info, range_arg->min_key,
13094                range_arg->min_length);
13095   if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
13096     return 0;
13097   return 1;                                     // outside of range
13098 }
13099 
13100 
13101 /*
13102  * TRUE if this range will require using HA_READ_AFTER_KEY
13103    See comment in get_next() about this
13104  */
13105 
range_reads_after_key(QUICK_RANGE * range_arg)13106 bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
13107 {
13108   return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
13109 	  !(range_arg->flag & EQ_RANGE) ||
13110 	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
13111 }
13112 
13113 
add_key_name(String * str,bool * first)13114 void QUICK_SELECT_I::add_key_name(String *str, bool *first)
13115 {
13116   KEY *key_info= head->key_info + index;
13117 
13118   if (*first)
13119     *first= FALSE;
13120   else
13121     str->append(',');
13122   str->append(&key_info->name);
13123 }
13124 
13125 
get_explain(MEM_ROOT * local_alloc)13126 Explain_quick_select* QUICK_RANGE_SELECT::get_explain(MEM_ROOT *local_alloc)
13127 {
13128   Explain_quick_select *res;
13129   if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_RANGE)))
13130     res->range.set(local_alloc, &head->key_info[index], max_used_key_length);
13131   return res;
13132 }
13133 
13134 
13135 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13136 QUICK_GROUP_MIN_MAX_SELECT::get_explain(MEM_ROOT *local_alloc)
13137 {
13138   Explain_quick_select *res;
13139   if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_GROUP_MIN_MAX)))
13140     res->range.set(local_alloc, &head->key_info[index], max_used_key_length);
13141   return res;
13142 }
13143 
13144 
13145 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13146 QUICK_INDEX_SORT_SELECT::get_explain(MEM_ROOT *local_alloc)
13147 {
13148   Explain_quick_select *res;
13149   if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13150     return NULL;
13151 
13152   QUICK_RANGE_SELECT *quick;
13153   Explain_quick_select *child_explain;
13154   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13155   while ((quick= it++))
13156   {
13157     if ((child_explain= quick->get_explain(local_alloc)))
13158       res->children.push_back(child_explain);
13159     else
13160       return NULL;
13161   }
13162 
13163   if (pk_quick_select)
13164   {
13165     if ((child_explain= pk_quick_select->get_explain(local_alloc)))
13166       res->children.push_back(child_explain);
13167     else
13168       return NULL;
13169   }
13170   return res;
13171 }
13172 
13173 
13174 /*
13175   Same as QUICK_INDEX_SORT_SELECT::get_explain(), but primary key is printed
13176   first
13177 */
13178 
13179 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13180 QUICK_INDEX_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc)
13181 {
13182   Explain_quick_select *res;
13183   Explain_quick_select *child_explain;
13184 
13185   if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13186     return NULL;
13187 
13188   if (pk_quick_select)
13189   {
13190     if ((child_explain= pk_quick_select->get_explain(local_alloc)))
13191       res->children.push_back(child_explain);
13192     else
13193       return NULL;
13194   }
13195 
13196   QUICK_RANGE_SELECT *quick;
13197   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13198   while ((quick= it++))
13199   {
13200     if ((child_explain= quick->get_explain(local_alloc)))
13201       res->children.push_back(child_explain);
13202     else
13203       return NULL;
13204   }
13205   return res;
13206 }
13207 
13208 
13209 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13210 QUICK_ROR_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc)
13211 {
13212   Explain_quick_select *res;
13213   Explain_quick_select *child_explain;
13214 
13215   if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13216     return NULL;
13217 
13218   QUICK_SELECT_WITH_RECORD *qr;
13219   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13220   while ((qr= it++))
13221   {
13222     if ((child_explain= qr->quick->get_explain(local_alloc)))
13223       res->children.push_back(child_explain);
13224     else
13225       return NULL;
13226   }
13227 
13228   if (cpk_quick)
13229   {
13230     if ((child_explain= cpk_quick->get_explain(local_alloc)))
13231       res->children.push_back(child_explain);
13232     else
13233       return NULL;
13234   }
13235   return res;
13236 }
13237 
13238 
13239 Explain_quick_select*
get_explain(MEM_ROOT * local_alloc)13240 QUICK_ROR_UNION_SELECT::get_explain(MEM_ROOT *local_alloc)
13241 {
13242   Explain_quick_select *res;
13243   Explain_quick_select *child_explain;
13244 
13245   if (!(res= new (local_alloc) Explain_quick_select(get_type())))
13246     return NULL;
13247 
13248   QUICK_SELECT_I *quick;
13249   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13250   while ((quick= it++))
13251   {
13252     if ((child_explain= quick->get_explain(local_alloc)))
13253       res->children.push_back(child_explain);
13254     else
13255       return NULL;
13256   }
13257 
13258   return res;
13259 }
13260 
13261 
add_key_and_length(String * key_names,String * used_lengths,bool * first)13262 void QUICK_SELECT_I::add_key_and_length(String *key_names,
13263                                         String *used_lengths,
13264                                         bool *first)
13265 {
13266   char buf[64];
13267   size_t length;
13268   KEY *key_info= head->key_info + index;
13269 
13270   if (*first)
13271     *first= FALSE;
13272   else
13273   {
13274     key_names->append(',');
13275     used_lengths->append(',');
13276   }
13277   key_names->append(&key_info->name);
13278   length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
13279   used_lengths->append(buf, length);
13280 }
13281 
13282 
add_keys_and_lengths(String * key_names,String * used_lengths)13283 void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
13284                                               String *used_lengths)
13285 {
13286   bool first= TRUE;
13287 
13288   add_key_and_length(key_names, used_lengths, &first);
13289 }
13290 
add_keys_and_lengths(String * key_names,String * used_lengths)13291 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
13292                                                     String *used_lengths)
13293 {
13294   QUICK_RANGE_SELECT *quick;
13295   bool first= TRUE;
13296 
13297   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13298 
13299   while ((quick= it++))
13300   {
13301     quick->add_key_and_length(key_names, used_lengths, &first);
13302   }
13303 
13304   if (pk_quick_select)
13305     pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
13306 }
13307 
13308 
add_keys_and_lengths(String * key_names,String * used_lengths)13309 void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
13310                                                         String *used_lengths)
13311 {
13312   QUICK_RANGE_SELECT *quick;
13313   bool first= TRUE;
13314 
13315   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13316 
13317   if (pk_quick_select)
13318     pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
13319 
13320   while ((quick= it++))
13321   {
13322     quick->add_key_and_length(key_names, used_lengths, &first);
13323   }
13324 }
13325 
add_keys_and_lengths(String * key_names,String * used_lengths)13326 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
13327                                                       String *used_lengths)
13328 {
13329   QUICK_SELECT_WITH_RECORD *qr;
13330   bool first= TRUE;
13331 
13332   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13333 
13334   while ((qr= it++))
13335   {
13336     qr->quick->add_key_and_length(key_names, used_lengths, &first);
13337   }
13338   if (cpk_quick)
13339     cpk_quick->add_key_and_length(key_names, used_lengths, &first);
13340 }
13341 
add_keys_and_lengths(String * key_names,String * used_lengths)13342 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
13343                                                   String *used_lengths)
13344 {
13345   QUICK_SELECT_I *quick;
13346   bool first= TRUE;
13347 
13348   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13349 
13350   while ((quick= it++))
13351   {
13352     if (first)
13353       first= FALSE;
13354     else
13355     {
13356       used_lengths->append(',');
13357       key_names->append(',');
13358     }
13359     quick->add_keys_and_lengths(key_names, used_lengths);
13360   }
13361 }
13362 
13363 
add_used_key_part_to_set()13364 void QUICK_RANGE_SELECT::add_used_key_part_to_set()
13365 {
13366   uint key_len;
13367   KEY_PART *part= key_parts;
13368   for (key_len=0; key_len < max_used_key_length;
13369        key_len += (part++)->store_length)
13370   {
13371     /*
13372       We have to use field_index instead of part->field
13373       as for partial fields, part->field points to
13374       a temporary field that is only part of the original
13375       field.  field_index always points to the original field
13376     */
13377     Field *field= head->field[part->field->field_index];
13378     field->register_field_in_read_map();
13379   }
13380 }
13381 
13382 
add_used_key_part_to_set()13383 void QUICK_GROUP_MIN_MAX_SELECT::add_used_key_part_to_set()
13384 {
13385   uint key_len;
13386   KEY_PART_INFO *part= index_info->key_part;
13387   for (key_len=0; key_len < max_used_key_length;
13388        key_len += (part++)->store_length)
13389   {
13390     /*
13391       We have to use field_index instead of part->field
13392       as for partial fields, part->field points to
13393       a temporary field that is only part of the original
13394       field.  field_index always points to the original field
13395     */
13396     Field *field= head->field[part->field->field_index];
13397     field->register_field_in_read_map();
13398   }
13399 }
13400 
13401 
add_used_key_part_to_set()13402 void QUICK_ROR_INTERSECT_SELECT::add_used_key_part_to_set()
13403 {
13404   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
13405   QUICK_SELECT_WITH_RECORD *quick;
13406   while ((quick= it++))
13407   {
13408     quick->quick->add_used_key_part_to_set();
13409   }
13410 }
13411 
13412 
add_used_key_part_to_set()13413 void QUICK_INDEX_SORT_SELECT::add_used_key_part_to_set()
13414 {
13415   QUICK_RANGE_SELECT *quick;
13416   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
13417   while ((quick= it++))
13418   {
13419     quick->add_used_key_part_to_set();
13420   }
13421   if (pk_quick_select)
13422     pk_quick_select->add_used_key_part_to_set();
13423 }
13424 
13425 
add_used_key_part_to_set()13426 void QUICK_ROR_UNION_SELECT::add_used_key_part_to_set()
13427 {
13428   QUICK_SELECT_I *quick;
13429   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
13430 
13431   while ((quick= it++))
13432   {
13433     quick->add_used_key_part_to_set();
13434   }
13435 }
13436 
13437 
13438 /*******************************************************************************
13439 * Implementation of QUICK_GROUP_MIN_MAX_SELECT
13440 *******************************************************************************/
13441 
13442 static inline uint get_field_keypart(KEY *index, Field *field);
13443 static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree,
13444                                     SEL_ARG **cur_range);
13445 static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
13446                        KEY_PART_INFO *first_non_group_part,
13447                        KEY_PART_INFO *min_max_arg_part,
13448                        KEY_PART_INFO *last_part, THD *thd,
13449                        uchar *key_infix, uint *key_infix_len,
13450                        KEY_PART_INFO **first_non_infix_part);
13451 static bool
13452 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
13453                                Field::imagetype image_type,
13454                                bool *has_min_max_fld, bool *has_other_fld);
13455 
13456 static void
13457 cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
13458                    uint group_key_parts, SEL_TREE *range_tree,
13459                    SEL_ARG *index_tree, ha_rows quick_prefix_records,
13460                    bool have_min, bool have_max,
13461                    double *read_cost, ha_rows *records);
13462 
13463 
13464 /**
13465   Test if this access method is applicable to a GROUP query with MIN/MAX
13466   functions, and if so, construct a new TRP object.
13467 
13468   DESCRIPTION
13469     Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
13470     Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
13471     following conditions:
13472     A) Table T has at least one compound index I of the form:
13473        I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
13474     B) Query conditions:
13475     B0. Q is over a single table T.
13476     B1. The attributes referenced by Q are a subset of the attributes of I.
13477     B2. All attributes QA in Q can be divided into 3 overlapping groups:
13478         - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
13479           referenced by any number of MIN and/or MAX functions if present.
13480         - WA = {W_1, ..., W_p} - from the WHERE clause
13481         - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
13482              = SA              - if Q is a DISTINCT query (based on the
13483                                  equivalence of DISTINCT and GROUP queries.
13484         - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
13485           GROUP BY and not referenced by MIN/MAX functions.
13486         with the following properties specified below.
13487     B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not
13488         applicable.
13489 
13490     SA1. There is at most one attribute in SA referenced by any number of
13491          MIN and/or MAX functions which, which if present, is denoted as C.
13492     SA2. The position of the C attribute in the index is after the last A_k.
13493     SA3. The attribute C can be referenced in the WHERE clause only in
13494          predicates of the forms:
13495          - (C {< | <= | > | >= | =} const)
13496          - (const {< | <= | > | >= | =} C)
13497          - (C between const_i and const_j)
13498          - C IS NULL
13499          - C IS NOT NULL
13500          - C != const
13501     SA4. If Q has a GROUP BY clause, there are no other aggregate functions
13502          except MIN and MAX. For queries with DISTINCT, aggregate functions
13503          are allowed.
13504     SA5. The select list in DISTINCT queries should not contain expressions.
13505     SA6. Clustered index can not be used by GROUP_MIN_MAX quick select
13506          for AGG_FUNC(DISTINCT ...) optimization because cursor position is
13507          never stored after a unique key lookup in the clustered index and
13508          furhter index_next/prev calls can not be used. So loose index scan
13509          optimization can not be used in this case.
13510     SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this
13511          access method is not used.
13512          For above queries MIN/MAX() aggregation has to be done at
13513          nested_loops_join (end_send_group). But with current design MIN/MAX()
13514          is always set as part of loose index scan. Because of this mismatch
13515          MIN() and MAX() values will be set incorrectly. For such queries to
13516          work we need a new interface for loose index scan. This new interface
13517          should only fetch records with min and max values and let
13518          end_send_group to do aggregation. Until then do not use
13519          loose_index_scan.
13520     GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
13521          G_i = A_j => i = j.
13522     GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
13523          forms a prefix of I. This permutation is used as the GROUP clause
13524          when the DISTINCT query is converted to a GROUP query.
13525     GA3. The attributes in GA may participate in arbitrary predicates, divided
13526          into two groups:
13527          - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
13528            attributes of a prefix of GA
13529          - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
13530            of GA. Since P is applied to only GROUP attributes it filters some
13531            groups, and thus can be applied after the grouping.
13532     GA4. There are no expressions among G_i, just direct column references.
13533     NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
13534          and the MIN/MAX attribute C, then NGA must consist of exactly the
13535          index attributes that constitute the gap. As a result there is a
13536          permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap
13537          in the index.
13538     NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
13539          equality conditions for all NG_i of the form (NG_i = const) or
13540          (const = NG_i), such that each NG_i is referenced in exactly one
13541          conjunct. Informally, the predicates provide constants to fill the
13542          gap in the index.
13543     NGA3.If BA <> {}, there can only be one range. TODO: This is a code
13544          limitation and is not strictly needed. See BUG#15947433
13545     WA1. There are no other attributes in the WHERE clause except the ones
13546          referenced in predicates RNG, PA, PC, EQ defined above. Therefore
13547          WA is subset of (GA union NGA union C) for GA,NGA,C that pass the
13548          above tests. By transitivity then it also follows that each WA_i
13549          participates in the index I (if this was already tested for GA, NGA
13550          and C).
13551     WA2. If there is a predicate on C, then it must be in conjunction
13552          to all predicates on all earlier keyparts in I.
13553 
13554     C) Overall query form:
13555        SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
13556          FROM T
13557         WHERE [RNG(A_1,...,A_p ; where p <= k)]
13558          [AND EQ(B_1,...,B_m)]
13559          [AND PC(C)]
13560          [AND PA(A_i1,...,A_iq)]
13561        GROUP BY A_1,...,A_k
13562        [HAVING PH(A_1, ..., B_1,..., C)]
13563     where EXPR(...) is an arbitrary expression over some or all SELECT fields,
13564     or:
13565        SELECT DISTINCT A_i1,...,A_ik
13566          FROM T
13567         WHERE [RNG(A_1,...,A_p ; where p <= k)]
13568          [AND PA(A_i1,...,A_iq)];
13569 
13570   NOTES
13571     If the current query satisfies the conditions above, and if
13572     (mem_root! = NULL), then the function constructs and returns a new TRP
13573     object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
13574     If (mem_root == NULL), then the function only tests whether the current
13575     query satisfies the conditions above, and, if so, sets
13576     is_applicable = TRUE.
13577 
13578     Queries with DISTINCT for which index access can be used are transformed
13579     into equivalent group-by queries of the form:
13580 
13581     SELECT A_1,...,A_k FROM T
13582      WHERE [RNG(A_1,...,A_p ; where p <= k)]
13583       [AND PA(A_i1,...,A_iq)]
13584     GROUP BY A_1,...,A_k;
13585 
13586     The group-by list is a permutation of the select attributes, according
13587     to their order in the index.
13588 
13589   TODO
13590   - What happens if the query groups by the MIN/MAX field, and there is no
13591     other field as in: "select MY_MIN(a) from t1 group by a" ?
13592   - We assume that the general correctness of the GROUP-BY query was checked
13593     before this point. Is this correct, or do we have to check it completely?
13594   - Lift the limitation in condition (B3), that is, make this access method
13595     applicable to ROLLUP queries.
13596 
13597  @param  param     Parameter from test_quick_select
13598  @param  sel_tree  Range tree generated by get_mm_tree
13599  @param  read_time Best read time so far of table or index scan time
13600  @return table read plan
13601    @retval NULL  Loose index scan not applicable or mem_root == NULL
13602    @retval !NULL Loose index scan table read plan
13603 */
13604 
13605 static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM * param,SEL_TREE * tree,double read_time)13606 get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
13607 {
13608   THD *thd= param->thd;
13609   JOIN *join= thd->lex->current_select->join;
13610   TABLE *table= param->table;
13611   bool have_min= FALSE;              /* TRUE if there is a MIN function. */
13612   bool have_max= FALSE;              /* TRUE if there is a MAX function. */
13613   Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions
13614   KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
13615   uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
13616   KEY *index_info= NULL;    /* The index chosen for data access. */
13617   uint index= 0;            /* The id of the chosen index. */
13618   uint group_key_parts= 0;  // Number of index key parts in the group prefix.
13619   uint used_key_parts= 0;   /* Number of index key parts used for access. */
13620   uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
13621   uint key_infix_len= 0;          /* Length of key_infix. */
13622   TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
13623   uint key_part_nr;
13624   uint elements_in_group;
13625   ORDER *tmp_group;
13626   Item *item;
13627   Item_field *item_field;
13628   bool is_agg_distinct;
13629   List<Item_field> agg_distinct_flds;
13630   DBUG_ENTER("get_best_group_min_max");
13631 
13632   Json_writer_object trace_group(thd, "group_index_range");
13633   const char* cause= NULL;
13634 
13635   /* Perform few 'cheap' tests whether this access method is applicable. */
13636   if (!join) /* This is not a select statement. */
13637     cause= "no join";
13638   else if (join->table_count != 1)  /* The query must reference one table. */
13639     cause= "not single_table";
13640   else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
13641     cause= "rollup";
13642   else if (table->s->keys == 0) /* There are no indexes to use. */
13643     cause= "no index";
13644   else if (join->conds && join->conds->used_tables()
13645           & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */
13646     cause= "correlated conditions";
13647 
13648   if (cause)
13649   {
13650     trace_group.add("chosen", false).add("cause", cause);
13651     DBUG_RETURN(NULL);
13652   }
13653 
13654   is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
13655 
13656   if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
13657       (!join->select_distinct) &&
13658       !is_agg_distinct)
13659   {
13660     trace_group.add("chosen", false).add("cause","no group by or distinct");
13661     DBUG_RETURN(NULL);
13662   }
13663   /* Analyze the query in more detail. */
13664 
13665   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
13666   List_iterator<Item> select_items_it(join->fields_list);
13667 
13668   if (join->sum_funcs[0])
13669   {
13670     Item_sum *min_max_item;
13671     Item_sum **func_ptr= join->sum_funcs;
13672     while ((min_max_item= *(func_ptr++)))
13673     {
13674       if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
13675         have_min= TRUE;
13676       else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
13677         have_max= TRUE;
13678       else if (is_agg_distinct &&
13679                (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
13680                 min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
13681                 min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
13682         continue;
13683       else
13684       {
13685         trace_group.add("chosen", false)
13686                    .add("cause", "not applicable aggregate function");
13687         DBUG_RETURN(NULL);
13688       }
13689 
13690       /* The argument of MIN/MAX. */
13691       Item *expr= min_max_item->get_arg(0)->real_item();
13692       if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
13693       {
13694         if (! min_max_arg_item)
13695           min_max_arg_item= (Item_field*) expr;
13696         else if (! min_max_arg_item->eq(expr, 1))
13697         {
13698           trace_group.add("chosen", false)
13699                      .add("cause", "arguments different in min max function");
13700           DBUG_RETURN(NULL);
13701         }
13702       }
13703       else
13704       {
13705         trace_group.add("chosen", false)
13706                    .add("cause", "no field item in min max function");
13707         DBUG_RETURN(NULL);
13708       }
13709     }
13710   }
13711 
13712   /* Check (SA7). */
13713   if (is_agg_distinct && (have_max || have_min))
13714   {
13715     trace_group.add("chosen", false)
13716                .add("cause", "have both agg distinct and min max");
13717     DBUG_RETURN(NULL);
13718   }
13719 
13720   /* Check (SA5). */
13721   if (join->select_distinct)
13722   {
13723     trace_group.add("distinct_query", true);
13724     while ((item= select_items_it++))
13725     {
13726       if (item->real_item()->type() != Item::FIELD_ITEM)
13727       {
13728         trace_group.add("chosen", false)
13729                    .add("cause", "distinct field is expression");
13730         DBUG_RETURN(NULL);
13731       }
13732     }
13733   }
13734 
13735   /* Check (GA4) - that there are no expressions among the group attributes. */
13736   elements_in_group= 0;
13737   for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
13738   {
13739     if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
13740     {
13741       trace_group.add("chosen", false)
13742                  .add("cause", "group field is expression");
13743       DBUG_RETURN(NULL);
13744     }
13745     elements_in_group++;
13746   }
13747 
13748   /*
13749     Check that table has at least one compound index such that the conditions
13750     (GA1,GA2) are all TRUE. If there is more than one such index, select the
13751     first one. Here we set the variables: group_prefix_len and index_info.
13752   */
13753   /* Cost-related variables for the best index so far. */
13754   double best_read_cost= DBL_MAX;
13755   ha_rows best_records= 0;
13756   SEL_ARG *best_index_tree= NULL;
13757   ha_rows best_quick_prefix_records= 0;
13758   uint best_param_idx= 0;
13759 
13760   const uint pk= param->table->s->primary_key;
13761   uint max_key_part;
13762   SEL_ARG *cur_index_tree= NULL;
13763   ha_rows cur_quick_prefix_records= 0;
13764 
13765   // We go through allowed indexes
13766   Json_writer_array trace_indexes(thd, "potential_group_range_indexes");
13767 
13768   for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx)
13769   {
13770     const uint cur_index= param->real_keynr[cur_param_idx];
13771     KEY *const cur_index_info= &table->key_info[cur_index];
13772 
13773     Json_writer_object trace_idx(thd);
13774     trace_idx.add("index", cur_index_info->name);
13775 
13776     KEY_PART_INFO *cur_part;
13777     KEY_PART_INFO *end_part; /* Last part for loops. */
13778     /* Last index part. */
13779     KEY_PART_INFO *last_part;
13780     KEY_PART_INFO *first_non_group_part;
13781     KEY_PART_INFO *first_non_infix_part;
13782     uint key_parts;
13783     uint key_infix_parts;
13784     uint cur_group_key_parts= 0;
13785     uint cur_group_prefix_len= 0;
13786     double cur_read_cost;
13787     ha_rows cur_records;
13788     key_map used_key_parts_map;
13789     uint cur_key_infix_len= 0;
13790     uchar cur_key_infix[MAX_KEY_LENGTH];
13791     uint cur_used_key_parts;
13792 
13793     /*
13794       Check (B1) - if current index is covering.
13795       (was also: "Exclude UNIQUE indexes ..." but this was removed because
13796       there are cases Loose Scan over a multi-part index is useful).
13797     */
13798     if (!table->covering_keys.is_set(cur_index) ||
13799         !table->keys_in_use_for_group_by.is_set(cur_index))
13800     {
13801       cause= "not covering";
13802       goto next_index;
13803     }
13804 
13805     /*
13806       This function is called on the precondition that the index is covering.
13807       Therefore if the GROUP BY list contains more elements than the index,
13808       these are duplicates. The GROUP BY list cannot be a prefix of the index.
13809     */
13810     if (elements_in_group > table->actual_n_key_parts(cur_index_info))
13811     {
13812       cause= "group key parts greater than index key parts";
13813       goto next_index;
13814     }
13815 
13816     /*
13817       Unless extended keys can be used for cur_index:
13818       If the current storage manager is such that it appends the primary key to
13819       each index, then the above condition is insufficient to check if the
13820       index is covering. In such cases it may happen that some fields are
13821       covered by the PK index, but not by the current index. Since we can't
13822       use the concatenation of both indexes for index lookup, such an index
13823       does not qualify as covering in our case. If this is the case, below
13824       we check that all query fields are indeed covered by 'cur_index'.
13825     */
13826     if (cur_index_info->user_defined_key_parts == table->actual_n_key_parts(cur_index_info)
13827         && pk < MAX_KEY && cur_index != pk &&
13828         (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
13829     {
13830       /* For each table field */
13831       for (uint i= 0; i < table->s->fields; i++)
13832       {
13833         Field *cur_field= table->field[i];
13834         /*
13835           If the field is used in the current query ensure that it's
13836           part of 'cur_index'
13837         */
13838         if (bitmap_is_set(table->read_set, cur_field->field_index) &&
13839             !cur_field->part_of_key_not_clustered.is_set(cur_index))
13840         {
13841           cause= "not covering";
13842           goto next_index;                  // Field was not part of key
13843         }
13844       }
13845     }
13846 
13847     trace_idx.add("covering", true);
13848 
13849     max_key_part= 0;
13850     used_key_parts_map.clear_all();
13851 
13852     /*
13853       Check (GA1) for GROUP BY queries.
13854     */
13855     if (join->group_list)
13856     {
13857       cur_part= cur_index_info->key_part;
13858       end_part= cur_part + table->actual_n_key_parts(cur_index_info);
13859       /* Iterate in parallel over the GROUP list and the index parts. */
13860       for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
13861            tmp_group= tmp_group->next, cur_part++)
13862       {
13863         /*
13864           TODO:
13865           tmp_group::item is an array of Item, is it OK to consider only the
13866           first Item? If so, then why? What is the array for?
13867         */
13868         /* Above we already checked that all group items are fields. */
13869         DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
13870         Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
13871         if (group_field->field->eq(cur_part->field))
13872         {
13873           cur_group_prefix_len+= cur_part->store_length;
13874           ++cur_group_key_parts;
13875           max_key_part= (uint)(cur_part - cur_index_info->key_part) + 1;
13876           used_key_parts_map.set_bit(max_key_part);
13877         }
13878         else
13879         {
13880           cause= "group attribute not prefix in index";
13881           goto next_index;
13882         }
13883       }
13884     }
13885     /*
13886       Check (GA2) if this is a DISTINCT query.
13887       If GA2, then Store a new ORDER object in group_fields_array at the
13888       position of the key part of item_field->field. Thus we get the ORDER
13889       objects for each field ordered as the corresponding key parts.
13890       Later group_fields_array of ORDER objects is used to convert the query
13891       to a GROUP query.
13892     */
13893     if ((!join->group && join->select_distinct) ||
13894         is_agg_distinct)
13895     {
13896       if (!is_agg_distinct)
13897       {
13898         select_items_it.rewind();
13899       }
13900 
13901       List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
13902       while (NULL != (item = (is_agg_distinct ?
13903              (Item *) agg_distinct_flds_it++ : select_items_it++)))
13904       {
13905         /* (SA5) already checked above. */
13906         item_field= (Item_field*) item->real_item();
13907         DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
13908 
13909         /* not doing loose index scan for derived tables */
13910         if (!item_field->field)
13911         {
13912           cause= "derived table";
13913           goto next_index;
13914         }
13915 
13916         /* Find the order of the key part in the index. */
13917         key_part_nr= get_field_keypart(cur_index_info, item_field->field);
13918         /*
13919           Check if this attribute was already present in the select list.
13920           If it was present, then its corresponding key part was alredy used.
13921         */
13922         if (used_key_parts_map.is_set(key_part_nr))
13923           continue;
13924         if (key_part_nr < 1 ||
13925             (!is_agg_distinct && key_part_nr > join->fields_list.elements))
13926         {
13927           cause= "select attribute not prefix in index";
13928           goto next_index;
13929         }
13930         cur_part= cur_index_info->key_part + key_part_nr - 1;
13931         cur_group_prefix_len+= cur_part->store_length;
13932         used_key_parts_map.set_bit(key_part_nr);
13933         ++cur_group_key_parts;
13934         max_key_part= MY_MAX(max_key_part,key_part_nr);
13935       }
13936       /*
13937         Check that used key parts forms a prefix of the index.
13938         To check this we compare bits in all_parts and cur_parts.
13939         all_parts have all bits set from 0 to (max_key_part-1).
13940         cur_parts have bits set for only used keyparts.
13941       */
13942       ulonglong all_parts, cur_parts;
13943       all_parts= (1ULL << max_key_part) - 1;
13944       cur_parts= used_key_parts_map.to_ulonglong() >> 1;
13945       if (all_parts != cur_parts)
13946         goto next_index;
13947     }
13948 
13949     /* Check (SA2). */
13950     if (min_max_arg_item)
13951     {
13952       key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
13953       if (key_part_nr <= cur_group_key_parts)
13954       {
13955         cause= "aggregate column not suffix in idx";
13956         goto next_index;
13957       }
13958       min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
13959     }
13960 
13961     /*
13962       Aplly a heuristic: there is no point to use loose index scan when we're
13963       using the whole unique index.
13964     */
13965     if (cur_index_info->flags & HA_NOSAME &&
13966         cur_group_key_parts == cur_index_info->user_defined_key_parts)
13967     {
13968       cause= "using unique index";
13969       goto next_index;
13970     }
13971 
13972     /*
13973       Check (NGA1, NGA2) and extract a sequence of constants to be used as part
13974       of all search keys.
13975     */
13976 
13977     /*
13978       If there is MIN/MAX, each keypart between the last group part and the
13979       MIN/MAX part must participate in one equality with constants, and all
13980       keyparts after the MIN/MAX part must not be referenced in the query.
13981 
13982       If there is no MIN/MAX, the keyparts after the last group part can be
13983       referenced only in equalities with constants, and the referenced keyparts
13984       must form a sequence without any gaps that starts immediately after the
13985       last group keypart.
13986     */
13987     key_parts= table->actual_n_key_parts(cur_index_info);
13988     last_part= cur_index_info->key_part + key_parts;
13989     first_non_group_part= (cur_group_key_parts < key_parts) ?
13990                           cur_index_info->key_part + cur_group_key_parts :
13991                           NULL;
13992     first_non_infix_part= min_max_arg_part ?
13993                           (min_max_arg_part < last_part) ?
13994                              min_max_arg_part :
13995                              NULL :
13996                            NULL;
13997     if (first_non_group_part &&
13998         (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
13999     {
14000       if (tree)
14001       {
14002         SEL_ARG *index_range_tree= tree->keys[cur_param_idx];
14003         if (!get_constant_key_infix(cur_index_info, index_range_tree,
14004                                     first_non_group_part, min_max_arg_part,
14005                                     last_part, thd, cur_key_infix,
14006                                     &cur_key_infix_len,
14007                                     &first_non_infix_part))
14008         {
14009           cause= "nonconst equality gap attribute";
14010           goto next_index;
14011         }
14012       }
14013       else if (min_max_arg_part &&
14014                (min_max_arg_part - first_non_group_part > 0))
14015       {
14016         /*
14017           There is a gap but no range tree, thus no predicates at all for the
14018           non-group keyparts.
14019         */
14020         cause= "no nongroup keypart predicate";
14021         goto next_index;
14022       }
14023       else if (first_non_group_part && join->conds)
14024       {
14025         /*
14026           If there is no MIN/MAX function in the query, but some index
14027           key part is referenced in the WHERE clause, then this index
14028           cannot be used because the WHERE condition over the keypart's
14029           field cannot be 'pushed' to the index (because there is no
14030           range 'tree'), and the WHERE clause must be evaluated before
14031           GROUP BY/DISTINCT.
14032         */
14033         /*
14034           Store the first and last keyparts that need to be analyzed
14035           into one array that can be passed as parameter.
14036         */
14037         KEY_PART_INFO *key_part_range[2];
14038         key_part_range[0]= first_non_group_part;
14039         key_part_range[1]= last_part;
14040 
14041         /* Check if cur_part is referenced in the WHERE clause. */
14042         if (join->conds->walk(&Item::find_item_in_field_list_processor, true,
14043                               key_part_range))
14044         {
14045           cause= "keypart reference from where clause";
14046           goto next_index;
14047         }
14048       }
14049     }
14050 
14051     /*
14052       Test (WA1) partially - that no other keypart after the last infix part is
14053       referenced in the query.
14054     */
14055     if (first_non_infix_part)
14056     {
14057       cur_part= first_non_infix_part +
14058                 (min_max_arg_part && (min_max_arg_part < last_part));
14059       for (; cur_part != last_part; cur_part++)
14060       {
14061         if (bitmap_is_set(table->read_set, cur_part->field->field_index))
14062         {
14063           cause= "keypart after infix in query";
14064           goto next_index;
14065         }
14066       }
14067     }
14068 
14069     /**
14070       Test WA2:If there are conditions on a column C participating in
14071       MIN/MAX, those conditions must be conjunctions to all earlier
14072       keyparts. Otherwise, Loose Index Scan cannot be used.
14073     */
14074     if (tree && min_max_arg_item)
14075     {
14076       SEL_ARG *index_range_tree= tree->keys[cur_param_idx];
14077       SEL_ARG *cur_range= NULL;
14078       if (get_sel_arg_for_keypart(min_max_arg_part->field,
14079                                   index_range_tree, &cur_range) ||
14080           (cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
14081       {
14082         cause= "minmax keypart in disjunctive query";
14083         goto next_index;
14084       }
14085     }
14086 
14087     /* If we got to this point, cur_index_info passes the test. */
14088     key_infix_parts= cur_key_infix_len ? (uint)
14089                      (first_non_infix_part - first_non_group_part) : 0;
14090     cur_used_key_parts= cur_group_key_parts + key_infix_parts;
14091 
14092     /* Compute the cost of using this index. */
14093     if (tree)
14094     {
14095       if ((cur_index_tree= tree->keys[cur_param_idx]))
14096       {
14097         cur_quick_prefix_records= param->quick_rows[cur_index];
14098         if (unlikely(cur_index_tree && thd->trace_started()))
14099         {
14100           Json_writer_array trace_range(thd, "ranges");
14101           trace_ranges(&trace_range, param, cur_param_idx,
14102                        cur_index_tree, cur_index_info->key_part);
14103         }
14104       }
14105       else
14106         cur_quick_prefix_records= HA_POS_ERROR;
14107     }
14108     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
14109                        cur_group_key_parts, tree, cur_index_tree,
14110                        cur_quick_prefix_records, have_min, have_max,
14111                        &cur_read_cost, &cur_records);
14112     /*
14113       If cur_read_cost is lower than best_read_cost use cur_index.
14114       Do not compare doubles directly because they may have different
14115       representations (64 vs. 80 bits).
14116     */
14117     trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
14118 
14119     if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
14120     {
14121       index_info= cur_index_info;
14122       index= cur_index;
14123       best_read_cost= cur_read_cost;
14124       best_records= cur_records;
14125       best_index_tree= cur_index_tree;
14126       best_quick_prefix_records= cur_quick_prefix_records;
14127       best_param_idx= cur_param_idx;
14128       group_key_parts= cur_group_key_parts;
14129       group_prefix_len= cur_group_prefix_len;
14130       key_infix_len= cur_key_infix_len;
14131       if (key_infix_len)
14132         memcpy (key_infix, cur_key_infix, sizeof (key_infix));
14133       used_key_parts= cur_used_key_parts;
14134     }
14135 
14136   next_index:
14137     if (cause)
14138     {
14139       trace_idx.add("usable", false).add("cause", cause);
14140       cause= NULL;
14141     }
14142   }
14143 
14144   trace_indexes.end();
14145 
14146   if (!index_info) /* No usable index found. */
14147     DBUG_RETURN(NULL);
14148 
14149   /* Check (SA3) for the where clause. */
14150   bool has_min_max_fld= false, has_other_fld= false;
14151   if (join->conds && min_max_arg_item &&
14152       !check_group_min_max_predicates(join->conds, min_max_arg_item,
14153                                       (index_info->flags & HA_SPATIAL) ?
14154                                       Field::itMBR : Field::itRAW,
14155                                       &has_min_max_fld, &has_other_fld))
14156   {
14157     trace_group.add("usable", false)
14158                .add("cause", "unsupported predicate on agg attribute");
14159     DBUG_RETURN(NULL);
14160   }
14161 
14162   /*
14163     Check (SA6) if clustered key is used
14164   */
14165   if (is_agg_distinct && table->file->is_clustering_key(index))
14166   {
14167     trace_group.add("usable", false)
14168                .add("cause", "index is clustered");
14169     DBUG_RETURN(NULL);
14170   }
14171 
14172   /* The query passes all tests, so construct a new TRP object. */
14173   read_plan= new (param->mem_root)
14174                  TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
14175                                    min_max_arg_part,
14176                                    group_prefix_len, used_key_parts,
14177                                    group_key_parts, index_info, index,
14178                                    key_infix_len,
14179                                    (key_infix_len > 0) ? key_infix : NULL,
14180                                    tree, best_index_tree, best_param_idx,
14181                                    best_quick_prefix_records);
14182   if (read_plan)
14183   {
14184     if (tree && read_plan->quick_prefix_records == 0)
14185       DBUG_RETURN(NULL);
14186 
14187     read_plan->read_cost= best_read_cost;
14188     read_plan->records=   best_records;
14189     if (read_time < best_read_cost && is_agg_distinct)
14190     {
14191       trace_group.add("index_scan", true);
14192       read_plan->read_cost= 0;
14193       read_plan->use_index_scan();
14194     }
14195 
14196     DBUG_PRINT("info",
14197                ("Returning group min/max plan: cost: %g, records: %lu",
14198                 read_plan->read_cost, (ulong) read_plan->records));
14199   }
14200 
14201   DBUG_RETURN(read_plan);
14202 }
14203 
14204 
14205 /*
14206   Check that the MIN/MAX attribute participates only in range predicates
14207   with constants.
14208 
14209   SYNOPSIS
14210     check_group_min_max_predicates()
14211     cond            [in]  the expression tree being analyzed
14212     min_max_arg     [in]  the field referenced by the MIN/MAX function(s)
14213     image_type      [in]
14214     has_min_max_arg [out] true if the subtree being analyzed references
14215                           min_max_arg
14216     has_other_arg   [out] true if the subtree being analyzed references a
14217                           column other min_max_arg
14218 
14219   DESCRIPTION
14220     The function walks recursively over the cond tree representing a WHERE
14221     clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
14222     aggregate function, it is referenced only by one of the following
14223     predicates $FUNC$:
14224     {=, !=, <, <=, >, >=, between, is [not] null, multiple equal}.
14225     In addition the function checks that the WHERE condition is equivalent to
14226     "cond1 AND cond2" where :
14227     cond1 - does not use min_max_column at all.
14228     cond2 - is an AND/OR tree with leaves in form
14229     "$FUNC$(min_max_column[, const])".
14230 
14231   RETURN
14232     TRUE  if cond passes the test
14233     FALSE o/w
14234 */
14235 
14236 static bool
check_group_min_max_predicates(Item * cond,Item_field * min_max_arg_item,Field::imagetype image_type,bool * has_min_max_arg,bool * has_other_arg)14237 check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
14238                                Field::imagetype image_type,
14239                                bool *has_min_max_arg, bool *has_other_arg)
14240 {
14241   DBUG_ENTER("check_group_min_max_predicates");
14242   DBUG_ASSERT(cond && min_max_arg_item);
14243 
14244   cond= cond->real_item();
14245   Item::Type cond_type= cond->real_type();
14246   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
14247   {
14248     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
14249     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
14250     Item *and_or_arg;
14251     Item_func::Functype func_type= ((Item_cond*) cond)->functype();
14252     bool has_min_max= false, has_other= false;
14253     while ((and_or_arg= li++))
14254     {
14255       /*
14256         The WHERE clause doesn't pass the condition if:
14257         (1) any subtree doesn't pass the condition or
14258         (2) the subtree passes the test, but it is an OR and it references both
14259             the min/max argument and other columns.
14260       */
14261       if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,     //1
14262                                           image_type,
14263                                           &has_min_max, &has_other) ||
14264           (func_type == Item_func::COND_OR_FUNC && has_min_max && has_other))//2
14265         DBUG_RETURN(FALSE);
14266     }
14267     *has_min_max_arg= has_min_max || *has_min_max_arg;
14268     *has_other_arg= has_other || *has_other_arg;
14269     DBUG_RETURN(TRUE);
14270   }
14271 
14272   /*
14273     Disallow loose index scan if the MIN/MAX argument field is referenced by
14274     a subquery in the WHERE clause.
14275   */
14276 
14277   if (unlikely(cond_type == Item::SUBSELECT_ITEM))
14278   {
14279     Item_subselect *subs_cond= (Item_subselect*) cond;
14280     if (subs_cond->is_correlated)
14281     {
14282       DBUG_ASSERT(subs_cond->upper_refs.elements > 0);
14283       List_iterator_fast<Item_subselect::Ref_to_outside>
14284         li(subs_cond->upper_refs);
14285       Item_subselect::Ref_to_outside *dep;
14286       while ((dep= li++))
14287       {
14288         if (dep->item->eq(min_max_arg_item, FALSE))
14289           DBUG_RETURN(FALSE);
14290       }
14291     }
14292     DBUG_RETURN(TRUE);
14293   }
14294   /*
14295     Subquery with IS [NOT] NULL
14296     TODO: Look into the cache_item and optimize it like we do for
14297     subselect's above
14298    */
14299   if (unlikely(cond_type == Item::CACHE_ITEM))
14300     DBUG_RETURN(cond->const_item());
14301 
14302   /*
14303     Condition of the form 'field' is equivalent to 'field <> 0' and thus
14304     satisfies the SA3 condition.
14305   */
14306   if (cond_type == Item::FIELD_ITEM)
14307   {
14308     DBUG_PRINT("info", ("Analyzing: %s", cond->full_name()));
14309     if (min_max_arg_item->eq((Item_field*)cond, 1))
14310       *has_min_max_arg= true;
14311     else
14312       *has_other_arg= true;
14313     DBUG_RETURN(TRUE);
14314   }
14315 
14316   /* We presume that at this point there are no other Items than functions. */
14317   DBUG_ASSERT(cond_type == Item::FUNC_ITEM);
14318   if (unlikely(cond_type != Item::FUNC_ITEM))   /* Safety */
14319     DBUG_RETURN(FALSE);
14320 
14321   /* Test if cond references only group-by or non-group fields. */
14322   Item_func *pred= (Item_func*) cond;
14323   Item_func::Functype pred_type= pred->functype();
14324   DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
14325   if (pred_type == Item_func::MULT_EQUAL_FUNC)
14326   {
14327     /*
14328       Check that each field in a multiple equality is either a constant or
14329       it is a reference to the min/max argument, or it doesn't contain the
14330       min/max argument at all.
14331     */
14332     Item_equal_fields_iterator eq_it(*((Item_equal*)pred));
14333     Item *eq_item;
14334     bool has_min_max= false, has_other= false;
14335     while ((eq_item= eq_it++))
14336     {
14337       if (min_max_arg_item->eq(eq_item->real_item(), 1))
14338         has_min_max= true;
14339       else
14340         has_other= true;
14341     }
14342     *has_min_max_arg= has_min_max || *has_min_max_arg;
14343     *has_other_arg= has_other || *has_other_arg;
14344     DBUG_RETURN(!(has_min_max && has_other));
14345   }
14346 
14347   Item **arguments= pred->arguments();
14348   Item *cur_arg;
14349   bool has_min_max= false, has_other= false;
14350   for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
14351   {
14352     cur_arg= arguments[arg_idx]->real_item();
14353     DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
14354     if (cur_arg->type() == Item::FIELD_ITEM)
14355     {
14356       if (min_max_arg_item->eq(cur_arg, 1))
14357       {
14358         has_min_max= true;
14359         /*
14360           If pred references the MIN/MAX argument, check whether pred is a range
14361           condition that compares the MIN/MAX argument with a constant.
14362         */
14363         if (pred_type != Item_func::EQUAL_FUNC     &&
14364             pred_type != Item_func::LT_FUNC        &&
14365             pred_type != Item_func::LE_FUNC        &&
14366             pred_type != Item_func::GT_FUNC        &&
14367             pred_type != Item_func::GE_FUNC        &&
14368             pred_type != Item_func::BETWEEN        &&
14369             pred_type != Item_func::ISNULL_FUNC    &&
14370             pred_type != Item_func::ISNOTNULL_FUNC &&
14371             pred_type != Item_func::EQ_FUNC        &&
14372             pred_type != Item_func::NE_FUNC)
14373           DBUG_RETURN(FALSE);
14374 
14375         /* Check that pred compares min_max_arg_item with a constant. */
14376         Item *args[3];
14377         bzero(args, 3 * sizeof(Item*));
14378         bool inv;
14379         /* Test if this is a comparison of a field and a constant. */
14380         if (!simple_pred(pred, args, &inv))
14381           DBUG_RETURN(FALSE);
14382 
14383         if (args[0] && args[1]) // this is a binary function or BETWEEN
14384         {
14385           DBUG_ASSERT(pred->fixed_type_handler());
14386           DBUG_ASSERT(pred->fixed_type_handler()->is_bool_type());
14387           Item_bool_func *bool_func= (Item_bool_func*) pred;
14388           Field *field= min_max_arg_item->field;
14389           if (!args[2]) // this is a binary function
14390           {
14391             if (!field->can_optimize_group_min_max(bool_func, args[1]))
14392               DBUG_RETURN(FALSE);
14393           }
14394           else // this is BETWEEN
14395           {
14396             if (!field->can_optimize_group_min_max(bool_func, args[1]) ||
14397                 !field->can_optimize_group_min_max(bool_func, args[2]))
14398               DBUG_RETURN(FALSE);
14399           }
14400         }
14401       }
14402       else
14403         has_other= true;
14404     }
14405     else if (cur_arg->type() == Item::FUNC_ITEM)
14406     {
14407       if (!check_group_min_max_predicates(cur_arg, min_max_arg_item, image_type,
14408                                           &has_min_max, &has_other))
14409         DBUG_RETURN(FALSE);
14410     }
14411     else if (cur_arg->const_item() && !cur_arg->is_expensive())
14412     {
14413       /*
14414         For predicates of the form "const OP expr" we also have to check 'expr'
14415         to make a decision.
14416       */
14417       continue;
14418     }
14419     else
14420       DBUG_RETURN(FALSE);
14421     if(has_min_max && has_other)
14422       DBUG_RETURN(FALSE);
14423   }
14424   *has_min_max_arg= has_min_max || *has_min_max_arg;
14425   *has_other_arg= has_other || *has_other_arg;
14426 
14427   DBUG_RETURN(TRUE);
14428 }
14429 
14430 
14431 /*
14432   Get the SEL_ARG tree 'tree' for the keypart covering 'field', if
14433   any. 'tree' must be a unique conjunction to ALL predicates in earlier
14434   keyparts of 'keypart_tree'.
14435 
14436   E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2
14437   covers 'field', all these conditions satisfies the requirement:
14438 
14439    1. "(kp1=2 OR kp1=3) AND kp2=10"    => returns "kp2=10"
14440    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)"  => returns "kp2=10"
14441    3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))"
14442                                        => returns "kp2=10  OR kp2=11"
14443 
14444    whereas these do not
14445    1. "(kp1=2 AND kp2=10) OR kp1=3"
14446    2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)"
14447    3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))"
14448 
14449    This function effectively tests requirement WA2. In combination with
14450    a test that the returned tree has no more than one range it is also
14451    a test of NGA3.
14452 
14453   @param[in]   field          The field we want the SEL_ARG tree for
14454   @param[in]   keypart_tree   Root node of the SEL_ARG* tree for the index
14455   @param[out]  cur_range      The SEL_ARG tree, if any, for the keypart
14456                               covering field 'keypart_field'
14457   @retval true   'keypart_tree' contained a predicate for 'field' that
14458                   is not conjunction to all predicates on earlier keyparts
14459   @retval false  otherwise
14460 */
14461 
14462 static bool
get_sel_arg_for_keypart(Field * field,SEL_ARG * keypart_tree,SEL_ARG ** cur_range)14463 get_sel_arg_for_keypart(Field *field,
14464                         SEL_ARG *keypart_tree,
14465                         SEL_ARG **cur_range)
14466 {
14467   if (keypart_tree == NULL)
14468     return false;
14469   if (keypart_tree->field->eq(field))
14470   {
14471     *cur_range= keypart_tree;
14472     return false;
14473   }
14474 
14475   SEL_ARG *tree_first_range= NULL;
14476   SEL_ARG *first_kp=  keypart_tree->first();
14477 
14478   for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next)
14479   {
14480     SEL_ARG *curr_tree= NULL;
14481     if (cur_kp->next_key_part)
14482     {
14483       if (get_sel_arg_for_keypart(field,
14484                                   cur_kp->next_key_part,
14485                                   &curr_tree))
14486         return true;
14487     }
14488     /*
14489       Check if the SEL_ARG tree for 'field' is identical for all ranges in
14490       'keypart_tree
14491      */
14492     if (cur_kp == first_kp)
14493       tree_first_range= curr_tree;
14494     else if (!all_same(tree_first_range, curr_tree))
14495       return true;
14496   }
14497   *cur_range= tree_first_range;
14498   return false;
14499 }
14500 
14501 /*
14502   Extract a sequence of constants from a conjunction of equality predicates.
14503 
14504   SYNOPSIS
14505     get_constant_key_infix()
14506     index_info             [in]  Descriptor of the chosen index.
14507     index_range_tree       [in]  Range tree for the chosen index
14508     first_non_group_part   [in]  First index part after group attribute parts
14509     min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
14510     last_part              [in]  Last keypart of the index
14511     thd                    [in]  Current thread
14512     key_infix              [out] Infix of constants to be used for index lookup
14513     key_infix_len          [out] Length of the infix
14514     first_non_infix_part   [out] The first keypart after the infix (if any)
14515 
14516   DESCRIPTION
14517     Test conditions (NGA1, NGA2, NGA3) from get_best_group_min_max(). Namely,
14518     for each keypart field NG_i not in GROUP-BY, check that there is exactly one
14519     constant equality predicate among conds with the form (NG_i = const_ci) or
14520     (const_ci = NG_i).. In addition, there can only be one range when there is
14521     such a gap.
14522     Thus all the NGF_i attributes must fill the 'gap' between the last group-by
14523     attribute and the MIN/MAX attribute in the index (if present).  Also ensure
14524     that there is only a single range on NGF_i (NGA3). If these
14525     conditions hold, copy each constant from its corresponding predicate into
14526     key_infix, in the order its NG_i attribute appears in the index, and update
14527     key_infix_len with the total length of the key parts in key_infix.
14528 
14529   RETURN
14530     TRUE  if the index passes the test
14531     FALSE o/w
14532 */
14533 static bool
get_constant_key_infix(KEY * index_info,SEL_ARG * index_range_tree,KEY_PART_INFO * first_non_group_part,KEY_PART_INFO * min_max_arg_part,KEY_PART_INFO * last_part,THD * thd,uchar * key_infix,uint * key_infix_len,KEY_PART_INFO ** first_non_infix_part)14534 get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
14535                        KEY_PART_INFO *first_non_group_part,
14536                        KEY_PART_INFO *min_max_arg_part,
14537                        KEY_PART_INFO *last_part, THD *thd,
14538                        uchar *key_infix, uint *key_infix_len,
14539                        KEY_PART_INFO **first_non_infix_part)
14540 {
14541   KEY_PART_INFO *cur_part;
14542   /* End part for the first loop below. */
14543   KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
14544 
14545   *key_infix_len= 0;
14546   uchar *key_ptr= key_infix;
14547   for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
14548   {
14549     SEL_ARG *cur_range= NULL;
14550     /*
14551       Check NGA3:
14552       1. get_sel_arg_for_keypart gets the range tree for the 'field' and also
14553          checks for a unique conjunction of this tree with all the predicates
14554          on the earlier keyparts in the index.
14555       2. Check for multiple ranges on the found keypart tree.
14556 
14557       We assume that index_range_tree points to the leftmost keypart in
14558       the index.
14559     */
14560     if (get_sel_arg_for_keypart(cur_part->field, index_range_tree,
14561                                 &cur_range))
14562       return false;
14563 
14564     if (cur_range && cur_range->elements > 1)
14565       return false;
14566 
14567     if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
14568     {
14569       if (min_max_arg_part)
14570         return false; /* The current keypart has no range predicates at all. */
14571       else
14572       {
14573         *first_non_infix_part= cur_part;
14574         return true;
14575       }
14576     }
14577 
14578     if ((cur_range->min_flag & NO_MIN_RANGE) ||
14579         (cur_range->max_flag & NO_MAX_RANGE) ||
14580         (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
14581       return false;
14582 
14583     uint field_length= cur_part->store_length;
14584     if (cur_range->maybe_null &&
14585          cur_range->min_value[0] && cur_range->max_value[0])
14586     {
14587       /*
14588         cur_range specifies 'IS NULL'. In this case the argument points
14589         to a "null value" (is_null_string) that may not always be long
14590         enough for a direct memcpy to a field.
14591       */
14592       DBUG_ASSERT (field_length > 0);
14593       *key_ptr= 1;
14594       bzero(key_ptr+1,field_length-1);
14595       key_ptr+= field_length;
14596       *key_infix_len+= field_length;
14597     }
14598     else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0)
14599     { /* cur_range specifies an equality condition. */
14600       memcpy(key_ptr, cur_range->min_value, field_length);
14601       key_ptr+= field_length;
14602       *key_infix_len+= field_length;
14603     }
14604     else
14605       return false;
14606   }
14607 
14608   if (!min_max_arg_part && (cur_part == last_part))
14609     *first_non_infix_part= last_part;
14610 
14611   return TRUE;
14612 }
14613 
14614 
14615 /*
14616   Find the key part referenced by a field.
14617 
14618   SYNOPSIS
14619     get_field_keypart()
14620     index  descriptor of an index
14621     field  field that possibly references some key part in index
14622 
14623   NOTES
14624     The return value can be used to get a KEY_PART_INFO pointer by
14625     part= index->key_part + get_field_keypart(...) - 1;
14626 
14627   RETURN
14628     Positive number which is the consecutive number of the key part, or
14629     0 if field does not reference any index field.
14630 */
14631 
14632 static inline uint
get_field_keypart(KEY * index,Field * field)14633 get_field_keypart(KEY *index, Field *field)
14634 {
14635   KEY_PART_INFO *part, *end;
14636 
14637   for (part= index->key_part,
14638          end= part + field->table->actual_n_key_parts(index);
14639        part < end; part++)
14640   {
14641     if (field->eq(part->field))
14642       return (uint)(part - index->key_part + 1);
14643   }
14644   return 0;
14645 }
14646 
14647 
14648 /*
14649   Compute the cost of a quick_group_min_max_select for a particular index.
14650 
14651   SYNOPSIS
14652     cost_group_min_max()
14653     table                [in] The table being accessed
14654     index_info           [in] The index used to access the table
14655     used_key_parts       [in] Number of key parts used to access the index
14656     group_key_parts      [in] Number of index key parts in the group prefix
14657     range_tree           [in] Tree of ranges for all indexes
14658     index_tree           [in] The range tree for the current index
14659     quick_prefix_records [in] Number of records retrieved by the internally
14660 			      used quick range select if any
14661     have_min             [in] True if there is a MIN function
14662     have_max             [in] True if there is a MAX function
14663     read_cost           [out] The cost to retrieve rows via this quick select
14664     records             [out] The number of rows retrieved
14665 
14666   DESCRIPTION
14667     This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
14668     the number of rows returned.
14669 
14670   NOTES
14671     The cost computation distinguishes several cases:
14672     1) No equality predicates over non-group attributes (thus no key_infix).
14673        If groups are bigger than blocks on the average, then we assume that it
14674        is very unlikely that block ends are aligned with group ends, thus even
14675        if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
14676        keys, except for the first MIN and the last MAX keys, will be in the
14677        same block.  If groups are smaller than blocks, then we are going to
14678        read all blocks.
14679     2) There are equality predicates over non-group attributes.
14680        In this case the group prefix is extended by additional constants, and
14681        as a result the min/max values are inside sub-groups of the original
14682        groups. The number of blocks that will be read depends on whether the
14683        ends of these sub-groups will be contained in the same or in different
14684        blocks. We compute the probability for the two ends of a subgroup to be
14685        in two different blocks as the ratio of:
14686        - the number of positions of the left-end of a subgroup inside a group,
14687          such that the right end of the subgroup is past the end of the buffer
14688          containing the left-end, and
14689        - the total number of possible positions for the left-end of the
14690          subgroup, which is the number of keys in the containing group.
14691        We assume it is very unlikely that two ends of subsequent subgroups are
14692        in the same block.
14693     3) The are range predicates over the group attributes.
14694        Then some groups may be filtered by the range predicates. We use the
14695        selectivity of the range predicates to decide how many groups will be
14696        filtered.
14697 
14698   TODO
14699      - Take into account the optional range predicates over the MIN/MAX
14700        argument.
14701      - Check if we have a PK index and we use all cols - then each key is a
14702        group, and it will be better to use an index scan.
14703 
14704   RETURN
14705     None
14706 */
14707 
cost_group_min_max(TABLE * table,KEY * index_info,uint used_key_parts,uint group_key_parts,SEL_TREE * range_tree,SEL_ARG * index_tree,ha_rows quick_prefix_records,bool have_min,bool have_max,double * read_cost,ha_rows * records)14708 void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
14709                         uint group_key_parts, SEL_TREE *range_tree,
14710                         SEL_ARG *index_tree, ha_rows quick_prefix_records,
14711                         bool have_min, bool have_max,
14712                         double *read_cost, ha_rows *records)
14713 {
14714   ha_rows table_records;
14715   ha_rows num_groups;
14716   ha_rows num_blocks;
14717   uint    keys_per_block;
14718   ha_rows keys_per_group;
14719   ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */
14720                           /* formed by a key infix. */
14721   double p_overlap; /* Probability that a sub-group overlaps two blocks. */
14722   double quick_prefix_selectivity;
14723   double io_cost;
14724   DBUG_ENTER("cost_group_min_max");
14725 
14726   table_records= table->stat_records();
14727   /* Assume block is 75 % full */
14728   keys_per_block= (uint) (table->file->stats.block_size * 3 / 4 /
14729                           (index_info->key_length + table->file->ref_length)
14730                           + 1);
14731   num_blocks= (ha_rows)(table_records / keys_per_block) + 1;
14732 
14733   /* Compute the number of keys in a group. */
14734   if (!group_key_parts)
14735   {
14736     /* Summary over the whole table */
14737     keys_per_group= table_records;
14738   }
14739   else
14740   {
14741     keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts -
14742                                                              1);
14743   }
14744 
14745   if (keys_per_group == 0) /* If there is no statistics try to guess */
14746     /* each group contains 10% of all records */
14747     keys_per_group= (table_records / 10) + 1;
14748   num_groups= (table_records / keys_per_group) + 1;
14749 
14750   /* Apply the selectivity of the quick select for group prefixes. */
14751   if (range_tree && (quick_prefix_records != HA_POS_ERROR))
14752   {
14753     quick_prefix_selectivity= (double) quick_prefix_records /
14754                               (double) table_records;
14755     num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
14756     set_if_bigger(num_groups, 1);
14757   }
14758 
14759   if (used_key_parts > group_key_parts)
14760   { /*
14761       Compute the probability that two ends of a subgroup are inside
14762       different blocks.
14763     */
14764     keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts - 1);
14765     if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
14766       p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
14767     else
14768     {
14769       double blocks_per_group= (double) num_blocks / (double) num_groups;
14770       p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
14771       p_overlap= MY_MIN(p_overlap, 1.0);
14772     }
14773     io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
14774   }
14775   else
14776     io_cost= (keys_per_group > keys_per_block) ?
14777              (have_min && have_max) ? (double) (num_groups + 1) :
14778                                       (double) num_groups :
14779              (double) num_blocks;
14780 
14781   /*
14782     CPU cost must be comparable to that of an index scan as computed
14783     in SQL_SELECT::test_quick_select(). When the groups are small,
14784     e.g. for a unique index, using index scan will be cheaper since it
14785     reads the next record without having to re-position to it on every
14786     group. To make the CPU cost reflect this, we estimate the CPU cost
14787     as the sum of:
14788     1. Cost for evaluating the condition (similarly as for index scan).
14789     2. Cost for navigating the index structure (assuming a b-tree).
14790        Note: We only add the cost for one comparision per block. For a
14791              b-tree the number of comparisons will be larger.
14792        TODO: This cost should be provided by the storage engine.
14793   */
14794   const double tree_traversal_cost=
14795     ceil(log(static_cast<double>(table_records))/
14796          log(static_cast<double>(keys_per_block))) *
14797     1/(2*TIME_FOR_COMPARE);
14798 
14799   const double cpu_cost= num_groups *
14800                          (tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX);
14801 
14802   *read_cost= io_cost + cpu_cost;
14803   *records= num_groups;
14804 
14805   DBUG_PRINT("info",
14806              ("table rows: %lu  keys/block: %u  keys/group: %lu  "
14807               "result rows: %lu  blocks: %lu",
14808               (ulong) table_records, keys_per_block, (ulong) keys_per_group,
14809               (ulong) *records, (ulong) num_blocks));
14810   DBUG_VOID_RETURN;
14811 }
14812 
14813 
14814 /*
14815   Construct a new quick select object for queries with group by with min/max.
14816 
14817   SYNOPSIS
14818     TRP_GROUP_MIN_MAX::make_quick()
14819     param              Parameter from test_quick_select
14820     retrieve_full_rows ignored
14821     parent_alloc       Memory pool to use, if any.
14822 
14823   NOTES
14824     Make_quick ignores the retrieve_full_rows parameter because
14825     QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
14826     The other parameter are ignored as well because all necessary
14827     data to create the QUICK object is computed at this TRP creation
14828     time.
14829 
14830   RETURN
14831     New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
14832     NULL otherwise.
14833 */
14834 
14835 QUICK_SELECT_I *
make_quick(PARAM * param,bool retrieve_full_rows,MEM_ROOT * parent_alloc)14836 TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
14837                               MEM_ROOT *parent_alloc)
14838 {
14839   QUICK_GROUP_MIN_MAX_SELECT *quick;
14840   DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");
14841 
14842   quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
14843                                         param->thd->lex->current_select->join,
14844                                         have_min, have_max,
14845                                         have_agg_distinct, min_max_arg_part,
14846                                         group_prefix_len, group_key_parts,
14847                                         used_key_parts, index_info, index,
14848                                         read_cost, records, key_infix_len,
14849                                         key_infix, parent_alloc, is_index_scan);
14850   if (!quick)
14851     DBUG_RETURN(NULL);
14852 
14853   if (quick->init())
14854   {
14855     delete quick;
14856     DBUG_RETURN(NULL);
14857   }
14858 
14859   if (range_tree)
14860   {
14861     DBUG_ASSERT(quick_prefix_records > 0);
14862     if (quick_prefix_records == HA_POS_ERROR)
14863       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
14864     else
14865       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
14866       quick->quick_prefix_select= get_quick_select(param, param_idx,
14867                                                    index_tree,
14868                                                    HA_MRR_USE_DEFAULT_IMPL, 0,
14869                                                    &quick->alloc);
14870 
14871     /*
14872       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
14873       attribute, and create an array of QUICK_RANGES to be used by the
14874       new quick select.
14875     */
14876     if (min_max_arg_part)
14877     {
14878       SEL_ARG *min_max_range= index_tree;
14879       while (min_max_range) /* Find the tree for the MIN/MAX key part. */
14880       {
14881         if (min_max_range->field->eq(min_max_arg_part->field))
14882           break;
14883         min_max_range= min_max_range->next_key_part;
14884       }
14885       /* Scroll to the leftmost interval for the MIN/MAX argument. */
14886       while (min_max_range && min_max_range->prev)
14887         min_max_range= min_max_range->prev;
14888       /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
14889       while (min_max_range)
14890       {
14891         if (quick->add_range(min_max_range))
14892         {
14893           delete quick;
14894           quick= NULL;
14895           DBUG_RETURN(NULL);
14896         }
14897         min_max_range= min_max_range->next;
14898       }
14899     }
14900   }
14901   else
14902     quick->quick_prefix_select= NULL;
14903 
14904   quick->update_key_stat();
14905   quick->adjust_prefix_ranges();
14906 
14907   DBUG_RETURN(quick);
14908 }
14909 
14910 
14911 /*
14912   Construct new quick select for group queries with min/max.
14913 
14914   SYNOPSIS
14915     QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
14916     table             The table being accessed
14917     join              Descriptor of the current query
14918     have_min          TRUE if the query selects a MIN function
14919     have_max          TRUE if the query selects a MAX function
14920     min_max_arg_part  The only argument field of all MIN/MAX functions
14921     group_prefix_len  Length of all key parts in the group prefix
14922     prefix_key_parts  All key parts in the group prefix
14923     index_info        The index chosen for data access
14924     use_index         The id of index_info
14925     read_cost         Cost of this access method
14926     records           Number of records returned
14927     key_infix_len     Length of the key infix appended to the group prefix
14928     key_infix         Infix of constants from equality predicates
14929     parent_alloc      Memory pool for this and quick_prefix_select data
14930     is_index_scan     get the next different key not by jumping on it via
14931                       index read, but by scanning until the end of the
14932                       rows with equal key value.
14933 
14934   RETURN
14935     None
14936 */
14937 
14938 QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE * table,JOIN * join_arg,bool have_min_arg,bool have_max_arg,bool have_agg_distinct_arg,KEY_PART_INFO * min_max_arg_part_arg,uint group_prefix_len_arg,uint group_key_parts_arg,uint used_key_parts_arg,KEY * index_info_arg,uint use_index,double read_cost_arg,ha_rows records_arg,uint key_infix_len_arg,uchar * key_infix_arg,MEM_ROOT * parent_alloc,bool is_index_scan_arg)14939 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
14940                            bool have_max_arg, bool have_agg_distinct_arg,
14941                            KEY_PART_INFO *min_max_arg_part_arg,
14942                            uint group_prefix_len_arg, uint group_key_parts_arg,
14943                            uint used_key_parts_arg, KEY *index_info_arg,
14944                            uint use_index, double read_cost_arg,
14945                            ha_rows records_arg, uint key_infix_len_arg,
14946                            uchar *key_infix_arg, MEM_ROOT *parent_alloc,
14947                            bool is_index_scan_arg)
14948   :file(table->file), join(join_arg), index_info(index_info_arg),
14949    group_prefix_len(group_prefix_len_arg),
14950    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
14951    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
14952    seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
14953    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
14954    min_functions_it(NULL), max_functions_it(NULL),
14955    is_index_scan(is_index_scan_arg)
14956 {
14957   head=       table;
14958   index=      use_index;
14959   record=     head->record[0];
14960   tmp_record= head->record[1];
14961   read_time= read_cost_arg;
14962   records= records_arg;
14963   used_key_parts= used_key_parts_arg;
14964   real_key_parts= used_key_parts_arg;
14965   real_prefix_len= group_prefix_len + key_infix_len;
14966   group_prefix= NULL;
14967   min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
14968 
14969   /*
14970     We can't have parent_alloc set as the init function can't handle this case
14971     yet.
14972   */
14973   DBUG_ASSERT(!parent_alloc);
14974   if (!parent_alloc)
14975   {
14976     THD *thd= join->thd;
14977     init_sql_alloc(key_memory_quick_range_select_root, &alloc,
14978                    thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
14979     thd->mem_root= &alloc;
14980   }
14981   else
14982     bzero(&alloc, sizeof(MEM_ROOT));            // ensure that it's not used
14983 }
14984 
14985 
14986 /*
14987   Do post-constructor initialization.
14988 
14989   SYNOPSIS
14990     QUICK_GROUP_MIN_MAX_SELECT::init()
14991 
14992   DESCRIPTION
14993     The method performs initialization that cannot be done in the constructor
14994     such as memory allocations that may fail. It allocates memory for the
14995     group prefix and inifix buffers, and for the lists of MIN/MAX item to be
14996     updated during execution.
14997 
14998   RETURN
14999     0      OK
15000     other  Error code
15001 */
15002 
init()15003 int QUICK_GROUP_MIN_MAX_SELECT::init()
15004 {
15005   if (group_prefix) /* Already initialized. */
15006     return 0;
15007 
15008   /*
15009     We allocate one byte more to serve the case when the last field in
15010     the buffer is compared using uint3korr (e.g. a Field_newdate field)
15011   */
15012   if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len+1)))
15013       return 1;
15014   /*
15015     We may use group_prefix to store keys with all select fields, so allocate
15016     enough space for it.
15017     We allocate one byte more to serve the case when the last field in
15018     the buffer is compared using uint3korr (e.g. a Field_newdate field)
15019   */
15020   if (!(group_prefix= (uchar*) alloc_root(&alloc,
15021                                           real_prefix_len+min_max_arg_len+1)))
15022     return 1;
15023 
15024   if (key_infix_len > 0)
15025   {
15026     /*
15027       The memory location pointed to by key_infix will be deleted soon, so
15028       allocate a new buffer and copy the key_infix into it.
15029     */
15030     uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len);
15031     if (!tmp_key_infix)
15032       return 1;
15033     memcpy(tmp_key_infix, this->key_infix, key_infix_len);
15034     this->key_infix= tmp_key_infix;
15035   }
15036 
15037   if (min_max_arg_part)
15038   {
15039     if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &min_max_ranges,
15040                               sizeof(QUICK_RANGE*), 16, 16,
15041                               MYF(MY_THREAD_SPECIFIC)))
15042       return 1;
15043 
15044     if (have_min)
15045     {
15046       if (!(min_functions= new List<Item_sum>))
15047         return 1;
15048     }
15049     else
15050       min_functions= NULL;
15051     if (have_max)
15052     {
15053       if (!(max_functions= new List<Item_sum>))
15054         return 1;
15055     }
15056     else
15057       max_functions= NULL;
15058 
15059     Item_sum *min_max_item;
15060     Item_sum **func_ptr= join->sum_funcs;
15061     while ((min_max_item= *(func_ptr++)))
15062     {
15063       if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
15064         min_functions->push_back(min_max_item);
15065       else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
15066         max_functions->push_back(min_max_item);
15067     }
15068 
15069     if (have_min)
15070     {
15071       if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
15072         return 1;
15073     }
15074 
15075     if (have_max)
15076     {
15077       if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
15078         return 1;
15079     }
15080   }
15081   else
15082     min_max_ranges.elements= 0;
15083 
15084   return 0;
15085 }
15086 
15087 
~QUICK_GROUP_MIN_MAX_SELECT()15088 QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
15089 {
15090   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
15091   if (file->inited != handler::NONE)
15092   {
15093     DBUG_ASSERT(file == head->file);
15094     head->file->ha_end_keyread();
15095     /*
15096       There may be a code path when the same table was first accessed by index,
15097       then the index is closed, and the table is scanned (order by + loose scan).
15098     */
15099     file->ha_index_or_rnd_end();
15100   }
15101   if (min_max_arg_part)
15102     delete_dynamic(&min_max_ranges);
15103   free_root(&alloc,MYF(0));
15104   delete min_functions_it;
15105   delete max_functions_it;
15106   delete quick_prefix_select;
15107   DBUG_VOID_RETURN;
15108 }
15109 
15110 
15111 /*
15112   Eventually create and add a new quick range object.
15113 
15114   SYNOPSIS
15115     QUICK_GROUP_MIN_MAX_SELECT::add_range()
15116     sel_range  Range object from which a
15117 
15118   NOTES
15119     Construct a new QUICK_RANGE object from a SEL_ARG object, and
15120     add it to the array min_max_ranges. If sel_arg is an infinite
15121     range, e.g. (x < 5 or x > 4), then skip it and do not construct
15122     a quick range.
15123 
15124   RETURN
15125     FALSE on success
15126     TRUE  otherwise
15127 */
15128 
add_range(SEL_ARG * sel_range)15129 bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
15130 {
15131   QUICK_RANGE *range;
15132   uint range_flag= sel_range->min_flag | sel_range->max_flag;
15133 
15134   /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
15135   if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
15136     return FALSE;
15137 
15138   if (!(sel_range->min_flag & NO_MIN_RANGE) &&
15139       !(sel_range->max_flag & NO_MAX_RANGE))
15140   {
15141     if (sel_range->maybe_null &&
15142         sel_range->min_value[0] && sel_range->max_value[0])
15143       range_flag|= NULL_RANGE; /* IS NULL condition */
15144     else if (memcmp(sel_range->min_value, sel_range->max_value,
15145                     min_max_arg_len) == 0)
15146       range_flag|= EQ_RANGE;  /* equality condition */
15147   }
15148   range= new QUICK_RANGE(join->thd, sel_range->min_value, min_max_arg_len,
15149                          make_keypart_map(sel_range->part),
15150                          sel_range->max_value, min_max_arg_len,
15151                          make_keypart_map(sel_range->part),
15152                          range_flag);
15153   if (!range)
15154     return TRUE;
15155   if (insert_dynamic(&min_max_ranges, (uchar*)&range))
15156     return TRUE;
15157   return FALSE;
15158 }
15159 
15160 
15161 /*
15162   Opens the ranges if there are more conditions in quick_prefix_select than
15163   the ones used for jumping through the prefixes.
15164 
15165   SYNOPSIS
15166     QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges()
15167 
15168   NOTES
15169     quick_prefix_select is made over the conditions on the whole key.
15170     It defines a number of ranges of length x.
15171     However when jumping through the prefixes we use only the the first
15172     few most significant keyparts in the range key. However if there
15173     are more keyparts to follow the ones we are using we must make the
15174     condition on the key inclusive (because x < "ab" means
15175     x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b').
15176     To achive the above we must turn off the NEAR_MIN/NEAR_MAX
15177 */
adjust_prefix_ranges()15178 void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges ()
15179 {
15180   if (quick_prefix_select &&
15181       group_prefix_len < quick_prefix_select->max_used_key_length)
15182   {
15183     DYNAMIC_ARRAY *arr;
15184     uint inx;
15185 
15186     for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++)
15187     {
15188       QUICK_RANGE *range;
15189 
15190       get_dynamic(arr, (uchar*)&range, inx);
15191       range->flag &= ~(NEAR_MIN | NEAR_MAX);
15192     }
15193   }
15194 }
15195 
15196 
15197 /*
15198   Determine the total number and length of the keys that will be used for
15199   index lookup.
15200 
15201   SYNOPSIS
15202     QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
15203 
15204   DESCRIPTION
15205     The total length of the keys used for index lookup depends on whether
15206     there are any predicates referencing the min/max argument, and/or if
15207     the min/max argument field can be NULL.
15208     This function does an optimistic analysis whether the search key might
15209     be extended by a constant for the min/max keypart. It is 'optimistic'
15210     because during actual execution it may happen that a particular range
15211     is skipped, and then a shorter key will be used. However this is data
15212     dependent and can't be easily estimated here.
15213 
15214   RETURN
15215     None
15216 */
15217 
update_key_stat()15218 void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
15219 {
15220   max_used_key_length= real_prefix_len;
15221   if (min_max_ranges.elements > 0)
15222   {
15223     QUICK_RANGE *cur_range;
15224     if (have_min)
15225     { /* Check if the right-most range has a lower boundary. */
15226       get_dynamic(&min_max_ranges, (uchar*)&cur_range,
15227                   min_max_ranges.elements - 1);
15228       if (!(cur_range->flag & NO_MIN_RANGE))
15229       {
15230         max_used_key_length+= min_max_arg_len;
15231         used_key_parts++;
15232         return;
15233       }
15234     }
15235     if (have_max)
15236     { /* Check if the left-most range has an upper boundary. */
15237       get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0);
15238       if (!(cur_range->flag & NO_MAX_RANGE))
15239       {
15240         max_used_key_length+= min_max_arg_len;
15241         used_key_parts++;
15242         return;
15243       }
15244     }
15245   }
15246   else if (have_min && min_max_arg_part &&
15247            min_max_arg_part->field->real_maybe_null())
15248   {
15249     /*
15250       If a MIN/MAX argument value is NULL, we can quickly determine
15251       that we're in the beginning of the next group, because NULLs
15252       are always < any other value. This allows us to quickly
15253       determine the end of the current group and jump to the next
15254       group (see next_min()) and thus effectively increases the
15255       usable key length.
15256     */
15257     max_used_key_length+= min_max_arg_len;
15258     used_key_parts++;
15259   }
15260 }
15261 
15262 
15263 /*
15264   Initialize a quick group min/max select for key retrieval.
15265 
15266   SYNOPSIS
15267     QUICK_GROUP_MIN_MAX_SELECT::reset()
15268 
15269   DESCRIPTION
15270     Initialize the index chosen for access and find and store the prefix
15271     of the last group. The method is expensive since it performs disk access.
15272 
15273   RETURN
15274     0      OK
15275     other  Error code
15276 */
15277 
reset(void)15278 int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
15279 {
15280   int result;
15281   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
15282 
15283   seen_first_key= FALSE;
15284   head->file->ha_start_keyread(index); /* We need only the key attributes */
15285 
15286   if ((result= file->ha_index_init(index,1)))
15287   {
15288     head->file->print_error(result, MYF(0));
15289     DBUG_RETURN(result);
15290   }
15291   if (quick_prefix_select && quick_prefix_select->reset())
15292     DBUG_RETURN(1);
15293   result= file->ha_index_last(record);
15294   if (result == HA_ERR_END_OF_FILE)
15295     DBUG_RETURN(0);
15296   /* Save the prefix of the last group. */
15297   key_copy(last_prefix, record, index_info, group_prefix_len);
15298 
15299   DBUG_RETURN(0);
15300 }
15301 
15302 
15303 
15304 /*
15305   Get the next key containing the MIN and/or MAX key for the next group.
15306 
15307   SYNOPSIS
15308     QUICK_GROUP_MIN_MAX_SELECT::get_next()
15309 
15310   DESCRIPTION
15311     The method finds the next subsequent group of records that satisfies the
15312     query conditions and finds the keys that contain the MIN/MAX values for
15313     the key part referenced by the MIN/MAX function(s). Once a group and its
15314     MIN/MAX values are found, store these values in the Item_sum objects for
15315     the MIN/MAX functions. The rest of the values in the result row are stored
15316     in the Item_field::result_field of each select field. If the query does
15317     not contain MIN and/or MAX functions, then the function only finds the
15318     group prefix, which is a query answer itself.
15319 
15320   NOTES
15321     If both MIN and MAX are computed, then we use the fact that if there is
15322     no MIN key, there can't be a MAX key as well, so we can skip looking
15323     for a MAX key in this case.
15324 
15325   RETURN
15326     0                  on success
15327     HA_ERR_END_OF_FILE if returned all keys
15328     other              if some error occurred
15329 */
15330 
get_next()15331 int QUICK_GROUP_MIN_MAX_SELECT::get_next()
15332 {
15333   int min_res= 0;
15334   int max_res= 0;
15335 #ifdef HPUX11
15336   /*
15337     volatile is required by a bug in the HP compiler due to which the
15338     last test of result fails.
15339   */
15340   volatile int result;
15341 #else
15342   int result;
15343 #endif
15344   int is_last_prefix= 0;
15345 
15346   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");
15347 
15348   /*
15349     Loop until a group is found that satisfies all query conditions or the last
15350     group is reached.
15351   */
15352   do
15353   {
15354     result= next_prefix();
15355     /*
15356       Check if this is the last group prefix. Notice that at this point
15357       this->record contains the current prefix in record format.
15358     */
15359     if (!result)
15360     {
15361       is_last_prefix= key_cmp(index_info->key_part, last_prefix,
15362                               group_prefix_len);
15363       DBUG_ASSERT(is_last_prefix <= 0);
15364     }
15365     else
15366     {
15367       if (result == HA_ERR_KEY_NOT_FOUND)
15368         continue;
15369       break;
15370     }
15371 
15372     if (have_min)
15373     {
15374       min_res= next_min();
15375       if (min_res == 0)
15376         update_min_result();
15377     }
15378     /* If there is no MIN in the group, there is no MAX either. */
15379     if ((have_max && !have_min) ||
15380         (have_max && have_min && (min_res == 0)))
15381     {
15382       max_res= next_max();
15383       if (max_res == 0)
15384         update_max_result();
15385       /* If a MIN was found, a MAX must have been found as well. */
15386       DBUG_ASSERT((have_max && !have_min) ||
15387                   (have_max && have_min && (max_res == 0)));
15388     }
15389     /*
15390       If this is just a GROUP BY or DISTINCT without MIN or MAX and there
15391       are equality predicates for the key parts after the group, find the
15392       first sub-group with the extended prefix.
15393     */
15394     if (!have_min && !have_max && key_infix_len > 0)
15395       result= file->ha_index_read_map(record, group_prefix,
15396                                       make_prev_keypart_map(real_key_parts),
15397                                       HA_READ_KEY_EXACT);
15398 
15399     result= have_min ? min_res : have_max ? max_res : result;
15400   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15401            is_last_prefix != 0);
15402 
15403   if (result == HA_ERR_KEY_NOT_FOUND)
15404     result= HA_ERR_END_OF_FILE;
15405 
15406   DBUG_RETURN(result);
15407 }
15408 
15409 
15410 /*
15411   Retrieve the minimal key in the next group.
15412 
15413   SYNOPSIS
15414     QUICK_GROUP_MIN_MAX_SELECT::next_min()
15415 
15416   DESCRIPTION
15417     Find the minimal key within this group such that the key satisfies the query
15418     conditions and NULL semantics. The found key is loaded into this->record.
15419 
15420   IMPLEMENTATION
15421     Depending on the values of min_max_ranges.elements, key_infix_len, and
15422     whether there is a  NULL in the MIN field, this function may directly
15423     return without any data access. In this case we use the key loaded into
15424     this->record by the call to this->next_prefix() just before this call.
15425 
15426   RETURN
15427     0                    on success
15428     HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
15429     HA_ERR_END_OF_FILE   - "" -
15430     other                if some error occurred
15431 */
15432 
next_min()15433 int QUICK_GROUP_MIN_MAX_SELECT::next_min()
15434 {
15435   int result= 0;
15436   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");
15437 
15438   /* Find the MIN key using the eventually extended group prefix. */
15439   if (min_max_ranges.elements > 0)
15440   {
15441     if ((result= next_min_in_range()))
15442       DBUG_RETURN(result);
15443   }
15444   else
15445   {
15446     /* Apply the constant equality conditions to the non-group select fields */
15447     if (key_infix_len > 0)
15448     {
15449       if ((result=
15450            file->ha_index_read_map(record, group_prefix,
15451                                    make_prev_keypart_map(real_key_parts),
15452                                    HA_READ_KEY_EXACT)))
15453         DBUG_RETURN(result);
15454     }
15455 
15456     /*
15457       If the min/max argument field is NULL, skip subsequent rows in the same
15458       group with NULL in it. Notice that:
15459       - if the first row in a group doesn't have a NULL in the field, no row
15460       in the same group has (because NULL < any other value),
15461       - min_max_arg_part->field->ptr points to some place in 'record'.
15462     */
15463     if (min_max_arg_part && min_max_arg_part->field->is_null())
15464     {
15465       uchar *tmp_key_buff= (uchar*)my_alloca(max_used_key_length);
15466       /* Find the first subsequent record without NULL in the MIN/MAX field. */
15467       key_copy(tmp_key_buff, record, index_info, max_used_key_length);
15468       result= file->ha_index_read_map(record, tmp_key_buff,
15469                                       make_keypart_map(real_key_parts),
15470                                       HA_READ_AFTER_KEY);
15471       /*
15472         Check if the new record belongs to the current group by comparing its
15473         prefix with the group's prefix. If it is from the next group, then the
15474         whole group has NULLs in the MIN/MAX field, so use the first record in
15475         the group as a result.
15476         TODO:
15477         It is possible to reuse this new record as the result candidate for the
15478         next call to next_min(), and to save one lookup in the next call. For
15479         this add a new member 'this->next_group_prefix'.
15480       */
15481       if (!result)
15482       {
15483         if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15484           key_restore(record, tmp_key_buff, index_info, 0);
15485       }
15486       else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
15487         result= 0; /* There is a result in any case. */
15488       my_afree(tmp_key_buff);
15489     }
15490   }
15491 
15492   /*
15493     If the MIN attribute is non-nullable, this->record already contains the
15494     MIN key in the group, so just return.
15495   */
15496   DBUG_RETURN(result);
15497 }
15498 
15499 
15500 /*
15501   Retrieve the maximal key in the next group.
15502 
15503   SYNOPSIS
15504     QUICK_GROUP_MIN_MAX_SELECT::next_max()
15505 
15506   DESCRIPTION
15507     Lookup the maximal key of the group, and store it into this->record.
15508 
15509   RETURN
15510     0                    on success
15511     HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
15512     HA_ERR_END_OF_FILE	 - "" -
15513     other                if some error occurred
15514 */
15515 
next_max()15516 int QUICK_GROUP_MIN_MAX_SELECT::next_max()
15517 {
15518   int result;
15519 
15520   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");
15521 
15522   /* Get the last key in the (possibly extended) group. */
15523   if (min_max_ranges.elements > 0)
15524     result= next_max_in_range();
15525   else
15526     result= file->ha_index_read_map(record, group_prefix,
15527                                     make_prev_keypart_map(real_key_parts),
15528                                     HA_READ_PREFIX_LAST);
15529   DBUG_RETURN(result);
15530 }
15531 
15532 
15533 /**
15534   Find the next different key value by skiping all the rows with the same key
15535   value.
15536 
15537   Implements a specialized loose index access method for queries
15538   containing aggregate functions with distinct of the form:
15539     SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
15540   This method comes to replace the index scan + Unique class
15541   (distinct selection) for loose index scan that visits all the rows of a
15542   covering index instead of jumping in the beginning of each group.
15543   TODO: Placeholder function. To be replaced by a handler API call
15544 
15545   @param is_index_scan     hint to use index scan instead of random index read
15546                            to find the next different value.
15547   @param file              table handler
15548   @param key_part          group key to compare
15549   @param record            row data
15550   @param group_prefix      current key prefix data
15551   @param group_prefix_len  length of the current key prefix data
15552   @param group_key_parts   number of the current key prefix columns
15553   @return status
15554     @retval  0  success
15555     @retval !0  failure
15556 */
15557 
index_next_different(bool is_index_scan,handler * file,KEY_PART_INFO * key_part,uchar * record,const uchar * group_prefix,uint group_prefix_len,uint group_key_parts)15558 static int index_next_different (bool is_index_scan, handler *file,
15559                                 KEY_PART_INFO *key_part, uchar * record,
15560                                 const uchar * group_prefix,
15561                                 uint group_prefix_len,
15562                                 uint group_key_parts)
15563 {
15564   if (is_index_scan)
15565   {
15566     int result= 0;
15567 
15568     while (!key_cmp (key_part, group_prefix, group_prefix_len))
15569     {
15570       result= file->ha_index_next(record);
15571       if (result)
15572         return(result);
15573     }
15574     return result;
15575   }
15576   else
15577     return file->ha_index_read_map(record, group_prefix,
15578                                 make_prev_keypart_map(group_key_parts),
15579                                 HA_READ_AFTER_KEY);
15580 }
15581 
15582 
15583 /*
15584   Determine the prefix of the next group.
15585 
15586   SYNOPSIS
15587     QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
15588 
15589   DESCRIPTION
15590     Determine the prefix of the next group that satisfies the query conditions.
15591     If there is a range condition referencing the group attributes, use a
15592     QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
15593     condition. If there is a key infix of constants, append this infix
15594     immediately after the group attributes. The possibly extended prefix is
15595     stored in this->group_prefix. The first key of the found group is stored in
15596     this->record, on which relies this->next_min().
15597 
15598   RETURN
15599     0                    on success
15600     HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
15601     HA_ERR_END_OF_FILE   if there are no more keys
15602     other                if some error occurred
15603 */
next_prefix()15604 int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
15605 {
15606   int result;
15607   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");
15608 
15609   if (quick_prefix_select)
15610   {
15611     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
15612     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
15613                                                       group_key_parts,
15614                                                       cur_prefix)))
15615       DBUG_RETURN(result);
15616     seen_first_key= TRUE;
15617   }
15618   else
15619   {
15620     if (!seen_first_key)
15621     {
15622       result= file->ha_index_first(record);
15623       if (result)
15624         DBUG_RETURN(result);
15625       seen_first_key= TRUE;
15626     }
15627     else
15628     {
15629       /* Load the first key in this group into record. */
15630       result= index_next_different (is_index_scan, file, index_info->key_part,
15631                             record, group_prefix, group_prefix_len,
15632                             group_key_parts);
15633       if (result)
15634         DBUG_RETURN(result);
15635     }
15636   }
15637 
15638   /* Save the prefix of this group for subsequent calls. */
15639   key_copy(group_prefix, record, index_info, group_prefix_len);
15640   /* Append key_infix to group_prefix. */
15641   if (key_infix_len > 0)
15642     memcpy(group_prefix + group_prefix_len,
15643            key_infix, key_infix_len);
15644 
15645   DBUG_RETURN(0);
15646 }
15647 
15648 
15649 /**
15650   Allocate a temporary buffer, populate the buffer using the group prefix key
15651   and the min/max field key, and compare the result to the current key pointed
15652   by index_info.
15653 
15654   @param key    - the min or max field key
15655   @param length - length of "key"
15656 */
15657 int
cmp_min_max_key(const uchar * key,uint16 length)15658 QUICK_GROUP_MIN_MAX_SELECT::cmp_min_max_key(const uchar *key, uint16 length)
15659 {
15660   /*
15661     Allocate a buffer.
15662     Note, we allocate one extra byte, because some of Field_xxx::cmp(),
15663     e.g. Field_newdate::cmp(), use uint3korr() which actually read four bytes
15664     and then bit-and the read value with 0xFFFFFF.
15665     See "MDEV-7920 main.group_min_max fails ... with valgrind" for details.
15666   */
15667   uchar *buffer= (uchar*) my_alloca(real_prefix_len + min_max_arg_len + 1);
15668   /* Concatenate the group prefix key and the min/max field key */
15669   memcpy(buffer, group_prefix, real_prefix_len);
15670   memcpy(buffer + real_prefix_len, key, length);
15671   /* Compare the key pointed by key_info to the created key */
15672   int cmp_res= key_cmp(index_info->key_part, buffer,
15673                        real_prefix_len + min_max_arg_len);
15674   my_afree(buffer);
15675   return cmp_res;
15676 }
15677 
15678 
15679 /*
15680   Find the minimal key in a group that satisfies some range conditions for the
15681   min/max argument field.
15682 
15683   SYNOPSIS
15684     QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
15685 
15686   DESCRIPTION
15687     Given the sequence of ranges min_max_ranges, find the minimal key that is
15688     in the left-most possible range. If there is no such key, then the current
15689     group does not have a MIN key that satisfies the WHERE clause. If a key is
15690     found, its value is stored in this->record.
15691 
15692   RETURN
15693     0                    on success
15694     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
15695                          the ranges
15696     HA_ERR_END_OF_FILE   - "" -
15697     other                if some error
15698 */
15699 
next_min_in_range()15700 int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
15701 {
15702   ha_rkey_function find_flag;
15703   key_part_map keypart_map;
15704   QUICK_RANGE *cur_range;
15705   bool found_null= FALSE;
15706   int result= HA_ERR_KEY_NOT_FOUND;
15707 
15708   DBUG_ASSERT(min_max_ranges.elements > 0);
15709 
15710   for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
15711   { /* Search from the left-most range to the right. */
15712     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx);
15713 
15714     /*
15715       If the current value for the min/max argument is bigger than the right
15716       boundary of cur_range, there is no need to check this range.
15717     */
15718     if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
15719         (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key,
15720                  min_max_arg_len) == 1))
15721       continue;
15722 
15723     if (cur_range->flag & NO_MIN_RANGE)
15724     {
15725       keypart_map= make_prev_keypart_map(real_key_parts);
15726       find_flag= HA_READ_KEY_EXACT;
15727     }
15728     else
15729     {
15730       /* Extend the search key with the lower boundary for this range. */
15731       memcpy(group_prefix + real_prefix_len, cur_range->min_key,
15732              cur_range->min_length);
15733       keypart_map= make_keypart_map(real_key_parts);
15734       find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
15735                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
15736                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
15737     }
15738 
15739     result= file->ha_index_read_map(record, group_prefix, keypart_map,
15740                                     find_flag);
15741     if (result)
15742     {
15743       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15744           (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
15745         continue; /* Check the next range. */
15746 
15747       /*
15748         In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
15749         HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
15750         range, it can't succeed for any other subsequent range.
15751       */
15752       break;
15753     }
15754 
15755     /* A key was found. */
15756     if (cur_range->flag & EQ_RANGE)
15757       break; /* No need to perform the checks below for equal keys. */
15758 
15759     if (cur_range->flag & NULL_RANGE)
15760     {
15761       /*
15762         Remember this key, and continue looking for a non-NULL key that
15763         satisfies some other condition.
15764       */
15765       memcpy(tmp_record, record, head->s->rec_buff_length);
15766       found_null= TRUE;
15767       continue;
15768     }
15769 
15770     /* Check if record belongs to the current group. */
15771     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15772     {
15773       result= HA_ERR_KEY_NOT_FOUND;
15774       continue;
15775     }
15776 
15777     /* If there is an upper limit, check if the found key is in the range. */
15778     if ( !(cur_range->flag & NO_MAX_RANGE) )
15779     {
15780       int cmp_res= cmp_min_max_key(cur_range->max_key, cur_range->max_length);
15781       /*
15782         The key is outside of the range if:
15783         the interval is open and the key is equal to the maximum boundry
15784         or
15785         the key is greater than the maximum
15786       */
15787       if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
15788           cmp_res > 0)
15789       {
15790         result= HA_ERR_KEY_NOT_FOUND;
15791         continue;
15792       }
15793     }
15794     /* If we got to this point, the current key qualifies as MIN. */
15795     DBUG_ASSERT(result == 0);
15796     break;
15797   }
15798   /*
15799     If there was a key with NULL in the MIN/MAX field, and there was no other
15800     key without NULL from the same group that satisfies some other condition,
15801     then use the key with the NULL.
15802   */
15803   if (found_null && result)
15804   {
15805     memcpy(record, tmp_record, head->s->rec_buff_length);
15806     result= 0;
15807   }
15808   return result;
15809 }
15810 
15811 
15812 /*
15813   Find the maximal key in a group that satisfies some range conditions for the
15814   min/max argument field.
15815 
15816   SYNOPSIS
15817     QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
15818 
15819   DESCRIPTION
15820     Given the sequence of ranges min_max_ranges, find the maximal key that is
15821     in the right-most possible range. If there is no such key, then the current
15822     group does not have a MAX key that satisfies the WHERE clause. If a key is
15823     found, its value is stored in this->record.
15824 
15825   RETURN
15826     0                    on success
15827     HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
15828                          the ranges
15829     HA_ERR_END_OF_FILE   - "" -
15830     other                if some error
15831 */
15832 
next_max_in_range()15833 int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
15834 {
15835   ha_rkey_function find_flag;
15836   key_part_map keypart_map;
15837   QUICK_RANGE *cur_range;
15838   int result;
15839 
15840   DBUG_ASSERT(min_max_ranges.elements > 0);
15841 
15842   for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
15843   { /* Search from the right-most range to the left. */
15844     get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1);
15845 
15846     /*
15847       If the current value for the min/max argument is smaller than the left
15848       boundary of cur_range, there is no need to check this range.
15849     */
15850     if (range_idx != min_max_ranges.elements &&
15851         !(cur_range->flag & NO_MIN_RANGE) &&
15852         (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key,
15853                  min_max_arg_len) == -1))
15854       continue;
15855 
15856     if (cur_range->flag & NO_MAX_RANGE)
15857     {
15858       keypart_map= make_prev_keypart_map(real_key_parts);
15859       find_flag= HA_READ_PREFIX_LAST;
15860     }
15861     else
15862     {
15863       /* Extend the search key with the upper boundary for this range. */
15864       memcpy(group_prefix + real_prefix_len, cur_range->max_key,
15865              cur_range->max_length);
15866       keypart_map= make_keypart_map(real_key_parts);
15867       find_flag= (cur_range->flag & EQ_RANGE) ?
15868                  HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
15869                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
15870     }
15871 
15872     result= file->ha_index_read_map(record, group_prefix, keypart_map,
15873                                     find_flag);
15874 
15875     if (result)
15876     {
15877       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
15878           (cur_range->flag & EQ_RANGE))
15879         continue; /* Check the next range. */
15880 
15881       /*
15882         In no key was found with this upper bound, there certainly are no keys
15883         in the ranges to the left.
15884       */
15885       return result;
15886     }
15887     /* A key was found. */
15888     if (cur_range->flag & EQ_RANGE)
15889       return 0; /* No need to perform the checks below for equal keys. */
15890 
15891     /* Check if record belongs to the current group. */
15892     if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
15893       continue;                                 // Row not found
15894 
15895     /* If there is a lower limit, check if the found key is in the range. */
15896     if ( !(cur_range->flag & NO_MIN_RANGE) )
15897     {
15898       int cmp_res= cmp_min_max_key(cur_range->min_key, cur_range->min_length);
15899       /*
15900         The key is outside of the range if:
15901         the interval is open and the key is equal to the minimum boundry
15902         or
15903         the key is less than the minimum
15904       */
15905       if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
15906           cmp_res < 0)
15907         continue;
15908     }
15909     /* If we got to this point, the current key qualifies as MAX. */
15910     return result;
15911   }
15912   return HA_ERR_KEY_NOT_FOUND;
15913 }
15914 
15915 
15916 /*
15917   Update all MIN function results with the newly found value.
15918 
15919   SYNOPSIS
15920     QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
15921 
15922   DESCRIPTION
15923     The method iterates through all MIN functions and updates the result value
15924     of each function by calling Item_sum::reset(), which in turn picks the new
15925     result value from this->head->record[0], previously updated by
15926     next_min(). The updated value is stored in a member variable of each of the
15927     Item_sum objects, depending on the value type.
15928 
15929   IMPLEMENTATION
15930     The update must be done separately for MIN and MAX, immediately after
15931     next_min() was called and before next_max() is called, because both MIN and
15932     MAX take their result value from the same buffer this->head->record[0]
15933     (i.e.  this->record).
15934 
15935   RETURN
15936     None
15937 */
15938 
update_min_result()15939 void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
15940 {
15941   Item_sum *min_func;
15942 
15943   min_functions_it->rewind();
15944   while ((min_func= (*min_functions_it)++))
15945     min_func->reset_and_add();
15946 }
15947 
15948 
15949 /*
15950   Update all MAX function results with the newly found value.
15951 
15952   SYNOPSIS
15953     QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
15954 
15955   DESCRIPTION
15956     The method iterates through all MAX functions and updates the result value
15957     of each function by calling Item_sum::reset(), which in turn picks the new
15958     result value from this->head->record[0], previously updated by
15959     next_max(). The updated value is stored in a member variable of each of the
15960     Item_sum objects, depending on the value type.
15961 
15962   IMPLEMENTATION
15963     The update must be done separately for MIN and MAX, immediately after
15964     next_max() was called, because both MIN and MAX take their result value
15965     from the same buffer this->head->record[0] (i.e.  this->record).
15966 
15967   RETURN
15968     None
15969 */
15970 
update_max_result()15971 void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
15972 {
15973   Item_sum *max_func;
15974 
15975   max_functions_it->rewind();
15976   while ((max_func= (*max_functions_it)++))
15977     max_func->reset_and_add();
15978 }
15979 
15980 
15981 /*
15982   Append comma-separated list of keys this quick select uses to key_names;
15983   append comma-separated list of corresponding used lengths to used_lengths.
15984 
15985   SYNOPSIS
15986     QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
15987     key_names    [out] Names of used indexes
15988     used_lengths [out] Corresponding lengths of the index names
15989 
15990   DESCRIPTION
15991     This method is used by select_describe to extract the names of the
15992     indexes used by a quick select.
15993 
15994 */
15995 
add_keys_and_lengths(String * key_names,String * used_lengths)15996 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
15997                                                       String *used_lengths)
15998 {
15999   bool first= TRUE;
16000 
16001   add_key_and_length(key_names, used_lengths, &first);
16002 }
16003 
16004 
16005 /* Check whether the number for equality ranges exceeds the set threshold */
16006 
eq_ranges_exceeds_limit(RANGE_SEQ_IF * seq,void * seq_init_param,uint limit)16007 bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param,
16008                              uint limit)
16009 {
16010   KEY_MULTI_RANGE range;
16011   range_seq_t seq_it;
16012   uint count = 0;
16013 
16014   if (limit == 0)
16015   {
16016     /* 'Statistics instead of index dives' feature is turned off */
16017    return false;
16018   }
16019   seq_it= seq->init(seq_init_param, 0, 0);
16020   while (!seq->next(seq_it, &range))
16021   {
16022     if ((range.range_flag & EQ_RANGE) && !(range.range_flag & NULL_RANGE))
16023     {
16024       if (++count >= limit)
16025         return true;
16026     }
16027   }
16028   return false;
16029 }
16030 
16031 #ifndef DBUG_OFF
16032 
print_sel_tree(PARAM * param,SEL_TREE * tree,key_map * tree_map,const char * msg)16033 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
16034                            const char *msg)
16035 {
16036   char buff[1024];
16037   DBUG_ENTER("print_sel_tree");
16038 
16039   String tmp(buff,sizeof(buff),&my_charset_bin);
16040   tmp.length(0);
16041   for (uint idx= 0; idx < param->keys; idx++)
16042   {
16043     if (tree_map->is_set(idx))
16044     {
16045       uint keynr= param->real_keynr[idx];
16046       if (tmp.length())
16047         tmp.append(',');
16048       tmp.append(&param->table->key_info[keynr].name);
16049     }
16050   }
16051   if (!tmp.length())
16052     tmp.append(STRING_WITH_LEN("(empty)"));
16053 
16054   DBUG_PRINT("info", ("SEL_TREE: %p (%s)  scans: %s", tree, msg,
16055                       tmp.c_ptr_safe()));
16056 
16057   DBUG_VOID_RETURN;
16058 }
16059 
16060 
print_ror_scans_arr(TABLE * table,const char * msg,struct st_ror_scan_info ** start,struct st_ror_scan_info ** end)16061 static void print_ror_scans_arr(TABLE *table, const char *msg,
16062                                 struct st_ror_scan_info **start,
16063                                 struct st_ror_scan_info **end)
16064 {
16065   DBUG_ENTER("print_ror_scans_arr");
16066 
16067   char buff[1024];
16068   String tmp(buff,sizeof(buff),&my_charset_bin);
16069   tmp.length(0);
16070   for (;start != end; start++)
16071   {
16072     if (tmp.length())
16073       tmp.append(',');
16074     tmp.append(&table->key_info[(*start)->keynr].name);
16075   }
16076   if (!tmp.length())
16077     tmp.append(STRING_WITH_LEN("(empty)"));
16078   DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.c_ptr()));
16079   DBUG_VOID_RETURN;
16080 }
16081 
16082 static String dbug_print_sel_arg_buf;
16083 
16084 static void
print_sel_arg_key(Field * field,const uchar * key,String * out)16085 print_sel_arg_key(Field *field, const uchar *key, String *out)
16086 {
16087   TABLE *table= field->table;
16088   MY_BITMAP *old_sets[2];
16089   dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16090 
16091   if (field->real_maybe_null())
16092   {
16093     if (*key)
16094     {
16095       out->append("NULL");
16096       goto end;
16097     }
16098     key++;					// Skip null byte
16099   }
16100 
16101   field->set_key_image(key, field->pack_length());
16102 
16103   if (field->type() == MYSQL_TYPE_BIT)
16104     (void) field->val_int_as_str(out, 1);
16105   else
16106     field->val_str(out);
16107 
16108 end:
16109   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16110 }
16111 
16112 
16113 /*
16114   @brief
16115     Produce a string representation of an individual SEL_ARG and return pointer
16116     to it
16117 
16118   @detail
16119     Intended usage:
16120 
16121      (gdb) p dbug_print_sel_arg(ptr)
16122 */
16123 
dbug_print_sel_arg(SEL_ARG * sel_arg)16124 const char *dbug_print_sel_arg(SEL_ARG *sel_arg)
16125 {
16126   StringBuffer<64> buf;
16127   String &out= dbug_print_sel_arg_buf;
16128   out.length(0);
16129 
16130   if (!sel_arg)
16131   {
16132     out.append("NULL");
16133     goto end;
16134   }
16135 
16136   out.append("SEL_ARG(");
16137 
16138   const char *stype;
16139   switch(sel_arg->type) {
16140   case SEL_ARG::IMPOSSIBLE:
16141     stype="IMPOSSIBLE";
16142     break;
16143   case SEL_ARG::MAYBE:
16144     stype="MAYBE";
16145     break;
16146   case SEL_ARG::MAYBE_KEY:
16147     stype="MAYBE_KEY";
16148     break;
16149   case SEL_ARG::KEY_RANGE:
16150   default:
16151     stype= NULL;
16152   }
16153 
16154   if (stype)
16155   {
16156     out.append("type=");
16157     out.append(stype);
16158     goto end;
16159   }
16160 
16161   if (sel_arg->min_flag & NO_MIN_RANGE)
16162     out.append("-inf");
16163   else
16164   {
16165     print_sel_arg_key(sel_arg->field, sel_arg->min_value, &buf);
16166     out.append(buf);
16167   }
16168 
16169   out.append((sel_arg->min_flag & NEAR_MIN)? "<" : "<=");
16170 
16171   out.append(sel_arg->field->field_name);
16172 
16173   out.append((sel_arg->max_flag & NEAR_MAX)? "<" : "<=");
16174 
16175   if (sel_arg->max_flag & NO_MAX_RANGE)
16176     out.append("+inf");
16177   else
16178   {
16179     print_sel_arg_key(sel_arg->field, sel_arg->max_value, &buf);
16180     out.append(buf);
16181   }
16182 
16183   out.append(")");
16184 
16185 end:
16186   return dbug_print_sel_arg_buf.c_ptr_safe();
16187 }
16188 
16189 
16190 /*****************************************************************************
16191 ** Print a quick range for debugging
16192 ** TODO:
16193 ** This should be changed to use a String to store each row instead
16194 ** of locking the DEBUG stream !
16195 *****************************************************************************/
16196 
16197 static void
print_key(KEY_PART * key_part,const uchar * key,uint used_length)16198 print_key(KEY_PART *key_part, const uchar *key, uint used_length)
16199 {
16200   char buff[1024];
16201   const uchar *key_end= key+used_length;
16202   uint store_length;
16203   TABLE *table= key_part->field->table;
16204   MY_BITMAP *old_sets[2];
16205 
16206   dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16207 
16208   for (; key < key_end; key+=store_length, key_part++)
16209   {
16210     String tmp(buff,sizeof(buff),&my_charset_bin);
16211     Field *field=      key_part->field;
16212     store_length= key_part->store_length;
16213 
16214     if (field->real_maybe_null())
16215     {
16216       if (*key)
16217       {
16218 	fwrite("NULL",sizeof(char),4,DBUG_FILE);
16219 	continue;
16220       }
16221       key++;					// Skip null byte
16222       store_length--;
16223     }
16224     field->set_key_image(key, key_part->length);
16225     if (field->type() == MYSQL_TYPE_BIT)
16226       (void) field->val_int_as_str(&tmp, 1);
16227     else
16228       field->val_str(&tmp);
16229     fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
16230     if (key+store_length < key_end)
16231       fputc('/',DBUG_FILE);
16232   }
16233   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16234 }
16235 
16236 
print_quick(QUICK_SELECT_I * quick,const key_map * needed_reg)16237 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
16238 {
16239   char buf[MAX_KEY/8+1];
16240   TABLE *table;
16241   MY_BITMAP *old_sets[2];
16242   DBUG_ENTER("print_quick");
16243   if (!quick)
16244     DBUG_VOID_RETURN;
16245   DBUG_LOCK_FILE;
16246 
16247   table= quick->head;
16248   dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16249   quick->dbug_dump(0, TRUE);
16250   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16251 
16252   fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
16253 
16254   DBUG_UNLOCK_FILE;
16255   DBUG_VOID_RETURN;
16256 }
16257 
dbug_dump(int indent,bool verbose)16258 void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
16259 {
16260   /* purecov: begin inspected */
16261   fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
16262 	  indent, "", head->key_info[index].name.str, max_used_key_length);
16263 
16264   if (verbose)
16265   {
16266     QUICK_RANGE *range;
16267     QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
16268     QUICK_RANGE **end_range= pr + ranges.elements;
16269     for (; pr != end_range; ++pr)
16270     {
16271       fprintf(DBUG_FILE, "%*s", indent + 2, "");
16272       range= *pr;
16273       if (!(range->flag & NO_MIN_RANGE))
16274       {
16275         print_key(key_parts, range->min_key, range->min_length);
16276         if (range->flag & NEAR_MIN)
16277 	  fputs(" < ",DBUG_FILE);
16278         else
16279 	  fputs(" <= ",DBUG_FILE);
16280       }
16281       fputs("X",DBUG_FILE);
16282 
16283       if (!(range->flag & NO_MAX_RANGE))
16284       {
16285         if (range->flag & NEAR_MAX)
16286 	  fputs(" < ",DBUG_FILE);
16287         else
16288 	  fputs(" <= ",DBUG_FILE);
16289         print_key(key_parts, range->max_key, range->max_length);
16290       }
16291       fputs("\n",DBUG_FILE);
16292     }
16293   }
16294   /* purecov: end */
16295 }
16296 
dbug_dump(int indent,bool verbose)16297 void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose)
16298 {
16299   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
16300   QUICK_RANGE_SELECT *quick;
16301   fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
16302   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16303   while ((quick= it++))
16304     quick->dbug_dump(indent+2, verbose);
16305   if (pk_quick_select)
16306   {
16307     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
16308     pk_quick_select->dbug_dump(indent+2, verbose);
16309   }
16310   fprintf(DBUG_FILE, "%*s}\n", indent, "");
16311 }
16312 
dbug_dump(int indent,bool verbose)16313 void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
16314 {
16315   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
16316   QUICK_SELECT_WITH_RECORD *qr;
16317   fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
16318           indent, "", need_to_fetch_row? "":"non-");
16319   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16320   while ((qr= it++))
16321     qr->quick->dbug_dump(indent+2, verbose);
16322   if (cpk_quick)
16323   {
16324     fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
16325     cpk_quick->dbug_dump(indent+2, verbose);
16326   }
16327   fprintf(DBUG_FILE, "%*s}\n", indent, "");
16328 }
16329 
dbug_dump(int indent,bool verbose)16330 void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
16331 {
16332   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
16333   QUICK_SELECT_I *quick;
16334   fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
16335   fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
16336   while ((quick= it++))
16337     quick->dbug_dump(indent+2, verbose);
16338   fprintf(DBUG_FILE, "%*s}\n", indent, "");
16339 }
16340 
16341 
16342 /*
16343   Print quick select information to DBUG_FILE.
16344 
16345   SYNOPSIS
16346     QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
16347     indent  Indentation offset
16348     verbose If TRUE show more detailed output.
16349 
16350   DESCRIPTION
16351     Print the contents of this quick select to DBUG_FILE. The method also
16352     calls dbug_dump() for the used quick select if any.
16353 
16354   IMPLEMENTATION
16355     Caller is responsible for locking DBUG_FILE before this call and unlocking
16356     it afterwards.
16357 
16358   RETURN
16359     None
16360 */
16361 
dbug_dump(int indent,bool verbose)16362 void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
16363 {
16364   fprintf(DBUG_FILE,
16365           "%*squick_group_min_max_select: index %s (%d), length: %d\n",
16366 	  indent, "", index_info->name.str, index, max_used_key_length);
16367   if (key_infix_len > 0)
16368   {
16369     fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
16370             indent, "", key_infix_len);
16371   }
16372   if (quick_prefix_select)
16373   {
16374     fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
16375     quick_prefix_select->dbug_dump(indent + 2, verbose);
16376   }
16377   if (min_max_ranges.elements > 0)
16378   {
16379     fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
16380             indent, "", min_max_ranges.elements);
16381   }
16382 }
16383 
16384 #endif /* !DBUG_OFF */
16385 
16386 
16387 /*
16388   @brief Print the comparison operator for the min range
16389 */
16390 
print_min_range_operator(String * out,const ha_rkey_function flag)16391 static void print_min_range_operator(String *out, const ha_rkey_function flag)
16392 {
16393     if (flag == HA_READ_AFTER_KEY)
16394       out->append(STRING_WITH_LEN(" < "));
16395     else if (flag == HA_READ_KEY_EXACT || flag == HA_READ_KEY_OR_NEXT)
16396       out->append(STRING_WITH_LEN(" <= "));
16397     else
16398       out->append(STRING_WITH_LEN(" ? "));
16399 }
16400 
16401 
16402 /*
16403   @brief Print the comparison operator for the max range
16404 */
16405 
print_max_range_operator(String * out,const ha_rkey_function flag)16406 static void print_max_range_operator(String *out, const ha_rkey_function flag)
16407 {
16408   if (flag == HA_READ_BEFORE_KEY)
16409     out->append(STRING_WITH_LEN(" < "));
16410   else if (flag == HA_READ_AFTER_KEY)
16411     out->append(STRING_WITH_LEN(" <= "));
16412   else
16413     out->append(STRING_WITH_LEN(" ? "));
16414 }
16415 
16416 
16417 static
print_range(String * out,const KEY_PART_INFO * key_part,KEY_MULTI_RANGE * range,uint n_key_parts)16418 void print_range(String *out, const KEY_PART_INFO *key_part,
16419                  KEY_MULTI_RANGE *range, uint n_key_parts)
16420 {
16421   uint flag= range->range_flag;
16422   String key_name;
16423   key_name.set_charset(system_charset_info);
16424   key_part_map keypart_map= range->start_key.keypart_map |
16425                             range->end_key.keypart_map;
16426 
16427   if (flag & GEOM_FLAG)
16428   {
16429     /*
16430       The flags of GEOM ranges do not work the same way as for other
16431       range types, so printing "col < some_geom" doesn't make sense.
16432       Just print the column name, not operator.
16433     */
16434     print_keyparts_name(out, key_part, n_key_parts, keypart_map);
16435     out->append(STRING_WITH_LEN(" "));
16436     print_key_value(out, key_part, range->start_key.key,
16437                     range->start_key.length);
16438     return;
16439   }
16440 
16441   if (range->start_key.length)
16442   {
16443     print_key_value(out, key_part, range->start_key.key,
16444                     range->start_key.length);
16445     print_min_range_operator(out, range->start_key.flag);
16446   }
16447 
16448   print_keyparts_name(out, key_part, n_key_parts, keypart_map);
16449 
16450   if (range->end_key.length)
16451   {
16452     print_max_range_operator(out, range->end_key.flag);
16453     print_key_value(out, key_part, range->end_key.key,
16454                     range->end_key.length);
16455   }
16456 }
16457 
16458 
16459 /*
16460   @brief Print range created for non-indexed columns
16461 
16462   @param
16463     out                   output string
16464     field                 field for which the range is printed
16465     range                 range for the field
16466 */
16467 
16468 static
print_range_for_non_indexed_field(String * out,Field * field,KEY_MULTI_RANGE * range)16469 void print_range_for_non_indexed_field(String *out, Field *field,
16470                                        KEY_MULTI_RANGE *range)
16471 {
16472   TABLE *table= field->table;
16473   MY_BITMAP *old_sets[2];
16474   dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16475 
16476   if (range->start_key.length)
16477   {
16478     field->print_key_part_value(out, range->start_key.key, field->key_length());
16479     print_min_range_operator(out, range->start_key.flag);
16480   }
16481 
16482   out->append(field->field_name);
16483 
16484   if (range->end_key.length)
16485   {
16486     print_max_range_operator(out, range->end_key.flag);
16487     field->print_key_part_value(out, range->end_key.key, field->key_length());
16488   }
16489   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16490 }
16491 
16492 
16493 
16494 /*
16495 
16496   Add ranges to the trace
16497   For ex:
16498     lets say we have an index a_b(a,b)
16499     query: select * from t1 where a=2 and b=4 ;
16500     so we create a range:
16501       (2,4) <= (a,b) <= (2,4)
16502     this is added to the trace
16503 */
16504 
trace_ranges(Json_writer_array * range_trace,PARAM * param,uint idx,SEL_ARG * keypart,const KEY_PART_INFO * key_parts)16505 static void trace_ranges(Json_writer_array *range_trace,
16506                          PARAM *param, uint idx,
16507                          SEL_ARG *keypart,
16508                          const KEY_PART_INFO *key_parts)
16509 {
16510   SEL_ARG_RANGE_SEQ seq;
16511   KEY_MULTI_RANGE range;
16512   range_seq_t seq_it;
16513   uint flags= 0;
16514   RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
16515                          sel_arg_range_seq_next, 0, 0};
16516   KEY *keyinfo= param->table->key_info + param->real_keynr[idx];
16517   uint n_key_parts= param->table->actual_n_key_parts(keyinfo);
16518   DBUG_ASSERT(range_trace->trace_started());
16519   seq.keyno= idx;
16520   seq.real_keyno= param->real_keynr[idx];
16521   seq.param= param;
16522   seq.start= keypart;
16523   /*
16524     is_ror_scan is set to FALSE here, because we are only interested
16525     in iterating over all the ranges and printing them.
16526   */
16527   seq.is_ror_scan= FALSE;
16528   const KEY_PART_INFO *cur_key_part= key_parts + keypart->part;
16529   seq_it= seq_if.init((void *) &seq, 0, flags);
16530 
16531   while (!seq_if.next(seq_it, &range))
16532   {
16533     StringBuffer<128> range_info(system_charset_info);
16534     print_range(&range_info, cur_key_part, &range, n_key_parts);
16535     range_trace->add(range_info.c_ptr_safe(), range_info.length());
16536   }
16537 }
16538 
16539 /**
16540   Print a key to a string
16541 
16542   @param[out] out          String the key is appended to
16543   @param[in]  key_part     Index components description
16544   @param[in]  key          Key tuple
16545   @param[in]  used_length  length of the key tuple
16546 */
16547 
print_key_value(String * out,const KEY_PART_INFO * key_part,const uchar * key,uint used_length)16548 static void print_key_value(String *out, const KEY_PART_INFO *key_part,
16549                             const uchar* key, uint used_length)
16550 {
16551   out->append(STRING_WITH_LEN("("));
16552   Field *field= key_part->field;
16553   StringBuffer<128> tmp(system_charset_info);
16554   TABLE *table= field->table;
16555   uint store_length;
16556   MY_BITMAP *old_sets[2];
16557   dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
16558   const uchar *key_end= key+used_length;
16559 
16560   for (; key < key_end; key+=store_length, key_part++)
16561   {
16562     field= key_part->field;
16563     store_length= key_part->store_length;
16564 
16565     field->print_key_part_value(out, key, key_part->length);
16566 
16567     if (key + store_length < key_end)
16568       out->append(STRING_WITH_LEN(","));
16569   }
16570   dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
16571   out->append(STRING_WITH_LEN(")"));
16572 }
16573 
16574 /**
16575   Print key parts involed in a range
16576   @param[out] out          String the key is appended to
16577   @param[in]  key_part     Index components description
16578   @param[in]  n_keypart    Number of keyparts in index
16579   @param[in]  keypart_map  map for keyparts involved in the range
16580 */
16581 
print_keyparts_name(String * out,const KEY_PART_INFO * key_part,uint n_keypart,key_part_map keypart_map)16582 void print_keyparts_name(String *out, const KEY_PART_INFO *key_part,
16583                          uint n_keypart, key_part_map keypart_map)
16584 {
16585   uint i;
16586   out->append(STRING_WITH_LEN("("));
16587   bool first_keypart= TRUE;
16588   for (i=0; i < n_keypart; key_part++, i++)
16589   {
16590     if (keypart_map & (1 << i))
16591     {
16592       if (first_keypart)
16593         first_keypart= FALSE;
16594       else
16595         out->append(STRING_WITH_LEN(","));
16596       out->append(key_part->field->field_name);
16597     }
16598     else
16599       break;
16600   }
16601   out->append(STRING_WITH_LEN(")"));
16602 }
16603