1 /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 
24 /**
25   @file
26 
27   Optimising of MIN(), MAX() and COUNT(*) queries without 'group by' clause
28   by replacing the aggregate expression with a constant.
29 
30   Given a table with a compound key on columns (a,b,c), the following
31   types of queries are optimised (assuming the table handler supports
32   the required methods)
33 
34   @verbatim
35   SELECT COUNT(*) FROM t1[,t2,t3,...]
36   SELECT MIN(b) FROM t1 WHERE a=const
37   SELECT MAX(c) FROM t1 WHERE a=const AND b=const
38   SELECT MAX(b) FROM t1 WHERE a=const AND b<const
39   SELECT MIN(b) FROM t1 WHERE a=const AND b>const
40   SELECT MIN(b) FROM t1 WHERE a=const AND b BETWEEN const AND const
41   SELECT MAX(b) FROM t1 WHERE a=const AND b BETWEEN const AND const
42   @endverbatim
43 
44   Instead of '<' one can use '<=', '>', '>=' and '=' as well.
45   Instead of 'a=const' the condition 'a IS NULL' can be used.
46 
47   If all selected fields are replaced then we will also remove all
48   involved tables and return the answer without any join. Thus, the
49   following query will be replaced with a row of two constants:
50   @verbatim
51   SELECT MAX(b), MIN(d) FROM t1,t2
52     WHERE a=const AND b<const AND d>const
53   @endverbatim
54   (assuming a index for column d of table t2 is defined)
55 */
56 
57 #include "key.h"                                // key_cmp_if_same
58 #include "sql_select.h"
59 #include "item_sum.h"                           // Item_sum
60 
61 static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
62                                 Item_field *item_field, Item *cond,
63                                 uint *range_fl, uint *key_prefix_length);
64 static int reckey_in_range(bool max_fl, TABLE_REF *ref, Item_field *item_field,
65                             Item *cond, uint range_fl, uint prefix_len);
66 static int maxmin_in_range(bool max_fl, Item_field *item_field, Item *cond);
67 
68 
69 /*
70   Get exact count of rows in all tables
71 
72   SYNOPSIS
73     get_exact_record_count()
74     @param tables  List of tables
75 
76   NOTES
77     When this is called, we know all table handlers supports HA_HAS_RECORDS
78     or HA_STATS_RECORDS_IS_EXACT
79 
80   RETURN
81     ULLONG_MAX          Error: Could not calculate number of rows
82     #			Multiplication of number of rows in all tables
83 */
84 
get_exact_record_count(TABLE_LIST * tables)85 static ulonglong get_exact_record_count(TABLE_LIST *tables)
86 {
87   ulonglong count= 1;
88   for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
89   {
90     ha_rows tmp= 0;
91     int error= tl->table->file->ha_records(&tmp);
92     if (error != 0)
93       return ULLONG_MAX;
94     count*= tmp;
95   }
96   return count;
97 }
98 
99 
100 /**
101   Use index to read MIN(field) value.
102 
103   @param table      Table object
104   @param ref        Reference to the structure where we store the key value
105   @item_field       Field used in MIN()
106   @range_fl         Whether range endpoint is strict less than
107   @prefix_len       Length of common key part for the range
108 
109   @retval
110     0               No errors
111     HA_ERR_...      Otherwise
112 */
113 
get_index_min_value(TABLE * table,TABLE_REF * ref,Item_field * item_field,uint range_fl,uint prefix_len)114 static int get_index_min_value(TABLE *table, TABLE_REF *ref,
115                                Item_field *item_field, uint range_fl,
116                                uint prefix_len)
117 {
118   int error;
119 
120   if (!ref->key_length)
121     error= table->file->ha_index_first(table->record[0]);
122   else
123   {
124     /*
125       Use index to replace MIN/MAX functions with their values
126       according to the following rules:
127 
128       1) Insert the minimum non-null values where the WHERE clause still
129          matches, or
130       2) a NULL value if there are only NULL values for key_part_k.
131       3) Fail, producing a row of nulls
132 
133       Implementation: Read the smallest value using the search key. If
134       the interval is open, read the next value after the search
135       key. If read fails, and we're looking for a MIN() value for a
136       nullable column, test if there is an exact match for the key.
137     */
138     if (!(range_fl & NEAR_MIN))
139       /*
140          Closed interval: Either The MIN argument is non-nullable, or
141          we have a >= predicate for the MIN argument.
142       */
143       error= table->file->ha_index_read_map(table->record[0],
144                                            ref->key_buff,
145                                            make_prev_keypart_map(ref->key_parts),
146                                            HA_READ_KEY_OR_NEXT);
147     else
148     {
149       /*
150         Open interval: There are two cases:
151         1) We have only MIN() and the argument column is nullable, or
152         2) there is a > predicate on it, nullability is irrelevant.
153         We need to scan the next bigger record first.
154         Open interval is not used if the search key involves the last keypart,
155         and it would not work.
156       */
157       DBUG_ASSERT(prefix_len < ref->key_length);
158       error= table->file->ha_index_read_map(table->record[0],
159                                             ref->key_buff,
160                                             make_prev_keypart_map(ref->key_parts),
161                                             HA_READ_AFTER_KEY);
162       /*
163          If the found record is outside the group formed by the search
164          prefix, or there is no such record at all, check if all
165          records in that group have NULL in the MIN argument
166          column. If that is the case return that NULL.
167 
168          Check if case 1 from above holds. If it does, we should read
169          the skipped tuple.
170       */
171       if (item_field->field->real_maybe_null() &&
172           ref->key_buff[prefix_len] == 1 &&
173           /*
174             Last keypart (i.e. the argument to MIN) is set to NULL by
175             find_key_for_maxmin only if all other keyparts are bound
176             to constants in a conjunction of equalities. Hence, we
177             can detect this by checking only if the last keypart is
178             NULL.
179           */
180           (error == HA_ERR_KEY_NOT_FOUND ||
181            key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len)))
182       {
183         DBUG_ASSERT(item_field->field->real_maybe_null());
184         error= table->file->ha_index_read_map(table->record[0],
185                                              ref->key_buff,
186                                              make_prev_keypart_map(ref->key_parts),
187                                              HA_READ_KEY_EXACT);
188       }
189     }
190   }
191   return error;
192 }
193 
194 
195 /**
196   Use index to read MAX(field) value.
197 
198   @param table      Table object
199   @param ref        Reference to the structure where we store the key value
200   @range_fl         Whether range endpoint is strict greater than
201 
202   @retval
203     0               No errors
204     HA_ERR_...      Otherwise
205 */
206 
get_index_max_value(TABLE * table,TABLE_REF * ref,uint range_fl)207 static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
208 {
209   return (ref->key_length ?
210           table->file->ha_index_read_map(table->record[0], ref->key_buff,
211                                         make_prev_keypart_map(ref->key_parts),
212                                         range_fl & NEAR_MAX ?
213                                         HA_READ_BEFORE_KEY :
214                                         HA_READ_PREFIX_LAST_OR_PREV) :
215           table->file->ha_index_last(table->record[0]));
216 }
217 
218 
219 
220 /**
221   Substitutes constants for some COUNT(), MIN() and MAX() functions.
222 
223   @param thd                   thread handler
224   @param tables                list of leaves of join table tree
225   @param all_fields            All fields to be returned
226   @param conds                 WHERE clause
227 
228   @note
229     This function is only called for queries with aggregate functions and no
230     GROUP BY part. This means that the result set shall contain a single
231     row only
232 
233   @retval
234     0                    no errors
235   @retval
236     1                    if all items were resolved
237   @retval
238     HA_ERR_KEY_NOT_FOUND on impossible conditions
239   @retval
240     HA_ERR_... if a deadlock or a lock wait timeout happens, for example
241   @retval
242     ER_...     e.g. ER_SUBQUERY_NO_1_ROW
243 */
244 
opt_sum_query(THD * thd,TABLE_LIST * tables,List<Item> & all_fields,Item * conds)245 int opt_sum_query(THD *thd,
246                   TABLE_LIST *tables, List<Item> &all_fields, Item *conds)
247 {
248   List_iterator_fast<Item> it(all_fields);
249   int const_result= 1;
250   bool recalc_const_item= false;
251   ulonglong count= 1;
252   bool is_exact_count= TRUE, maybe_exact_count= TRUE;
253   table_map removed_tables= 0, outer_tables= 0, used_tables= 0;
254   Item *item;
255   int error;
256 
257   DBUG_ENTER("opt_sum_query");
258 
259   const table_map where_tables= conds ? conds->used_tables() : 0;
260   /*
261     opt_sum_query() happens at optimization. A subquery is optimized once but
262     executed possibly multiple times.
263     If the value of the set function depends on the join's emptiness (like
264     MIN() does), and the join's emptiness depends on the outer row, we cannot
265     mark the set function as constant:
266    */
267   if (where_tables & OUTER_REF_TABLE_BIT)
268     DBUG_RETURN(0);
269 
270   bool force_index= false;
271   /*
272     Analyze outer join dependencies, and, if possible, compute the number
273     of returned rows.
274   */
275   for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
276   {
277     if (tl->join_cond_optim() || tl->outer_join_nest())
278     /* Don't replace expression on a table that is part of an outer join */
279     {
280       outer_tables|= tl->map();
281 
282       /*
283         We can't optimise LEFT JOIN in cases where the WHERE condition
284         restricts the table that is used, like in:
285           SELECT MAX(t1.a) FROM t1 LEFT JOIN t2 join-condition
286           WHERE t2.field IS NULL;
287       */
288       if (tl->map() & where_tables)
289         DBUG_RETURN(0);
290     }
291     else
292       used_tables|= tl->map();
293 
294     /*
295       If the storage manager of 'tl' gives exact row count as part of
296       statistics (cheap), compute the total number of rows. If there are
297       no outer table dependencies, this count may be used as the real count.
298       Schema tables are filled after this function is invoked, so we can't
299       get row count.
300       Derived tables aren't filled yet, their number of rows are estimates.
301     */
302     bool table_filled= !(tl->schema_table || tl->uses_materialization());
303     if ((tl->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
304         table_filled)
305     {
306       error= tl->fetch_number_of_rows();
307       if(error)
308       {
309         tl->table->file->print_error(error, MYF(ME_FATALERROR));
310         DBUG_RETURN(error);
311       }
312       count*= tl->table->file->stats.records;
313     }
314     else
315     {
316       maybe_exact_count&= MY_TEST(table_filled &&
317                                   (tl->table->file->ha_table_flags() &
318                                    HA_HAS_RECORDS));
319       is_exact_count= FALSE;
320       count= 1;                                 // ensure count != 0
321       force_index|= tl->table->force_index;
322     }
323   }
324 
325   /*
326     Iterate through all items in the SELECT clause and replace
327     COUNT(), MIN() and MAX() with constants (if possible).
328   */
329 
330   while ((item= it++))
331   {
332     if (item->type() == Item::SUM_FUNC_ITEM)
333     {
334       Item_sum *item_sum= (((Item_sum*) item));
335       switch (item_sum->sum_func()) {
336       case Item_sum::COUNT_FUNC:
337         /*
338           If the expr in COUNT(expr) can never be null we can change this
339           to the number of rows in the tables if this number is exact and
340           there are no outer joins.
341           Don't apply this optimization when there is a FORCE INDEX on any of
342           the tables.
343         */
344         if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null &&
345             !outer_tables && maybe_exact_count && !force_index)
346         {
347           if (!is_exact_count)
348           {
349             /*
350               We will skip calling record count for explain query,
351 	      since it might take long time to compute.
352             */
353             if (!thd->lex->describe &&
354                 (count= get_exact_record_count(tables)) == ULLONG_MAX)
355             {
356               /* Error from handler in counting rows. Don't optimize count() */
357               const_result= 0;
358               continue;
359             }
360             is_exact_count= 1;                  // count is now exact
361           }
362         }
363         /* For result count of full-text search: If
364            1. it is a single table query,
365            2. the WHERE condition is a single MATCH expresssion,
366            3. the table engine can provide the row count from FTS result, and
367            4. the expr in COUNT(expr) can not be NULL,
368            we do the full-text search now, and replace with the actual count.
369 
370            Note: Item_func_match::init_search() will be called again
371                  later in the optimization phase by init_fts_funcs(),
372                  but search will still only be done once.
373         */
374         else if (tables->next_leaf == NULL &&                             // 1
375                  conds && conds->type() == Item::FUNC_ITEM &&
376                  ((Item_func*)conds)->functype() == Item_func::FT_FUNC && // 2
377                  (tables->table->file->ha_table_flags() &
378                   HA_CAN_FULLTEXT_EXT) &&                                 // 3
379                  !((Item_sum_count*) item)->get_arg(0)->maybe_null)       // 4
380         {
381           Item_func_match* fts_item= static_cast<Item_func_match*>(conds);
382           fts_item->get_master()->set_hints(NULL, FT_NO_RANKING,
383                                             HA_POS_ERROR, false);
384           if (fts_item->init_search(thd))
385             break;
386           count= fts_item->get_count();
387         }
388         else
389           const_result= 0;
390 
391         // See comment above for get_exact_record_count()
392         if (!thd->lex->describe && const_result == 1) {
393           ((Item_sum_count*) item)->make_const((longlong) count);
394           recalc_const_item= true;
395         }
396 
397         break;
398       case Item_sum::MIN_FUNC:
399       case Item_sum::MAX_FUNC:
400       {
401         int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC);
402         /*
403           If MIN/MAX(expr) is the first part of a key or if all previous
404           parts of the key is found in the COND, then we can use
405           indexes to find the key.
406         */
407         Item *expr=item_sum->get_arg(0);
408         if (expr->real_item()->type() == Item::FIELD_ITEM)
409         {
410           uchar key_buff[MAX_KEY_LENGTH];
411           TABLE_REF ref;
412           uint range_fl, prefix_len;
413 
414           ref.key_buff= key_buff;
415           Item_field *item_field= (Item_field*) (expr->real_item());
416           TABLE *table= item_field->field->table;
417 
418           /*
419             Look for a partial key that can be used for optimization.
420             If we succeed, ref.key_length will contain the length of
421             this key, while prefix_len will contain the length of
422             the beginning of this key without field used in MIN/MAX().
423             Type of range for the key part for this field will be
424             returned in range_fl.
425           */
426           if (table->file->inited ||
427               (outer_tables & item_field->table_ref->map()) ||
428               !find_key_for_maxmin(is_max, &ref, item_field, conds,
429                                    &range_fl, &prefix_len))
430           {
431             const_result= 0;
432             break;
433           }
434           if ((error= table->file->ha_index_init((uint) ref.key, 1)))
435           {
436             table->file->print_error(error, MYF(0));
437             table->set_keyread(FALSE);
438             DBUG_RETURN(error);
439           }
440 
441           /*
442             Necessary columns to read from the index have been determined by
443             find_key_for_maxmin(); they are the columns involved in
444             'WHERE col=const' and the aggregated one.
445             We may not need all columns of read_set, neither all columns of
446             the index.
447           */
448           DBUG_ASSERT(table->read_set == &table->def_read_set);
449           DBUG_ASSERT(bitmap_is_clear_all(&table->tmp_set));
450           table->read_set= &table->tmp_set;
451           table->mark_columns_used_by_index_no_reset(ref.key, table->read_set,
452                                                      ref.key_parts);
453           // The aggregated column may or not be included in ref.key_parts.
454           bitmap_set_bit(table->read_set, item_field->field->field_index);
455           error= is_max ?
456                  get_index_max_value(table, &ref, range_fl) :
457                  get_index_min_value(table, &ref, item_field, range_fl,
458                                      prefix_len);
459 
460           /*
461             Set TABLE::status to STATUS_GARBAGE since original and
462             real read_set are different, i.e. some field values
463             from original read set could be unread.
464           */
465           if (!bitmap_is_subset(&table->def_read_set, &table->tmp_set))
466             table->status|= STATUS_GARBAGE;
467 
468           table->read_set= &table->def_read_set;
469           bitmap_clear_all(&table->tmp_set);
470           /* Verify that the read tuple indeed matches the search key */
471 	  if (!error && reckey_in_range(is_max, &ref, item_field,
472 			                conds, range_fl, prefix_len))
473 	    error= HA_ERR_KEY_NOT_FOUND;
474           table->set_keyread(FALSE);
475           table->file->ha_index_end();
476           if (error)
477 	  {
478 	    if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
479 	      DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); // No rows matching WHERE
480 	    /* HA_ERR_LOCK_DEADLOCK or some other error */
481  	    table->file->print_error(error, MYF(0));
482             DBUG_RETURN(error);
483 	  }
484           removed_tables|= item_field->table_ref->map();
485         }
486         else if (!expr->const_item() || conds || !is_exact_count)
487         {
488           /*
489             We get here if the aggregate function is not based on a field.
490             Example: "SELECT MAX(1) FROM table ..."
491 
492             This constant optimization is not applicable if
493             1. the expression is not constant, or
494             2. it is unknown if the query returns any rows. MIN/MAX must return
495                NULL if the query doesn't return any rows. We can't determine
496                this if:
497                - the query has a condition, because, in contrast to the
498                  "MAX(field)" case above, the condition will not be evaluated
499                  against an index for this case, or
500                - the storage engine does not provide exact count, which means
501                  that it doesn't know whether there are any rows.
502           */
503           const_result= 0;
504           break;
505         }
506         item_sum->set_aggregator(item_sum->has_with_distinct() ?
507                                  Aggregator::DISTINCT_AGGREGATOR :
508                                  Aggregator::SIMPLE_AGGREGATOR);
509         /*
510           If count == 0 (so is_exact_count == TRUE) and
511           there're no outer joins, set to NULL,
512           otherwise set to the constant value.
513         */
514         if (!count && !outer_tables)
515         {
516           item_sum->aggregator_clear();
517           // Mark the aggregated value as based on no rows
518           item->no_rows_in_result();
519         }
520         else
521           item_sum->reset_and_add();
522         item_sum->make_const();
523         recalc_const_item= 1;
524         break;
525       }
526       default:
527         const_result= 0;
528         break;
529       }
530     }
531     else if (const_result)
532     {
533       if (recalc_const_item)
534         item->update_used_tables();
535       if (!item->const_item())
536         const_result= 0;
537     }
538   }
539 
540   if (thd->is_error())
541     DBUG_RETURN(thd->get_stmt_da()->mysql_errno());
542 
543   /*
544     If we have a where clause, we can only ignore searching in the
545     tables if MIN/MAX optimisation replaced all used tables
546     We do not use replaced values in case of:
547     SELECT MIN(key) FROM table_1, empty_table
548     removed_tables is != 0 if we have used MIN() or MAX().
549   */
550   if (removed_tables && used_tables != removed_tables)
551     const_result= 0;                            // We didn't remove all tables
552   DBUG_RETURN(const_result);
553 }
554 
555 
556 /**
557   Test if the predicate compares a field with constants.
558 
559   @param func_item        Predicate item
560   @param[out] args        Here we store the field followed by constants
561   @param[out] inv_order   Is set to 1 if the predicate is of the form
562                           'const op field'
563 
564   @retval
565     0        func_item is a simple predicate: a field is compared with
566     constants
567   @retval
568     1        Otherwise
569 */
570 
simple_pred(Item_func * func_item,Item ** args,bool * inv_order)571 bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
572 {
573   Item *item;
574   *inv_order= 0;
575   switch (func_item->argument_count()) {
576   case 0:
577     /* MULT_EQUAL_FUNC */
578     {
579       Item_equal *item_equal= (Item_equal *) func_item;
580       Item_equal_iterator it(*item_equal);
581       args[0]= it++;
582       if (it++)
583         return 0;
584       if (!(args[1]= item_equal->get_const()))
585         return 0;
586     }
587     break;
588   case 1:
589     /* field IS NULL */
590     item= func_item->arguments()[0];
591     if (item->type() != Item::FIELD_ITEM)
592       return 0;
593     args[0]= item;
594     break;
595   case 2:
596     /* 'field op const' or 'const op field' */
597     item= func_item->arguments()[0];
598     if (item->type() == Item::FIELD_ITEM)
599     {
600       args[0]= item;
601       item= func_item->arguments()[1];
602       if (!item->const_item())
603         return 0;
604       args[1]= item;
605     }
606     else if (item->const_item())
607     {
608       args[1]= item;
609       item= func_item->arguments()[1];
610       if (item->type() != Item::FIELD_ITEM)
611         return 0;
612       args[0]= item;
613       *inv_order= 1;
614     }
615     else
616       return 0;
617     break;
618   case 3:
619     /* field BETWEEN const AND const */
620     item= func_item->arguments()[0];
621     if (item->type() == Item::FIELD_ITEM)
622     {
623       args[0]= item;
624       for (int i= 1 ; i <= 2; i++)
625       {
626         item= func_item->arguments()[i];
627         if (!item->const_item())
628           return 0;
629         args[i]= item;
630       }
631     }
632     else
633       return 0;
634   }
635   return 1;
636 }
637 
638 
639 /**
640   Check whether a condition matches a key to get {MAX|MIN}(field):.
641 
642    For the index specified by the keyinfo parameter and an index that
643    contains the field as its component (field_part), the function
644    checks whether
645 
646    - the condition cond is a conjunction,
647    - all of its conjuncts refer to columns of the same table, and
648    - each conjunct is on one of the following forms:
649      - f_i = const_i or const_i = f_i or f_i IS NULL,
650        where f_i is part of the index
651      - field {<|<=|>=|>|=} const
652      - const {<|<=|>=|>|=} field
653      - field BETWEEN const_1 AND const_2
654 
655    As a side-effect, the key value to be used for looking up the MIN/MAX value
656    is actually stored inside the Field object. An interesting feature is that
657    the function will find the most restrictive endpoint by over-eager
658    evaluation of the @c WHERE condition. It continually stores the current
659    endpoint inside the Field object. For a query such as
660 
661    @code
662    SELECT MIN(a) FROM t1 WHERE a > 3 AND a > 5;
663    @endcode
664 
665    the algorithm will recurse over the conjuction, storing first a 3 in the
666    field. In the next recursive invocation the expression a > 5 is evaluated
667    as 3 > 5 (Due to the dual nature of Field objects as value carriers and
668    field identifiers), which will obviously fail, leading to 5 being stored in
669    the Field object.
670 
671    @param[in]     max_fl         Set to true if we are optimizing MAX(),
672                                  false means we are optimizing %MIN()
673    @param[in, out] ref           Reference to the structure where the function
674                                  stores the key value
675    @param[in]     keyinfo        Reference to the key info
676    @param[in]     field_part     Pointer to the key part for the field
677    @param[in]     cond           WHERE condition
678    @param[in]     map            Table map for the key
679    @param[in,out] key_part_used  Map of matchings parts. The function will output
680                                  the set of key parts actually being matched in
681                                  this set, yet it relies on the caller to
682                                  initialize the value to zero. This is due
683                                  to the fact that this value is passed
684                                  recursively.
685    @param[in,out] range_fl       Says whether endpoints use strict greater/less
686                                  than.
687    @param[out]    prefix_len     Length of common key part for the range
688                                  where MAX/MIN is searched for
689 
690   @retval
691     false    Index can't be used.
692   @retval
693     true     We can use the index to get MIN/MAX value
694 */
695 
matching_cond(bool max_fl,TABLE_REF * ref,KEY * keyinfo,KEY_PART_INFO * field_part,Item * cond,table_map map,key_part_map * key_part_used,uint * range_fl,uint * prefix_len)696 static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
697                           KEY_PART_INFO *field_part, Item *cond,
698                           table_map map, key_part_map *key_part_used,
699                           uint *range_fl, uint *prefix_len)
700 {
701   DBUG_ENTER("matching_cond");
702   if (!cond)
703     DBUG_RETURN(TRUE);
704 
705   if (!(cond->used_tables() & map))
706   {
707     /* Condition doesn't restrict the used table */
708     DBUG_RETURN(TRUE);
709   }
710   if (cond->type() == Item::COND_ITEM)
711   {
712     if (((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC)
713       DBUG_RETURN(FALSE);
714 
715     /* AND */
716     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
717     Item *item;
718     while ((item= li++))
719     {
720       if (!matching_cond(max_fl, ref, keyinfo, field_part, item, map,
721                          key_part_used, range_fl, prefix_len))
722         DBUG_RETURN(FALSE);
723     }
724     DBUG_RETURN(TRUE);
725   }
726 
727   if (cond->type() != Item::FUNC_ITEM)
728     DBUG_RETURN(FALSE);                                 // Not operator, can't optimize
729 
730   bool eq_type= 0;                            // =, <=> or IS NULL
731   bool is_null_safe_eq= FALSE;                // The operator is NULL safe, e.g. <=>
732   bool noeq_type= 0;                          // < or >
733   bool less_fl= 0;                            // < or <=
734   bool is_null= 0;                            // IS NULL
735   bool between= 0;                            // BETWEEN ... AND ...
736 
737   switch (((Item_func*) cond)->functype()) {
738   case Item_func::ISNULL_FUNC:
739     is_null= 1;     /* fall through */
740   case Item_func::EQ_FUNC:
741     eq_type= TRUE;
742     break;
743   case Item_func::EQUAL_FUNC:
744     eq_type= is_null_safe_eq= TRUE;
745     break;
746   case Item_func::LT_FUNC:
747     noeq_type= 1;   /* fall through */
748   case Item_func::LE_FUNC:
749     less_fl= 1;
750     break;
751   case Item_func::GT_FUNC:
752     noeq_type= 1;   /* fall through */
753   case Item_func::GE_FUNC:
754     break;
755   case Item_func::BETWEEN:
756     between= 1;
757 
758     // NOT BETWEEN is equivalent to OR and is therefore not a conjunction
759     if (((Item_func_between*)cond)->negated)
760       DBUG_RETURN(false);
761 
762     break;
763   case Item_func::MULT_EQUAL_FUNC:
764     eq_type= 1;
765     break;
766   default:
767     DBUG_RETURN(FALSE);                                        // Can't optimize function
768   }
769 
770   Item *args[3];
771   bool inv;
772 
773   /* Test if this is a comparison of a field and constant */
774   if (!simple_pred((Item_func*) cond, args, &inv))
775     DBUG_RETURN(FALSE);
776 
777   if (!is_null_safe_eq && !is_null &&
778       (args[1]->is_null() || (between && args[2]->is_null())))
779     DBUG_RETURN(FALSE);
780 
781   if (inv && !eq_type)
782     less_fl= 1-less_fl;                         // Convert '<' -> '>' (etc)
783 
784   /* Check if field is part of the tested partial key */
785   uchar *key_ptr= ref->key_buff;
786   KEY_PART_INFO *part;
787   for (part= keyinfo->key_part; ; key_ptr+= part++->store_length)
788 
789   {
790     if (part > field_part)
791       DBUG_RETURN(FALSE);                     // Field is beyond the tested parts
792     if (part->field->eq(((Item_field*) args[0])->field))
793       break;                        // Found a part of the key for the field
794   }
795 
796   bool is_field_part= part == field_part;
797   if (!(is_field_part || eq_type))
798     DBUG_RETURN(FALSE);
799 
800   key_part_map org_key_part_used= *key_part_used;
801   if (eq_type || between || max_fl == less_fl)
802   {
803     size_t length= (key_ptr-ref->key_buff)+part->store_length;
804     if (ref->key_length < length)
805     {
806     /* Ultimately ref->key_length will contain the length of the search key */
807       ref->key_length= length;
808       ref->key_parts= (part - keyinfo->key_part) + 1;
809     }
810     if (!*prefix_len && part+1 == field_part)
811       *prefix_len= length;
812     if (is_field_part && eq_type)
813       *prefix_len= ref->key_length;
814 
815     *key_part_used|= (key_part_map) 1 << (part - keyinfo->key_part);
816   }
817 
818   if (org_key_part_used == *key_part_used &&
819     /*
820       The current search key is not being extended with a new key part.  This
821       means that the a condition is added a key part for which there was a
822       previous condition. We can only overwrite such key parts in some special
823       cases, e.g. a > 2 AND a > 1 (here range_fl must be set to something). In
824       all other cases the WHERE condition is always false anyway.
825     */
826       (eq_type || *range_fl == 0))
827       DBUG_RETURN(FALSE);
828 
829   if (org_key_part_used != *key_part_used ||
830       (is_field_part &&
831        (between || eq_type || max_fl == less_fl) && !cond->val_int()))
832   {
833     /*
834       It's the first predicate for this part or a predicate of the
835       following form  that moves upper/lower bounds for max/min values:
836       - field BETWEEN const AND const
837       - field = const
838       - field {<|<=} const, when searching for MAX
839       - field {>|>=} const, when searching for MIN
840     */
841 
842     if (is_null || (is_null_safe_eq && args[1]->is_null()))
843     {
844       /*
845         If we have a non-nullable index, we cannot use it,
846         since set_null will be ignored, and we will compare uninitialized data.
847       */
848       if (!part->field->real_maybe_null())
849         DBUG_RETURN(false);
850       part->field->set_null();
851       *key_ptr= (uchar) 1;
852     }
853     else
854     {
855       /* Update endpoints for MAX/MIN, see function comment. */
856       Item *value= args[between && max_fl ? 2 : 1];
857 
858       /*
859         A perfect save is neccessary. Truncated / incorrect value can result
860         in an incorrect index lookup. Truncation of trailing space is ignored
861         since that is expected for strings even in other cases.
862       */
863       type_conversion_status retval=
864         value->save_in_field_no_warnings(part->field, true);
865       if (!(retval == TYPE_OK || retval == TYPE_NOTE_TRUNCATED))
866         DBUG_RETURN(false);
867 
868       if (part->null_bit)
869         *key_ptr++= (uchar) MY_TEST(part->field->is_null());
870       part->field->get_key_image(key_ptr, part->length, Field::itRAW);
871     }
872     if (is_field_part)
873     {
874       if (between || eq_type)
875         *range_fl&= ~(NO_MAX_RANGE | NO_MIN_RANGE);
876       else
877       {
878         *range_fl&= ~(max_fl ? NO_MAX_RANGE : NO_MIN_RANGE);
879         if (noeq_type)
880           *range_fl|=  (max_fl ? NEAR_MAX : NEAR_MIN);
881         else
882           *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN);
883       }
884     }
885   }
886   else if (eq_type)
887   {
888     if ((!is_null && !cond->val_int()) ||
889         (is_null && !MY_TEST(part->field->is_null())))
890      DBUG_RETURN(FALSE);                       // Impossible test
891   }
892   else if (is_field_part)
893     *range_fl&= ~(max_fl ? NO_MIN_RANGE : NO_MAX_RANGE);
894   DBUG_RETURN(TRUE);
895 }
896 
897 
898 /**
899   Check whether we can get value for {max|min}(field) by using a key.
900 
901      If where-condition is not a conjunction of 0 or more conjuct the
902      function returns false, otherwise it checks whether there is an
903      index including field as its k-th component/part such that:
904 
905      -# for each previous component f_i there is one and only one conjunct
906         of the form: f_i= const_i or const_i= f_i or f_i is null
907      -# references to field occur only in conjucts of the form:
908         field {<|<=|>=|>|=} const or const {<|<=|>=|>|=} field or
909         field BETWEEN const1 AND const2
910      -# all references to the columns from the same table as column field
911         occur only in conjucts mentioned above.
912      -# each of k first components the index is not partial, i.e. is not
913         defined on a fixed length proper prefix of the field.
914 
915      If such an index exists the function through the ref parameter
916      returns the key value to find max/min for the field using the index,
917      the length of first (k-1) components of the key and flags saying
918      how to apply the key for the search max/min value.
919      (if we have a condition field = const, prefix_len contains the length
920      of the whole search key)
921 
922   @param[in]     max_fl      0 for MIN(field) / 1 for MAX(field)
923   @param[in,out] ref         Reference to the structure we store the key value
924   @param[in]     item_field  Field used inside MIN() / MAX()
925   @param[in]     cond        WHERE condition
926   @param[out]    range_fl    Bit flags for how to search if key is ok
927   @param[out]    prefix_len  Length of prefix for the search range
928 
929   @note
930     This function may set field->table->key_read to true,
931     which must be reset after index is used!
932     (This can only happen when function returns 1)
933 
934   @retval
935     0   Index can not be used to optimize MIN(field)/MAX(field)
936   @retval
937     1   Can use key to optimize MIN()/MAX().
938     In this case ref, range_fl and prefix_len are updated
939 */
940 
941 
find_key_for_maxmin(bool max_fl,TABLE_REF * ref,Item_field * item_field,Item * cond,uint * range_fl,uint * prefix_len)942 static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
943                                 Item_field *item_field, Item *cond,
944                                 uint *range_fl, uint *prefix_len)
945 {
946   Field *const field= item_field->field;
947 
948   if (!(field->flags & PART_KEY_FLAG))
949     return false;                               // Not key field
950 
951   DBUG_ENTER("find_key_for_maxmin");
952 
953   TABLE *const table= field->table;
954   uint idx= 0;
955 
956   KEY *keyinfo,*keyinfo_end;
957   for (keyinfo= table->key_info, keyinfo_end= keyinfo+table->s->keys ;
958        keyinfo != keyinfo_end;
959        keyinfo++,idx++)
960   {
961     KEY_PART_INFO *part,*part_end;
962     key_part_map key_part_to_use= 0;
963     /*
964       Perform a check if index is not disabled by ALTER TABLE
965       or IGNORE INDEX.
966     */
967     if (!table->keys_in_use_for_query.is_set(idx))
968       continue;
969     uint jdx= 0;
970     *prefix_len= 0;
971     for (part= keyinfo->key_part, part_end= part + actual_key_parts(keyinfo) ;
972          part != part_end ;
973          part++, jdx++, key_part_to_use= (key_part_to_use << 1) | 1)
974     {
975       if (!(table->file->index_flags(idx, jdx, 0) & HA_READ_ORDER))
976         DBUG_RETURN(false);
977 
978       /* Check whether the index component is partial */
979       Field *part_field= table->field[part->fieldnr-1];
980       if ((part_field->flags & BLOB_FLAG) ||
981           part->length < part_field->key_length())
982         break;
983 
984       if (field->eq(part->field))
985       {
986         ref->key= idx;
987         ref->key_length= 0;
988         ref->key_parts= 0;
989         key_part_map key_part_used= 0;
990         *range_fl= NO_MIN_RANGE | NO_MAX_RANGE;
991         if (matching_cond(max_fl, ref, keyinfo, part, cond,
992                           item_field->table_ref->map(),
993                           &key_part_used, range_fl, prefix_len) &&
994             !(key_part_to_use & ~key_part_used))
995         {
996           if (!max_fl && key_part_used == key_part_to_use && part->null_bit)
997           {
998             /*
999               The query is on this form:
1000 
1001               SELECT MIN(key_part_k)
1002               FROM t1
1003               WHERE key_part_1 = const and ... and key_part_k-1 = const
1004 
1005               If key_part_k is nullable, we want to find the first matching row
1006               where key_part_k is not null. The key buffer is now {const, ...,
1007               NULL}. This will be passed to the handler along with a flag
1008               indicating open interval. If a tuple is read that does not match
1009               these search criteria, an attempt will be made to read an exact
1010               match for the key buffer.
1011             */
1012             /* Set the first byte of key_part_k to 1, that means NULL */
1013             ref->key_buff[ref->key_length]= 1;
1014             ref->key_length+= part->store_length;
1015             ref->key_parts++;
1016             DBUG_ASSERT(ref->key_parts == jdx+1);
1017             *range_fl&= ~NO_MIN_RANGE;
1018             *range_fl|= NEAR_MIN; // Open interval
1019           }
1020           /*
1021             The following test is false when the key in the key tree is
1022             converted (for example to upper case)
1023           */
1024           if (field->part_of_key.is_set(idx))
1025             table->set_keyread(TRUE);
1026           DBUG_RETURN(true);
1027         }
1028       }
1029     }
1030   }
1031   DBUG_RETURN(false);
1032 }
1033 
1034 
1035 /**
1036   Check whether found key is in range specified by conditions.
1037 
1038   @param[in] max_fl         0 for MIN(field) / 1 for MAX(field)
1039   @param[in] ref            Reference to the key value and info
1040   @param[in] item_field     Item representing field used in MIN/MAX expression
1041   @param[in] cond           WHERE condition
1042   @param[in] range_fl       Says whether there is a condition to to be checked
1043   @param[in] prefix_len     Length of the constant part of the key
1044 
1045   @retval
1046     0        ok
1047   @retval
1048     1        WHERE was not true for the found row
1049 */
1050 
reckey_in_range(bool max_fl,TABLE_REF * ref,Item_field * item_field,Item * cond,uint range_fl,uint prefix_len)1051 static int reckey_in_range(bool max_fl, TABLE_REF *ref, Item_field *item_field,
1052                             Item *cond, uint range_fl, uint prefix_len)
1053 {
1054   if (key_cmp_if_same(item_field->field->table, ref->key_buff, ref->key,
1055                       prefix_len))
1056     return 1;
1057   if (!cond || (range_fl & (max_fl ? NO_MIN_RANGE : NO_MAX_RANGE)))
1058     return 0;
1059   return maxmin_in_range(max_fl, item_field, cond);
1060 }
1061 
1062 
1063 /**
1064   Check whether {MAX|MIN}(field) is in range specified by conditions.
1065 
1066   @param[in] max_fl          0 for MIN(field) / 1 for MAX(field)
1067   @param[in] item_field      Item representing field used in MIN/MAX expression
1068   @param[in] cond            WHERE condition
1069 
1070   @retval
1071     0        ok
1072   @retval
1073     1        WHERE was not true for the found row
1074 */
1075 
maxmin_in_range(bool max_fl,Item_field * item_field,Item * cond)1076 static int maxmin_in_range(bool max_fl, Item_field *item_field, Item *cond)
1077 {
1078   /* If AND/OR condition */
1079   if (cond->type() == Item::COND_ITEM)
1080   {
1081     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
1082     Item *item;
1083     while ((item= li++))
1084     {
1085       if (maxmin_in_range(max_fl, item_field, item))
1086         return 1;
1087     }
1088     return 0;
1089   }
1090 
1091   if (cond->used_tables() != item_field->table_ref->map())
1092     return 0;
1093   bool less_fl= false;
1094   switch (((Item_func*) cond)->functype()) {
1095   case Item_func::BETWEEN:
1096     return cond->val_int() == 0;                // Return 1 if WHERE is false
1097   case Item_func::LT_FUNC:
1098   case Item_func::LE_FUNC:
1099     less_fl= 1;
1100     // Fall through.
1101   case Item_func::GT_FUNC:
1102   case Item_func::GE_FUNC:
1103   {
1104     Item *item= ((Item_func*) cond)->arguments()[1];
1105     /* In case of 'const op item' we have to swap the operator */
1106     if (!item->const_item())
1107       less_fl= 1-less_fl;
1108     /*
1109       We only have to check the expression if we are using an expression like
1110       SELECT MAX(b) FROM t1 WHERE a=const AND b>const
1111       not for
1112       SELECT MAX(b) FROM t1 WHERE a=const AND b<const
1113     */
1114     if (max_fl != less_fl)
1115       return cond->val_int() == 0;                // Return 1 if WHERE is false
1116     return 0;
1117   }
1118   case Item_func::EQ_FUNC:
1119   case Item_func::EQUAL_FUNC:
1120     break;
1121   default:                                        // Keep compiler happy
1122     DBUG_ASSERT(1);                               // Impossible
1123     break;
1124   }
1125   return 0;
1126 }
1127 
1128