1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   mysql_select and join optimization
28 
29 
30   @defgroup Query_Optimizer  Query Optimizer
31   @{
32 */
33 
34 #include "sql_priv.h"
35 #include "sql_select.h"
36 #include "sql_table.h"                          // primary_key_name
37 #include "sql_derived.h"
38 #include "probes_mysql.h"
39 #include "opt_trace.h"
40 #include "key.h"                 // key_copy, key_cmp, key_cmp_if_same
41 #include "lock.h"                // mysql_unlock_some_tables,
42                                  // mysql_unlock_read_tables
43 #include "sql_show.h"            // append_identifier
44 #include "sql_base.h"            // setup_wild, setup_fields, fill_record
45 #include "sql_acl.h"             // *_ACL
46 #include "sql_test.h"            // misc. debug printing utilities
47 #include "records.h"             // init_read_record, end_read_record
48 #include "filesort.h"            // filesort_free_buffers
49 #include "sql_union.h"           // mysql_union
50 #include "opt_explain.h"
51 #include "sql_join_buffer.h"     // JOIN_CACHE
52 #include "sql_optimizer.h"       // JOIN
53 #include "sql_tmp_table.h"       // tmp tables
54 
55 #include <algorithm>
56 using std::max;
57 using std::min;
58 
59 static store_key *get_store_key(THD *thd,
60 				Key_use *keyuse, table_map used_tables,
61 				KEY_PART_INFO *key_part, uchar *key_buff,
62 				uint maybe_null);
63 bool const_expression_in_where(Item *conds,Item *item, Item **comp_item);
64 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
65 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
66                                      ORDER *order, TABLE *table,
67                                      key_map usable_keys, int key,
68                                      ha_rows select_limit,
69                                      int *new_key, int *new_key_direction,
70                                      ha_rows *new_select_limit,
71                                      uint *new_used_key_parts= NULL,
72                                      uint *saved_best_key_parts= NULL);
73 static uint join_buffer_alg(const THD *thd);
74 static void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
75                             Opt_trace_object *trace_obj);
76 
77 /**
78   This handles SELECT with and without UNION
79 */
80 
handle_select(THD * thd,select_result * result,ulong setup_tables_done_option)81 bool handle_select(THD *thd, select_result *result,
82                    ulong setup_tables_done_option)
83 {
84   bool res;
85   LEX *lex= thd->lex;
86   SELECT_LEX *select_lex = &lex->select_lex;
87   DBUG_ENTER("handle_select");
88   MYSQL_SELECT_START(thd->query());
89 
90   if (lex->proc_analyse && lex->sql_command != SQLCOM_SELECT)
91   {
92     my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
93     DBUG_RETURN(true);
94   }
95 
96   if (select_lex->master_unit()->is_union() ||
97       select_lex->master_unit()->fake_select_lex)
98     res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option);
99   else
100   {
101     SELECT_LEX_UNIT *unit= &lex->unit;
102     unit->set_limit(unit->global_parameters);
103     /*
104       'options' of mysql_select will be set in JOIN, as far as JOIN for
105       every PS/SP execution new, we will not need reset this flag if
106       setup_tables_done_option changed for next rexecution
107     */
108     res= mysql_select(thd,
109 		      select_lex->table_list.first,
110 		      select_lex->with_wild, select_lex->item_list,
111 		      select_lex->where,
112 		      &select_lex->order_list,
113 		      &select_lex->group_list,
114 		      select_lex->having,
115 		      select_lex->options | thd->variables.option_bits |
116                       setup_tables_done_option,
117 		      result, unit, select_lex);
118   }
119   DBUG_PRINT("info",("res: %d  report_error: %d", res,
120 		     thd->is_error()));
121   res|= thd->is_error();
122   if (unlikely(res))
123     result->abort_result_set();
124 
125   MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
126   DBUG_RETURN(res);
127 }
128 
129 
130 /*****************************************************************************
131   Check fields, find best join, do the select and output fields.
132   All tables must be opened.
133 *****************************************************************************/
134 
135 /**
136   @brief Check if two items are compatible wrt. materialization.
137 
138   @param outer Expression from outer query
139   @param inner Expression from inner query
140 
141   @retval TRUE   If subquery types allow materialization.
142   @retval FALSE  Otherwise.
143 */
144 
types_allow_materialization(Item * outer,Item * inner)145 bool types_allow_materialization(Item *outer, Item *inner)
146 
147 {
148   if (outer->result_type() != inner->result_type())
149     return FALSE;
150   switch (outer->result_type()) {
151   case STRING_RESULT:
152     if (outer->is_temporal_with_date() != inner->is_temporal_with_date())
153       return FALSE;
154     if (!(outer->collation.collation == inner->collation.collation
155         /*&& outer->max_length <= inner->max_length */))
156       return FALSE;
157   /*case INT_RESULT:
158     if (!(outer->unsigned_flag ^ inner->unsigned_flag))
159       return FALSE; */
160   default:
161     ;                 /* suitable for materialization */
162   }
163   return TRUE;
164 }
165 
166 
167 /*
168   Check if the table's rowid is included in the temptable
169 
170   SYNOPSIS
171     sj_table_is_included()
172       join      The join
173       join_tab  The table to be checked
174 
175   DESCRIPTION
176     SemiJoinDuplicateElimination: check the table's rowid should be included
177     in the temptable. This is so if
178 
179     1. The table is not embedded within some semi-join nest
180     2. The has been pulled out of a semi-join nest, or
181 
182     3. The table is functionally dependent on some previous table
183 
184     [4. This is also true for constant tables that can't be
185         NULL-complemented but this function is not called for such tables]
186 
187   RETURN
188     TRUE  - Include table's rowid
189     FALSE - Don't
190 */
191 
sj_table_is_included(JOIN * join,JOIN_TAB * join_tab)192 static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
193 {
194   if (join_tab->emb_sj_nest)
195     return FALSE;
196 
197   /* Check if this table is functionally dependent on the tables that
198      are within the same outer join nest
199   */
200   TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
201   if (join_tab->type == JT_EQ_REF)
202   {
203     table_map depends_on= 0;
204     uint idx;
205 
206     for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
207       depends_on |= join_tab->ref.items[kp]->used_tables();
208 
209     Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
210     while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
211     {
212       JOIN_TAB *ref_tab= join->map2table[idx];
213       if (embedding != ref_tab->table->pos_in_table_list->embedding)
214         return TRUE;
215     }
216     /* Ok, functionally dependent */
217     return FALSE;
218   }
219   /* Not functionally dependent => need to include*/
220   return TRUE;
221 }
222 
223 /**
224    Check if the optimizer might choose to use join buffering for this
225    join. If that is the case, and if duplicate weedout semijoin
226    strategy is used, the duplicate generating range must be extended
227    to the first non-const table.
228 
229    This function is called from setup_semijoin_dups_elimination()
230    before the final decision is made on whether or not buffering is
231    used. It is therefore only a rough test that covers all cases where
232    join buffering might be used, but potentially also some cases where
233    join buffering will not be used.
234 
235    @param join_buffer_alg      Bitmap with possible join buffer algorithms
236    @param sj_tab               Table that might be joined by BNL/BKA
237 
238    @return
239       true if join buffering might be used, false otherwise
240 
241  */
might_do_join_buffering(uint join_buffer_alg,const JOIN_TAB * sj_tab)242 static bool might_do_join_buffering(uint join_buffer_alg,
243                                     const JOIN_TAB *sj_tab)
244 {
245   /*
246      (1) sj_tab is not a const table
247   */
248   int sj_tabno= sj_tab - sj_tab->join->join_tab;
249   return (sj_tabno >= (int)sj_tab->join->const_tables && // (1)
250           sj_tab->use_quick != QS_DYNAMIC_RANGE &&
251           (((join_buffer_alg & JOIN_CACHE::ALG_BNL) &&
252             sj_tab->type == JT_ALL) ||
253            ((join_buffer_alg &
254              (JOIN_CACHE::ALG_BKA | JOIN_CACHE::ALG_BKA_UNIQUE)) &&
255             (sj_tab->type == JT_REF ||
256              sj_tab->type == JT_EQ_REF ||
257              sj_tab->type == JT_CONST))));
258 }
259 
260 /**
261   Setup the strategies to eliminate semi-join duplicates.
262 
263   @param join           Join to process
264   @param options        Join options (needed to see if join buffering will be
265                         used or not)
266   @param no_jbuf_after  Do not use join buffering after the table with this
267                         number
268 
269   @retval FALSE  OK
270   @retval TRUE   Out of memory error
271 
272   @details
273     Setup the strategies to eliminate semi-join duplicates.
274     At the moment there are 5 strategies:
275 
276     1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
277                          of row combinations)
278     2. FirstMatch (pick only the 1st matching row combination of inner tables)
279     3. LooseScan (scanning the sj-inner table in a way that groups duplicates
280                   together and picking the 1st one)
281     4. MaterializeLookup (Materialize inner tables, then setup a scan over
282                           outer correlated tables, lookup in materialized table)
283     5. MaterializeScan (Materialize inner tables, then setup a scan over
284                         materialized tables, perform lookup in outer tables)
285 
286     The join order has "duplicate-generating ranges", and every range is
287     served by one strategy or a combination of FirstMatch with with some
288     other strategy.
289 
290     "Duplicate-generating range" is defined as a range within the join order
291     that contains all of the inner tables of a semi-join. All ranges must be
292     disjoint, if tables of several semi-joins are interleaved, then the ranges
293     are joined together, which is equivalent to converting
294       SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
295     to
296       SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
297     .
298 
299     Applicability conditions are as follows:
300 
301     DuplicateWeedout strategy
302     ~~~~~~~~~~~~~~~~~~~~~~~~~
303 
304       (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
305       +------+  +=========================+  +---+
306         (1)                 (2)               (3)
307 
308        (1) - Prefix of OuterTables (those that participate in
309              IN-equality and/or are correlated with subquery) and outer
310              Non-correlated tables.
311        (2) - The handled range. The range starts with the first sj-inner
312              table, and covers all sj-inner and outer tables
313              Within the range,  Inner, Outer, outer non-correlated tables
314              may follow in any order.
315        (3) - The suffix of outer non-correlated tables.
316 
317     FirstMatch strategy
318     ~~~~~~~~~~~~~~~~~~~
319 
320       (ot|nt)*  [ it ((it|nt)* it) ]  (nt)*
321       +------+  +==================+  +---+
322         (1)             (2)          (3)
323 
324       (1) - Prefix of outer correlated and non-correlated tables
325       (2) - The handled range, which may contain only inner and
326             non-correlated tables.
327       (3) - The suffix of outer non-correlated tables.
328 
329     LooseScan strategy
330     ~~~~~~~~~~~~~~~~~~
331 
332      (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
333      +--------+   +===========+ +=============+   +------+
334         (1)           (2)          (3)              (4)
335 
336       (1) - Prefix that may contain any outer tables. The prefix must contain
337             all the non-trivially correlated outer tables. (non-trivially means
338             that the correlation is not just through the IN-equality).
339 
340       (2) - Inner table for which the LooseScan scan is performed.
341             Notice that special requirements for existence of certain indexes
342             apply to this table, @see class Loose_scan_opt.
343 
344       (3) - The remainder of the duplicate-generating range. It is served by
345             application of FirstMatch strategy. Outer IN-correlated tables
346             must be correlated to the LooseScan table but not to the inner
347             tables in this range. (Currently, there can be no outer tables
348             in this range because of implementation restrictions,
349             @see Optimize_table_order::advance_sj_state()).
350 
351       (4) - The suffix of outer correlated and non-correlated tables.
352 
353     MaterializeLookup strategy
354     ~~~~~~~~~~~~~~~~~~~~~~~~~~
355 
356      (ot|nt)*  [ it (it)* ]  (nt)*
357      +------+  +==========+  +---+
358         (1)         (2)        (3)
359 
360       (1) - Prefix of outer correlated and non-correlated tables.
361 
362       (2) - The handled range, which may contain only inner tables.
363             The inner tables are materialized in a temporary table that is
364             later used as a lookup structure for the outer correlated tables.
365 
366       (3) - The suffix of outer non-correlated tables.
367 
368     MaterializeScan strategy
369     ~~~~~~~~~~~~~~~~~~~~~~~~~~
370 
371      (ot|nt)*  [ it (it)* ]  (ot|nt)*
372      +------+  +==========+  +-----+
373         (1)         (2)         (3)
374 
375       (1) - Prefix of outer correlated and non-correlated tables.
376 
377       (2) - The handled range, which may contain only inner tables.
378             The inner tables are materialized in a temporary table which is
379             later used to setup a scan.
380 
381       (3) - The suffix of outer correlated and non-correlated tables.
382 
383   Note that MaterializeLookup and MaterializeScan has overlap in their patterns.
384   It may be possible to consolidate the materialization strategies into one.
385 
386   The choice between the strategies is made by the join optimizer (see
387   advance_sj_state() and fix_semijoin_strategies()).
388   This function sets up all fields/structures/etc needed for execution except
389   for setup/initialization of semi-join materialization which is done in
390   setup_materialized_table().
391 */
392 
setup_semijoin_dups_elimination(JOIN * join,ulonglong options,uint no_jbuf_after)393 static bool setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
394                                             uint no_jbuf_after)
395 {
396   uint tableno;
397   THD *thd= join->thd;
398   DBUG_ENTER("setup_semijoin_dups_elimination");
399 
400   if (join->select_lex->sj_nests.is_empty())
401     DBUG_RETURN(FALSE);
402 
403   for (tableno= join->const_tables; tableno < join->primary_tables; )
404   {
405     JOIN_TAB *const tab= join->join_tab + tableno;
406     POSITION *const pos= tab->position;
407     uint keylen, keyno;
408     if (pos->sj_strategy == SJ_OPT_NONE)
409     {
410       tableno++;  // nothing to do
411       continue;
412     }
413     JOIN_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
414     switch (pos->sj_strategy) {
415       case SJ_OPT_MATERIALIZE_LOOKUP:
416       case SJ_OPT_MATERIALIZE_SCAN:
417         DBUG_ASSERT(false); // Should not occur among "primary" tables
418         // Do nothing
419         tableno+= pos->n_sj_tables;
420         break;
421       case SJ_OPT_LOOSE_SCAN:
422       {
423         DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
424         /* We jump from the last table to the first one */
425         tab->match_tab= last_sj_tab;
426 
427         /* For LooseScan, duplicate elimination is based on rows being sorted
428            on key. We need to make sure that range select keeps the sorted index
429            order. (When using MRR it may not.)
430 
431            Note: need_sorted_output() implementations for range select classes
432            that do not support sorted output, will trigger an assert. This
433            should not happen since LooseScan strategy is only picked if sorted
434            output is supported.
435         */
436         if (tab->select && tab->select->quick)
437         {
438           if (tab->select->quick->index == pos->loosescan_key)
439             tab->select->quick->need_sorted_output();
440           else
441             tab->select->set_quick(NULL);
442         }
443         /* Calculate key length */
444         keylen= 0;
445         keyno= pos->loosescan_key;
446         for (uint kp=0; kp < pos->loosescan_parts; kp++)
447           keylen += tab->table->key_info[keyno].key_part[kp].store_length;
448 
449         tab->loosescan_key_len= keylen;
450         if (pos->n_sj_tables > 1)
451         {
452           last_sj_tab->firstmatch_return= tab;
453           last_sj_tab->match_tab= last_sj_tab;
454         }
455         tableno+= pos->n_sj_tables;
456         break;
457       }
458       case SJ_OPT_DUPS_WEEDOUT:
459       {
460         DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
461         /*
462           Consider a semijoin of one outer and one inner table, both
463           with two rows. The inner table is assumed to be confluent
464           (See sj_opt_materialize_lookup)
465 
466           If normal nested loop execution is used, we do not need to
467           include semi-join outer table rowids in the duplicate
468           weedout temp table since NL guarantees that outer table rows
469           are encountered only consecutively and because all rows in
470           the temp table are deleted for every new outer table
471           combination (example is with a confluent inner table):
472 
473             ot1.row1|it1.row1
474                  '-> temp table's have_confluent_row == FALSE
475                    |-> output ot1.row1
476                    '-> set have_confluent_row= TRUE
477             ot1.row1|it1.row2
478                  |-> temp table's have_confluent_row == TRUE
479                  | '-> do not output ot1.row1
480                  '-> no more join matches - set have_confluent_row= FALSE
481             ot1.row2|it1.row1
482                  '-> temp table's have_confluent_row == FALSE
483                    |-> output ot1.row2
484                    '-> set have_confluent_row= TRUE
485               ...
486 
487           Note: not having outer table rowids in the temp table and
488           then emptying the temp table when a new outer table row
489           combinition is encountered is an optimization. Including
490           outer table rowids in the temp table is not harmful but
491           wastes memory.
492 
493           Now consider the join buffering algorithms (BNL/BKA). These
494           algorithms join each inner row with outer rows in "reverse"
495           order compared to NL. Effectively, this means that outer
496           table rows may be encountered multiple times in a
497           non-consecutive manner:
498 
499             NL:                 BNL/BKA:
500             ot1.row1|it1.row1   ot1.row1|it1.row1
501             ot1.row1|it1.row2   ot1.row2|it1.row1
502             ot1.row2|it1.row1   ot1.row1|it1.row2
503             ot1.row2|it1.row2   ot1.row2|it1.row2
504 
505           It is clear from the above that there is no place we can
506           empty the temp table like we do in NL to avoid storing outer
507           table rowids.
508 
509           Below we check if join buffering might be used. If so, set
510           first_table to the first non-constant table so that outer
511           table rowids are included in the temp table. Do not destroy
512           other duplicate elimination methods.
513         */
514         uint first_table= tableno;
515         for (uint sj_tableno= tableno;
516              sj_tableno < tableno + pos->n_sj_tables;
517              sj_tableno++)
518         {
519           /*
520             The final decision on whether or not join buffering will
521             be used is taken in setup_join_buffering(), which is
522             called from make_join_readinfo()'s main loop.
523             setup_join_buffering() needs to know if duplicate weedout is used,
524             so moving setup_semijoin_dups_elimination() from before the main
525             loop to after it is not possible. I.e.,
526             join->join_tab[sj_tableno]->position->use_join_buffer is not
527             trustworthy at this point.
528           */
529           /**
530             @todo: merge make_join_readinfo() and
531             setup_semijoin_dups_elimination() loops and change the
532             following 'if' to
533 
534             "if (join->join_tab[sj_tableno]->position->use_join_buffer &&
535                  sj_tableno <= no_jbuf_after)".
536 
537             For now, use a rough criteria:
538           */
539 
540           if (sj_tableno <= no_jbuf_after &&
541               might_do_join_buffering(join_buffer_alg(thd),
542                                       join->join_tab + sj_tableno))
543 
544           {
545             /* Join buffering will probably be used */
546             first_table= join->const_tables;
547             break;
548           }
549         }
550 
551         JOIN_TAB *const first_sj_tab= join->join_tab + first_table;
552         if (last_sj_tab->first_inner != NULL &&
553             first_sj_tab->first_inner != last_sj_tab->first_inner)
554         {
555           /*
556             The first duplicate weedout table is an outer table of an outer join
557             and the last duplicate weedout table is one of the inner tables of
558             the outer join.
559             In this case, we must assure that all the inner tables of the
560             outer join are part of the duplicate weedout operation.
561             This is to assure that NULL-extension for inner tables of an
562             outer join is performed before duplicate elimination is performed,
563             otherwise we will have extra NULL-extended rows being output, which
564             should have been eliminated as duplicates.
565           */
566           JOIN_TAB *tab= last_sj_tab->first_inner;
567           /*
568             First, locate the table that is the first inner table of the
569             outer join operation that first_sj_tab is outer for.
570           */
571           while (tab->first_upper != NULL &&
572                  tab->first_upper != first_sj_tab->first_inner)
573             tab= tab->first_upper;
574           // Then, extend the range with all inner tables of the join nest:
575           if (tab->first_inner->last_inner > last_sj_tab)
576             last_sj_tab= tab->first_inner->last_inner;
577         }
578 
579         SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
580         SJ_TMP_TABLE::TAB *last_tab= sjtabs;
581         uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
582         uint jt_null_bits= 0;    // # null bits in tuple bytes
583         /*
584           Walk through the range and remember
585            - tables that need their rowids to be put into temptable
586            - the last outer table
587         */
588         for (JOIN_TAB *tab_in_range= join->join_tab + first_table;
589              tab_in_range <= last_sj_tab;
590              tab_in_range++)
591         {
592           if (sj_table_is_included(join, tab_in_range))
593           {
594             last_tab->join_tab= tab_in_range;
595             last_tab->rowid_offset= jt_rowid_offset;
596             jt_rowid_offset += tab_in_range->table->file->ref_length;
597             if (tab_in_range->table->maybe_null)
598             {
599               last_tab->null_byte= jt_null_bits / 8;
600               last_tab->null_bit= jt_null_bits++;
601             }
602             last_tab++;
603             tab_in_range->table->prepare_for_position();
604             tab_in_range->keep_current_rowid= TRUE;
605           }
606         }
607 
608         SJ_TMP_TABLE *sjtbl;
609         if (jt_rowid_offset) /* Temptable has at least one rowid */
610         {
611           uint tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
612           if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE) ||
613               !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
614             DBUG_RETURN(TRUE); /* purecov: inspected */
615           memcpy(sjtbl->tabs, sjtabs, tabs_size);
616           sjtbl->is_confluent= FALSE;
617           sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
618           sjtbl->rowid_len= jt_rowid_offset;
619           sjtbl->null_bits= jt_null_bits;
620           sjtbl->null_bytes= (jt_null_bits + 7)/8;
621           sjtbl->tmp_table=
622             create_duplicate_weedout_tmp_table(thd,
623                                                sjtbl->rowid_len +
624                                                sjtbl->null_bytes,
625                                                sjtbl);
626           join->sj_tmp_tables.push_back(sjtbl->tmp_table);
627         }
628         else
629         {
630           /*
631             This is confluent case where the entire subquery predicate does
632             not depend on anything at all, ie this is
633               WHERE const IN (uncorrelated select)
634           */
635           if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE))
636             DBUG_RETURN(TRUE); /* purecov: inspected */
637           sjtbl->tmp_table= NULL;
638           sjtbl->is_confluent= TRUE;
639           sjtbl->have_confluent_row= FALSE;
640         }
641         join->join_tab[first_table].flush_weedout_table= sjtbl;
642         last_sj_tab->check_weed_out_table= sjtbl;
643 
644         tableno+= pos->n_sj_tables;
645         break;
646       }
647       case SJ_OPT_FIRST_MATCH:
648       {
649         /*
650           Setup a "jump" from the last table in the range of inner tables
651           to the last outer table before the inner tables.
652           If there are outer tables inbetween the inner tables, we have to
653           setup a "split jump": Jump from the last inner table to the last
654           outer table within the range, then from the last inner table
655           before the outer table(s), jump to the last outer table before
656           this range of inner tables, etc.
657         */
658         JOIN_TAB *jump_to= tab - 1;
659         DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
660         for (JOIN_TAB *tab_in_range= tab;
661              tab_in_range <= last_sj_tab;
662              tab_in_range++)
663         {
664           if (!tab_in_range->emb_sj_nest)
665           {
666             /*
667               Let last non-correlated table be jump target for
668               subsequent inner tables.
669             */
670             jump_to= tab_in_range;
671           }
672           else
673           {
674             /*
675               Assign jump target for last table in a consecutive range of
676               inner tables.
677             */
678             if (tab_in_range == last_sj_tab || !(tab_in_range+1)->emb_sj_nest)
679             {
680               tab_in_range->firstmatch_return= jump_to;
681               tab_in_range->match_tab= last_sj_tab;
682             }
683           }
684         }
685         tableno+= pos->n_sj_tables;
686         break;
687       }
688     }
689   }
690   DBUG_RETURN(FALSE);
691 }
692 
693 
694 /*
695   Destroy all temporary tables created by NL-semijoin runtime
696 */
697 
destroy_sj_tmp_tables(JOIN * join)698 static void destroy_sj_tmp_tables(JOIN *join)
699 {
700   List_iterator<TABLE> it(join->sj_tmp_tables);
701   TABLE *table;
702   while ((table= it++))
703   {
704     /*
705       SJ-Materialization tables are initialized for either sequential reading
706       or index lookup, DuplicateWeedout tables are not initialized for read
707       (we only write to them), so need to call ha_index_or_rnd_end.
708     */
709     table->file->ha_index_or_rnd_end();
710     free_tmp_table(join->thd, table);
711   }
712   join->sj_tmp_tables.empty();
713 }
714 
715 
716 /**
717   Remove all rows from all temp tables used by NL-semijoin runtime
718 
719   @param join  The join to remove tables for
720 
721   All rows must be removed from all temporary tables before every join
722   re-execution.
723 */
724 
clear_sj_tmp_tables(JOIN * join)725 static int clear_sj_tmp_tables(JOIN *join)
726 {
727   int res;
728   List_iterator<TABLE> it(join->sj_tmp_tables);
729   TABLE *table;
730   while ((table= it++))
731   {
732     if ((res= table->file->ha_delete_all_rows()))
733       return res; /* purecov: inspected */
734   }
735   Semijoin_mat_exec *sjm;
736   List_iterator<Semijoin_mat_exec> it2(join->sjm_exec_list);
737   while ((sjm= it2++))
738   {
739     JOIN_TAB *const tab= join->join_tab + sjm->mat_table_index;
740     DBUG_ASSERT(tab->materialize_table);
741     tab->materialized= false;
742     // The materialized table must be re-read on next evaluation:
743     tab->table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
744   }
745   return 0;
746 }
747 
748 
749 /**
750   Reset the state of this join object so that it is ready for a
751   new execution.
752 */
753 
reset()754 void JOIN::reset()
755 {
756   DBUG_ENTER("JOIN::reset");
757 
758   unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
759                                     select_lex->offset_limit->val_uint() :
760                                     ULL(0));
761 
762   first_record= false;
763   group_sent= false;
764 
765   if (tmp_tables)
766   {
767     for (uint tmp= primary_tables; tmp < primary_tables + tmp_tables; tmp++)
768     {
769       TABLE *tmp_table= join_tab[tmp].table;
770       if (!tmp_table->is_created())
771         continue;
772       tmp_table->file->extra(HA_EXTRA_RESET_STATE);
773       tmp_table->file->ha_delete_all_rows();
774       free_io_cache(tmp_table);
775       filesort_free_buffers(tmp_table,0);
776     }
777   }
778   clear_sj_tmp_tables(this);
779   if (current_ref_ptrs != items0)
780   {
781     set_items_ref_array(items0);
782     set_group_rpa= false;
783   }
784 
785   /* need to reset ref access state (see join_read_key) */
786   if (join_tab)
787     for (uint i= 0; i < tables; i++)
788       join_tab[i].ref.key_err= TRUE;
789 
790   /* Reset of sum functions */
791   if (sum_funcs)
792   {
793     Item_sum *func, **func_ptr= sum_funcs;
794     while ((func= *(func_ptr++)))
795       func->clear();
796   }
797 
798   if (!(select_options & SELECT_DESCRIBE) && select_lex->has_ft_funcs())
799     (void) init_ftfuncs(thd, select_lex, MY_TEST(order));
800 
801   DBUG_VOID_RETURN;
802 }
803 
804 
805 /**
806   Prepare join result.
807 
808   @details Prepare join result prior to join execution or describing.
809   Instantiate derived tables and get schema tables result if necessary.
810 
811   @return
812     TRUE  An error during derived or schema tables instantiation.
813     FALSE Ok
814 */
815 
prepare_result(List<Item> ** columns_list)816 bool JOIN::prepare_result(List<Item> **columns_list)
817 {
818   DBUG_ENTER("JOIN::prepare_result");
819 
820   error= 0;
821   /* Create result tables for materialized views. */
822   if (!zero_result_cause &&
823       select_lex->handle_derived(thd->lex, &mysql_derived_create))
824     goto err;
825 
826   if (result->prepare2())
827     goto err;
828 
829   if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
830       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
831     goto err;
832 
833   DBUG_RETURN(FALSE);
834 
835 err:
836   error= 1;
837   DBUG_RETURN(TRUE);
838 }
839 
840 
841 /**
842   Explain join.
843 */
844 
845 bool
explain()846 JOIN::explain()
847 {
848   Opt_trace_context * const trace= &thd->opt_trace;
849   Opt_trace_object trace_wrapper(trace);
850   Opt_trace_object trace_exec(trace, "join_explain");
851   trace_exec.add_select_number(select_lex->select_number);
852   Opt_trace_array trace_steps(trace, "steps");
853   List<Item> *columns_list= &fields_list;
854   bool ret;
855   DBUG_ENTER("JOIN::explain");
856 
857   THD_STAGE_INFO(thd, stage_explaining);
858 
859   if (prepare_result(&columns_list))
860     DBUG_RETURN(true);
861 
862   if (!tables_list && (tables || !select_lex->with_sum_func))
863   {                                           // Only test of functions
864     ret= explain_no_table(thd, this, zero_result_cause ? zero_result_cause
865                                                   : "No tables used");
866     /* Single select (without union) always returns 0 or 1 row */
867     thd->limit_found_rows= send_records;
868     thd->set_examined_row_count(0);
869     DBUG_RETURN(ret);
870   }
871   /*
872     Don't reset the found rows count if there're no tables as
873     FOUND_ROWS() may be called. Never reset the examined row count here.
874     It must be accumulated from all join iterations of all join parts.
875   */
876   if (tables)
877     thd->limit_found_rows= 0;
878 
879   if (zero_result_cause)
880   {
881     ret= explain_no_table(thd, this, zero_result_cause);
882     DBUG_RETURN(ret);
883   }
884 
885   if (tables)
886     ret= explain_query_specification(thd, this);
887   else
888     ret= explain_no_table(thd, this, "No tables used");
889 
890   DBUG_RETURN(ret);
891 }
892 
893 
894 /**
895   Clean up and destroy join object.
896 
897   @return false if previous execution was successful, and true otherwise
898 */
899 
destroy()900 bool JOIN::destroy()
901 {
902   DBUG_ENTER("JOIN::destroy");
903   select_lex->join= 0;
904 
905   cond_equal= 0;
906 
907   cleanup(1);
908 
909   if (join_tab) // We should not have tables > 0 and join_tab != NULL
910   for (uint i= 0; i < tables; i++)
911   {
912     JOIN_TAB *const tab= join_tab + i;
913 
914     DBUG_ASSERT(!tab->table || !tab->table->sort.record_pointers);
915     if (tab->op)
916     {
917       if (tab->op->type() == QEP_operation::OT_TMP_TABLE)
918       {
919         if (tab->table) // Check tmp table is not yet freed.
920           free_tmp_table(thd, tab->table);
921         delete tab->tmp_table_param;
922         tab->tmp_table_param= NULL;
923       }
924       tab->op->free();
925       tab->op= NULL;
926     }
927 
928     tab->table= NULL;
929   }
930  /* Cleanup items referencing temporary table columns */
931   cleanup_item_list(tmp_all_fields1);
932   cleanup_item_list(tmp_all_fields3);
933   destroy_sj_tmp_tables(this);
934 
935   List_iterator<Semijoin_mat_exec> sjm_list_it(sjm_exec_list);
936   Semijoin_mat_exec *sjm;
937   while ((sjm= sjm_list_it++))
938     delete sjm;
939   sjm_exec_list.empty();
940 
941   keyuse.clear();
942   DBUG_RETURN(MY_TEST(error));
943 }
944 
945 
cleanup_item_list(List<Item> & items) const946 void JOIN::cleanup_item_list(List<Item> &items) const
947 {
948   if (!items.is_empty())
949   {
950     List_iterator_fast<Item> it(items);
951     Item *item;
952     while ((item= it++))
953       item->cleanup();
954   }
955 }
956 
957 
958 /**
959   Prepare stage of mysql_select.
960 
961   @param thd                  thread handler
962                               the top-level select_lex for this query
963   @param tables               list of all tables used in this query.
964                               The tables have been pre-opened.
965   @param wild_num             number of wildcards used in the top level
966                               select of this query.
967                               For example statement
968                               SELECT *, t1.*, catalog.t2.* FROM t0, t1, t2;
969                               has 3 wildcards.
970   @param fields               list of items in SELECT list of the top-level
971                               select
972                               e.g. SELECT a, b, c FROM t1 will have Item_field
973                               for a, b and c in this list.
974   @param conds                top level item of an expression representing
975                               WHERE clause of the top level select
976   @param og_num               total number of ORDER BY and GROUP BY clauses
977                               arguments
978   @param order                linked list of ORDER BY agruments
979   @param group                linked list of GROUP BY arguments
980   @param having               top level item of HAVING expression
981   @param select_options       select options (BIG_RESULT, etc)
982   @param result               an instance of result set handling class.
983                               This object is responsible for send result
984                               set rows to the client or inserting them
985                               into a table.
986   @param unit                 top-level UNIT of this query
987                               UNIT is an artificial object created by the
988                               parser for every SELECT clause.
989                               e.g.
990                               SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
991                               has 2 unions.
992   @param select_lex           the only SELECT_LEX of this query
993   @param[out] free_join       Will be set to false if select_lex->join does
994                               not need to be freed.
995 
996   @retval
997     false  success
998   @retval
999     true   an error
1000 
1001   @note tables must be opened before calling mysql_prepare_select.
1002 */
1003 
1004 static bool
mysql_prepare_select(THD * thd,TABLE_LIST * tables,uint wild_num,List<Item> & fields,Item * conds,uint og_num,ORDER * order,ORDER * group,Item * having,ulonglong select_options,select_result * result,SELECT_LEX_UNIT * unit,SELECT_LEX * select_lex,bool * free_join)1005 mysql_prepare_select(THD *thd,
1006                      TABLE_LIST *tables, uint wild_num, List<Item> &fields,
1007                      Item *conds, uint og_num,  ORDER *order, ORDER *group,
1008                      Item *having, ulonglong select_options,
1009                      select_result *result, SELECT_LEX_UNIT *unit,
1010                      SELECT_LEX *select_lex, bool *free_join)
1011 {
1012   bool err= false;
1013   JOIN *join;
1014 
1015   DBUG_ENTER("mysql_prepare_select");
1016   select_lex->context.resolve_in_select_list= TRUE;
1017   if (select_lex->join != 0)
1018   {
1019     join= select_lex->join;
1020     /*
1021       is it single SELECT in derived table, called in derived table
1022       creation
1023     */
1024     if (select_lex->linkage != DERIVED_TABLE_TYPE ||
1025 	(select_options & SELECT_DESCRIBE))
1026     {
1027       if (select_lex->linkage != GLOBAL_OPTIONS_TYPE)
1028       {
1029 	//here is EXPLAIN of subselect or derived table
1030 	if (join->change_result(result))
1031 	{
1032 	  DBUG_RETURN(TRUE);
1033 	}
1034         /*
1035           Original join tabs might be overwritten at first
1036           subselect execution. So we need to restore them.
1037         */
1038         Item_subselect *subselect= select_lex->master_unit()->item;
1039         if (subselect && subselect->is_uncacheable())
1040           join->reset();
1041       }
1042       else
1043       {
1044         err= join->prepare(tables, wild_num,
1045                            conds, og_num, order, group, having,
1046                            select_lex, unit);
1047         if (err)
1048           DBUG_RETURN(true);
1049       }
1050     }
1051     *free_join= false;
1052     join->select_options= select_options;
1053   }
1054   else
1055   {
1056     if (!(join= new JOIN(thd, fields, select_options, result)))
1057 	DBUG_RETURN(TRUE); /* purecov: inspected */
1058     THD_STAGE_INFO(thd, stage_init);
1059     thd->lex->used_tables=0;                         // Updated by setup_fields
1060     err= join->prepare(tables, wild_num,
1061                        conds, og_num, order, group, having,
1062                        select_lex, unit);
1063     if (err)
1064       DBUG_RETURN(true);
1065   }
1066 
1067   DBUG_RETURN(err);
1068 }
1069 
1070 
1071 /**
1072   Execute stage of mysql_select.
1073 
1074   @param thd                  thread handler
1075   @param select_lex           the only SELECT_LEX of this query
1076   @param free_join            if join should be freed
1077 
1078   @return Operation status
1079     @retval false  success
1080     @retval true   an error
1081 
1082   @note tables must be opened and locked before calling mysql_execute_select.
1083 */
1084 
1085 static bool
mysql_execute_select(THD * thd,SELECT_LEX * select_lex,bool free_join)1086 mysql_execute_select(THD *thd, SELECT_LEX *select_lex, bool free_join)
1087 {
1088   bool err;
1089   JOIN* join= select_lex->join;
1090 
1091   DBUG_ENTER("mysql_execute_select");
1092   DBUG_ASSERT(join);
1093 
1094   if ((err= join->optimize()))
1095   {
1096     goto err;					// 1
1097   }
1098 
1099   if (thd->is_error())
1100     goto err;
1101 
1102   if (join->select_options & SELECT_DESCRIBE)
1103   {
1104     join->explain();
1105     free_join= false;
1106   }
1107   else
1108     join->exec();
1109 
1110 err:
1111   if (free_join)
1112   {
1113     THD_STAGE_INFO(thd, stage_end);
1114     err|= select_lex->cleanup();
1115     DBUG_RETURN(err || thd->is_error());
1116   }
1117   DBUG_RETURN(join->error);
1118 }
1119 
1120 
1121 /**
1122   An entry point to single-unit select (a select without UNION).
1123 
1124   @param thd                  thread handler
1125   @param tables               list of all tables used in this query.
1126                               The tables have been pre-opened.
1127   @param wild_num             number of wildcards used in the top level
1128                               select of this query.
1129                               For example statement
1130                               SELECT *, t1.*, catalog.t2.* FROM t0, t1, t2;
1131                               has 3 wildcards.
1132   @param fields               list of items in SELECT list of the top-level
1133                               select
1134                               e.g. SELECT a, b, c FROM t1 will have Item_field
1135                               for a, b and c in this list.
1136   @param conds                top level item of an expression representing
1137                               WHERE clause of the top level select
1138   @param order                linked list of ORDER BY agruments
1139   @param group                linked list of GROUP BY arguments
1140   @param having               top level item of HAVING expression
1141   @param select_options       select options (BIG_RESULT, etc)
1142   @param result               an instance of result set handling class.
1143                               This object is responsible for send result
1144                               set rows to the client or inserting them
1145                               into a table.
1146   @param unit                 top-level UNIT of this query
1147                               UNIT is an artificial object created by the
1148                               parser for every SELECT clause.
1149                               e.g.
1150                               SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
1151                               has 2 unions.
1152   @param select_lex           the only SELECT_LEX of this query
1153 
1154   @retval
1155     false  success
1156   @retval
1157     true   an error
1158 */
1159 
1160 bool
mysql_select(THD * thd,TABLE_LIST * tables,uint wild_num,List<Item> & fields,Item * conds,SQL_I_List<ORDER> * order,SQL_I_List<ORDER> * group,Item * having,ulonglong select_options,select_result * result,SELECT_LEX_UNIT * unit,SELECT_LEX * select_lex)1161 mysql_select(THD *thd,
1162              TABLE_LIST *tables, uint wild_num, List<Item> &fields,
1163              Item *conds, SQL_I_List<ORDER> *order, SQL_I_List<ORDER> *group,
1164              Item *having, ulonglong select_options,
1165              select_result *result, SELECT_LEX_UNIT *unit,
1166              SELECT_LEX *select_lex)
1167 {
1168   bool free_join= true;
1169   uint og_num= 0;
1170   ORDER *first_order= NULL;
1171   ORDER *first_group= NULL;
1172   DBUG_ENTER("mysql_select");
1173 
1174   if (order)
1175   {
1176     og_num= order->elements;
1177     first_order= order->first;
1178   }
1179   if (group)
1180   {
1181     og_num+= group->elements;
1182     first_group= group->first;
1183   }
1184 
1185   if (mysql_prepare_select(thd, tables, wild_num, fields,
1186                            conds, og_num, first_order, first_group, having,
1187                            select_options, result, unit,
1188                            select_lex, &free_join))
1189   {
1190     if (free_join)
1191     {
1192       THD_STAGE_INFO(thd, stage_end);
1193       (void) select_lex->cleanup();
1194     }
1195     DBUG_RETURN(true);
1196   }
1197 
1198   if (! thd->lex->is_query_tables_locked())
1199   {
1200     /*
1201       If tables are not locked at this point, it means that we have delayed
1202       this step until after prepare stage (i.e. this moment). This allows to
1203       do better partition pruning and avoid locking unused partitions.
1204       As a consequence, in such a case, prepare stage can rely only on
1205       metadata about tables used and not data from them.
1206       We need to lock tables now in order to proceed with the remaning
1207       stages of query optimization and execution.
1208     */
1209     if (lock_tables(thd, thd->lex->query_tables, thd->lex->table_count, 0))
1210     {
1211       if (free_join)
1212       {
1213         THD_STAGE_INFO(thd, stage_end);
1214         (void) select_lex->cleanup();
1215       }
1216       DBUG_RETURN(true);
1217     }
1218 
1219     /*
1220       Only register query in cache if it tables were locked above.
1221 
1222       Tables must be locked before storing the query in the query cache.
1223       Transactional engines must been signalled that the statement started,
1224       which external_lock signals.
1225     */
1226     query_cache_store_query(thd, thd->lex->query_tables);
1227   }
1228 
1229   DBUG_RETURN(mysql_execute_select(thd, select_lex, free_join));
1230 }
1231 
1232 /*****************************************************************************
1233   Go through all combinations of not marked tables and find the one
1234   which uses least records
1235 *****************************************************************************/
1236 
1237 /**
1238    Returns which join buffer algorithms are enabled for this session.
1239 
1240    @param thd the @c THD for this session
1241 
1242    @return bitmap with available join buffer algorithms
1243 */
1244 
join_buffer_alg(const THD * thd)1245 static uint join_buffer_alg(const THD *thd)
1246 {
1247   uint alg= JOIN_CACHE::ALG_NONE;
1248 
1249   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL))
1250     alg|= JOIN_CACHE::ALG_BNL;
1251 
1252   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BKA))
1253   {
1254     bool use_bka_unique= false;
1255     DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
1256 
1257     if (use_bka_unique)
1258       alg|= JOIN_CACHE::ALG_BKA_UNIQUE;
1259     else
1260       alg|= JOIN_CACHE::ALG_BKA;
1261   }
1262 
1263   return alg;
1264 }
1265 
1266 
1267 /**
1268   Find how much space the prevous read not const tables takes in cache.
1269 */
1270 
calc_used_field_length(THD * thd,JOIN_TAB * join_tab)1271 void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
1272 {
1273   uint null_fields,blobs,fields,rec_length;
1274   Field **f_ptr,*field;
1275   uint uneven_bit_fields;
1276   MY_BITMAP *read_set= join_tab->table->read_set;
1277 
1278   uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
1279   for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
1280   {
1281     if (bitmap_is_set(read_set, field->field_index))
1282     {
1283       uint flags=field->flags;
1284       fields++;
1285       rec_length+=field->pack_length();
1286       if (flags & BLOB_FLAG)
1287 	blobs++;
1288       if (!(flags & NOT_NULL_FLAG))
1289         null_fields++;
1290       if (field->type() == MYSQL_TYPE_BIT &&
1291           ((Field_bit*)field)->bit_len)
1292         uneven_bit_fields++;
1293     }
1294   }
1295   if (null_fields || uneven_bit_fields)
1296     rec_length+=(join_tab->table->s->null_fields+7)/8;
1297   if (join_tab->table->maybe_null)
1298     rec_length+=sizeof(my_bool);
1299   if (blobs)
1300   {
1301     uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
1302 			     (join_tab->table->s->reclength- rec_length));
1303     rec_length+= max<uint>(4U, blob_length);
1304   }
1305   /**
1306     @todo why don't we count the rowids that we might need to store
1307     when using DuplicateElimination?
1308   */
1309   join_tab->used_fields=fields;
1310   join_tab->used_fieldlength=rec_length;
1311   join_tab->used_blobs=blobs;
1312   join_tab->used_null_fields= null_fields;
1313   join_tab->used_uneven_bit_fields= uneven_bit_fields;
1314 }
1315 
1316 
1317 /**
1318   Set up JOIN_TAB structs according to the picked join order in best_positions.
1319   This allocates execution structures so may be called only after we have the
1320   very final plan. It must be called after
1321   Optimize_table_order::fix_semijoin_strategies().
1322 
1323   @return False if success, True if error
1324 
1325   @details
1326     - create join->join_tab array and copy from existing JOIN_TABs in join order
1327     - create helper structs for materialized semi-join handling
1328     - finalize semi-join strategy choices
1329     - Number of intermediate tables "tmp_tables" is calculated.
1330     - "tables" and "primary_tables" are recalculated.
1331 
1332    Notice that intermediate tables will not have a POSITION reference; and they
1333    will not have a TABLE reference before the final stages of code generation.
1334 */
1335 
get_best_combination()1336 bool JOIN::get_best_combination()
1337 {
1338   DBUG_ENTER("JOIN::get_best_combination");
1339 
1340   // At this point "tables" and "primary"tables" represent the same:
1341   DBUG_ASSERT(tables == primary_tables);
1342 
1343   /*
1344     Allocate additional space for tmp tables.
1345     Number of plan nodes:
1346       # of regular input tables (including semi-joined ones) +
1347       # of semi-join nests for materialization +
1348       1? + // For GROUP BY
1349       1? + // For DISTINCT
1350       1? + // For aggregation functions aggregated in outer query
1351            // when used with distinct
1352       1? + // For ORDER BY
1353       1?   // buffer result
1354     Up to 2 tmp tables are actually used, but it's hard to tell exact number
1355     at this stage.
1356   */
1357   uint tmp_tables= (group_list ? 1 : 0) +
1358                    (select_distinct ?
1359                     (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
1360                    (order ? 1 : 0) +
1361        (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
1362   if (tmp_tables > 2)
1363     tmp_tables= 2;
1364 
1365   /*
1366     Rearrange queries with materialized semi-join nests so that the semi-join
1367     nest is replaced with a reference to a materialized temporary table and all
1368     materialized subquery tables are placed after the intermediate tables.
1369     After the following loop, "inner_target" is the position of the first
1370     subquery table (if any). "outer_target" is the position of first outer
1371     table, and will later be used to track the position of any materialized
1372     temporary tables.
1373   */
1374   const bool has_semijoin= !select_lex->sj_nests.is_empty();
1375   uint outer_target= 0;
1376   uint inner_target= primary_tables + tmp_tables;
1377   uint sjm_nests= 0;
1378 
1379   if (has_semijoin)
1380   {
1381     for (uint tableno= 0; tableno < primary_tables; )
1382     {
1383       if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
1384       {
1385         sjm_nests++;
1386         inner_target-= (best_positions[tableno].n_sj_tables - 1);
1387         tableno+= best_positions[tableno].n_sj_tables;
1388       }
1389       else
1390         tableno++;
1391     }
1392   }
1393   if (!(join_tab= new(thd->mem_root) JOIN_TAB[tables + sjm_nests + tmp_tables]))
1394     DBUG_RETURN(true);
1395 
1396   int sjm_index= tables;  // Number assigned to materialized temporary table
1397   int remaining_sjm_inner= 0;
1398   for (uint tableno= 0; tableno < tables; tableno++)
1399   {
1400     if (has_semijoin &&
1401         sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
1402     {
1403       DBUG_ASSERT(outer_target < inner_target);
1404 
1405       POSITION *const pos_table= best_positions + tableno;
1406       TABLE_LIST *const sj_nest= pos_table->table->emb_sj_nest;
1407 
1408       // Handle this many inner tables of materialized semi-join
1409       remaining_sjm_inner= pos_table->n_sj_tables;
1410 
1411       Semijoin_mat_exec *const sjm_exec=
1412         new (thd->mem_root)
1413         Semijoin_mat_exec(sj_nest,
1414                           (pos_table->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
1415                           remaining_sjm_inner, outer_target, inner_target);
1416       if (!sjm_exec)
1417         DBUG_RETURN(true);
1418 
1419       (join_tab + outer_target)->sj_mat_exec= sjm_exec;
1420 
1421       if (setup_materialized_table(join_tab + outer_target, sjm_index,
1422                                    pos_table, best_positions + sjm_index))
1423         DBUG_RETURN(true);
1424 
1425       map2table[sjm_exec->table->tablenr]= join_tab + outer_target;
1426 
1427       outer_target++;
1428       sjm_index++;
1429     }
1430     /*
1431       Locate join_tab target for the table we are considering.
1432       (remaining_sjm_inner becomes negative for non-SJM tables, this can be
1433        safely ignored).
1434     */
1435     const uint target=
1436       (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
1437     JOIN_TAB *const tab= join_tab + target;
1438 
1439     // Copy data from existing join_tab
1440     *tab= *best_positions[tableno].table;
1441 
1442     tab->position= best_positions + tableno;
1443 
1444     TABLE *const table= tab->table;
1445     table->reginfo.join_tab= tab;
1446     if (!*tab->on_expr_ref)
1447       table->reginfo.not_exists_optimize= false;     // Only with LEFT JOIN
1448     map2table[table->tablenr]= tab;
1449   }
1450 
1451   // Count the materialized semi-join tables as regular input tables
1452   tables+= sjm_nests + tmp_tables;
1453   // Set the number of non-materialized tables:
1454   primary_tables= outer_target;
1455 
1456   if (has_semijoin)
1457   {
1458     set_semijoin_info();
1459 
1460     // Update equalities and keyuses after having added SJ materialization
1461     if (update_equalities_for_sjm())
1462       DBUG_RETURN(true);
1463   }
1464   // sjm is no longer needed, trash it. To reuse it, reset its members!
1465   List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
1466   TABLE_LIST *sj_nest;
1467   while ((sj_nest= sj_list_it++))
1468     TRASH(static_cast<void*>(&sj_nest->nested_join->sjm),
1469           sizeof(sj_nest->nested_join->sjm));
1470 
1471   DBUG_RETURN(false);
1472 }
1473 
1474 
1475 /**
1476   Set access methods for the tables of a query plan.
1477 
1478   @return False if success, True if error
1479 
1480   @details
1481     We need to fill in data for the case where
1482      - There is no key selected (use JT_ALL)
1483      - Loose scan semi-join strategy is selected (use JT_ALL)
1484      - A ref key can be used (use JT_REF, JT_REF_OR_NULL, JT_EQ_REF or JT_FT)
1485 
1486   @note We cannot setup fields used for ref access before we have sorted
1487         the items within multiple equalities according to the final order of
1488         the tables involved in the join operation. Currently, this occurs in
1489         @see substitute_for_best_equal_field().
1490 */
set_access_methods()1491 bool JOIN::set_access_methods()
1492 {
1493   DBUG_ENTER("JOIN::set_access_methods");
1494 
1495   full_join= false;
1496 
1497   for (uint tableno= const_tables; tableno < tables; tableno++)
1498   {
1499     JOIN_TAB *const tab= join_tab + tableno;
1500 
1501     if (!tab->position)
1502       continue;
1503 
1504     DBUG_PRINT("info",("type: %d", tab->type));
1505 
1506     // Set preliminary join cache setting based on decision from greedy search
1507     tab->use_join_cache= tab->position->use_join_buffer ?
1508                            JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE;
1509 
1510     if (tab->type == JT_CONST || tab->type == JT_SYSTEM)
1511       continue;                      // Handled in make_join_statistics()
1512 
1513     Key_use *const keyuse= tab->position->key;
1514     if (tab->position->sj_strategy == SJ_OPT_LOOSE_SCAN)
1515     {
1516       DBUG_ASSERT(tab->keys.is_set(tab->position->loosescan_key));
1517       tab->type= JT_ALL; // @todo is this consistent for a LooseScan table ?
1518       tab->index= tab->position->loosescan_key;
1519      }
1520     else if (!keyuse)
1521     {
1522       tab->type= JT_ALL;
1523       if (tableno > const_tables)
1524        full_join= true;
1525     }
1526     else
1527     {
1528       if (create_ref_for_key(this, tab, keyuse, tab->prefix_tables()))
1529         DBUG_RETURN(true);
1530     }
1531    }
1532 
1533   DBUG_RETURN(false);
1534 }
1535 
1536 
1537 /**
1538   Set the first_sj_inner_tab and last_sj_inner_tab fields for all tables
1539   inside the semijoin nests of the query.
1540 */
set_semijoin_info()1541 void JOIN::set_semijoin_info()
1542 {
1543   if (select_lex->sj_nests.is_empty())
1544     return;
1545 
1546   for (uint tableno= const_tables; tableno < tables; )
1547   {
1548     JOIN_TAB *const tab= join_tab + tableno;
1549     const POSITION *const pos= tab->position;
1550 
1551     if (!pos)
1552     {
1553       tableno++;
1554       continue;
1555     }
1556     switch (pos->sj_strategy)
1557     {
1558     case SJ_OPT_NONE:
1559       tableno++;
1560       break;
1561     case SJ_OPT_MATERIALIZE_LOOKUP:
1562     case SJ_OPT_MATERIALIZE_SCAN:
1563     case SJ_OPT_LOOSE_SCAN:
1564     case SJ_OPT_DUPS_WEEDOUT:
1565     case SJ_OPT_FIRST_MATCH:
1566       /*
1567         Remember the first and last semijoin inner tables; this serves to tell
1568         a JOIN_TAB's semijoin strategy (like in setup_join_buffering()).
1569       */
1570       JOIN_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
1571       JOIN_TAB *last_sj_inner=
1572         (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) ?
1573         /* Range may end with non-inner table so cannot set last_sj_inner_tab */
1574         NULL : last_sj_tab;
1575       for (JOIN_TAB *tab_in_range= tab;
1576            tab_in_range <= last_sj_tab;
1577            tab_in_range++)
1578       {
1579         tab_in_range->first_sj_inner_tab= tab;
1580         tab_in_range->last_sj_inner_tab=  last_sj_inner;
1581       }
1582       tableno+= pos->n_sj_tables;
1583       break;
1584     }
1585   }
1586 }
1587 
1588 
1589 /**
1590   Setup a ref access for looking up rows via an index (a key).
1591 
1592   @param join          The join object being handled
1593   @param j             The join_tab which will have the ref access populated
1594   @param first_keyuse  First key part of (possibly multi-part) key
1595   @param used_tables   Bitmap of available tables
1596 
1597   @return False if success, True if error
1598 
1599   @details
1600     This function will set up a ref access using the best key found
1601     during access path analysis and cost analysis.
1602 
1603   @note We cannot setup fields used for ref access before we have sorted
1604         the items within multiple equalities according to the final order of
1605         the tables involved in the join operation. Currently, this occurs in
1606         @see substitute_for_best_equal_field().
1607         The exception is ref access for const tables, which are fixed
1608         before the greedy search planner is invoked.
1609 */
1610 
create_ref_for_key(JOIN * join,JOIN_TAB * j,Key_use * org_keyuse,table_map used_tables)1611 bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
1612                         table_map used_tables)
1613 {
1614   DBUG_ENTER("create_ref_for_key");
1615 
1616   Key_use *keyuse= org_keyuse;
1617   const uint key= keyuse->key;
1618   const bool ftkey= (keyuse->keypart == FT_KEYPART);
1619   THD  *const thd= join->thd;
1620   uint keyparts, length;
1621   TABLE *const table= j->table;
1622   KEY   *const keyinfo= table->key_info+key;
1623   Key_use *chosen_keyuses[MAX_REF_PARTS];
1624 
1625   DBUG_ASSERT(j->keys.is_set(org_keyuse->key));
1626 
1627   if (ftkey)
1628   {
1629     Item_func_match *ifm=(Item_func_match *)keyuse->val;
1630 
1631     length=0;
1632     keyparts=1;
1633     ifm->join_key=1;
1634   }
1635   else
1636   {
1637     keyparts=length=0;
1638     uint found_part_ref_or_null= 0;
1639     // Calculate length for the used key. Remember chosen Key_use-s.
1640     do
1641     {
1642       /*
1643         This Key_use is chosen if:
1644         - it involves a key part at the right place (if index is (a,b) we
1645         can have a search criterion on 'b' only if we also have a criterion
1646         on 'a'),
1647         - it references only tables earlier in the plan.
1648         Moreover, the execution layer is limited to maximum one ref_or_null
1649         keypart, as TABLE_REF::null_ref_key is only one byte.
1650       */
1651       if (!(~used_tables & keyuse->used_tables) &&
1652           keyparts == keyuse->keypart &&
1653           !(found_part_ref_or_null & keyuse->optimize))
1654       {
1655         DBUG_ASSERT(keyparts <= MAX_REF_PARTS);
1656         chosen_keyuses[keyparts]= keyuse;
1657         keyparts++;
1658         length+= keyinfo->key_part[keyuse->keypart].store_length;
1659         found_part_ref_or_null|= keyuse->optimize;
1660       }
1661       keyuse++;
1662     } while (keyuse->table == table && keyuse->key == key);
1663     DBUG_ASSERT(length > 0 && keyparts != 0);
1664   } /* not ftkey */
1665 
1666   DBUG_ASSERT(keyparts > 0);
1667 
1668   /* set up fieldref */
1669   j->ref.key_parts=keyparts;
1670   j->ref.key_length=length;
1671   j->ref.key=(int) key;
1672   if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
1673       !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
1674                                                    (keyparts)))) ||
1675       !(j->ref.items=    (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
1676       !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
1677   {
1678     DBUG_RETURN(TRUE);
1679   }
1680   j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
1681   j->ref.key_err=1;
1682   j->ref.has_record= FALSE;
1683   j->ref.null_rejecting= 0;
1684   j->ref.use_count= 0;
1685   j->ref.disable_cache= FALSE;
1686   keyuse=org_keyuse;
1687 
1688   uchar *key_buff= j->ref.key_buff;
1689   uchar *null_ref_key= NULL;
1690   bool keyuse_uses_no_tables= true;
1691   if (ftkey)
1692   {
1693     j->ref.items[0]=((Item_func*)(keyuse->val))->key_item();
1694     /* Predicates pushed down into subquery can't be used FT access */
1695     j->ref.cond_guards[0]= NULL;
1696     if (keyuse->used_tables)
1697       DBUG_RETURN(TRUE);                        // not supported yet. SerG
1698 
1699     j->type=JT_FT;
1700     memset(j->ref.key_copy, 0, sizeof(j->ref.key_copy[0]) * keyparts);
1701   }
1702   else
1703   {
1704     // Set up TABLE_REF based on chosen Key_use-s.
1705     for (uint part_no= 0 ; part_no < keyparts ; part_no++)
1706     {
1707       keyuse= chosen_keyuses[part_no];
1708       uint maybe_null= MY_TEST(keyinfo->key_part[part_no].null_bit);
1709 
1710       if (keyuse->val->type() == Item::FIELD_ITEM)
1711       {
1712         // Look up the most appropriate field to base the ref access on.
1713         keyuse->val= get_best_field(static_cast<Item_field *>(keyuse->val),
1714                                     join->cond_equal);
1715         keyuse->used_tables= keyuse->val->used_tables();
1716       }
1717       j->ref.items[part_no]=keyuse->val;        // Save for cond removal
1718       j->ref.cond_guards[part_no]= keyuse->cond_guard;
1719       if (keyuse->null_rejecting)
1720         j->ref.null_rejecting|= (key_part_map)1 << part_no;
1721       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
1722 
1723       store_key* key= get_store_key(thd,
1724                                     keyuse,join->const_table_map,
1725                                     &keyinfo->key_part[part_no],
1726                                     key_buff, maybe_null);
1727       if (unlikely(!key || thd->is_fatal_error))
1728         DBUG_RETURN(TRUE);
1729 
1730       if (keyuse->used_tables || thd->lex->describe)
1731         /*
1732           Comparing against a non-constant or executing an EXPLAIN
1733           query (which refers to this info when printing the 'ref'
1734           column of the query plan)
1735         */
1736         j->ref.key_copy[part_no]= key;
1737       else
1738       {
1739         /*
1740           key is const, copy value now and possibly skip it while ::exec().
1741 
1742           Note:
1743             Result check of store_key::copy() is unnecessary,
1744             it could be an error returned by store_key::copy() method
1745             but stored value is not null and default value could be used
1746             in this case. Methods which used for storing the value
1747             should be responsible for proper null value setting
1748             in case of an error. Thus it's enough to check key->null_key
1749             value only.
1750         */
1751         (void) key->copy();
1752         /*
1753           It should be reevaluated in ::exec() if
1754           constant evaluated to NULL value which we might need to
1755           handle as a special case during JOIN::exec()
1756           (As in : 'Full scan on NULL key')
1757         */
1758         if (key->null_key)
1759           j->ref.key_copy[part_no]= key; // Reevaluate in JOIN::exec()
1760         else
1761           j->ref.key_copy[part_no]= NULL;
1762       }
1763       /*
1764 	Remember if we are going to use REF_OR_NULL
1765 	But only if field _really_ can be null i.e. we force JT_REF
1766 	instead of JT_REF_OR_NULL in case if field can't be null
1767       */
1768       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
1769       {
1770         DBUG_ASSERT(null_ref_key == NULL); // or we would overwrite it below
1771         null_ref_key= key_buff;
1772       }
1773       key_buff+=keyinfo->key_part[part_no].store_length;
1774     }
1775   } /* not ftkey */
1776   if (j->type == JT_FT)
1777     DBUG_RETURN(false);
1778   if (j->type == JT_CONST)
1779     j->table->const_table= 1;
1780   else if (((actual_key_flags(keyinfo) &
1781              (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
1782 	   keyparts != actual_key_parts(keyinfo) || null_ref_key)
1783   {
1784     /* Must read with repeat */
1785     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
1786     j->ref.null_ref_key= null_ref_key;
1787   }
1788   else if (keyuse_uses_no_tables &&
1789            !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))
1790   {
1791     /*
1792       This happen if we are using a constant expression in the ON part
1793       of an LEFT JOIN.
1794       SELECT * FROM a LEFT JOIN b ON b.key=30
1795       Here we should not mark the table as a 'const' as a field may
1796       have a 'normal' value or a NULL value.
1797     */
1798     j->type=JT_CONST;
1799   }
1800   else
1801     j->type=JT_EQ_REF;
1802   DBUG_RETURN(false);
1803 }
1804 
1805 
1806 
1807 static store_key *
get_store_key(THD * thd,Key_use * keyuse,table_map used_tables,KEY_PART_INFO * key_part,uchar * key_buff,uint maybe_null)1808 get_store_key(THD *thd, Key_use *keyuse, table_map used_tables,
1809 	      KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
1810 {
1811   if (!((~used_tables) & keyuse->used_tables))		// if const item
1812   {
1813     return new store_key_const_item(thd,
1814 				    key_part->field,
1815 				    key_buff + maybe_null,
1816 				    maybe_null ? key_buff : 0,
1817 				    key_part->length,
1818 				    keyuse->val);
1819   }
1820 
1821   Item_field *field_item= NULL;
1822   if (keyuse->val->type() == Item::FIELD_ITEM)
1823     field_item= static_cast<Item_field*>(keyuse->val->real_item());
1824   else if (keyuse->val->type() == Item::REF_ITEM)
1825   {
1826     Item_ref *item_ref= static_cast<Item_ref*>(keyuse->val);
1827     if (item_ref->ref_type() == Item_ref::OUTER_REF)
1828     {
1829       if ((*item_ref->ref)->type() == Item::FIELD_ITEM)
1830         field_item= static_cast<Item_field*>(item_ref->real_item());
1831       else if ((*(Item_ref**)(item_ref)->ref)->ref_type()
1832                == Item_ref::DIRECT_REF
1833                &&
1834                item_ref->real_item()->type() == Item::FIELD_ITEM)
1835         field_item= static_cast<Item_field*>(item_ref->real_item());
1836     }
1837   }
1838   if (field_item)
1839     return new store_key_field(thd,
1840                                key_part->field,
1841                                key_buff + maybe_null,
1842                                maybe_null ? key_buff : 0,
1843                                key_part->length,
1844                                field_item->field,
1845                                keyuse->val->full_name());
1846 
1847   return new store_key_item(thd,
1848 			    key_part->field,
1849 			    key_buff + maybe_null,
1850 			    maybe_null ? key_buff : 0,
1851 			    key_part->length,
1852 			    keyuse->val);
1853 }
1854 
1855 
1856 /**
1857   Extend e1 by AND'ing e2 to the condition e1 points to. The resulting
1858   condition is fixed. Requirement: the input Items must already have
1859   been fixed.
1860 
1861   @param[in,out]   e1 Pointer to condition that will be extended with e2
1862   @param           e2 Condition that will extend e1
1863 
1864   @retval true   if there was a memory allocation error, in which case
1865                  e1 remains unchanged
1866   @retval false  otherwise
1867 */
1868 
and_conditions(Item ** e1,Item * e2)1869 bool and_conditions(Item **e1, Item *e2)
1870 {
1871   DBUG_ASSERT(!(*e1) || (*e1)->fixed);
1872   DBUG_ASSERT(!e2 || e2->fixed);
1873   if (*e1)
1874   {
1875     if (!e2)
1876       return false;
1877     Item *res= new Item_cond_and(*e1, e2);
1878     if (unlikely(!res))
1879       return true;
1880 
1881     *e1= res;
1882     res->quick_fix_field();
1883     res->update_used_tables();
1884 
1885   }
1886   else
1887     *e1= e2;
1888   return false;
1889 }
1890 
1891 
1892 #define ICP_COND_USES_INDEX_ONLY 10
1893 
1894 /*
1895   Get a part of the condition that can be checked using only index fields
1896 
1897   SYNOPSIS
1898     make_cond_for_index()
1899       cond           The source condition
1900       table          The table that is partially available
1901       keyno          The index in the above table. Only fields covered by the index
1902                      are available
1903       other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
1904 
1905   DESCRIPTION
1906     Get a part of the condition that can be checked when for the given table
1907     we have values only of fields covered by some index. The condition may
1908     refer to other tables, it is assumed that we have values of all of their
1909     fields.
1910 
1911     Example:
1912       make_cond_for_index(
1913          "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
1914           t2, keyno(t2.key1))
1915       will return
1916         "cond(t1.field) AND cond(t2.key2)"
1917 
1918   RETURN
1919     Index condition, or NULL if no condition could be inferred.
1920 */
1921 
make_cond_for_index(Item * cond,TABLE * table,uint keyno,bool other_tbls_ok)1922 static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
1923                                  bool other_tbls_ok)
1924 {
1925   DBUG_ASSERT(cond != NULL);
1926 
1927   if (cond->type() == Item::COND_ITEM)
1928   {
1929     uint n_marked= 0;
1930     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1931     {
1932       table_map used_tables= 0;
1933       Item_cond_and *new_cond=new Item_cond_and;
1934       if (!new_cond)
1935 	return NULL;
1936       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1937       Item *item;
1938       while ((item=li++))
1939       {
1940 	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1941 	if (fix)
1942         {
1943 	  new_cond->argument_list()->push_back(fix);
1944           used_tables|= fix->used_tables();
1945         }
1946         n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1947       }
1948       if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1949         cond->marker= ICP_COND_USES_INDEX_ONLY;
1950       switch (new_cond->argument_list()->elements) {
1951       case 0:
1952 	return NULL;
1953       case 1:
1954         new_cond->set_used_tables(used_tables);
1955 	return new_cond->argument_list()->head();
1956       default:
1957 	new_cond->quick_fix_field();
1958         new_cond->set_used_tables(used_tables);
1959 	return new_cond;
1960       }
1961     }
1962     else /* It's OR */
1963     {
1964       Item_cond_or *new_cond=new Item_cond_or;
1965       if (!new_cond)
1966 	return NULL;
1967       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1968       Item *item;
1969       while ((item=li++))
1970       {
1971 	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1972 	if (!fix)
1973 	  return NULL;
1974 	new_cond->argument_list()->push_back(fix);
1975         n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1976       }
1977       if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1978         cond->marker= ICP_COND_USES_INDEX_ONLY;
1979       new_cond->quick_fix_field();
1980       new_cond->set_used_tables(cond->used_tables());
1981       new_cond->top_level_item();
1982       return new_cond;
1983     }
1984   }
1985 
1986   if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
1987   {
1988     /*
1989       Reset marker since it might have the value
1990       ICP_COND_USES_INDEX_ONLY if this condition is part of the select
1991       condition for multiple tables.
1992     */
1993     cond->marker= 0;
1994     return NULL;
1995   }
1996   cond->marker= ICP_COND_USES_INDEX_ONLY;
1997   return cond;
1998 }
1999 
2000 
make_cond_remainder(Item * cond,bool exclude_index)2001 static Item *make_cond_remainder(Item *cond, bool exclude_index)
2002 {
2003   if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
2004     return 0; /* Already checked */
2005 
2006   if (cond->type() == Item::COND_ITEM)
2007   {
2008     table_map tbl_map= 0;
2009     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
2010     {
2011       /* Create new top level AND item */
2012       Item_cond_and *new_cond=new Item_cond_and;
2013       if (!new_cond)
2014 	return (Item*) 0;
2015       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2016       Item *item;
2017       while ((item=li++))
2018       {
2019 	Item *fix= make_cond_remainder(item, exclude_index);
2020 	if (fix)
2021         {
2022 	  new_cond->argument_list()->push_back(fix);
2023           tbl_map |= fix->used_tables();
2024         }
2025       }
2026       switch (new_cond->argument_list()->elements) {
2027       case 0:
2028 	return (Item*) 0;
2029       case 1:
2030 	return new_cond->argument_list()->head();
2031       default:
2032 	new_cond->quick_fix_field();
2033         new_cond->set_used_tables(tbl_map);
2034 	return new_cond;
2035       }
2036     }
2037     else /* It's OR */
2038     {
2039       Item_cond_or *new_cond=new Item_cond_or;
2040       if (!new_cond)
2041 	return (Item*) 0;
2042       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2043       Item *item;
2044       while ((item=li++))
2045       {
2046 	Item *fix= make_cond_remainder(item, FALSE);
2047 	if (!fix)
2048 	  return (Item*) 0;
2049 	new_cond->argument_list()->push_back(fix);
2050         tbl_map |= fix->used_tables();
2051       }
2052       new_cond->quick_fix_field();
2053       new_cond->set_used_tables(tbl_map);
2054       new_cond->top_level_item();
2055       return new_cond;
2056     }
2057   }
2058   return cond;
2059 }
2060 
2061 
2062 /**
2063   Try to extract and push the index condition down to table handler
2064 
2065   @param  tab            A join tab that has tab->table->file and its
2066                          condition in tab->m_condition
2067   @param  keyno          Index for which extract and push the condition
2068   @param  other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
2069   @param  trace_obj      trace object where information is to be added
2070 */
2071 
push_index_cond(JOIN_TAB * tab,uint keyno,bool other_tbls_ok,Opt_trace_object * trace_obj)2072 static void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
2073                             Opt_trace_object *trace_obj)
2074 {
2075   DBUG_ENTER("push_index_cond");
2076 
2077   /*
2078     We will only attempt to push down an index condition when the
2079     following criteria are true:
2080     0. The table has a select condition
2081     1. The storage engine supports ICP.
2082     2. The system variable for enabling ICP is ON.
2083     3. The query is not a multi-table update or delete statement. The reason
2084        for this requirement is that the same handler will be used
2085        both for doing the select/join and the update. The pushed index
2086        condition might then also be applied by the storage engine
2087        when doing the update part and result in either not finding
2088        the record to update or updating the wrong record.
2089     4. The JOIN_TAB is not part of a subquery that has guarded conditions
2090        that can be turned on or off during execution of a 'Full scan on NULL
2091        key'.
2092        @see Item_in_optimizer::val_int()
2093        @see subselect_single_select_engine::exec()
2094        @see TABLE_REF::cond_guards
2095        @see setup_join_buffering
2096     5. The join type is not CONST or SYSTEM. The reason for excluding
2097        these join types, is that these are optimized to only read the
2098        record once from the storage engine and later re-use it. In a
2099        join where a pushed index condition evaluates fields from
2100        tables earlier in the join sequence, the pushed condition would
2101        only be evaluated the first time the record value was needed.
2102     6. The index is not a clustered index. The performance improvement
2103        of pushing an index condition on a clustered key is much lower
2104        than on a non-clustered key. This restriction should be
2105        re-evaluated when WL#6061 is implemented.
2106   */
2107   if (tab->condition() &&
2108       tab->table->file->index_flags(keyno, 0, 1) &
2109       HA_DO_INDEX_COND_PUSHDOWN &&
2110       tab->join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_CONDITION_PUSHDOWN) &&
2111       tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
2112       tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
2113       !tab->has_guarded_conds() &&
2114       tab->type != JT_CONST && tab->type != JT_SYSTEM &&
2115       !(keyno == tab->table->s->primary_key &&
2116         tab->table->file->primary_key_is_clustered()))
2117   {
2118     DBUG_EXECUTE("where", print_where(tab->condition(), "full cond",
2119                  QT_ORDINARY););
2120     Item *idx_cond= make_cond_for_index(tab->condition(), tab->table,
2121                                         keyno, other_tbls_ok);
2122     DBUG_EXECUTE("where", print_where(idx_cond, "idx cond", QT_ORDINARY););
2123     if (idx_cond)
2124     {
2125       /*
2126         Check that the condition to push actually contains fields from
2127         the index. Without any fields from the index it is unlikely
2128         that it will filter out any records since the conditions on
2129         fields from other tables in most cases have already been
2130         evaluated.
2131       */
2132       idx_cond->update_used_tables();
2133       if ((idx_cond->used_tables() & tab->table->map) == 0)
2134       {
2135         DBUG_VOID_RETURN;
2136       }
2137 
2138       Item *idx_remainder_cond= 0;
2139       tab->pre_idx_push_cond= tab->condition();
2140 
2141       /*
2142         For BKA cache we store condition to special BKA cache field
2143         because evaluation of the condition requires additional operations
2144         before the evaluation. This condition is used in
2145         JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
2146       */
2147       if (tab->use_join_cache &&
2148           /*
2149             if cache is used then the value is TRUE only
2150             for BKA[_UNIQUE] cache (see setup_join_buffering() func).
2151             In this case other_tbls_ok is an equivalent of
2152             cache->is_key_access().
2153           */
2154           other_tbls_ok &&
2155           (idx_cond->used_tables() &
2156            ~(tab->table->map | tab->join->const_table_map)))
2157       {
2158         tab->cache_idx_cond= idx_cond;
2159         trace_obj->add("pushed_to_BKA", true);
2160       }
2161       else
2162       {
2163         idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
2164         tab->select->icp_cond= idx_cond;
2165         DBUG_EXECUTE("where",
2166                      print_where(tab->select->icp_cond, "icp cond",
2167                                  QT_ORDINARY););
2168       }
2169       /*
2170         Disable eq_ref's "lookup cache" if we've pushed down an index
2171         condition.
2172         TODO: This check happens to work on current ICP implementations, but
2173         there may exist a compliant implementation that will not work
2174         correctly with it. Sort this out when we stabilize the condition
2175         pushdown APIs.
2176       */
2177       if (idx_remainder_cond != idx_cond)
2178       {
2179         tab->ref.disable_cache= TRUE;
2180         trace_obj->add("pushed_index_condition", idx_cond);
2181       }
2182 
2183       Item *row_cond= make_cond_remainder(tab->condition(), TRUE);
2184       DBUG_EXECUTE("where", print_where(row_cond, "remainder cond",
2185                    QT_ORDINARY););
2186 
2187       if (row_cond)
2188       {
2189         if (!idx_remainder_cond)
2190           tab->set_condition(row_cond, __LINE__);
2191         else
2192         {
2193           and_conditions(&row_cond, idx_remainder_cond);
2194           tab->set_condition(row_cond, __LINE__);
2195         }
2196       }
2197       else
2198         tab->set_condition(idx_remainder_cond, __LINE__);
2199       trace_obj->add("table_condition_attached", tab->condition());
2200       if (tab->select)
2201       {
2202         DBUG_EXECUTE("where", print_where(tab->select->cond, "cond",
2203                      QT_ORDINARY););
2204         tab->select->cond= tab->condition();
2205       }
2206     }
2207   }
2208   DBUG_VOID_RETURN;
2209 }
2210 
2211 
2212 /*
2213   Deny usage of join buffer for the specified table
2214 
2215   SYNOPSIS
2216     set_join_cache_denial()
2217       tab    join table for which join buffer usage is to be denied
2218 
2219   DESCRIPTION
2220     The function denies usage of join buffer when joining the table 'tab'.
2221     The table is marked as not employing any join buffer. If a join cache
2222     object has been already allocated for the table this object is destroyed.
2223 
2224   RETURN
2225     none
2226 */
2227 
2228 static
set_join_cache_denial(JOIN_TAB * join_tab)2229 void set_join_cache_denial(JOIN_TAB *join_tab)
2230 {
2231   if (join_tab->op)
2232   {
2233     join_tab->op->free();
2234     join_tab->op= 0;
2235   }
2236   if (join_tab->use_join_cache)
2237   {
2238     join_tab->use_join_cache= JOIN_CACHE::ALG_NONE;
2239     /*
2240       It could be only sub_select(). It could not be sub_seject_sjm because we
2241       don't do join buffering for the first table in sjm nest.
2242     */
2243     join_tab[-1].next_select= sub_select;
2244   }
2245 }
2246 
2247 
2248 /*
2249   Revise usage of join buffer for the specified table and the whole nest
2250 
2251   SYNOPSIS
2252     revise_cache_usage()
2253       tab    join table for which join buffer usage is to be revised
2254 
2255   DESCRIPTION
2256     The function revise the decision to use a join buffer for the table 'tab'.
2257     If this table happened to be among the inner tables of a nested outer join/
2258     semi-join the functions denies usage of join buffers for all of them
2259 
2260   RETURN
2261     none
2262 */
2263 
2264 static
revise_cache_usage(JOIN_TAB * join_tab)2265 void revise_cache_usage(JOIN_TAB *join_tab)
2266 {
2267   JOIN_TAB *tab;
2268   JOIN_TAB *first_inner;
2269 
2270   set_join_cache_denial(join_tab);
2271 
2272   if (join_tab->first_inner)
2273   {
2274     JOIN_TAB *end_tab= join_tab;
2275     for (first_inner= join_tab->first_inner;
2276          first_inner;
2277          first_inner= first_inner->first_upper)
2278     {
2279       for (tab= end_tab-1; tab >= first_inner; tab--)
2280         set_join_cache_denial(tab);
2281       end_tab= first_inner;
2282     }
2283   }
2284   else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
2285   {
2286     first_inner= join_tab->first_sj_inner_tab;
2287     for (tab= join_tab-1; tab >= first_inner; tab--)
2288     {
2289       if (tab->first_sj_inner_tab == first_inner)
2290         set_join_cache_denial(tab);
2291     }
2292   }
2293 }
2294 
2295 
2296 /**
2297   Set up join buffering for a specified table, if possible.
2298 
2299   @param tab             joined table to check join buffer usage for
2300   @param join            join for which the check is performed
2301   @param options         options of the join
2302   @param no_jbuf_after   don't use join buffering after table with this number
2303   @param icp_other_tables_ok[out] TRUE if condition pushdown supports
2304                                   other tables presence
2305 
2306   @return false if successful, true if error.
2307           Currently, allocation errors for join cache objects are ignored,
2308           and regular execution is chosen silently.
2309 
2310   @details
2311     The function finds out whether the table 'tab' can be joined using a join
2312     buffer. This check is performed after the best execution plan for 'join'
2313     has been chosen. If the function decides that a join buffer can be employed
2314     then it selects the most appropriate join cache object that contains this
2315     join buffer.
2316     If it has already been decided to not use join buffering for this table,
2317     no action is taken.
2318 
2319     Often it is already decided that join buffering will be used earlier in
2320     the optimization process, and this will also ensure that the most correct
2321     cost for the operation is calculated, and hence the probability of
2322     choosing an optimal join plan is higher. However, some join buffering
2323     decisions cannot currently be taken before this stage, hence we need this
2324     function to decide the most accurate join buffering strategy.
2325 
2326     @todo Long-term it is the goal that join buffering strategy is decided
2327     when the plan is selected.
2328 
2329     The result of the check and the type of the the join buffer to be used
2330     depend on:
2331       - the access method to access rows of the joined table
2332       - whether the join table is an inner table of an outer join or semi-join
2333       - the optimizer_switch settings for join buffering
2334       - the join 'options'.
2335     In any case join buffer is not used if the number of the joined table is
2336     greater than 'no_jbuf_after'.
2337 
2338     If block_nested_loop is turned on, and if all other criteria for using
2339     join buffering is fulfilled (see below), then join buffer is used
2340     for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
2341     access method.  In that case, a JOIN_CACHE_BNL object is always employed.
2342 
2343     If an index is used to access rows of the joined table and batched_key_access
2344     is on, then a JOIN_CACHE_BKA object is employed. (Unless debug flag,
2345     test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE object is employed
2346     instead.)
2347 
2348     If the function decides that a join buffer can be used to join the table
2349     'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm
2350     and assigns the selected join cache object to the field 'cache' of the
2351     previous join table.  After creating a join cache object, it will be
2352     initialized. Failure to do so, will cause the decision to use join
2353     buffering to be reverted.
2354 
2355   @note
2356     For a nested outer join/semi-join, currently, we either use join buffers for
2357     all inner tables or for none of them.
2358 
2359   @todo
2360     Support BKA inside SJ-Materialization nests. When doing this, we'll need
2361     to only store sj-inner tables in the join buffer.
2362 #if 0
2363         JOIN_TAB *first_tab= join->join_tab+join->const_tables;
2364         uint n_tables= i-join->const_tables;
2365         / *
2366           We normally put all preceding tables into the join buffer, except
2367           for the constant tables.
2368           If we're inside a semi-join materialization nest, e.g.
2369 
2370              outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
2371                                                        ^-- we're here
2372 
2373           then we need to put into the join buffer only the tables from
2374           within the nest.
2375         * /
2376         if (i >= first_sjm_table && i < last_sjm_table)
2377         {
2378           n_tables= i - first_sjm_table; // will be >0 if we got here
2379           first_tab= join->join_tab + first_sjm_table;
2380         }
2381 #endif
2382 
2383 */
2384 
setup_join_buffering(JOIN_TAB * tab,JOIN * join,ulonglong options,uint no_jbuf_after,bool * icp_other_tables_ok)2385 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join,
2386                                  ulonglong options, uint no_jbuf_after,
2387                                  bool *icp_other_tables_ok)
2388 {
2389   uint flags;
2390   Cost_estimate cost;
2391   ha_rows rows;
2392   uint bufsz= 4096;
2393   JOIN_CACHE *prev_cache;
2394   const bool bnl_on= join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL);
2395   const bool bka_on= join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BKA);
2396   const uint tableno= tab - join->join_tab;
2397   const uint tab_sj_strategy= tab->get_sj_strategy();
2398   bool use_bka_unique= false;
2399   DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
2400   *icp_other_tables_ok= TRUE;
2401 
2402   if (!(bnl_on || bka_on) || tableno == join->const_tables)
2403   {
2404     DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2405     return false;
2406   }
2407   if (options & SELECT_NO_JOIN_CACHE)
2408     goto no_join_cache;
2409   /*
2410     psergey-todo: why the below when execution code seems to handle the
2411     "range checked for each record" case?
2412   */
2413   if (tab->use_quick == QS_DYNAMIC_RANGE)
2414     goto no_join_cache;
2415 
2416   /* No join buffering if prevented by no_jbuf_after */
2417   if (tableno > no_jbuf_after)
2418     goto no_join_cache;
2419 
2420   /*
2421     An inner table of an outer join nest must not use join buffering if
2422     the first inner table of that outer join nest does not use join buffering.
2423     This condition is not handled by earlier optimizer stages.
2424   */
2425   if (tab->first_inner != NULL &&
2426       tab->first_inner != tab &&
2427       !tab->first_inner->use_join_cache)
2428     goto no_join_cache;
2429   /*
2430     The first inner table of an outer join nest must not use join buffering
2431     if the tables in the embedding outer join nest do not use join buffering.
2432     This condition is not handled by earlier optimizer stages.
2433   */
2434   if (tab->first_upper != NULL &&
2435       !tab->first_upper->use_join_cache)
2436     goto no_join_cache;
2437 
2438   switch (tab_sj_strategy)
2439   {
2440   case SJ_OPT_FIRST_MATCH:
2441     /*
2442       Use join cache with FirstMatch semi-join strategy only when semi-join
2443       contains only one table.
2444     */
2445     if (!tab->is_single_inner_of_semi_join())
2446     {
2447       DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2448       goto no_join_cache;
2449     }
2450     break;
2451 
2452   case SJ_OPT_LOOSE_SCAN:
2453     /* No join buffering if this semijoin nest is handled by loosescan */
2454     DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2455     goto no_join_cache;
2456 
2457   case SJ_OPT_MATERIALIZE_LOOKUP:
2458   case SJ_OPT_MATERIALIZE_SCAN:
2459     /*
2460       The Materialize strategies reuse the join_tab belonging to the
2461       first table that was materialized. Neither table can use join buffering:
2462       - The first table in a join never uses join buffering.
2463       - The join_tab used for looking up a row in the materialized table, or
2464         scanning the rows of a materialized table, cannot use join buffering.
2465       We allow join buffering for the remaining tables of the materialized
2466       semi-join nest.
2467     */
2468     if (tab->first_sj_inner_tab == tab)
2469     {
2470       DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2471       goto no_join_cache;
2472     }
2473     break;
2474 
2475   case SJ_OPT_DUPS_WEEDOUT:
2476     // This strategy allows the same join buffering as a regular join would.
2477   case SJ_OPT_NONE:
2478     break;
2479   }
2480 
2481   /*
2482     Link with the previous join cache, but make sure that we do not link
2483     join caches of two different operations when the previous operation was
2484     MaterializeLookup or MaterializeScan, ie if:
2485      1. the previous join_tab has join buffering enabled, and
2486      2. the previous join_tab belongs to a materialized semi-join nest, and
2487      3. this join_tab represents a regular table, or is part of a different
2488         semi-join interval than the previous join_tab.
2489   */
2490   prev_cache= (JOIN_CACHE*)(tab-1)->op;
2491   if (prev_cache != NULL &&                                       // 1
2492       sj_is_materialize_strategy((tab-1)->get_sj_strategy()) &&   // 2
2493       tab->first_sj_inner_tab != (tab-1)->first_sj_inner_tab)     // 3
2494     prev_cache= NULL;
2495 
2496   /*
2497     The following code prevents use of join buffering when there is an
2498     outer join operation and first match semi-join strategy is used, because:
2499 
2500     Outer join needs a "match flag" to track that a row should be
2501     NULL-complemented, such flag being attached to first inner table's cache
2502     (tracks whether the cached row from outer table got a match, in which case
2503     no NULL-complemented row is needed).
2504 
2505     FirstMatch also needs a "match flag", such flag is attached to sj inner
2506     table's cache (tracks whether the cached row from outer table already got
2507     a first match in the sj-inner table, in which case we don't need to join
2508     this cached row again)
2509      - but a row in a cache has only one "match flag"
2510      - so if "sj inner table"=="first inner", there is a problem.
2511   */
2512   if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
2513       tab->is_inner_table_of_outer_join())
2514     goto no_join_cache;
2515 
2516   switch (tab->type) {
2517   case JT_ALL:
2518     if (!bnl_on)
2519     {
2520       DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2521       goto no_join_cache;
2522     }
2523 
2524     if ((options & SELECT_DESCRIBE) ||
2525         ((tab->op= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
2526          !tab->op->init()))
2527     {
2528       *icp_other_tables_ok= FALSE;
2529       DBUG_ASSERT(might_do_join_buffering(join_buffer_alg(join->thd), tab));
2530       tab->use_join_cache= JOIN_CACHE::ALG_BNL;
2531       return false;
2532     }
2533     goto no_join_cache;
2534   case JT_SYSTEM:
2535   case JT_CONST:
2536   case JT_REF:
2537   case JT_EQ_REF:
2538     if (!bka_on)
2539     {
2540       DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2541       goto no_join_cache;
2542     }
2543 
2544     /*
2545       Disable BKA for materializable derived tables/views as they aren't
2546       instantiated yet.
2547     */
2548     if (tab->table->pos_in_table_list->uses_materialization())
2549       goto no_join_cache;
2550 
2551     /*
2552       Can't use BKA for subquery if dealing with a subquery that can
2553       turn a ref access into a "full scan on NULL key" table scan.
2554 
2555       @see Item_in_optimizer::val_int()
2556       @see subselect_single_select_engine::exec()
2557       @see TABLE_REF::cond_guards
2558       @see push_index_cond()
2559 
2560       @todo: This choice to not use BKA should be done before making
2561       cost estimates, e.g. in set_join_buffer_properties(). That
2562       happens before cond guards are set up, so instead of doing the
2563       check below, BKA should be disabled if
2564        - We are in an IN subquery, and
2565        - The IN predicate is not a top_level_item, and
2566        - The left_expr of the IN predicate may contain NULL values
2567          (left_expr->maybe_null)
2568     */
2569     if (tab->has_guarded_conds())
2570       goto no_join_cache;
2571 
2572     flags= HA_MRR_NO_NULL_ENDPOINTS;
2573     if (tab->table->covering_keys.is_set(tab->ref.key))
2574       flags|= HA_MRR_INDEX_ONLY;
2575     rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
2576                                                   &bufsz, &flags, &cost);
2577     /*
2578       Cannot use BKA/BKA_UNIQUE if
2579       1. MRR scan cannot be performed, or
2580       2. MRR default implementation is used
2581       Cannot use BKA if
2582       3. HA_MRR_NO_ASSOCIATION flag is set
2583     */
2584     if ((rows == HA_POS_ERROR) ||                               // 1
2585         (flags & HA_MRR_USE_DEFAULT_IMPL) ||                    // 2
2586         ((flags & HA_MRR_NO_ASSOCIATION) && !use_bka_unique))   // 3
2587       goto no_join_cache;
2588 
2589     if (!(options & SELECT_DESCRIBE))
2590     {
2591       if (use_bka_unique)
2592         tab->op= new JOIN_CACHE_BKA_UNIQUE(join, tab, flags, prev_cache);
2593       else
2594         tab->op= new JOIN_CACHE_BKA(join, tab, flags, prev_cache);
2595 
2596       if (!tab->op || tab->op->init())
2597         goto no_join_cache;
2598     }
2599 
2600     DBUG_ASSERT(might_do_join_buffering(join_buffer_alg(join->thd), tab));
2601     if (use_bka_unique)
2602       tab->use_join_cache= JOIN_CACHE::ALG_BKA_UNIQUE;
2603     else
2604       tab->use_join_cache= JOIN_CACHE::ALG_BKA;
2605 
2606     return false;
2607   default : ;
2608   }
2609 
2610 no_join_cache:
2611   if (bnl_on || bka_on)
2612     revise_cache_usage(tab);
2613   tab->use_join_cache= JOIN_CACHE::ALG_NONE;
2614   return false;
2615 }
2616 
2617 
2618 /**
2619   Setup the materialized table for a semi-join nest
2620 
2621   @param tab       join_tab for the materialized semi-join table
2622   @param tableno   table number of materialized table
2623   @param inner_pos information about the first inner table of the subquery
2624   @param sjm_pos   information about the materialized semi-join table,
2625                    to be filled with data.
2626 
2627   @details
2628     Setup execution structures for one semi-join materialization nest:
2629     - Create the materialization temporary table, including TABLE_LIST object.
2630     - Create a list of Item_field objects per column in the temporary table.
2631     - Create a keyuse array describing index lookups into the table
2632       (for MaterializeLookup)
2633 
2634   @return False if OK, True if error
2635 */
2636 
setup_materialized_table(JOIN_TAB * tab,uint tableno,const POSITION * inner_pos,POSITION * sjm_pos)2637 bool JOIN::setup_materialized_table(JOIN_TAB *tab, uint tableno,
2638                                     const POSITION *inner_pos,
2639                                     POSITION *sjm_pos)
2640 {
2641   DBUG_ENTER("JOIN::setup_materialized_table");
2642   const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
2643   Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
2644   Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec;
2645   const uint field_count= emb_sj_nest->nested_join->sj_inner_exprs.elements;
2646 
2647   DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
2648               inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
2649 
2650   /*
2651     Set up the table to write to, do as select_union::create_result_table does
2652   */
2653   sjm_exec->table_param.init();
2654   sjm_exec->table_param.field_count= field_count;
2655   sjm_exec->table_param.bit_fields_as_long= true;
2656 
2657   char buffer[NAME_LEN];
2658   const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
2659                                 emb_sj_nest->nested_join->query_block_id);
2660   char *name= (char *)alloc_root(thd->mem_root, len + 1);
2661   if (name == NULL)
2662     DBUG_RETURN(true); /* purecov: inspected */
2663 
2664   memcpy(name, buffer, len);
2665   name[len] = '\0';
2666   TABLE *table;
2667   if (!(table= create_tmp_table(thd, &sjm_exec->table_param,
2668                                 emb_sj_nest->nested_join->sj_inner_exprs,
2669                                 NULL,
2670                                 true /* distinct */,
2671                                 true /* save_sum_fields */,
2672                                 thd->variables.option_bits |
2673                                 TMP_TABLE_ALL_COLUMNS,
2674                                 HA_POS_ERROR /* rows_limit */,
2675                                 name)))
2676     DBUG_RETURN(true); /* purecov: inspected */
2677   sjm_exec->table= table;
2678   table->tablenr= tableno;
2679   table->map= (table_map)1 << tableno;
2680   table->file->extra(HA_EXTRA_WRITE_CACHE);
2681   table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
2682   table->reginfo.join_tab= tab;
2683   sj_tmp_tables.push_back(table);
2684   sjm_exec_list.push_back(sjm_exec);
2685 
2686   if (!(sjm_opt->mat_fields=
2687     (Item_field **) alloc_root(thd->mem_root,
2688                                field_count * sizeof(Item_field **))))
2689     DBUG_RETURN(true);
2690 
2691   for (uint fieldno= 0; fieldno < field_count; fieldno++)
2692   {
2693     if (!(sjm_opt->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
2694       DBUG_RETURN(true);
2695   }
2696 
2697   TABLE_LIST *tl;
2698   if (!(tl= (TABLE_LIST *) alloc_root(thd->mem_root, sizeof(TABLE_LIST))))
2699     DBUG_RETURN(true);
2700   // TODO: May have to setup outer-join info for this TABLE_LIST !!!
2701 
2702   tl->init_one_table("", 0, name, strlen(name), name, TL_IGNORE);
2703 
2704   tl->table= table;
2705 
2706   tab->table= table;
2707   tab->position= sjm_pos;
2708   tab->join= this;
2709 
2710   tab->worst_seeks= 1.0;
2711   tab->records= (ha_rows)emb_sj_nest->nested_join->sjm.expected_rowcount;
2712   tab->found_records= tab->records;
2713   tab->read_time= (ha_rows)emb_sj_nest->nested_join->sjm.scan_cost.total_cost();
2714 
2715   tab->on_expr_ref= tl->join_cond_ref();
2716 
2717   tab->materialize_table= join_materialize_semijoin;
2718 
2719   table->pos_in_table_list= tl;
2720   table->keys_in_use_for_query.set_all();
2721   sjm_pos->table= tab;
2722   sjm_pos->sj_strategy= SJ_OPT_NONE;
2723 
2724   sjm_pos->use_join_buffer= false;
2725 
2726   /*
2727     Key_use objects are required so that create_ref_for_key() can set up
2728     a proper ref access for this table.
2729   */
2730   Key_use_array *keyuse=
2731    create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
2732                            emb_sj_nest->nested_join->sj_outer_exprs);
2733   if (!keyuse)
2734     DBUG_RETURN(true);
2735 
2736   double fanout= (tab == join_tab + tab->join->const_tables) ?
2737                  1.0 : (tab-1)->position->prefix_record_count;
2738   if (!sjm_exec->is_scan)
2739   {
2740     sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
2741     tab->keyuse= keyuse->begin();
2742     tab->keys.set_bit(0);          // There is one index - use it always
2743     tab->index= 0;
2744     sjm_pos->set_prefix_costs(1.0, fanout);
2745     sjm_pos->records_read= 1.0;
2746     sjm_pos->read_time= 1.0;
2747   }
2748   else
2749   {
2750     sjm_pos->key= NULL; // No index use for MaterializeScan
2751     sjm_pos->set_prefix_costs(tab->read_time, tab->records * fanout);
2752     sjm_pos->records_read= tab->records;
2753     sjm_pos->read_time= tab->read_time;
2754   }
2755 
2756   DBUG_RETURN(false);
2757 }
2758 
2759 
2760 /**
2761   Plan refinement stage: do various setup things for the executor
2762 
2763   @param join          Join being processed
2764   @param options       Join's options (checking for SELECT_DESCRIBE,
2765                        SELECT_NO_JOIN_CACHE)
2766   @param no_jbuf_after Don't use join buffering after table with this number.
2767 
2768   @return false if successful, true if error (Out of memory)
2769 
2770   @details
2771     Plan refinement stage: do various set ups for the executioner
2772       - setup join buffering use
2773       - push index conditions
2774       - increment relevant counters
2775       - etc
2776 */
2777 
2778 bool
make_join_readinfo(JOIN * join,ulonglong options,uint no_jbuf_after)2779 make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
2780 {
2781   const bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE));
2782 
2783   DBUG_ENTER("make_join_readinfo");
2784 
2785   Opt_trace_context * const trace= &join->thd->opt_trace;
2786   Opt_trace_object wrapper(trace);
2787   Opt_trace_array trace_refine_plan(trace, "refine_plan");
2788 
2789   if (setup_semijoin_dups_elimination(join, options, no_jbuf_after))
2790     DBUG_RETURN(TRUE); /* purecov: inspected */
2791 
2792   for (uint i= join->const_tables; i < join->tables; i++)
2793   {
2794     JOIN_TAB *const tab= join->join_tab+i;
2795     TABLE    *const table= tab->table;
2796     if (!tab->position)
2797       continue;
2798 
2799     bool icp_other_tables_ok;
2800     tab->read_record.table= table;
2801     tab->next_select=sub_select;		/* normal select */
2802     tab->cache_idx_cond= 0;
2803     table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
2804     tab->read_first_record= NULL; // Access methods not set yet
2805     tab->read_record.read_record= NULL;
2806     tab->read_record.unlock_row= rr_unlock_row;
2807 
2808     Opt_trace_object trace_refine_table(trace);
2809     trace_refine_table.add_utf8_table(table);
2810 
2811     if (tab->do_loosescan())
2812     {
2813       if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
2814                                                          loosescan_key_len)))
2815         DBUG_RETURN(TRUE); /* purecov: inspected */
2816     }
2817     switch (tab->type) {
2818     case JT_EQ_REF:
2819     case JT_REF_OR_NULL:
2820     case JT_REF:
2821       if (tab->select)
2822         tab->select->set_quick(NULL);
2823       delete tab->quick;
2824       tab->quick=0;
2825       /* fall through */
2826     case JT_SYSTEM:
2827     case JT_CONST:
2828       /* Only happens with outer joins */
2829       if (setup_join_buffering(tab, join, options, no_jbuf_after,
2830                                &icp_other_tables_ok))
2831         DBUG_RETURN(true);
2832       if (tab->use_join_cache != JOIN_CACHE::ALG_NONE)
2833         tab[-1].next_select= sub_select_op;
2834 
2835       if (table->covering_keys.is_set(tab->ref.key) &&
2836           !table->no_keyread)
2837         table->set_keyread(TRUE);
2838       else
2839         push_index_cond(tab, tab->ref.key, icp_other_tables_ok,
2840                         &trace_refine_table);
2841       break;
2842     case JT_ALL:
2843       if (setup_join_buffering(tab, join, options, no_jbuf_after,
2844                                &icp_other_tables_ok))
2845         DBUG_RETURN(true);
2846       if (tab->use_join_cache != JOIN_CACHE::ALG_NONE)
2847         tab[-1].next_select=sub_select_op;
2848 
2849       /* These init changes read_record */
2850       if (tab->use_quick == QS_DYNAMIC_RANGE)
2851       {
2852 	join->thd->set_status_no_good_index_used();
2853 	tab->read_first_record= join_init_quick_read_record;
2854 	if (statistics)
2855 	  join->thd->inc_status_select_range_check();
2856         trace_refine_table.add_alnum("access_type", "dynamic_range");
2857       }
2858       else
2859       {
2860 	tab->read_first_record= join_init_read_record;
2861 	if (i == join->const_tables)
2862 	{
2863 	  if (tab->select && tab->select->quick)
2864 	  {
2865 	    if (statistics)
2866 	      join->thd->inc_status_select_range();
2867 	  }
2868 	  else
2869 	  {
2870 	    join->thd->set_status_no_index_used();
2871 	    if (statistics)
2872             {
2873               join->thd->inc_status_select_scan();
2874               join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
2875             }
2876 	  }
2877 	}
2878 	else
2879 	{
2880 	  if (tab->select && tab->select->quick)
2881 	  {
2882 	    if (statistics)
2883 	      join->thd->inc_status_select_full_range_join();
2884 	  }
2885 	  else
2886 	  {
2887 	    join->thd->set_status_no_index_used();
2888 	    if (statistics)
2889             {
2890               join->thd->inc_status_select_full_join();
2891               join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
2892             }
2893 	  }
2894 	}
2895 	if (!table->no_keyread)
2896 	{
2897           if (tab->select && tab->select->quick &&
2898               tab->select->quick->index != MAX_KEY && //not index_merge
2899               table->covering_keys.is_set(tab->select->quick->index))
2900             table->set_keyread(TRUE);
2901           else if (!table->covering_keys.is_clear_all() &&
2902                    !(tab->select && tab->select->quick))
2903 	  {					// Only read index tree
2904 	    /*
2905             It has turned out that the below change, while speeding things
2906             up for disk-bound loads, slows them down for cases when the data
2907             is in disk cache (see BUG#35850):
2908 	    //  See bug #26447: "Using the clustered index for a table scan
2909 	    //  is always faster than using a secondary index".
2910             if (table->s->primary_key != MAX_KEY &&
2911                 table->file->primary_key_is_clustered())
2912               tab->index= table->s->primary_key;
2913             else
2914               tab->index=find_shortest_key(table, & table->covering_keys);
2915 	    */
2916             if (!tab->do_loosescan())
2917               tab->index=find_shortest_key(table, & table->covering_keys);
2918 	    tab->read_first_record= join_read_first;
2919             tab->type=JT_INDEX_SCAN;      // Read with index_first / index_next
2920 	  }
2921           else if (!(tab->select && tab->select->quick))
2922           {
2923             DBUG_ASSERT(table->covering_keys.is_clear_all());
2924             if (!tab->do_loosescan())
2925             {
2926               key_map clustering_keys;
2927               for (uint i= 0; i < table->s->keys; i++)
2928               {
2929                 if (tab->keys.is_set(i)
2930                     && table->file->index_flags(i, 0, 0) & HA_CLUSTERED_INDEX)
2931                   clustering_keys.set_bit(i);
2932               }
2933               uint index= find_shortest_key(table, &clustering_keys);
2934               if (index != MAX_KEY)
2935               {
2936                 tab->index= index;
2937                 tab->read_first_record= join_read_first;
2938                 tab->type= JT_INDEX_SCAN;
2939               }
2940             }
2941           }
2942         }
2943         if (tab->select && tab->select->quick &&
2944             tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
2945           push_index_cond(tab, tab->select->quick->index, icp_other_tables_ok,
2946                           &trace_refine_table);
2947         trace_refine_table.add_alnum("access_type",
2948                                      tab->type == JT_INDEX_SCAN ?
2949                                      "index_scan" :
2950                                      (tab->select && tab->select->quick) ?
2951                                      "range" : "table_scan");
2952       }
2953       break;
2954     case JT_FT:
2955       break;
2956     default:
2957       DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
2958       break;					/* purecov: deadcode */
2959     case JT_UNKNOWN:
2960       abort();					/* purecov: deadcode */
2961     }
2962     // Materialize derived tables prior to accessing them.
2963     if (tab->table->pos_in_table_list->uses_materialization())
2964       tab->materialize_table= join_materialize_derived;
2965   }
2966 
2967   for (uint i= join->const_tables; i < join->primary_tables; i++)
2968   {
2969     if (join->join_tab[i].use_join_cache != JOIN_CACHE::ALG_NONE)
2970     {
2971       /*
2972         A join buffer is used for this table. We here inform the optimizer
2973         that it should not rely on rows of the first non-const table being in
2974         order thanks to an index scan; indeed join buffering of the present
2975         table subsequently changes the order of rows.
2976       */
2977       if (join->order != NULL)
2978         join->simple_order= false;
2979       if (join->group_list != NULL)
2980         join->simple_group= false;
2981       break;
2982     }
2983   }
2984 
2985   DBUG_RETURN(FALSE);
2986 }
2987 
2988 
2989 /**
2990   Give error if we some tables are done with a full join.
2991 
2992   This is used by multi_table_update and multi_table_delete when running
2993   in safe mode.
2994 
2995   @param join		Join condition
2996 
2997   @retval
2998     0	ok
2999   @retval
3000     1	Error (full join used)
3001 */
3002 
error_if_full_join(JOIN * join)3003 bool error_if_full_join(JOIN *join)
3004 {
3005   for (uint i= 0; i < join->primary_tables; i++)
3006   {
3007     JOIN_TAB *const tab= join->join_tab + i;
3008 
3009     if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
3010     {
3011       /* This error should not be ignored. */
3012       join->select_lex->no_error= FALSE;
3013       my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
3014                  ER(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
3015       return true;
3016     }
3017   }
3018   return false;
3019 }
3020 
3021 
3022 /**
3023   Cleanup table of join operation.
3024 
3025   @note
3026     Notice that this is not a complete cleanup. In some situations, the
3027     object may be reused after a cleanup operation, hence we cannot set
3028     the table pointer to NULL in this function.
3029 */
3030 
cleanup()3031 void JOIN_TAB::cleanup()
3032 {
3033   delete select;
3034   select= 0;
3035   delete quick;
3036   quick= 0;
3037   limit= 0;
3038 
3039   // Free select that was created for filesort outside of create_sort_index
3040   if (filesort && filesort->select && !filesort->own_select)
3041     delete filesort->select;
3042   delete filesort;
3043   filesort= NULL;
3044   /* Skip non-existing derived tables/views result tables */
3045   if (table &&
3046       (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created()))
3047   {
3048     table->set_keyread(FALSE);
3049     table->file->ha_index_or_rnd_end();
3050 
3051     free_io_cache(table);
3052     filesort_free_buffers(table, true);
3053     /*
3054       We need to reset this for next select
3055       (Tested in part_of_refkey)
3056     */
3057     table->reginfo.join_tab= NULL;
3058     if (table->pos_in_table_list)
3059     {
3060       table->pos_in_table_list->derived_keys_ready= false;
3061       table->pos_in_table_list->derived_key_list.empty();
3062     }
3063   }
3064   end_read_record(&read_record);
3065 }
3066 
sjm_query_block_id() const3067 uint JOIN_TAB::sjm_query_block_id() const
3068 {
3069   return sj_is_materialize_strategy(get_sj_strategy()) ?
3070     first_sj_inner_tab->emb_sj_nest->nested_join->query_block_id : 0;
3071 }
3072 
3073 
3074 /**
3075   Extend join_tab->m_condition and join_tab->select->cond by AND'ing
3076   add_cond to them
3077 
3078   @param add_cond   The condition to AND with the existing conditions
3079   @param line       Code line this method was called from
3080 
3081   @retval true   if there was a memory allocation error
3082   @retval false  otherwise
3083 
3084 */
and_with_jt_and_sel_condition(Item * add_cond,uint line)3085 bool JOIN_TAB::and_with_jt_and_sel_condition(Item *add_cond, uint line)
3086 {
3087   if (and_with_condition(add_cond, line))
3088     return true;
3089 
3090   if (select)
3091   {
3092     DBUG_PRINT("info",
3093                ("select::cond extended. Change %p -> %p "
3094                 "at line %u tab %p select %p",
3095                 select->cond, m_condition, line, this, select));
3096     select->cond= m_condition;
3097   }
3098   return false;
3099 }
3100 
3101 /**
3102   Extend join_tab->cond by AND'ing add_cond to it
3103 
3104   @param add_cond    The condition to AND with the existing cond
3105                      for this JOIN_TAB
3106   @param line        Code line this method was called from
3107 
3108   @retval true   if there was a memory allocation error
3109   @retval false  otherwise
3110 */
and_with_condition(Item * add_cond,uint line)3111 bool JOIN_TAB::and_with_condition(Item *add_cond, uint line)
3112 {
3113   Item *old_cond MY_ATTRIBUTE((unused))= m_condition;
3114   if (and_conditions(&m_condition, add_cond))
3115     return true;
3116   DBUG_PRINT("info", ("JOIN_TAB::m_condition extended. Change %p -> %p "
3117                       "at line %u tab %p",
3118                       old_cond, m_condition, line, this));
3119   return false;
3120 }
3121 
3122 
3123 /**
3124   Check if JOIN_TAB condition was moved to Filesort condition.
3125   If yes then return condition belonging to Filesort, otherwise
3126   return condition belonging to JOIN_TAB.
3127 */
3128 
unified_condition() const3129 Item *JOIN_TAB::unified_condition() const
3130 {
3131   return filesort && filesort->select ? filesort->select->cond : condition();
3132 }
3133 
3134 
3135 /**
3136   Partially cleanup JOIN after it has executed: close index or rnd read
3137   (table cursors), free quick selects.
3138 
3139     This function is called in the end of execution of a JOIN, before the used
3140     tables are unlocked and closed.
3141 
3142     For a join that is resolved using a temporary table, the first sweep is
3143     performed against actual tables and an intermediate result is inserted
3144     into the temprorary table.
3145     The last sweep is performed against the temporary table. Therefore,
3146     the base tables and associated buffers used to fill the temporary table
3147     are no longer needed, and this function is called to free them.
3148 
3149     For a join that is performed without a temporary table, this function
3150     is called after all rows are sent, but before EOF packet is sent.
3151 
3152     For a simple SELECT with no subqueries this function performs a full
3153     cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
3154     tables.
3155 
3156     If a JOIN is executed for a subquery or if it has a subquery, we can't
3157     do the full cleanup and need to do a partial cleanup only.
3158     - If a JOIN is not the top level join, we must not unlock the tables
3159     because the outer select may not have been evaluated yet, and we
3160     can't unlock only selected tables of a query.
3161     - Additionally, if this JOIN corresponds to a correlated subquery, we
3162     should not free quick selects and join buffers because they will be
3163     needed for the next execution of the correlated subquery.
3164     - However, if this is a JOIN for a [sub]select, which is not
3165     a correlated subquery itself, but has subqueries, we can free it
3166     fully and also free JOINs of all its subqueries. The exception
3167     is a subquery in SELECT list, e.g: @n
3168     SELECT a, (select max(b) from t1) group by c @n
3169     This subquery will not be evaluated at first sweep and its value will
3170     not be inserted into the temporary table. Instead, it's evaluated
3171     when selecting from the temporary table. Therefore, it can't be freed
3172     here even though it's not correlated.
3173 
3174   @todo
3175     Unlock tables even if the join isn't top level select in the tree
3176 */
3177 
join_free()3178 void JOIN::join_free()
3179 {
3180   SELECT_LEX_UNIT *tmp_unit;
3181   SELECT_LEX *sl;
3182   /*
3183     Optimization: if not EXPLAIN and we are done with the JOIN,
3184     free all tables.
3185   */
3186   bool full= (!select_lex->uncacheable && !thd->lex->describe);
3187   bool can_unlock= full;
3188   DBUG_ENTER("JOIN::join_free");
3189 
3190   cleanup(full);
3191 
3192   for (tmp_unit= select_lex->first_inner_unit();
3193        tmp_unit;
3194        tmp_unit= tmp_unit->next_unit())
3195     for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
3196     {
3197       Item_subselect *subselect= sl->master_unit()->item;
3198       bool full_local= full && (!subselect || subselect->is_evaluated());
3199       /*
3200         If this join is evaluated, we can fully clean it up and clean up all
3201         its underlying joins even if they are correlated -- they will not be
3202         used any more anyway.
3203         If this join is not yet evaluated, we still must clean it up to
3204         close its table cursors -- it may never get evaluated, as in case of
3205         ... HAVING FALSE OR a IN (SELECT ...))
3206         but all table cursors must be closed before the unlock.
3207       */
3208       sl->cleanup_all_joins(full_local);
3209       /* Can't unlock if at least one JOIN is still needed */
3210       can_unlock= can_unlock && full_local;
3211     }
3212 
3213   /*
3214     We are not using tables anymore
3215     Unlock all tables. We may be in an INSERT .... SELECT statement.
3216   */
3217   if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
3218       !(select_options & SELECT_NO_UNLOCK) &&
3219       !select_lex->subquery_in_having &&
3220       (select_lex == (thd->lex->unit.fake_select_lex ?
3221                       thd->lex->unit.fake_select_lex : &thd->lex->select_lex)))
3222   {
3223     /*
3224       TODO: unlock tables even if the join isn't top level select in the
3225       tree.
3226     */
3227     mysql_unlock_read_tables(thd, lock);           // Don't free join->lock
3228     lock= 0;
3229   }
3230 
3231   DBUG_VOID_RETURN;
3232 }
3233 
3234 
3235 /**
3236   Free resources of given join.
3237 
3238   @param fill   true if we should free all resources, call with full==1
3239                 should be last, before it this function can be called with
3240                 full==0
3241 
3242   @note
3243     With subquery this function definitely will be called several times,
3244     but even for simple query it can be called several times.
3245 */
3246 
cleanup(bool full)3247 void JOIN::cleanup(bool full)
3248 {
3249   DBUG_ENTER("JOIN::cleanup");
3250 
3251   DBUG_ASSERT(const_tables <= primary_tables &&
3252               primary_tables <= tables);
3253 
3254   if (join_tab)
3255   {
3256     JOIN_TAB *tab,*end;
3257 
3258     if (full)
3259     {
3260       for (tab= join_tab, end= tab + tables; tab < end; tab++)
3261 	tab->cleanup();
3262     }
3263     else
3264     {
3265       for (tab= join_tab, end= tab + tables; tab < end; tab++)
3266       {
3267         if (!tab->table)
3268           continue;
3269 	if (tab->table->is_created())
3270         {
3271           tab->table->file->ha_index_or_rnd_end();
3272           if (tab->op &&
3273               tab->op->type() == QEP_operation::OT_TMP_TABLE)
3274           {
3275             int tmp= 0;
3276             if ((tmp= tab->table->file->extra(HA_EXTRA_NO_CACHE)))
3277               tab->table->file->print_error(tmp, MYF(0));
3278           }
3279         }
3280         free_io_cache(tab->table);
3281         filesort_free_buffers(tab->table, full);
3282       }
3283     }
3284   }
3285   /*
3286     We are not using tables anymore
3287     Unlock all tables. We may be in an INSERT .... SELECT statement.
3288   */
3289   if (full)
3290   {
3291     // Run Cached_item DTORs!
3292     group_fields.delete_elements();
3293 
3294     /*
3295       We can't call delete_elements() on copy_funcs as this will cause
3296       problems in free_elements() as some of the elements are then deleted.
3297     */
3298     tmp_table_param.copy_funcs.empty();
3299     tmp_table_param.cleanup();
3300   }
3301   /* Restore ref array to original state */
3302   if (current_ref_ptrs != items0)
3303   {
3304     set_items_ref_array(items0);
3305     set_group_rpa= false;
3306   }
3307   DBUG_VOID_RETURN;
3308 }
3309 
3310 
3311 /**
3312   Filter out ORDER items those are equal to constants in WHERE
3313 
3314   This function is a limited version of remove_const() for use
3315   with non-JOIN statements (i.e. single-table UPDATE and DELETE).
3316 
3317 
3318   @param order            Linked list of ORDER BY arguments
3319   @param cond             WHERE expression
3320 
3321   @return pointer to new filtered ORDER list or NULL if whole list eliminated
3322 
3323   @note
3324     This function overwrites input order list.
3325 */
3326 
simple_remove_const(ORDER * order,Item * where)3327 ORDER *simple_remove_const(ORDER *order, Item *where)
3328 {
3329   if (!order || !where)
3330     return order;
3331 
3332   ORDER *first= NULL, *prev= NULL;
3333   for (; order; order= order->next)
3334   {
3335     DBUG_ASSERT(!order->item[0]->with_sum_func); // should never happen
3336     if (!const_expression_in_where(where, order->item[0]))
3337     {
3338       if (!first)
3339         first= order;
3340       if (prev)
3341         prev->next= order;
3342       prev= order;
3343     }
3344   }
3345   if (prev)
3346     prev->next= NULL;
3347   return first;
3348 }
3349 
3350 
3351 
3352 
3353 /*
3354   Check if equality can be used in removing components of GROUP BY/DISTINCT
3355 
3356   SYNOPSIS
3357     test_if_equality_guarantees_uniqueness()
3358       l          the left comparison argument (a field if any)
3359       r          the right comparison argument (a const of any)
3360 
3361   DESCRIPTION
3362     Checks if an equality predicate can be used to take away
3363     DISTINCT/GROUP BY because it is known to be true for exactly one
3364     distinct value (e.g. <expr> == <const>).
3365     Arguments must be of the same type because e.g.
3366     <string_field> = <int_const> may match more than 1 distinct value from
3367     the column.
3368     We must take into consideration and the optimization done for various
3369     string constants when compared to dates etc (see Item_int_with_ref) as
3370     well as the collation of the arguments.
3371 
3372   RETURN VALUE
3373     TRUE    can be used
3374     FALSE   cannot be used
3375 */
3376 static bool
test_if_equality_guarantees_uniqueness(Item * l,Item * r)3377 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
3378 {
3379   return r->const_item() &&
3380     /* elements must be compared as dates */
3381      (Arg_comparator::can_compare_as_dates(l, r, 0) ||
3382       /* or of the same result type */
3383       (r->result_type() == l->result_type() &&
3384        /* and must have the same collation if compared as strings */
3385        (l->result_type() != STRING_RESULT ||
3386         l->collation.collation == r->collation.collation)));
3387 }
3388 
3389 
3390 /*
3391   Return TRUE if i1 and i2 (if any) are equal items,
3392   or if i1 is a wrapper item around the f2 field.
3393 */
3394 
equal(Item * i1,Item * i2,Field * f2)3395 static bool equal(Item *i1, Item *i2, Field *f2)
3396 {
3397   DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
3398 
3399   if (i2 != NULL)
3400     return i1->eq(i2, 1);
3401   else if (i1->type() == Item::FIELD_ITEM)
3402     return f2->eq(((Item_field *) i1)->field);
3403   else
3404     return FALSE;
3405 }
3406 
3407 
3408 /**
3409   Test if a field or an item is equal to a constant value in WHERE
3410 
3411   @param        cond            WHERE clause expression
3412   @param        comp_item       Item to find in WHERE expression
3413                                 (if comp_field != NULL)
3414   @param        comp_field      Field to find in WHERE expression
3415                                 (if comp_item != NULL)
3416   @param[out]   const_item      intermediate arg, set to Item pointer to NULL
3417 
3418   @return TRUE if the field is a constant value in WHERE
3419 
3420   @note
3421     comp_item and comp_field parameters are mutually exclusive.
3422 */
3423 bool
const_expression_in_where(Item * cond,Item * comp_item,Field * comp_field,Item ** const_item)3424 const_expression_in_where(Item *cond, Item *comp_item, Field *comp_field,
3425                           Item **const_item)
3426 {
3427   DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
3428 
3429   Item *intermediate= NULL;
3430   if (const_item == NULL)
3431     const_item= &intermediate;
3432 
3433   if (cond->type() == Item::COND_ITEM)
3434   {
3435     bool and_level= (((Item_cond*) cond)->functype()
3436 		     == Item_func::COND_AND_FUNC);
3437     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
3438     Item *item;
3439     while ((item=li++))
3440     {
3441       bool res=const_expression_in_where(item, comp_item, comp_field,
3442                                          const_item);
3443       if (res)					// Is a const value
3444       {
3445 	if (and_level)
3446 	  return 1;
3447       }
3448       else if (!and_level)
3449 	return 0;
3450     }
3451     return and_level ? 0 : 1;
3452   }
3453   else if (cond->eq_cmp_result() != Item::COND_OK)
3454   {						// boolean compare function
3455     Item_func* func= (Item_func*) cond;
3456     if (func->functype() != Item_func::EQUAL_FUNC &&
3457 	func->functype() != Item_func::EQ_FUNC)
3458       return 0;
3459     Item *left_item=	((Item_func*) cond)->arguments()[0];
3460     Item *right_item= ((Item_func*) cond)->arguments()[1];
3461     if (equal(left_item, comp_item, comp_field))
3462     {
3463       if (test_if_equality_guarantees_uniqueness (left_item, right_item))
3464       {
3465 	if (*const_item)
3466 	  return right_item->eq(*const_item, 1);
3467 	*const_item=right_item;
3468 	return 1;
3469       }
3470     }
3471     else if (equal(right_item, comp_item, comp_field))
3472     {
3473       if (test_if_equality_guarantees_uniqueness (right_item, left_item))
3474       {
3475 	if (*const_item)
3476 	  return left_item->eq(*const_item, 1);
3477 	*const_item=left_item;
3478 	return 1;
3479       }
3480     }
3481   }
3482   return 0;
3483 }
3484 
3485 /**
3486   Test if this is a prefix index.
3487 
3488   @param   table     table
3489   @param   idx       index to check
3490 
3491   @return TRUE if this is a prefix index
3492 */
is_prefix_index(TABLE * table,uint idx)3493 bool is_prefix_index(TABLE* table, uint idx)
3494 {
3495   if (!table || !table->key_info)
3496   {
3497     return false;
3498   }
3499   KEY* key_info = table->key_info;
3500   uint key_parts = key_info[idx].user_defined_key_parts;
3501   KEY_PART_INFO* key_part = key_info[idx].key_part;
3502 
3503   for (uint i = 0; i < key_parts; i++, key_part++)
3504   {
3505     if (key_part->field &&
3506       (key_part->length !=
3507         table->field[key_part->fieldnr - 1]->key_length() &&
3508         !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
3509     {
3510       return true;
3511     }
3512   }
3513   return false;
3514 }
3515 
3516 /**
3517   Test if one can use the key to resolve ORDER BY.
3518 
3519   @param order                 Sort order
3520   @param table                 Table to sort
3521   @param idx                   Index to check
3522   @param used_key_parts [out]  NULL by default, otherwise return value for
3523                                used key parts.
3524 
3525 
3526   @note
3527     used_key_parts is set to correct key parts used if return value != 0
3528     (On other cases, used_key_part may be changed)
3529     Note that the value may actually be greater than the number of index
3530     key parts. This can happen for storage engines that have the primary
3531     key parts as a suffix for every secondary key.
3532 
3533   @retval
3534     1   key is ok.
3535   @retval
3536     0   Key can't be used
3537   @retval
3538     -1   Reverse key can be used
3539 */
3540 
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)3541 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
3542                          uint *used_key_parts)
3543 {
3544   KEY_PART_INFO *key_part,*key_part_end;
3545   key_part=table->key_info[idx].key_part;
3546   key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
3547   key_part_map const_key_parts=table->const_key_parts[idx];
3548   int reverse=0;
3549   uint key_parts;
3550   my_bool on_pk_suffix= FALSE;
3551   DBUG_ENTER("test_if_order_by_key");
3552 
3553   for (; order ; order=order->next, const_key_parts>>=1)
3554   {
3555     /*
3556       Since only fields can be indexed, ORDER BY <something> that is
3557       not a field cannot be resolved by using an index.
3558     */
3559     Item *real_itm= (*order->item)->real_item();
3560     if (real_itm->type() != Item::FIELD_ITEM)
3561       DBUG_RETURN(0);
3562 
3563     Field *field= static_cast<Item_field*>(real_itm)->field;
3564     int flag;
3565 
3566     /*
3567       Skip key parts that are constants in the WHERE clause.
3568       These are already skipped in the ORDER BY by const_expression_in_where()
3569     */
3570     for (; const_key_parts & 1 ; const_key_parts>>= 1)
3571       key_part++;
3572 
3573 #ifdef WITH_PARTITION_STORAGE_ENGINE
3574     /* Avoid usage of prefix index for sorting a partition table */
3575     if (table->part_info && key_part != table->key_info[idx].key_part &&
3576       key_part != key_part_end && is_prefix_index(table, idx))
3577      DBUG_RETURN(0);
3578 #endif
3579 
3580     if (key_part == key_part_end)
3581     {
3582       /*
3583         We are at the end of the key. Check if the engine has the primary
3584         key as a suffix to the secondary keys. If it has continue to check
3585         the primary key as a suffix.
3586       */
3587       if (!on_pk_suffix &&
3588           (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
3589           table->s->primary_key != MAX_KEY &&
3590           table->s->primary_key != idx)
3591       {
3592         on_pk_suffix= TRUE;
3593         key_part= table->key_info[table->s->primary_key].key_part;
3594         key_part_end=key_part +
3595           table->key_info[table->s->primary_key].user_defined_key_parts;
3596         const_key_parts=table->const_key_parts[table->s->primary_key];
3597 
3598         for (; const_key_parts & 1 ; const_key_parts>>= 1)
3599           key_part++;
3600         /*
3601          The primary and secondary key parts were all const (i.e. there's
3602          one row).  The sorting doesn't matter.
3603         */
3604         if (key_part == key_part_end && reverse == 0)
3605         {
3606           key_parts= 0;
3607           reverse= 1;
3608           goto ok;
3609         }
3610       }
3611       else
3612         DBUG_RETURN(0);
3613     }
3614 
3615     if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
3616       DBUG_RETURN(0);
3617 
3618     const ORDER::enum_order keypart_order=
3619       (key_part->key_part_flag & HA_REVERSE_SORT) ?
3620       ORDER::ORDER_DESC : ORDER::ORDER_ASC;
3621     /* set flag to 1 if we can use read-next on key, else to -1 */
3622     flag= (order->direction == keypart_order) ? 1 : -1;
3623     if (reverse && flag != reverse)
3624       DBUG_RETURN(0);
3625     reverse=flag;				// Remember if reverse
3626     key_part++;
3627   }
3628   if (on_pk_suffix)
3629   {
3630     uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
3631     uint used_key_parts_pk=
3632       (uint) (key_part - table->key_info[table->s->primary_key].key_part);
3633     key_parts= used_key_parts_pk + used_key_parts_secondary;
3634 
3635     if (reverse == -1 &&
3636         (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
3637            HA_READ_PREV) ||
3638          !(table->file->index_flags(table->s->primary_key,
3639                                     used_key_parts_pk - 1, 1) & HA_READ_PREV)))
3640       reverse= 0;                               // Index can't be used
3641   }
3642   else
3643   {
3644     key_parts= (uint) (key_part - table->key_info[idx].key_part);
3645     if (reverse == -1 &&
3646         !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
3647       reverse= 0;                               // Index can't be used
3648   }
3649 ok:
3650   if (used_key_parts != NULL)
3651     *used_key_parts= key_parts;
3652   DBUG_RETURN(reverse);
3653 }
3654 
3655 
3656 /**
3657   Find shortest key suitable for full table scan.
3658 
3659   @param table                 Table to scan
3660   @param usable_keys           Allowed keys
3661 
3662   @note
3663      As far as
3664      1) clustered primary key entry data set is a set of all record
3665         fields (key fields and not key fields) and
3666      2) secondary index entry data is a union of its key fields and
3667         primary key fields (at least InnoDB and its derivatives don't
3668         duplicate primary key fields there, even if the primary and
3669         the secondary keys have a common subset of key fields),
3670      then secondary index entry data is always a subset of primary key entry.
3671      Unfortunately, key_info[nr].key_length doesn't show the length
3672      of key/pointer pair but a sum of key field lengths only, thus
3673      we can't estimate index IO volume comparing only this key_length
3674      value of secondary keys and clustered PK.
3675      So, try secondary keys first, and choose PK only if there are no
3676      usable secondary covering keys or found best secondary key include
3677      all table fields (i.e. same as PK):
3678 
3679   @return
3680     MAX_KEY     no suitable key found
3681     key index   otherwise
3682 */
3683 
find_shortest_key(TABLE * table,const key_map * usable_keys)3684 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
3685 {
3686   uint best= MAX_KEY;
3687   uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
3688                              table->s->primary_key != MAX_KEY &&
3689                              usable_keys->is_set(table->s->primary_key)) ?
3690                             table->s->primary_key : MAX_KEY;
3691   if (!usable_keys->is_clear_all())
3692   {
3693     uint min_length= (uint) ~0;
3694     for (uint nr=0; nr < table->s->keys ; nr++)
3695     {
3696       if (nr == usable_clustered_pk)
3697         continue;
3698       if (usable_keys->is_set(nr))
3699       {
3700         if (table->key_info[nr].key_length < min_length)
3701         {
3702           min_length=table->key_info[nr].key_length;
3703           best=nr;
3704         }
3705       }
3706     }
3707   }
3708   if (usable_clustered_pk != MAX_KEY)
3709   {
3710     /*
3711      If the primary key is clustered and found shorter key covers all table
3712      fields and is not clustering then primary key scan normally would be
3713      faster because amount of data to scan is the same but PK is clustered.
3714      It's safe to compare key parts with table fields since duplicate key
3715      parts aren't allowed.
3716      */
3717     if (best == MAX_KEY ||
3718         ((table->key_info[best].user_defined_key_parts >= table->s->fields)
3719          && !(table->file->index_flags(best, 0, 0) & HA_CLUSTERED_INDEX)))
3720       best= usable_clustered_pk;
3721   }
3722   return best;
3723 }
3724 
3725 /**
3726   Test if a second key is the subkey of the first one.
3727 
3728   @param key_part              First key parts
3729   @param ref_key_part          Second key parts
3730   @param ref_key_part_end      Last+1 part of the second key
3731 
3732   @note
3733     Second key MUST be shorter than the first one.
3734 
3735   @retval
3736     1	is a subkey
3737   @retval
3738     0	no sub key
3739 */
3740 
3741 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)3742 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
3743 	  KEY_PART_INFO *ref_key_part_end)
3744 {
3745   for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
3746     if (!key_part->field->eq(ref_key_part->field))
3747       return 0;
3748   return 1;
3749 }
3750 
3751 /**
3752   Test if REF_OR_NULL optimization will be used if the specified
3753   ref_key is used for REF-access to 'tab'
3754 
3755   @retval
3756     true	JT_REF_OR_NULL will be used
3757   @retval
3758     false	no JT_REF_OR_NULL access
3759 */
3760 bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)3761 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
3762 {
3763   if (tab->keyuse)
3764   {
3765     const Key_use *keyuse= tab->keyuse;
3766     while (keyuse->key != ref_key && keyuse->table == tab->table)
3767       keyuse++;
3768 
3769     const table_map const_tables= tab->join->const_table_map;
3770     while (keyuse->key == ref_key && keyuse->table == tab->table)
3771     {
3772       if (!(keyuse->used_tables & ~const_tables))
3773       {
3774         if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
3775           return true;
3776       }
3777       keyuse++;
3778     }
3779   }
3780   return false;
3781 }
3782 
3783 /**
3784   Test if we can use one of the 'usable_keys' instead of 'ref' key
3785   for sorting.
3786 
3787   @param ref			Number of key, used for WHERE clause
3788   @param usable_keys		Keys for testing
3789 
3790   @return
3791     - MAX_KEY			If we can't use other key
3792     - the number of found key	Otherwise
3793 */
3794 
3795 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)3796 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
3797 	       const key_map *usable_keys)
3798 {
3799   uint nr;
3800   uint min_length= (uint) ~0;
3801   uint best= MAX_KEY;
3802   TABLE *table= tab->table;
3803   KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
3804   KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
3805 
3806   for (nr= 0 ; nr < table->s->keys ; nr++)
3807   {
3808     if (usable_keys->is_set(nr) &&
3809 	table->key_info[nr].key_length < min_length &&
3810 	table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
3811 	is_subkey(table->key_info[nr].key_part, ref_key_part,
3812 		  ref_key_part_end) &&
3813         !is_ref_or_null_optimized(tab, nr) &&
3814 	test_if_order_by_key(order, table, nr))
3815     {
3816       min_length= table->key_info[nr].key_length;
3817       best= nr;
3818     }
3819   }
3820   return best;
3821 }
3822 
3823 
3824 /**
3825   It is not obvious to see that test_if_skip_sort_order() never changes the
3826   plan if no_changes is true. So we double-check: creating an instance of this
3827   class saves some important access-path-related information of the current
3828   table; when the instance is destroyed, the latest access-path information is
3829   compared with saved data.
3830 */
3831 class Plan_change_watchdog
3832 {
3833 #ifndef DBUG_OFF
3834 public:
3835   /**
3836     @param tab_arg     table whose access path is being determined
3837     @param no_changes  whether a change to the access path is allowed
3838   */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)3839   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
3840   {
3841     // Only to keep gcc 4.1.2-44 silent about uninitialized variables
3842     quick= NULL;
3843     quick_index= 0;
3844     if (no_changes_arg)
3845     {
3846       tab= tab_arg;
3847       type= tab->type;
3848       if ((select= tab->select))
3849         if ((quick= tab->select->quick))
3850           quick_index= quick->index;
3851       use_quick= tab->use_quick;
3852       ref_key= tab->ref.key;
3853       ref_key_parts= tab->ref.key_parts;
3854       index= tab->index;
3855     }
3856     else
3857     {
3858       tab= NULL;
3859       // Only to keep gcc 4.1.2-44 silent about uninitialized variables
3860       type= JT_UNKNOWN;
3861       select= NULL;
3862       ref_key= ref_key_parts= index= 0;
3863       use_quick= QS_NONE;
3864     }
3865   }
~Plan_change_watchdog()3866   ~Plan_change_watchdog()
3867   {
3868     if (tab == NULL)
3869       return;
3870     // changes are not allowed, we verify:
3871     DBUG_ASSERT(tab->type == type);
3872     DBUG_ASSERT(tab->select == select);
3873     if (select != NULL)
3874     {
3875       DBUG_ASSERT(tab->select->quick == quick);
3876       if (quick != NULL)
3877         DBUG_ASSERT(tab->select->quick->index == quick_index);
3878     }
3879     DBUG_ASSERT(tab->use_quick == use_quick);
3880     DBUG_ASSERT(tab->ref.key == ref_key);
3881     DBUG_ASSERT(tab->ref.key_parts == ref_key_parts);
3882     DBUG_ASSERT(tab->index == index);
3883   }
3884 private:
3885   const JOIN_TAB *tab;            ///< table, or NULL if changes are allowed
3886   enum join_type type;            ///< copy of tab->type
3887   // "Range / index merge" info
3888   const SQL_SELECT *select;       ///< copy of tab->select
3889   const QUICK_SELECT_I *quick;    ///< copy of tab->select->quick
3890   uint quick_index;               ///< copy of tab->select->quick->index
3891   enum quick_type use_quick;      ///< copy of tab->use_quick
3892   // "ref access" info
3893   int ref_key;                    ///< copy of tab->ref.key
3894   uint ref_key_parts;/// copy of tab->ref.key_parts
3895   // Other index-related info
3896   uint index;                     ///< copy of tab->index
3897 #else // in non-debug build, empty class
3898 public:
3899   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
3900 #endif
3901 };
3902 
3903 /**
3904   Test if we can skip the ORDER BY by using an index.
3905 
3906   SYNOPSIS
3907     test_if_skip_sort_order()
3908       tab
3909       order
3910       select_limit
3911       no_changes
3912       map
3913 
3914   If we can use an index, the JOIN_TAB / tab->select struct
3915   is changed to use the index.
3916 
3917   The index must cover all fields in <order>, or it will not be considered.
3918 
3919   @param tab           NULL or JOIN_TAB of the accessed table
3920   @param order         Linked list of ORDER BY arguments
3921   @param select_limit  LIMIT value, or HA_POS_ERROR if no limit
3922   @param no_changes    No changes will be made to the query plan.
3923   @param map           key_map of applicable indexes.
3924   @param clause_type   "ORDER BY" etc for printing in optimizer trace
3925 
3926   @todo
3927     - sergeyp: Results of all index merge selects actually are ordered
3928     by clustered PK values.
3929 
3930   @retval
3931     0    We have to use filesort to do the sorting
3932   @retval
3933     1    We can use an index.
3934 */
3935 
3936 bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)3937 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
3938                         const bool no_changes, const key_map *map,
3939                         const char *clause_type)
3940 {
3941   int ref_key;
3942   uint ref_key_parts;
3943   int order_direction= 0;
3944   uint used_key_parts;
3945   TABLE *table=tab->table;
3946   SQL_SELECT *select=tab->select;
3947   QUICK_SELECT_I *save_quick= select ? select->quick : NULL;
3948   int best_key= -1;
3949   Item *orig_cond;
3950   bool orig_cond_saved= false, set_up_ref_access_to_key= false;
3951   bool can_skip_sorting= false;                  // used as return value
3952   int changed_key= -1;
3953   DBUG_ENTER("test_if_skip_sort_order");
3954   LINT_INIT(ref_key_parts);
3955   LINT_INIT(orig_cond);
3956 
3957   /* Check that we are always called with first non-const table */
3958   DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
3959 
3960   Plan_change_watchdog watchdog(tab, no_changes);
3961 
3962   /* Sorting a single row can always be skipped */
3963   if (tab->type == JT_EQ_REF ||
3964       tab->type == JT_CONST  ||
3965       tab->type == JT_SYSTEM)
3966   {
3967     DBUG_RETURN(1);
3968   }
3969 
3970   /* Check that we are always called with first non-const table */
3971   DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
3972 
3973   /*
3974     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
3975     been taken into account.
3976   */
3977   key_map usable_keys= *map;
3978 
3979   for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
3980   {
3981     Item *item= (*tmp_order->item)->real_item();
3982     if (item->type() != Item::FIELD_ITEM)
3983     {
3984       usable_keys.clear_all();
3985       DBUG_RETURN(0);
3986     }
3987     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
3988     if (usable_keys.is_clear_all())
3989       DBUG_RETURN(0);					// No usable keys
3990   }
3991 
3992   ref_key= -1;
3993   /* Test if constant range in WHERE */
3994   if (tab->ref.key >= 0 && tab->ref.key_parts)
3995   {
3996     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
3997       DBUG_RETURN(0);
3998     ref_key=	   tab->ref.key;
3999     ref_key_parts= tab->ref.key_parts;
4000   }
4001   else if (select && select->quick)		// Range found by opt_range
4002   {
4003     int quick_type= select->quick->get_type();
4004     /*
4005       assume results are not ordered when index merge is used
4006       TODO: sergeyp: Results of all index merge selects actually are ordered
4007       by clustered PK values.
4008     */
4009 
4010     if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
4011         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
4012         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
4013       DBUG_RETURN(0);
4014     ref_key=	   select->quick->index;
4015     ref_key_parts= select->quick->used_key_parts;
4016   }
4017 
4018   /*
4019     If part of the select condition has been pushed we use the
4020     select condition as it was before pushing. The original
4021     select condition is saved so that it can be restored when
4022     exiting this function (if we have not changed index).
4023   */
4024   if (tab->pre_idx_push_cond)
4025   {
4026     orig_cond=
4027       tab->set_jt_and_sel_condition(tab->pre_idx_push_cond, __LINE__);
4028     orig_cond_saved= true;
4029   }
4030 
4031   Opt_trace_context * const trace= &tab->join->thd->opt_trace;
4032   Opt_trace_object trace_wrapper(trace);
4033   Opt_trace_object
4034     trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
4035   trace_skip_sort_order.add_alnum("clause", clause_type);
4036 
4037   if (ref_key >= 0)
4038   {
4039     /*
4040       We come here when there is a {ref or or ordered range access} key.
4041     */
4042     if (!usable_keys.is_set(ref_key))
4043     {
4044       /*
4045         We come here when ref_key is not among usable_keys, try to find a
4046         usable prefix key of that key.
4047       */
4048       uint new_ref_key;
4049       /*
4050 	If using index only read, only consider other possible index only
4051 	keys
4052       */
4053       if (table->covering_keys.is_set(ref_key))
4054 	usable_keys.intersect(table->covering_keys);
4055 
4056       if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
4057 				       &usable_keys)) < MAX_KEY)
4058       {
4059 	/* Found key that can be used to retrieve data in sorted order */
4060 	if (tab->ref.key >= 0)
4061         {
4062           /*
4063             We'll use ref access method on key new_ref_key. The actual change
4064             is done further down in this function where we update the plan.
4065           */
4066           set_up_ref_access_to_key= true;
4067         }
4068 	else if (!no_changes)
4069 	{
4070           /*
4071             The range optimizer constructed QUICK_RANGE for ref_key, and
4072             we want to use instead new_ref_key as the index. We can't
4073             just change the index of the quick select, because this may
4074             result in an incosistent QUICK_SELECT object. Below we
4075             create a new QUICK_SELECT from scratch so that all its
4076             parameres are set correctly by the range optimizer.
4077 
4078             Note that the range optimizer is NOT called if
4079             no_changes==true. This reason is that the range optimizer
4080             cannot find a QUICK that can return ordered result unless
4081             index access (ref or index scan) is also able to do so
4082             (which test_if_order_by_key () will tell).
4083             Admittedly, range access may be much more efficient than
4084             e.g. index scan, but the only thing that matters when
4085             no_change==true is the answer to the question: "Is it
4086             possible to avoid sorting if an index is used to access
4087             this table?". The answer does not depend on the outcome of
4088             the range optimizer.
4089           */
4090           key_map new_ref_key_map;  // Force the creation of quick select
4091           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
4092 
4093           Opt_trace_object
4094             trace_recest(trace, "rows_estimation");
4095           trace_recest.add_utf8_table(tab->table).
4096           add_utf8("index", table->key_info[new_ref_key].name);
4097           select->quick= 0;
4098           if (select->test_quick_select(tab->join->thd,
4099                                         new_ref_key_map,
4100                                         0,       // empty table_map
4101                                         (tab->join->select_options &
4102                                          OPTION_FOUND_ROWS) ?
4103                                         HA_POS_ERROR :
4104                                         tab->join->unit->select_limit_cnt,
4105                                         false,   // don't force quick range
4106                                         order->direction) <= 0)
4107           {
4108             can_skip_sorting= false;
4109             goto fix_ICP;
4110           }
4111 	}
4112         ref_key= new_ref_key;
4113         changed_key= new_ref_key;
4114       }
4115     }
4116     /* Check if we get the rows in requested sorted order by using the key */
4117     if (usable_keys.is_set(ref_key) &&
4118         (order_direction= test_if_order_by_key(order,table,ref_key,
4119 					       &used_key_parts)))
4120       goto check_reverse_order;
4121   }
4122   {
4123     /*
4124       There was no {ref or or ordered range access} key, or it was not
4125       satisfying, neither was any prefix of it. Do a cost-based search on all
4126       keys:
4127     */
4128     uint best_key_parts= 0;
4129     uint saved_best_key_parts= 0;
4130     int best_key_direction= 0;
4131     JOIN *join= tab->join;
4132     ha_rows table_records= table->file->stats.records;
4133 
4134     test_if_cheaper_ordering(tab, order, table, usable_keys,
4135                              ref_key, select_limit,
4136                              &best_key, &best_key_direction,
4137                              &select_limit, &best_key_parts,
4138                              &saved_best_key_parts);
4139 
4140     if (best_key < 0)
4141     {
4142       // No usable key has been found
4143       can_skip_sorting= false;
4144       goto fix_ICP;
4145     }
4146 
4147     /*
4148       Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
4149       clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
4150       clause is order by)?
4151     */
4152     const bool is_group_by= join && join->group && order == join->group_list;
4153     const bool is_force_index= table->force_index ||
4154       (is_group_by ? table->force_index_group : table->force_index_order);
4155 
4156     /*
4157       filesort() and join cache are usually faster than reading in
4158       index order and not using join cache. Don't use index scan
4159       unless:
4160        - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
4161          the user knows what's best)
4162        - the chosen index is clustered primary key (table scan is not cheaper)
4163     */
4164     if (!is_force_index &&
4165         (select_limit >= table_records) &&
4166         (tab->type == JT_ALL &&
4167          tab->join->primary_tables > tab->join->const_tables + 1) &&
4168          ((unsigned) best_key != table->s->primary_key ||
4169           !table->file->primary_key_is_clustered()) &&
4170         !(best_key >= 0
4171           && (table->file->index_flags(best_key, 0, 0) & HA_CLUSTERED_INDEX)))
4172     {
4173       can_skip_sorting= false;
4174       goto fix_ICP;
4175     }
4176 
4177     if (select &&
4178         table->quick_keys.is_set(best_key) &&
4179         !tab->quick_order_tested.is_set(best_key) &&
4180         best_key != ref_key)
4181     {
4182       tab->quick_order_tested.set_bit(best_key);
4183       Opt_trace_object
4184         trace_recest(trace, "rows_estimation");
4185       trace_recest.add_utf8_table(tab->table).
4186         add_utf8("index", table->key_info[best_key].name);
4187 
4188       key_map map;           // Force the creation of quick select
4189       map.set_bit(best_key); // only best_key.
4190       select->quick= 0;
4191       select->test_quick_select(join->thd,
4192                                 map,
4193                                 0,        // empty table_map
4194                                 join->select_options & OPTION_FOUND_ROWS ?
4195                                 HA_POS_ERROR :
4196                                 join->unit->select_limit_cnt,
4197                                 true,     // force quick range
4198                                 order->direction);
4199     }
4200     order_direction= best_key_direction;
4201     /*
4202       saved_best_key_parts is actual number of used keyparts found by the
4203       test_if_order_by_key function. It could differ from keyinfo->key_parts,
4204       thus we have to restore it in case of desc order as it affects
4205       QUICK_SELECT_DESC behaviour.
4206     */
4207     used_key_parts= (order_direction == -1) ?
4208       saved_best_key_parts :  best_key_parts;
4209     changed_key= best_key;
4210     // We will use index scan or range scan:
4211     set_up_ref_access_to_key= false;
4212   }
4213 
4214 check_reverse_order:
4215   DBUG_ASSERT(order_direction != 0);
4216 
4217   if (order_direction == -1)		// If ORDER BY ... DESC
4218   {
4219     if (select && select->quick)
4220     {
4221       /*
4222 	Don't reverse the sort order, if it's already done.
4223         (In some cases test_if_order_by_key() can be called multiple times
4224       */
4225       if (select->quick->reverse_sorted())
4226       {
4227         can_skip_sorting= true;
4228         goto fix_ICP;
4229       }
4230 
4231       if (select->quick->reverse_sort_possible())
4232         can_skip_sorting= true;
4233       else
4234       {
4235         can_skip_sorting= false;
4236         goto fix_ICP;
4237       }
4238 
4239       /*
4240         test_quick_select() should not create a quick that cannot do
4241         reverse ordering
4242       */
4243       DBUG_ASSERT((select->quick == save_quick) || can_skip_sorting);
4244     }
4245     else
4246     {
4247       // Other index access (ref or scan) poses no problem
4248       can_skip_sorting= true;
4249     }
4250   }
4251   else
4252   {
4253     // ORDER BY ASC poses no problem
4254     can_skip_sorting= true;
4255   }
4256 
4257   DBUG_ASSERT(can_skip_sorting);
4258 
4259   /*
4260     Update query plan with access pattern for doing
4261     ordered access according to what we have decided
4262     above.
4263   */
4264   if (!no_changes) // We are allowed to update QEP
4265   {
4266     if (set_up_ref_access_to_key)
4267     {
4268       /*
4269         We'll use ref access method on key changed_key. In general case
4270         the index search tuple for changed_ref_key will be different (e.g.
4271         when one index is defined as (part1, part2, ...) and another as
4272         (part1, part2(N), ...) and the WHERE clause contains
4273         "part1 = const1 AND part2=const2".
4274         So we build tab->ref from scratch here.
4275       */
4276       Key_use *keyuse= tab->keyuse;
4277       while (keyuse->key != (uint)changed_key && keyuse->table == tab->table)
4278         keyuse++;
4279 
4280       if (create_ref_for_key(tab->join, tab, keyuse, tab->prefix_tables()))
4281       {
4282         can_skip_sorting= false;
4283         goto fix_ICP;
4284       }
4285 
4286       DBUG_ASSERT(tab->type != JT_REF_OR_NULL && tab->type != JT_FT);
4287     }
4288     else if (best_key >= 0)
4289     {
4290       bool quick_created=
4291         (select && select->quick && select->quick!=save_quick);
4292 
4293       /*
4294         If ref_key used index tree reading only ('Using index' in EXPLAIN),
4295         and best_key doesn't, then revert the decision.
4296       */
4297       if(!table->covering_keys.is_set(best_key))
4298         table->set_keyread(false);
4299       if (!quick_created)
4300       {
4301         if (select)                  // Throw any existing quick select
4302           select->quick= 0;          // Cleanup either reset to save_quick,
4303                                      // or 'delete save_quick'
4304         tab->index= best_key;
4305         tab->read_first_record= order_direction > 0 ?
4306                                 join_read_first:join_read_last;
4307         tab->type=JT_INDEX_SCAN;       // Read with index_first(), index_next()
4308 
4309         table->file->ha_index_or_rnd_end();
4310         if (tab->join->select_options & SELECT_DESCRIBE)
4311         {
4312           /*
4313             @todo this neutralizes add_ref_to_table_cond(); as a result
4314             EXPLAIN shows no "using where" though real SELECT has one.
4315           */
4316           tab->ref.key= -1;
4317           tab->ref.key_parts= 0;
4318           if (select_limit < table->file->stats.records)
4319             tab->limit= select_limit;
4320         }
4321       }
4322       else if (tab->type != JT_ALL)
4323       {
4324         /*
4325           We're about to use a quick access to the table.
4326           We need to change the access method so as the quick access
4327           method is actually used.
4328         */
4329         DBUG_ASSERT(tab->select->quick);
4330         DBUG_ASSERT(tab->select->quick->index==(uint)best_key);
4331         tab->type=JT_ALL;
4332         tab->use_quick=QS_RANGE;
4333         tab->ref.key= -1;
4334         tab->ref.key_parts=0;		// Don't use ref key.
4335         tab->read_first_record= join_init_read_record;
4336         if (tab->is_using_loose_index_scan())
4337           tab->join->tmp_table_param.precomputed_group_by= TRUE;
4338         /*
4339           TODO: update the number of records in tab->position
4340         */
4341       }
4342     } // best_key >= 0
4343 
4344     if (order_direction == -1)		// If ORDER BY ... DESC
4345     {
4346       if (select && select->quick)
4347       {
4348         /* ORDER BY range_key DESC */
4349         QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts);
4350         if (!tmp)
4351         {
4352           tab->limit= 0;
4353           can_skip_sorting= false;      // Reverse sort failed -> filesort
4354           goto fix_ICP;
4355         }
4356         if (select->quick == save_quick)
4357           save_quick= 0;                // Because set_quick(tmp) frees it
4358         select->set_quick(tmp);
4359       }
4360       else if (tab->type != JT_INDEX_SCAN && tab->type != JT_REF_OR_NULL &&
4361                tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts)
4362       {
4363         /*
4364           SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
4365 
4366           Use a traversal function that starts by reading the last row
4367           with key part (A) and then traverse the index backwards.
4368         */
4369         tab->read_first_record= join_read_last_key;
4370         tab->read_record.read_record= join_read_prev_same;
4371         tab->read_record.unlock_row= rr_unlock_row;
4372 
4373         /*
4374           The current implementation of join_read_prev_same() does not
4375           work well in combination with ICP and can lead to increased
4376           execution time. Setting changed_key to the current key
4377           (based on that we change the access order for the key) will
4378           ensure that a pushed index condition will be cancelled.
4379         */
4380         changed_key= tab->ref.key;
4381       }
4382     }
4383     else if (select && select->quick)
4384       select->quick->need_sorted_output();
4385   } // QEP has been modified
4386 
4387 fix_ICP:
4388   /*
4389     Cleanup:
4390     We may have both a 'select->quick' and 'save_quick' (original)
4391     at this point. Delete the one that we won't use.
4392   */
4393   if (can_skip_sorting && !no_changes)
4394   {
4395     // Keep current (ordered) select->quick
4396     if (select && save_quick != select->quick)
4397       delete save_quick;
4398   }
4399   else
4400   {
4401     // Restore original save_quick
4402     if (select && select->quick != save_quick)
4403       select->set_quick(save_quick);
4404   }
4405 
4406   Opt_trace_object
4407     trace_change_index(trace, "index_order_summary");
4408   trace_change_index.add_utf8_table(tab->table)
4409     .add("index_provides_order", can_skip_sorting)
4410     .add_alnum("order_direction", order_direction == 1 ? "asc" :
4411                ((order_direction == -1) ? "desc" :
4412                 "undefined"));
4413 
4414   if (changed_key >= 0)
4415   {
4416     bool cancelled_ICP= false;
4417     // switching to another index, makes pushed index condition obsolete
4418     if (!no_changes && table->file->pushed_idx_cond)
4419     {
4420       table->file->cancel_pushed_idx_cond();
4421       // and thus tab's m_condition must be how it was before ICP
4422       orig_cond_saved= false;
4423       cancelled_ICP= true;
4424     }
4425     if (unlikely(trace->is_started()))
4426     {
4427       if (cancelled_ICP)
4428         trace_change_index.add("disabled_pushed_condition_on_old_index", true);
4429       trace_change_index.add_utf8("index", table->key_info[changed_key].name);
4430       trace_change_index.add("plan_changed", !no_changes);
4431       if (!no_changes)
4432       {
4433         const char *new_type= tab->type == JT_INDEX_SCAN ? "index_scan" :
4434           (tab->select && tab->select->quick) ?
4435           "range" : join_type_str[tab->type];
4436         trace_change_index.add_alnum("access_type", new_type);
4437       }
4438     }
4439   }
4440   else if (unlikely(trace->is_started()))
4441   {
4442     trace_change_index.add_utf8("index",
4443                                 ref_key >= 0 ?
4444                                 table->key_info[ref_key].name : "unknown");
4445     trace_change_index.add("plan_changed", false);
4446   }
4447   if (orig_cond_saved)
4448   {
4449     // ICP set up prior to the call, is still valid:
4450     tab->set_jt_and_sel_condition(orig_cond, __LINE__);
4451   }
4452   DBUG_RETURN(can_skip_sorting);
4453 }
4454 
4455 
4456 
4457 /**
4458   Update join with count of the different type of fields.
4459 */
4460 
4461 void
count_field_types(SELECT_LEX * select_lex,TMP_TABLE_PARAM * param,List<Item> & fields,bool reset_with_sum_func)4462 count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
4463                   List<Item> &fields, bool reset_with_sum_func)
4464 {
4465   List_iterator<Item> li(fields);
4466   Item *field;
4467 
4468   param->field_count= 0;
4469   param->sum_func_count= 0;
4470   param->func_count= 0;
4471   param->hidden_field_count= 0;
4472   param->outer_sum_func_count= 0;
4473   param->quick_group=1;
4474   while ((field=li++))
4475   {
4476     Item::Type real_type= field->real_item()->type();
4477     if (real_type == Item::FIELD_ITEM)
4478       param->field_count++;
4479     else if (real_type == Item::SUM_FUNC_ITEM)
4480     {
4481       if (! field->const_item())
4482       {
4483 	Item_sum *sum_item=(Item_sum*) field->real_item();
4484         if (!sum_item->depended_from() ||
4485             sum_item->depended_from() == select_lex)
4486         {
4487           if (!sum_item->quick_group)
4488             param->quick_group=0;			// UDF SUM function
4489           param->sum_func_count++;
4490 
4491           for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
4492           {
4493             if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
4494               param->field_count++;
4495             else
4496               param->func_count++;
4497           }
4498         }
4499         param->func_count++;
4500       }
4501     }
4502     else
4503     {
4504       param->func_count++;
4505       if (reset_with_sum_func)
4506 	field->with_sum_func=0;
4507       if (field->with_sum_func)
4508         param->outer_sum_func_count++;
4509     }
4510   }
4511 }
4512 
4513 
4514 /**
4515   Return 1 if second is a subpart of first argument.
4516 
4517   If first parts has different direction, change it to second part
4518   (group is sorted like order)
4519 */
4520 
4521 bool
test_if_subpart(ORDER * a,ORDER * b)4522 test_if_subpart(ORDER *a,ORDER *b)
4523 {
4524   for (; a && b; a=a->next,b=b->next)
4525   {
4526     if ((*a->item)->eq(*b->item,1))
4527       a->direction= b->direction;
4528     else
4529       return 0;
4530   }
4531   return MY_TEST(!b);
4532 }
4533 
4534 /**
4535   calc how big buffer we need for comparing group entries.
4536 */
4537 
4538 void
calc_group_buffer(JOIN * join,ORDER * group)4539 calc_group_buffer(JOIN *join,ORDER *group)
4540 {
4541   uint key_length=0, parts=0, null_parts=0;
4542 
4543   if (group)
4544     join->group= 1;
4545   for (; group ; group=group->next)
4546   {
4547     Item *group_item= *group->item;
4548     Field *field= group_item->get_tmp_table_field();
4549     if (field)
4550     {
4551       enum_field_types type;
4552       if ((type= field->type()) == MYSQL_TYPE_BLOB)
4553 	key_length+=MAX_BLOB_WIDTH;		// Can't be used as a key
4554       else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
4555         key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
4556       else if (type == MYSQL_TYPE_BIT)
4557       {
4558         /* Bit is usually stored as a longlong key for group fields */
4559         key_length+= 8;                         // Big enough
4560       }
4561       else
4562 	key_length+= field->pack_length();
4563     }
4564     else
4565     {
4566       switch (group_item->result_type()) {
4567       case REAL_RESULT:
4568         key_length+= sizeof(double);
4569         break;
4570       case INT_RESULT:
4571         key_length+= sizeof(longlong);
4572         break;
4573       case DECIMAL_RESULT:
4574         key_length+= my_decimal_get_binary_size(group_item->max_length -
4575                                                 (group_item->decimals ? 1 : 0),
4576                                                 group_item->decimals);
4577         break;
4578       case STRING_RESULT:
4579       {
4580         /*
4581           As items represented as DATE/TIME fields in the group buffer
4582           have STRING_RESULT result type, we increase the length
4583           by 8 as maximum pack length of such fields.
4584         */
4585         if (group_item->is_temporal())
4586         {
4587           key_length+= 8;
4588         }
4589         else if (group_item->field_type() == MYSQL_TYPE_BLOB)
4590           key_length+= MAX_BLOB_WIDTH;		// Can't be used as a key
4591         else
4592         {
4593           /*
4594             Group strings are taken as varstrings and require an length field.
4595             A field is not yet created by create_tmp_field()
4596             and the sizes should match up.
4597           */
4598           key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
4599         }
4600         break;
4601       }
4602       default:
4603         /* This case should never be choosen */
4604         DBUG_ASSERT(0);
4605         my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
4606       }
4607     }
4608     parts++;
4609     if (group_item->maybe_null)
4610       null_parts++;
4611   }
4612   join->tmp_table_param.group_length=key_length+null_parts;
4613   join->tmp_table_param.group_parts=parts;
4614   join->tmp_table_param.group_null_parts=null_parts;
4615 }
4616 
4617 
4618 
4619 /**
4620   Make an array of pointers to sum_functions to speed up
4621   sum_func calculation.
4622 
4623   @retval
4624     0	ok
4625   @retval
4626     1	Error
4627 */
4628 
alloc_func_list()4629 bool JOIN::alloc_func_list()
4630 {
4631   uint func_count, group_parts;
4632   DBUG_ENTER("alloc_func_list");
4633 
4634   func_count= tmp_table_param.sum_func_count;
4635   /*
4636     If we are using rollup, we need a copy of the summary functions for
4637     each level
4638   */
4639   if (rollup.state != ROLLUP::STATE_NONE)
4640     func_count*= (send_group_parts+1);
4641 
4642   group_parts= send_group_parts;
4643   /*
4644     If distinct, reserve memory for possible
4645     disctinct->group_by optimization
4646   */
4647   if (select_distinct)
4648   {
4649     group_parts+= fields_list.elements;
4650     /*
4651       If the ORDER clause is specified then it's possible that
4652       it also will be optimized, so reserve space for it too
4653     */
4654     if (order)
4655     {
4656       ORDER *ord;
4657       for (ord= order; ord; ord= ord->next)
4658         group_parts++;
4659     }
4660   }
4661 
4662   /* This must use calloc() as rollup_make_fields depends on this */
4663   sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) +
4664 				           sizeof(Item_sum***) * (group_parts+1));
4665   sum_funcs_end= (Item_sum***) (sum_funcs + func_count + 1);
4666   DBUG_RETURN(sum_funcs == 0);
4667 }
4668 
4669 
4670 /**
4671   Initialize 'sum_funcs' array with all Item_sum objects.
4672 
4673   @param field_list        All items
4674   @param send_result_set_metadata       Items in select list
4675   @param before_group_by   Set to 1 if this is called before GROUP BY handling
4676   @param recompute         Set to TRUE if sum_funcs must be recomputed
4677 
4678   @retval
4679     0  ok
4680   @retval
4681     1  error
4682 */
4683 
make_sum_func_list(List<Item> & field_list,List<Item> & send_result_set_metadata,bool before_group_by,bool recompute)4684 bool JOIN::make_sum_func_list(List<Item> &field_list,
4685                               List<Item> &send_result_set_metadata,
4686 			      bool before_group_by, bool recompute)
4687 {
4688   List_iterator_fast<Item> it(field_list);
4689   Item_sum **func;
4690   Item *item;
4691   DBUG_ENTER("make_sum_func_list");
4692 
4693   if (*sum_funcs && !recompute)
4694     DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
4695 
4696   func= sum_funcs;
4697   while ((item=it++))
4698   {
4699     if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
4700         (!((Item_sum*) item)->depended_from() ||
4701          ((Item_sum *)item)->depended_from() == select_lex))
4702       *func++= (Item_sum*) item;
4703   }
4704   if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
4705   {
4706     rollup.state= ROLLUP::STATE_READY;
4707     if (rollup_make_fields(field_list, send_result_set_metadata, &func))
4708       DBUG_RETURN(TRUE);			// Should never happen
4709   }
4710   else if (rollup.state == ROLLUP::STATE_NONE)
4711   {
4712     for (uint i=0 ; i <= send_group_parts ;i++)
4713       sum_funcs_end[i]= func;
4714   }
4715   else if (rollup.state == ROLLUP::STATE_READY)
4716     DBUG_RETURN(FALSE);                         // Don't put end marker
4717   *func=0;					// End marker
4718   DBUG_RETURN(FALSE);
4719 }
4720 
4721 
4722 /**
4723   Free joins of subselect of this select.
4724 
4725   @param thd      THD pointer
4726   @param select   pointer to st_select_lex which subselects joins we will free
4727 */
4728 
free_underlaid_joins(THD * thd,SELECT_LEX * select)4729 void free_underlaid_joins(THD *thd, SELECT_LEX *select)
4730 {
4731   for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
4732        unit;
4733        unit= unit->next_unit())
4734     unit->cleanup();
4735 }
4736 
4737 /****************************************************************************
4738   ROLLUP handling
4739 ****************************************************************************/
4740 
4741 /**
4742    Wrap all constant Items in GROUP BY list.
4743 
4744    For ROLLUP queries each constant item referenced in GROUP BY list
4745    is wrapped up into an Item_func object yielding the same value
4746    as the constant item. The objects of the wrapper class are never
4747    considered as constant items and besides they inherit all
4748    properties of the Item_result_field class.
4749    This wrapping allows us to ensure writing constant items
4750    into temporary tables whenever the result of the ROLLUP
4751    operation has to be written into a temporary table, e.g. when
4752    ROLLUP is used together with DISTINCT in the SELECT list.
4753    Usually when creating temporary tables for a intermidiate
4754    result we do not include fields for constant expressions.
4755 
4756    @retval
4757      0  if ok
4758    @retval
4759      1  on error
4760 */
4761 
rollup_process_const_fields()4762 bool JOIN::rollup_process_const_fields()
4763 {
4764   ORDER *group_tmp;
4765   Item *item;
4766   List_iterator<Item> it(all_fields);
4767 
4768   for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
4769   {
4770     if (!(*group_tmp->item)->const_item())
4771       continue;
4772     while ((item= it++))
4773     {
4774       if (*group_tmp->item == item)
4775       {
4776         Item* new_item= new Item_func_rollup_const(item);
4777         if (!new_item)
4778           return 1;
4779         new_item->fix_fields(thd, (Item **) 0);
4780         thd->change_item_tree(it.ref(), new_item);
4781         for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
4782         {
4783           if (*tmp->item == item)
4784             thd->change_item_tree(tmp->item, new_item);
4785         }
4786         break;
4787       }
4788     }
4789     it.rewind();
4790   }
4791   return 0;
4792 }
4793 
4794 
4795 /**
4796   Fill up rollup structures with pointers to fields to use.
4797 
4798   Creates copies of item_sum items for each sum level.
4799 
4800   @param fields_arg		List of all fields (hidden and real ones)
4801   @param sel_fields		Pointer to selected fields
4802   @param func			Store here a pointer to all fields
4803 
4804   @retval
4805     0	if ok;
4806     In this case func is pointing to next not used element.
4807   @retval
4808     1    on error
4809 */
4810 
rollup_make_fields(List<Item> & fields_arg,List<Item> & sel_fields,Item_sum *** func)4811 bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
4812 			      Item_sum ***func)
4813 {
4814   List_iterator_fast<Item> it(fields_arg);
4815   Item *first_field= sel_fields.head();
4816   uint level;
4817 
4818   /*
4819     Create field lists for the different levels
4820 
4821     The idea here is to have a separate field list for each rollup level to
4822     avoid all runtime checks of which columns should be NULL.
4823 
4824     The list is stored in reverse order to get sum function in such an order
4825     in func that it makes it easy to reset them with init_sum_functions()
4826 
4827     Assuming:  SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
4828 
4829     rollup.fields[0] will contain list where a,b,c is NULL
4830     rollup.fields[1] will contain list where b,c is NULL
4831     ...
4832     rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
4833     ...
4834     sum_funcs_end[0] points to all sum functions
4835     sum_funcs_end[1] points to all sum functions, except grand totals
4836     ...
4837   */
4838 
4839   for (level=0 ; level < send_group_parts ; level++)
4840   {
4841     uint i;
4842     uint pos= send_group_parts - level -1;
4843     bool real_fields= 0;
4844     Item *item;
4845     List_iterator<Item> new_it(rollup.fields[pos]);
4846     Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
4847     ORDER *start_group;
4848 
4849     /* Point to first hidden field */
4850     uint ref_array_ix= fields_arg.elements-1;
4851 
4852     /* Remember where the sum functions ends for the previous level */
4853     sum_funcs_end[pos+1]= *func;
4854 
4855     /* Find the start of the group for this level */
4856     for (i= 0, start_group= group_list ;
4857 	 i++ < pos ;
4858 	 start_group= start_group->next)
4859       ;
4860 
4861     it.rewind();
4862     while ((item= it++))
4863     {
4864       if (item == first_field)
4865       {
4866 	real_fields= 1;				// End of hidden fields
4867         ref_array_ix= 0;
4868       }
4869 
4870       if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
4871           (!((Item_sum*) item)->depended_from() ||
4872            ((Item_sum *)item)->depended_from() == select_lex))
4873 
4874       {
4875 	/*
4876 	  This is a top level summary function that must be replaced with
4877 	  a sum function that is reset for this level.
4878 
4879 	  NOTE: This code creates an object which is not that nice in a
4880 	  sub select.  Fortunately it's not common to have rollup in
4881 	  sub selects.
4882 	*/
4883 	item= item->copy_or_same(thd);
4884 	((Item_sum*) item)->make_unique();
4885 	*(*func)= (Item_sum*) item;
4886 	(*func)++;
4887       }
4888       else
4889       {
4890 	/* Check if this is something that is part of this group by */
4891 	ORDER *group_tmp;
4892 	for (group_tmp= start_group, i= pos ;
4893              group_tmp ; group_tmp= group_tmp->next, i++)
4894 	{
4895           if (*group_tmp->item == item)
4896 	  {
4897 	    /*
4898 	      This is an element that is used by the GROUP BY and should be
4899 	      set to NULL in this level
4900 	    */
4901             Item_null_result *null_item=
4902               new (thd->mem_root) Item_null_result(item->field_type(),
4903                                                    item->result_type());
4904             if (!null_item)
4905               return 1;
4906 	    item->maybe_null= 1;		// Value will be null sometimes
4907             null_item->result_field= item->get_tmp_table_field();
4908             item= null_item;
4909 	    break;
4910 	  }
4911 	}
4912       }
4913       ref_array_start[ref_array_ix]= item;
4914       if (real_fields)
4915       {
4916 	(void) new_it++;			// Point to next item
4917 	new_it.replace(item);			// Replace previous
4918 	ref_array_ix++;
4919       }
4920       else
4921 	ref_array_ix--;
4922     }
4923   }
4924   sum_funcs_end[0]= *func;			// Point to last function
4925   return 0;
4926 }
4927 
4928 
4929 /**
4930   clear results if there are not rows found for group
4931   (end_send_group/end_write_group)
4932 */
4933 
clear()4934 void JOIN::clear()
4935 {
4936   /*
4937     must clear only the non-const tables, as const tables
4938     are not re-calculated.
4939   */
4940   for (uint tableno= const_tables; tableno < primary_tables; tableno++)
4941     mark_as_null_row(join_tab[tableno].table);  // All fields are NULL
4942 
4943   copy_fields(&tmp_table_param);
4944 
4945   if (sum_funcs)
4946   {
4947     Item_sum *func, **func_ptr= sum_funcs;
4948     while ((func= *(func_ptr++)))
4949       func->clear();
4950   }
4951 }
4952 
4953 
4954 /**
4955   change select_result object of JOIN.
4956 
4957   @param res		new select_result object
4958 
4959   @retval
4960     FALSE   OK
4961   @retval
4962     TRUE    error
4963 */
4964 
change_result(select_result * res)4965 bool JOIN::change_result(select_result *res)
4966 {
4967   DBUG_ENTER("JOIN::change_result");
4968   result= res;
4969   if (result->prepare(fields_list, select_lex->master_unit()) ||
4970       result->prepare2())
4971   {
4972     DBUG_RETURN(TRUE);
4973   }
4974   DBUG_RETURN(FALSE);
4975 }
4976 
4977 /**
4978   Add having condition as a where clause condition of the given temp table.
4979 
4980   @param    curr_tmp_table  Table number to which having conds are added.
4981 
4982   @returns  false if success, true if error.
4983 */
4984 
add_having_as_tmp_table_cond(uint curr_tmp_table)4985 bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table)
4986 {
4987   having->update_used_tables();
4988   JOIN_TAB *curr_table= &join_tab[curr_tmp_table];
4989   table_map used_tables= curr_table->table->map | OUTER_REF_TABLE_BIT;
4990 
4991   /* If tmp table is not used then consider conditions of const table also */
4992   if (!need_tmp)
4993     used_tables|= const_table_map;
4994 
4995   DBUG_ENTER("JOIN::add_having_as_table_conds");
4996 
4997   Item* sort_table_cond= make_cond_for_table(having, used_tables,
4998                                            (table_map) 0, false);
4999   if (sort_table_cond)
5000   {
5001     if (!curr_table->select)
5002       if (!(curr_table->select= new SQL_SELECT))
5003         DBUG_RETURN(true);
5004     if (!curr_table->select->cond)
5005       curr_table->select->cond= sort_table_cond;
5006     else
5007     {
5008       if (!(curr_table->select->cond=
5009               new Item_cond_and(curr_table->select->cond,
5010                                 sort_table_cond)))
5011         DBUG_RETURN(true);
5012       curr_table->select->cond->fix_fields(thd, 0);
5013     }
5014     curr_table->set_condition(curr_table->select->cond, __LINE__);
5015     curr_table->condition()->top_level_item();
5016     DBUG_EXECUTE("where",print_where(curr_table->select->cond,
5017                                      "select and having",
5018                                      QT_ORDINARY););
5019 
5020     having= make_cond_for_table(having, ~ (table_map) 0,
5021                                 ~used_tables, false);
5022     DBUG_EXECUTE("where",
5023                  print_where(having, "having after sort", QT_ORDINARY););
5024   }
5025 
5026   DBUG_RETURN(false);
5027 }
5028 
5029 /**
5030   Init tmp tables usage info.
5031 
5032   @details
5033   This function finalizes execution plan by taking following actions:
5034     .) tmp tables are created, but not instantiated (this is done during
5035        execution). JOIN_TABs dedicated to tmp tables are filled appropriately.
5036        see JOIN::create_intermediate_table.
5037     .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
5038        each required stage of execution. These fields lists are set for
5039        tmp tables' tabs and for the tab of last table in the join.
5040     .) fill info for sorting/grouping/dups removal is prepared and saved to
5041        appropriate tabs. Here is an example:
5042         SELECT * from t1,t2 WHERE ... GROUP BY t1.f1 ORDER BY t2.f2, t1.f2
5043         and lets assume that the table order in the plan is t1,t2.
5044        In this case optimizer will sort for group only the first table as the
5045        second one isn't mentioned in GROUP BY. The result will be materialized
5046        in tmp table.  As filesort can't sort join optimizer will sort tmp table
5047        also. The first sorting (for group) is called simple as is doesn't
5048        require tmp table.  The Filesort object for it is created here - in
5049        JOIN::create_intermediate_table.  Filesort for the second case is
5050        created here, in JOIN::make_tmp_tables_info.
5051   @returns
5052   false - Ok
5053   true  - Error
5054 */
5055 
make_tmp_tables_info()5056 bool JOIN::make_tmp_tables_info()
5057 {
5058   List<Item> *curr_all_fields= &all_fields;
5059   List<Item> *curr_fields_list= &fields_list;
5060   bool materialize_join= false;
5061   uint curr_tmp_table= const_tables;
5062   TABLE *exec_tmp_table= NULL;
5063   DBUG_ENTER("JOIN::make_tmp_tables_info");
5064   having_for_explain= having;
5065 
5066   const bool has_group_by= this->group;
5067   /*
5068     Setup last table to provide fields and all_fields lists to the next
5069     node in the plan.
5070   */
5071   if (join_tab)
5072   {
5073     join_tab[primary_tables - 1].fields= &fields_list;
5074     join_tab[primary_tables - 1].all_fields= &all_fields;
5075   }
5076   /*
5077     The loose index scan access method guarantees that all grouping or
5078     duplicate row elimination (for distinct) is already performed
5079     during data retrieval, and that all MIN/MAX functions are already
5080     computed for each group. Thus all MIN/MAX functions should be
5081     treated as regular functions, and there is no need to perform
5082     grouping in the main execution loop.
5083     Notice that currently loose index scan is applicable only for
5084     single table queries, thus it is sufficient to test only the first
5085     join_tab element of the plan for its access method.
5086   */
5087   if (join_tab && join_tab->is_using_loose_index_scan())
5088     tmp_table_param.precomputed_group_by=
5089       !join_tab->is_using_agg_loose_index_scan();
5090 
5091   /* Create a tmp table if distinct or if the sort is too complicated */
5092   if (need_tmp)
5093   {
5094     curr_tmp_table= primary_tables;
5095     tmp_tables++;
5096     if (plan_is_const())
5097       first_select= sub_select_op;
5098 
5099     /*
5100       Create temporary table on first execution of this join.
5101       (Will be reused if this is a subquery that is executed several times.)
5102     */
5103     init_items_ref_array();
5104 
5105     ORDER_with_src tmp_group;
5106     if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP))
5107       tmp_group= group_list;
5108 
5109     tmp_table_param.hidden_field_count=
5110       all_fields.elements - fields_list.elements;
5111 
5112     if (create_intermediate_table(&join_tab[curr_tmp_table],
5113                                   &all_fields, tmp_group,
5114                                   group_list && simple_group))
5115       DBUG_RETURN(true);
5116     exec_tmp_table= join_tab[curr_tmp_table].table;
5117 
5118     if (exec_tmp_table->distinct)
5119       optimize_distinct();
5120 
5121     /*
5122       If there is no sorting or grouping, 'use_order'
5123       index result should not have been requested.
5124       Exception: LooseScan strategy for semijoin requires
5125       sorted access even if final result is not to be sorted.
5126     */
5127     DBUG_ASSERT(
5128       !(ordered_index_usage == ordered_index_void &&
5129         !plan_is_const() &&
5130         join_tab[const_tables].position->sj_strategy != SJ_OPT_LOOSE_SCAN &&
5131         join_tab[const_tables].use_order()));
5132 
5133     /* Change sum_fields reference to calculated fields in tmp_table */
5134     DBUG_ASSERT(items1.is_null());
5135     items1= ref_ptr_array_slice(2);
5136     if (sort_and_group || join_tab[curr_tmp_table].table->group ||
5137         tmp_table_param.precomputed_group_by)
5138     {
5139       if (change_to_use_tmp_fields(thd, items1,
5140                                    tmp_fields_list1, tmp_all_fields1,
5141                                    fields_list.elements, all_fields))
5142         DBUG_RETURN(true);
5143     }
5144     else
5145     {
5146       if (change_refs_to_tmp_fields(thd, items1,
5147                                     tmp_fields_list1, tmp_all_fields1,
5148                                     fields_list.elements, all_fields))
5149         DBUG_RETURN(true);
5150     }
5151     curr_all_fields= &tmp_all_fields1;
5152     curr_fields_list= &tmp_fields_list1;
5153     // Need to set them now for correct group_fields setup, reset at the end.
5154     set_items_ref_array(items1);
5155     join_tab[curr_tmp_table].ref_array= &items1;
5156     join_tab[curr_tmp_table].all_fields= &tmp_all_fields1;
5157     join_tab[curr_tmp_table].fields= &tmp_fields_list1;
5158     setup_tmptable_write_func(&join_tab[curr_tmp_table]);
5159 
5160     /*
5161       If having is not handled here, it will be checked before the row is sent
5162       to the client.
5163     */
5164     if (having &&
5165         (sort_and_group || (exec_tmp_table->distinct && !group_list)))
5166     {
5167       /*
5168         If there is no select distinct then move the having to table conds of
5169         tmp table.
5170         NOTE : We cannot apply having after distinct. If columns of having are
5171                not part of select distinct, then distinct may remove rows
5172                which can satisfy having.
5173       */
5174       if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
5175 	DBUG_RETURN(true);
5176 
5177       /*
5178         Having condition which we are not able to add as tmp table conds are
5179         kept as before. And, this will be applied before storing the rows in
5180         tmp table.
5181       */
5182       join_tab[curr_tmp_table].having= having;
5183       having= NULL; // Already done
5184     }
5185 
5186     tmp_table_param.func_count= 0;
5187     tmp_table_param.field_count+= tmp_table_param.func_count;
5188     if (sort_and_group || join_tab[curr_tmp_table].table->group)
5189     {
5190       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
5191       tmp_table_param.sum_func_count= 0;
5192     }
5193 
5194     if (exec_tmp_table->group)
5195     {						// Already grouped
5196       if (!order && !no_order && !skip_sort_order)
5197         order= group_list;  /* order by group */
5198       group_list= NULL;
5199     }
5200     /*
5201       If we have different sort & group then we must sort the data by group
5202       and copy it to another tmp table
5203       This code is also used if we are using distinct something
5204       we haven't been able to store in the temporary table yet
5205       like SEC_TO_TIME(SUM(...)).
5206     */
5207 
5208     if ((group_list &&
5209          (!test_if_subpart(group_list, order) || select_distinct)) ||
5210         (select_distinct && tmp_table_param.using_outer_summary_function))
5211     {					/* Must copy to another table */
5212       DBUG_PRINT("info",("Creating group table"));
5213 
5214       calc_group_buffer(this, group_list);
5215       count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
5216                         select_distinct && !group_list);
5217       tmp_table_param.hidden_field_count=
5218         tmp_all_fields1.elements - tmp_fields_list1.elements;
5219       sort_and_group= false;
5220       if (!exec_tmp_table->group && !exec_tmp_table->distinct)
5221       {
5222         // 1st tmp table were materializing join result
5223         materialize_join= true;
5224         explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
5225       }
5226       curr_tmp_table++;
5227       tmp_tables++;
5228 
5229       /* group data to new table */
5230       /*
5231         If the access method is loose index scan then all MIN/MAX
5232         functions are precomputed, and should be treated as regular
5233         functions. See extended comment above.
5234       */
5235       if (join_tab->is_using_loose_index_scan())
5236         tmp_table_param.precomputed_group_by= TRUE;
5237 
5238       tmp_table_param.hidden_field_count=
5239         curr_all_fields->elements - curr_fields_list->elements;
5240       ORDER_with_src dummy= NULL; //TODO can use table->group here also
5241 
5242       if (create_intermediate_table(&join_tab[curr_tmp_table],
5243                                     curr_all_fields, dummy, true))
5244 	DBUG_RETURN(true);
5245 
5246       if (group_list)
5247       {
5248         explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
5249         if (!plan_is_const())        // No need to sort a single row
5250         {
5251           JOIN_TAB *sort_tab= &join_tab[curr_tmp_table - 1];
5252           if (add_sorting_to_table(sort_tab, &group_list))
5253             DBUG_RETURN(true);
5254         }
5255 
5256         if (make_group_fields(this, this))
5257           DBUG_RETURN(true);
5258       }
5259 
5260       /*
5261         If there is no sorting or grouping, 'use_order'
5262         index result should not have been requested.
5263       */
5264       DBUG_ASSERT(!(ordered_index_usage == ordered_index_void &&
5265                     !plan_is_const() &&
5266                     join_tab[const_tables].use_order()));
5267 
5268       // Setup sum funcs only when necessary, otherwise we might break info
5269       // for the first table
5270       if (group_list || tmp_table_param.sum_func_count)
5271       {
5272         if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
5273           DBUG_RETURN(true);
5274         if (prepare_sum_aggregators(sum_funcs,
5275                                     !join_tab->is_using_agg_loose_index_scan()))
5276           DBUG_RETURN(true);
5277         group_list= NULL;
5278         if (setup_sum_funcs(thd, sum_funcs))
5279           DBUG_RETURN(true);
5280       }
5281       // No sum funcs anymore
5282       DBUG_ASSERT(items2.is_null());
5283 
5284       items2= ref_ptr_array_slice(3);
5285       if (change_to_use_tmp_fields(thd, items2,
5286                                    tmp_fields_list2, tmp_all_fields2,
5287                                    fields_list.elements, tmp_all_fields1))
5288         DBUG_RETURN(true);
5289 
5290       curr_fields_list= &tmp_fields_list2;
5291       curr_all_fields= &tmp_all_fields2;
5292       set_items_ref_array(items2);
5293       join_tab[curr_tmp_table].ref_array= &items2;
5294       join_tab[curr_tmp_table].all_fields= &tmp_all_fields2;
5295       join_tab[curr_tmp_table].fields= &tmp_fields_list2;
5296       setup_tmptable_write_func(&join_tab[curr_tmp_table]);
5297 
5298       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
5299       tmp_table_param.sum_func_count= 0;
5300     }
5301     if (join_tab[curr_tmp_table].table->distinct)
5302       select_distinct= false;               /* Each row is unique */
5303 
5304     if (select_distinct && !group_list)
5305     {
5306       if (having)
5307       {
5308         join_tab[curr_tmp_table].having= having;
5309         having->update_used_tables();
5310       }
5311       join_tab[curr_tmp_table].distinct= true;
5312       explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
5313       having= NULL;
5314       select_distinct= false;
5315     }
5316     /* Clean tmp_table_param for the next tmp table. */
5317     tmp_table_param.field_count= tmp_table_param.sum_func_count=
5318       tmp_table_param.func_count= 0;
5319 
5320     tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
5321     first_record= sort_and_group=0;
5322 
5323     if (!group_optimized_away)
5324     {
5325       group= false;
5326     }
5327     else
5328     {
5329       /*
5330         If grouping has been optimized away, a temporary table is
5331         normally not needed unless we're explicitly requested to create
5332         one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
5333 
5334         In this case (grouping was optimized away), temp_table was
5335         created without a grouping expression and JOIN::exec() will not
5336         perform the necessary grouping (by the use of end_send_group()
5337         or end_write_group()) if JOIN::group is set to false.
5338       */
5339       // the temporary table was explicitly requested
5340       DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
5341       // the temporary table does not have a grouping expression
5342       DBUG_ASSERT(!join_tab[curr_tmp_table].table->group);
5343     }
5344     calc_group_buffer(this, group_list);
5345     count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
5346   }
5347 
5348   if (group || implicit_grouping || tmp_table_param.sum_func_count)
5349   {
5350     if (make_group_fields(this, this))
5351       DBUG_RETURN(true);
5352 
5353     DBUG_ASSERT(items3.is_null());
5354 
5355     if (items0.is_null())
5356       init_items_ref_array();
5357     items3= ref_ptr_array_slice(4);
5358     setup_copy_fields(thd, &tmp_table_param,
5359                       items3, tmp_fields_list3, tmp_all_fields3,
5360                       curr_fields_list->elements, *curr_all_fields);
5361 
5362     curr_fields_list= &tmp_fields_list3;
5363     curr_all_fields= &tmp_all_fields3;
5364     set_items_ref_array(items3);
5365     if (join_tab)
5366     {
5367       // Set grouped fields on the last table
5368       join_tab[primary_tables + tmp_tables - 1].ref_array= &items3;
5369       join_tab[primary_tables + tmp_tables - 1].all_fields= &tmp_all_fields3;
5370       join_tab[primary_tables + tmp_tables - 1].fields= &tmp_fields_list3;
5371     }
5372     if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
5373       DBUG_RETURN(true);
5374     if (prepare_sum_aggregators(sum_funcs,
5375                                 !join_tab ||
5376                                 !join_tab-> is_using_agg_loose_index_scan()))
5377       DBUG_RETURN(true);
5378     if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
5379       DBUG_RETURN(true);
5380   }
5381   if (group_list || order)
5382   {
5383     DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
5384     THD_STAGE_INFO(thd, stage_sorting_result);
5385     /* If we have already done the group, add HAVING to sorted table */
5386     if (having && !group_list && !sort_and_group)
5387     {
5388       if (add_having_as_tmp_table_cond(curr_tmp_table))
5389         DBUG_RETURN(true);
5390     }
5391 
5392     if (group)
5393       m_select_limit= HA_POS_ERROR;
5394     else if (!need_tmp)
5395     {
5396       /*
5397         We can abort sorting after thd->select_limit rows if there are no
5398         filter conditions for any tables after the sorted one.
5399         Filter conditions come in several forms:
5400          1. as a condition item attached to the join_tab, or
5401          2. as a keyuse attached to the join_tab (ref access).
5402       */
5403       for (uint i= const_tables + 1; i < primary_tables; i++)
5404       {
5405         JOIN_TAB *const tab= join_tab + i;
5406         if (tab->condition() ||                                // 1
5407             (tab->keyuse && !tab->first_inner))                // 2
5408         {
5409           /* We have to sort all rows */
5410           m_select_limit= HA_POS_ERROR;
5411           break;
5412         }
5413       }
5414     }
5415     /*
5416       Here we add sorting stage for ORDER BY/GROUP BY clause, if the
5417       optimiser chose FILESORT to be faster than INDEX SCAN or there is
5418       no suitable index present.
5419       OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
5420     */
5421     DBUG_PRINT("info",("Sorting for order by/group by"));
5422     ORDER_with_src order_arg= group_list ?  group_list : order;
5423     if (join_tab &&
5424         ordered_index_usage !=
5425         (group_list ? ordered_index_group_by : ordered_index_order_by) &&
5426         join_tab[curr_tmp_table].type != JT_CONST &&
5427         join_tab[curr_tmp_table].type != JT_EQ_REF) // Don't sort 1 row
5428     {
5429       // Sort either first non-const table or the last tmp table
5430       JOIN_TAB *sort_tab= &join_tab[curr_tmp_table];
5431       if (need_tmp && !materialize_join && !exec_tmp_table->group)
5432         explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
5433 
5434       if (add_sorting_to_table(sort_tab, &order_arg))
5435         DBUG_RETURN(true);
5436       /*
5437         filesort_limit:	 Return only this many rows from filesort().
5438         We can use select_limit_cnt only if we have no group_by and 1 table.
5439         This allows us to use Bounded_queue for queries like:
5440           "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
5441         m_select_limit == HA_POS_ERROR (we need a full table scan)
5442         unit->select_limit_cnt == 1 (we only need one row in the result set)
5443       */
5444       sort_tab->filesort->limit=
5445         (has_group_by || (primary_tables > curr_tmp_table + 1)) ?
5446          m_select_limit : unit->select_limit_cnt;
5447     }
5448     if (!plan_is_const() &&
5449         !join_tab[const_tables].table->sort.io_cache)
5450     {
5451       /*
5452         If no IO cache exists for the first table then we are using an
5453         INDEX SCAN and no filesort. Thus we should not remove the sorted
5454         attribute on the INDEX SCAN.
5455       */
5456       skip_sort_order= true;
5457     }
5458   }
5459   fields= curr_fields_list;
5460   // Reset before execution
5461   set_items_ref_array(items0);
5462   if (join_tab)
5463     join_tab[primary_tables + tmp_tables - 1].next_select=
5464       setup_end_select_func(this, NULL);
5465   group= has_group_by;
5466 
5467   DBUG_RETURN(false);
5468 }
5469 
5470 
5471 /**
5472   @brief Add Filesort object to the given table to sort if with filesort
5473 
5474   @param tab   the JOIN_TAB object to attach created Filesort object to
5475   @param order List of expressions to sort the table by
5476 
5477   @note This function moves tab->select, if any, to filesort->select
5478 
5479   @return false on success, true on OOM
5480 */
5481 
5482 bool
add_sorting_to_table(JOIN_TAB * tab,ORDER_with_src * order)5483 JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER_with_src *order)
5484 {
5485   explain_flags.set(order->src, ESP_USING_FILESORT);
5486   tab->filesort= new (thd->mem_root) Filesort(*order, HA_POS_ERROR, tab->select);
5487   if (!tab->filesort)
5488     return true;
5489   /*
5490     Select was moved to filesort->select to force join_init_read_record to use
5491     sorted result instead of reading table through select.
5492   */
5493   if (tab->select)
5494   {
5495     tab->select= NULL;
5496     tab->set_condition(NULL, __LINE__);
5497   }
5498   tab->read_first_record= join_init_read_record;
5499   return false;
5500 }
5501 
5502 /**
5503   Find a cheaper access key than a given @a key
5504 
5505   @param          tab                 NULL or JOIN_TAB of the accessed table
5506   @param          order               Linked list of ORDER BY arguments
5507   @param          table               Table if tab == NULL or tab->table
5508   @param          usable_keys         Key map to find a cheaper key in
5509   @param          ref_key
5510                 * 0 <= key < MAX_KEY   - key number (hint) to start the search
5511                 * -1                   - no key number provided
5512   @param          select_limit        LIMIT value, or HA_POS_ERROR if no limit
5513   @param [out]    new_key             Key number if success, otherwise undefined
5514   @param [out]    new_key_direction   Return -1 (reverse) or +1 if success,
5515                                       otherwise undefined
5516   @param [out]    new_select_limit    Return adjusted LIMIT
5517   @param [out]    new_used_key_parts  NULL by default, otherwise return number
5518                                       of new_key prefix columns if success
5519                                       or undefined if the function fails
5520   @param [out]  saved_best_key_parts  NULL by default, otherwise preserve the
5521                                       value for further use in QUICK_SELECT_DESC
5522 
5523   @note
5524     This function takes into account table->quick_condition_rows statistic
5525     (that is calculated by the make_join_statistics function).
5526     However, single table procedures such as mysql_update() and mysql_delete()
5527     never call make_join_statistics, so they have to update it manually
5528     (@see get_index_for_order()).
5529 */
5530 
5531 static bool
test_if_cheaper_ordering(const JOIN_TAB * tab,ORDER * order,TABLE * table,key_map usable_keys,int ref_key,ha_rows select_limit,int * new_key,int * new_key_direction,ha_rows * new_select_limit,uint * new_used_key_parts,uint * saved_best_key_parts)5532 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
5533                          key_map usable_keys,  int ref_key,
5534                          ha_rows select_limit,
5535                          int *new_key, int *new_key_direction,
5536                          ha_rows *new_select_limit, uint *new_used_key_parts,
5537                          uint *saved_best_key_parts)
5538 {
5539   DBUG_ENTER("test_if_cheaper_ordering");
5540   /*
5541     Check whether there is an index compatible with the given order
5542     usage of which is cheaper than usage of the ref_key index (ref_key>=0)
5543     or a table scan.
5544     It may be the case if ORDER/GROUP BY is used with LIMIT.
5545   */
5546   ha_rows best_select_limit= HA_POS_ERROR;
5547   JOIN *join= tab ? tab->join : NULL;
5548   uint nr;
5549   key_map keys;
5550   uint best_key_parts= 0;
5551   int best_key_direction= 0;
5552   ha_rows best_records= 0;
5553   double read_time;
5554   int best_key= -1;
5555   bool is_best_covering= FALSE;
5556   double fanout= 1;
5557   ha_rows table_records= table->file->stats.records;
5558   bool group= join && join->group && order == join->group_list;
5559   ha_rows refkey_rows_estimate= table->quick_condition_rows;
5560   const bool has_limit= (select_limit != HA_POS_ERROR);
5561 
5562   /*
5563     If not used with LIMIT, only use keys if the whole query can be
5564     resolved with a key;  This is because filesort() is usually faster than
5565     retrieving all rows through an index.
5566   */
5567   if (select_limit >= table_records)
5568   {
5569     keys= *table->file->keys_to_use_for_scanning();
5570     keys.merge(table->covering_keys);
5571 
5572     /*
5573       We are adding here also the index specified in FORCE INDEX clause,
5574       if any.
5575       This is to allow users to use index in ORDER BY.
5576     */
5577     if (table->force_index)
5578       keys.merge(group ? table->keys_in_use_for_group_by :
5579                          table->keys_in_use_for_order_by);
5580     keys.intersect(usable_keys);
5581   }
5582   else
5583     keys= usable_keys;
5584 
5585   if (join)
5586   {
5587     read_time= tab->position->read_time;
5588     for (const JOIN_TAB *jt= tab + 1;
5589          jt < join->join_tab + join->primary_tables; jt++)
5590       fanout*= jt->position->records_read; // fanout is always >= 1
5591   }
5592   else
5593     read_time= table->file->scan_time();
5594 
5595   /*
5596     Calculate the selectivity of the ref_key for REF_ACCESS. For
5597     RANGE_ACCESS we use table->quick_condition_rows.
5598   */
5599   if (ref_key >= 0 && tab->type == JT_REF)
5600   {
5601     if (table->quick_keys.is_set(ref_key))
5602       refkey_rows_estimate= table->quick_rows[ref_key];
5603     else
5604     {
5605       const KEY *ref_keyinfo= table->key_info + ref_key;
5606       refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
5607     }
5608     set_if_bigger(refkey_rows_estimate, 1);
5609   }
5610   for (nr=0; nr < table->s->keys ; nr++)
5611   {
5612     int direction;
5613     uint used_key_parts;
5614 
5615     if (keys.is_set(nr) &&
5616         (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
5617     {
5618       /*
5619         At this point we are sure that ref_key is a non-ordering
5620         key (where "ordering key" is a key that will return rows
5621         in the order required by ORDER BY).
5622       */
5623       DBUG_ASSERT (ref_key != (int) nr);
5624 
5625       bool is_covering= table->covering_keys.is_set(nr) ||
5626                         (nr == table->s->primary_key &&
5627                         table->file->primary_key_is_clustered()) ||
5628                         (table->file->index_flags(nr, 0, 0)
5629                          & HA_CLUSTERED_INDEX);
5630 
5631       /*
5632         Don't use an index scan with ORDER BY without limit.
5633         For GROUP BY without limit always use index scan
5634         if there is a suitable index.
5635         Why we hold to this asymmetry hardly can be explained
5636         rationally. It's easy to demonstrate that using
5637         temporary table + filesort could be cheaper for grouping
5638         queries too.
5639       */
5640       if (is_covering ||
5641           select_limit != HA_POS_ERROR ||
5642           (ref_key < 0 && (group || table->force_index)))
5643       {
5644         double rec_per_key;
5645         double index_scan_time;
5646         KEY *keyinfo= table->key_info+nr;
5647         if (select_limit == HA_POS_ERROR)
5648           select_limit= table_records;
5649         if (group)
5650         {
5651           /*
5652             Used_key_parts can be larger than keyinfo->key_parts
5653             when using a secondary index clustered with a primary
5654             key (e.g. as in Innodb).
5655             See Bug #28591 for details.
5656           */
5657           rec_per_key= used_key_parts &&
5658                        used_key_parts <= actual_key_parts(keyinfo) ?
5659                        keyinfo->rec_per_key[used_key_parts-1] : 1;
5660           set_if_bigger(rec_per_key, 1);
5661           /*
5662             With a grouping query each group containing on average
5663             rec_per_key records produces only one row that will
5664             be included into the result set.
5665           */
5666           if (select_limit > table_records/rec_per_key)
5667             select_limit= table_records;
5668           else
5669             select_limit= (ha_rows) (select_limit*rec_per_key);
5670         }
5671         /*
5672           If tab=tk is not the last joined table tn then to get first
5673           L records from the result set we can expect to retrieve
5674           only L/fanout(tk,tn) where fanout(tk,tn) says how many
5675           rows in the record set on average will match each row tk.
5676           Usually our estimates for fanouts are too pessimistic.
5677           So the estimate for L/fanout(tk,tn) will be too optimistic
5678           and as result we'll choose an index scan when using ref/range
5679           access + filesort will be cheaper.
5680         */
5681         select_limit= (ha_rows) (select_limit < fanout ?
5682                                  1 : select_limit/fanout);
5683         /*
5684           We assume that each of the tested indexes is not correlated
5685           with ref_key. Thus, to select first N records we have to scan
5686           N/selectivity(ref_key) index entries.
5687           selectivity(ref_key) = #scanned_records/#table_records =
5688           refkey_rows_estimate/table_records.
5689           In any case we can't select more than #table_records.
5690           N/(refkey_rows_estimate/table_records) > table_records
5691           <=> N > refkey_rows_estimate.
5692          */
5693         if (select_limit > refkey_rows_estimate)
5694           select_limit= table_records;
5695         else
5696           select_limit= (ha_rows) (select_limit *
5697                                    (double) table_records /
5698                                     refkey_rows_estimate);
5699         rec_per_key= keyinfo->rec_per_key[keyinfo->user_defined_key_parts - 1];
5700         set_if_bigger(rec_per_key, 1);
5701         /*
5702           Here we take into account the fact that rows are
5703           accessed in sequences rec_per_key records in each.
5704           Rows in such a sequence are supposed to be ordered
5705           by rowid/primary key. When reading the data
5706           in a sequence we'll touch not more pages than the
5707           table file contains.
5708           TODO. Use the formula for a disk sweep sequential access
5709           to calculate the cost of accessing data rows for one
5710           index entry.
5711         */
5712         index_scan_time= select_limit/rec_per_key *
5713                          min(rec_per_key, table->file->scan_time());
5714         if ((ref_key < 0 && is_covering) ||
5715             (ref_key < 0 && (group || table->force_index)) ||
5716             index_scan_time < read_time)
5717         {
5718           ha_rows quick_records= table_records;
5719           ha_rows refkey_select_limit= (ref_key >= 0 &&
5720                                         table->covering_keys.is_set(ref_key)) ?
5721                                         refkey_rows_estimate :
5722                                         HA_POS_ERROR;
5723           if ((is_best_covering && !is_covering) ||
5724               (is_covering && refkey_select_limit < select_limit))
5725             continue;
5726           if (table->quick_keys.is_set(nr))
5727             quick_records= table->quick_rows[nr];
5728           if (best_key < 0 ||
5729               (select_limit <= min(quick_records,best_records) ?
5730                keyinfo->user_defined_key_parts < best_key_parts :
5731                quick_records < best_records) ||
5732                ((quick_records == best_records) &&
5733                 !is_best_covering && is_covering))
5734           {
5735             best_key= nr;
5736             best_key_parts= keyinfo->user_defined_key_parts;
5737             if (saved_best_key_parts)
5738               *saved_best_key_parts= used_key_parts;
5739             best_records= quick_records;
5740             is_best_covering= is_covering;
5741             best_key_direction= direction;
5742             best_select_limit= select_limit;
5743           }
5744         }
5745       }
5746     }
5747   }
5748 
5749   if (best_key < 0 || best_key == ref_key)
5750     DBUG_RETURN(FALSE);
5751 
5752   *new_key= best_key;
5753   *new_key_direction= best_key_direction;
5754   *new_select_limit= has_limit ? best_select_limit : table_records;
5755   if (new_used_key_parts != NULL)
5756     *new_used_key_parts= best_key_parts;
5757 
5758   DBUG_RETURN(TRUE);
5759 }
5760 
5761 
5762 /**
5763   Find a key to apply single table UPDATE/DELETE by a given ORDER
5764 
5765   @param       order           Linked list of ORDER BY arguments
5766   @param       table           Table to find a key
5767   @param       select          Pointer to access/update select->quick (if any)
5768   @param       limit           LIMIT clause parameter
5769   @param [out] need_sort       TRUE if filesort needed
5770   @param [out] reverse
5771     TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
5772 
5773   @return
5774     - MAX_KEY if no key found                        (need_sort == TRUE)
5775     - MAX_KEY if quick select result order is OK     (need_sort == FALSE)
5776     - key number (either index scan or quick select) (need_sort == FALSE)
5777 
5778   @note
5779     Side effects:
5780     - may deallocate or deallocate and replace select->quick;
5781     - may set table->quick_condition_rows and table->quick_rows[...]
5782       to table->file->stats.records.
5783 */
5784 
get_index_for_order(ORDER * order,TABLE * table,SQL_SELECT * select,ha_rows limit,bool * need_sort,bool * reverse)5785 uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
5786                          ha_rows limit, bool *need_sort, bool *reverse)
5787 {
5788   if (select && select->quick && select->quick->unique_key_range())
5789   { // Single row select (always "ordered"): Ok to use with key field UPDATE
5790     *need_sort= FALSE;
5791     /*
5792       Returning of MAX_KEY here prevents updating of used_key_is_modified
5793       in mysql_update(). Use quick select "as is".
5794     */
5795     return MAX_KEY;
5796   }
5797 
5798   if (!order)
5799   {
5800     *need_sort= FALSE;
5801     if (select && select->quick)
5802       return select->quick->index; // index or MAX_KEY, use quick select as is
5803     else
5804       return table->file->key_used_on_scan; // MAX_KEY or index for some engines
5805   }
5806 
5807   if (!is_simple_order(order)) // just to cut further expensive checks
5808   {
5809     *need_sort= TRUE;
5810     return MAX_KEY;
5811   }
5812 
5813   if (select && select->quick)
5814   {
5815     if (select->quick->index == MAX_KEY)
5816     {
5817       *need_sort= TRUE;
5818       return MAX_KEY;
5819     }
5820 
5821     uint used_key_parts;
5822     switch (test_if_order_by_key(order, table, select->quick->index,
5823                                  &used_key_parts)) {
5824     case 1: // desired order
5825       *need_sort= FALSE;
5826       return select->quick->index;
5827     case 0: // unacceptable order
5828       *need_sort= TRUE;
5829       return MAX_KEY;
5830     case -1: // desired order, but opposite direction
5831       {
5832         QUICK_SELECT_I *reverse_quick;
5833         if ((reverse_quick=
5834                select->quick->make_reverse(used_key_parts)))
5835         {
5836           select->set_quick(reverse_quick);
5837           *need_sort= FALSE;
5838           return select->quick->index;
5839         }
5840         else
5841         {
5842           *need_sort= TRUE;
5843           return MAX_KEY;
5844         }
5845       }
5846     }
5847     DBUG_ASSERT(0);
5848   }
5849   else if (limit != HA_POS_ERROR)
5850   { // check if some index scan & LIMIT is more efficient than filesort
5851 
5852     /*
5853       Update quick_condition_rows since single table UPDATE/DELETE procedures
5854       don't call make_join_statistics() and leave this variable uninitialized.
5855     */
5856     table->quick_condition_rows= table->file->stats.records;
5857 
5858     int key, direction;
5859     if (test_if_cheaper_ordering(NULL, order, table,
5860                                  table->keys_in_use_for_order_by, -1,
5861                                  limit,
5862                                  &key, &direction, &limit) &&
5863         !is_key_used(table, key, table->write_set))
5864     {
5865       *need_sort= FALSE;
5866       *reverse= (direction < 0);
5867       return key;
5868     }
5869   }
5870   *need_sort= TRUE;
5871   return MAX_KEY;
5872 }
5873 
5874 
5875 /**
5876   Returns number of key parts depending on
5877   OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
5878 
5879   @param  key_info  pointer to KEY structure
5880 
5881   @return number of key parts.
5882 */
5883 
actual_key_parts(KEY * key_info)5884 uint actual_key_parts(KEY *key_info)
5885 {
5886   return key_info->table->in_use->
5887     optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
5888     key_info->actual_key_parts : key_info->user_defined_key_parts;
5889 }
5890 
5891 
5892 /**
5893   Returns key flags depending on
5894   OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
5895 
5896   @param  key_info  pointer to KEY structure
5897 
5898   @return key flags.
5899 */
5900 
actual_key_flags(KEY * key_info)5901 uint actual_key_flags(KEY *key_info)
5902 {
5903   return key_info->table->in_use->
5904     optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
5905     key_info->actual_flags : key_info->flags;
5906 }
5907 
5908 /**
5909   @} (end of group Query_Optimizer)
5910 */
5911